Don't assert "alg != libcall" for -minline-stringops-dynamically
[official-gcc.git] / gcc / config / i386 / i386.c
blobaaf0b385c90da3f5a1570d97a3fb73b40af84e09
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2014 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "stringpool.h"
27 #include "attribs.h"
28 #include "calls.h"
29 #include "stor-layout.h"
30 #include "varasm.h"
31 #include "tm_p.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-codes.h"
38 #include "insn-attr.h"
39 #include "flags.h"
40 #include "except.h"
41 #include "hashtab.h"
42 #include "hash-set.h"
43 #include "vec.h"
44 #include "machmode.h"
45 #include "input.h"
46 #include "function.h"
47 #include "recog.h"
48 #include "expr.h"
49 #include "optabs.h"
50 #include "diagnostic-core.h"
51 #include "toplev.h"
52 #include "predict.h"
53 #include "dominance.h"
54 #include "cfg.h"
55 #include "cfgrtl.h"
56 #include "cfganal.h"
57 #include "lcm.h"
58 #include "cfgbuild.h"
59 #include "cfgcleanup.h"
60 #include "basic-block.h"
61 #include "ggc.h"
62 #include "target.h"
63 #include "target-def.h"
64 #include "common/common-target.h"
65 #include "langhooks.h"
66 #include "reload.h"
67 #include "hash-map.h"
68 #include "is-a.h"
69 #include "plugin-api.h"
70 #include "ipa-ref.h"
71 #include "cgraph.h"
72 #include "hash-table.h"
73 #include "tree-ssa-alias.h"
74 #include "internal-fn.h"
75 #include "gimple-fold.h"
76 #include "tree-eh.h"
77 #include "gimple-expr.h"
78 #include "gimple.h"
79 #include "gimplify.h"
80 #include "cfgloop.h"
81 #include "dwarf2.h"
82 #include "df.h"
83 #include "tm-constrs.h"
84 #include "params.h"
85 #include "cselib.h"
86 #include "debug.h"
87 #include "sched-int.h"
88 #include "sbitmap.h"
89 #include "fibheap.h"
90 #include "opts.h"
91 #include "diagnostic.h"
92 #include "dumpfile.h"
93 #include "tree-pass.h"
94 #include "wide-int.h"
95 #include "context.h"
96 #include "pass_manager.h"
97 #include "target-globals.h"
98 #include "tree-vectorizer.h"
99 #include "shrink-wrap.h"
100 #include "builtins.h"
101 #include "rtl-iter.h"
102 #include "tree-iterator.h"
103 #include "tree-chkp.h"
104 #include "rtl-chkp.h"
106 static rtx legitimize_dllimport_symbol (rtx, bool);
107 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
108 static rtx legitimize_pe_coff_symbol (rtx, bool);
110 #ifndef CHECK_STACK_LIMIT
111 #define CHECK_STACK_LIMIT (-1)
112 #endif
114 /* Return index of given mode in mult and division cost tables. */
115 #define MODE_INDEX(mode) \
116 ((mode) == QImode ? 0 \
117 : (mode) == HImode ? 1 \
118 : (mode) == SImode ? 2 \
119 : (mode) == DImode ? 3 \
120 : 4)
122 /* Processor costs (relative to an add) */
123 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
124 #define COSTS_N_BYTES(N) ((N) * 2)
126 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
128 static stringop_algs ix86_size_memcpy[2] = {
129 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
130 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
131 static stringop_algs ix86_size_memset[2] = {
132 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
133 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
135 const
136 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
137 COSTS_N_BYTES (2), /* cost of an add instruction */
138 COSTS_N_BYTES (3), /* cost of a lea instruction */
139 COSTS_N_BYTES (2), /* variable shift costs */
140 COSTS_N_BYTES (3), /* constant shift costs */
141 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
142 COSTS_N_BYTES (3), /* HI */
143 COSTS_N_BYTES (3), /* SI */
144 COSTS_N_BYTES (3), /* DI */
145 COSTS_N_BYTES (5)}, /* other */
146 0, /* cost of multiply per each bit set */
147 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
148 COSTS_N_BYTES (3), /* HI */
149 COSTS_N_BYTES (3), /* SI */
150 COSTS_N_BYTES (3), /* DI */
151 COSTS_N_BYTES (5)}, /* other */
152 COSTS_N_BYTES (3), /* cost of movsx */
153 COSTS_N_BYTES (3), /* cost of movzx */
154 0, /* "large" insn */
155 2, /* MOVE_RATIO */
156 2, /* cost for loading QImode using movzbl */
157 {2, 2, 2}, /* cost of loading integer registers
158 in QImode, HImode and SImode.
159 Relative to reg-reg move (2). */
160 {2, 2, 2}, /* cost of storing integer registers */
161 2, /* cost of reg,reg fld/fst */
162 {2, 2, 2}, /* cost of loading fp registers
163 in SFmode, DFmode and XFmode */
164 {2, 2, 2}, /* cost of storing fp registers
165 in SFmode, DFmode and XFmode */
166 3, /* cost of moving MMX register */
167 {3, 3}, /* cost of loading MMX registers
168 in SImode and DImode */
169 {3, 3}, /* cost of storing MMX registers
170 in SImode and DImode */
171 3, /* cost of moving SSE register */
172 {3, 3, 3}, /* cost of loading SSE registers
173 in SImode, DImode and TImode */
174 {3, 3, 3}, /* cost of storing SSE registers
175 in SImode, DImode and TImode */
176 3, /* MMX or SSE register to integer */
177 0, /* size of l1 cache */
178 0, /* size of l2 cache */
179 0, /* size of prefetch block */
180 0, /* number of parallel prefetches */
181 2, /* Branch cost */
182 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
183 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
184 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
185 COSTS_N_BYTES (2), /* cost of FABS instruction. */
186 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
187 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
188 ix86_size_memcpy,
189 ix86_size_memset,
190 1, /* scalar_stmt_cost. */
191 1, /* scalar load_cost. */
192 1, /* scalar_store_cost. */
193 1, /* vec_stmt_cost. */
194 1, /* vec_to_scalar_cost. */
195 1, /* scalar_to_vec_cost. */
196 1, /* vec_align_load_cost. */
197 1, /* vec_unalign_load_cost. */
198 1, /* vec_store_cost. */
199 1, /* cond_taken_branch_cost. */
200 1, /* cond_not_taken_branch_cost. */
203 /* Processor costs (relative to an add) */
204 static stringop_algs i386_memcpy[2] = {
205 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
206 DUMMY_STRINGOP_ALGS};
207 static stringop_algs i386_memset[2] = {
208 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
209 DUMMY_STRINGOP_ALGS};
211 static const
212 struct processor_costs i386_cost = { /* 386 specific costs */
213 COSTS_N_INSNS (1), /* cost of an add instruction */
214 COSTS_N_INSNS (1), /* cost of a lea instruction */
215 COSTS_N_INSNS (3), /* variable shift costs */
216 COSTS_N_INSNS (2), /* constant shift costs */
217 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
218 COSTS_N_INSNS (6), /* HI */
219 COSTS_N_INSNS (6), /* SI */
220 COSTS_N_INSNS (6), /* DI */
221 COSTS_N_INSNS (6)}, /* other */
222 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
223 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
224 COSTS_N_INSNS (23), /* HI */
225 COSTS_N_INSNS (23), /* SI */
226 COSTS_N_INSNS (23), /* DI */
227 COSTS_N_INSNS (23)}, /* other */
228 COSTS_N_INSNS (3), /* cost of movsx */
229 COSTS_N_INSNS (2), /* cost of movzx */
230 15, /* "large" insn */
231 3, /* MOVE_RATIO */
232 4, /* cost for loading QImode using movzbl */
233 {2, 4, 2}, /* cost of loading integer registers
234 in QImode, HImode and SImode.
235 Relative to reg-reg move (2). */
236 {2, 4, 2}, /* cost of storing integer registers */
237 2, /* cost of reg,reg fld/fst */
238 {8, 8, 8}, /* cost of loading fp registers
239 in SFmode, DFmode and XFmode */
240 {8, 8, 8}, /* cost of storing fp registers
241 in SFmode, DFmode and XFmode */
242 2, /* cost of moving MMX register */
243 {4, 8}, /* cost of loading MMX registers
244 in SImode and DImode */
245 {4, 8}, /* cost of storing MMX registers
246 in SImode and DImode */
247 2, /* cost of moving SSE register */
248 {4, 8, 16}, /* cost of loading SSE registers
249 in SImode, DImode and TImode */
250 {4, 8, 16}, /* cost of storing SSE registers
251 in SImode, DImode and TImode */
252 3, /* MMX or SSE register to integer */
253 0, /* size of l1 cache */
254 0, /* size of l2 cache */
255 0, /* size of prefetch block */
256 0, /* number of parallel prefetches */
257 1, /* Branch cost */
258 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
259 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
260 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
261 COSTS_N_INSNS (22), /* cost of FABS instruction. */
262 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
263 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
264 i386_memcpy,
265 i386_memset,
266 1, /* scalar_stmt_cost. */
267 1, /* scalar load_cost. */
268 1, /* scalar_store_cost. */
269 1, /* vec_stmt_cost. */
270 1, /* vec_to_scalar_cost. */
271 1, /* scalar_to_vec_cost. */
272 1, /* vec_align_load_cost. */
273 2, /* vec_unalign_load_cost. */
274 1, /* vec_store_cost. */
275 3, /* cond_taken_branch_cost. */
276 1, /* cond_not_taken_branch_cost. */
279 static stringop_algs i486_memcpy[2] = {
280 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
281 DUMMY_STRINGOP_ALGS};
282 static stringop_algs i486_memset[2] = {
283 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
284 DUMMY_STRINGOP_ALGS};
286 static const
287 struct processor_costs i486_cost = { /* 486 specific costs */
288 COSTS_N_INSNS (1), /* cost of an add instruction */
289 COSTS_N_INSNS (1), /* cost of a lea instruction */
290 COSTS_N_INSNS (3), /* variable shift costs */
291 COSTS_N_INSNS (2), /* constant shift costs */
292 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
293 COSTS_N_INSNS (12), /* HI */
294 COSTS_N_INSNS (12), /* SI */
295 COSTS_N_INSNS (12), /* DI */
296 COSTS_N_INSNS (12)}, /* other */
297 1, /* cost of multiply per each bit set */
298 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
299 COSTS_N_INSNS (40), /* HI */
300 COSTS_N_INSNS (40), /* SI */
301 COSTS_N_INSNS (40), /* DI */
302 COSTS_N_INSNS (40)}, /* other */
303 COSTS_N_INSNS (3), /* cost of movsx */
304 COSTS_N_INSNS (2), /* cost of movzx */
305 15, /* "large" insn */
306 3, /* MOVE_RATIO */
307 4, /* cost for loading QImode using movzbl */
308 {2, 4, 2}, /* cost of loading integer registers
309 in QImode, HImode and SImode.
310 Relative to reg-reg move (2). */
311 {2, 4, 2}, /* cost of storing integer registers */
312 2, /* cost of reg,reg fld/fst */
313 {8, 8, 8}, /* cost of loading fp registers
314 in SFmode, DFmode and XFmode */
315 {8, 8, 8}, /* cost of storing fp registers
316 in SFmode, DFmode and XFmode */
317 2, /* cost of moving MMX register */
318 {4, 8}, /* cost of loading MMX registers
319 in SImode and DImode */
320 {4, 8}, /* cost of storing MMX registers
321 in SImode and DImode */
322 2, /* cost of moving SSE register */
323 {4, 8, 16}, /* cost of loading SSE registers
324 in SImode, DImode and TImode */
325 {4, 8, 16}, /* cost of storing SSE registers
326 in SImode, DImode and TImode */
327 3, /* MMX or SSE register to integer */
328 4, /* size of l1 cache. 486 has 8kB cache
329 shared for code and data, so 4kB is
330 not really precise. */
331 4, /* size of l2 cache */
332 0, /* size of prefetch block */
333 0, /* number of parallel prefetches */
334 1, /* Branch cost */
335 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
336 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
337 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
338 COSTS_N_INSNS (3), /* cost of FABS instruction. */
339 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
340 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
341 i486_memcpy,
342 i486_memset,
343 1, /* scalar_stmt_cost. */
344 1, /* scalar load_cost. */
345 1, /* scalar_store_cost. */
346 1, /* vec_stmt_cost. */
347 1, /* vec_to_scalar_cost. */
348 1, /* scalar_to_vec_cost. */
349 1, /* vec_align_load_cost. */
350 2, /* vec_unalign_load_cost. */
351 1, /* vec_store_cost. */
352 3, /* cond_taken_branch_cost. */
353 1, /* cond_not_taken_branch_cost. */
356 static stringop_algs pentium_memcpy[2] = {
357 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
358 DUMMY_STRINGOP_ALGS};
359 static stringop_algs pentium_memset[2] = {
360 {libcall, {{-1, rep_prefix_4_byte, false}}},
361 DUMMY_STRINGOP_ALGS};
363 static const
364 struct processor_costs pentium_cost = {
365 COSTS_N_INSNS (1), /* cost of an add instruction */
366 COSTS_N_INSNS (1), /* cost of a lea instruction */
367 COSTS_N_INSNS (4), /* variable shift costs */
368 COSTS_N_INSNS (1), /* constant shift costs */
369 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
370 COSTS_N_INSNS (11), /* HI */
371 COSTS_N_INSNS (11), /* SI */
372 COSTS_N_INSNS (11), /* DI */
373 COSTS_N_INSNS (11)}, /* other */
374 0, /* cost of multiply per each bit set */
375 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
376 COSTS_N_INSNS (25), /* HI */
377 COSTS_N_INSNS (25), /* SI */
378 COSTS_N_INSNS (25), /* DI */
379 COSTS_N_INSNS (25)}, /* other */
380 COSTS_N_INSNS (3), /* cost of movsx */
381 COSTS_N_INSNS (2), /* cost of movzx */
382 8, /* "large" insn */
383 6, /* MOVE_RATIO */
384 6, /* cost for loading QImode using movzbl */
385 {2, 4, 2}, /* cost of loading integer registers
386 in QImode, HImode and SImode.
387 Relative to reg-reg move (2). */
388 {2, 4, 2}, /* cost of storing integer registers */
389 2, /* cost of reg,reg fld/fst */
390 {2, 2, 6}, /* cost of loading fp registers
391 in SFmode, DFmode and XFmode */
392 {4, 4, 6}, /* cost of storing fp registers
393 in SFmode, DFmode and XFmode */
394 8, /* cost of moving MMX register */
395 {8, 8}, /* cost of loading MMX registers
396 in SImode and DImode */
397 {8, 8}, /* cost of storing MMX registers
398 in SImode and DImode */
399 2, /* cost of moving SSE register */
400 {4, 8, 16}, /* cost of loading SSE registers
401 in SImode, DImode and TImode */
402 {4, 8, 16}, /* cost of storing SSE registers
403 in SImode, DImode and TImode */
404 3, /* MMX or SSE register to integer */
405 8, /* size of l1 cache. */
406 8, /* size of l2 cache */
407 0, /* size of prefetch block */
408 0, /* number of parallel prefetches */
409 2, /* Branch cost */
410 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
411 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
412 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
413 COSTS_N_INSNS (1), /* cost of FABS instruction. */
414 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
415 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
416 pentium_memcpy,
417 pentium_memset,
418 1, /* scalar_stmt_cost. */
419 1, /* scalar load_cost. */
420 1, /* scalar_store_cost. */
421 1, /* vec_stmt_cost. */
422 1, /* vec_to_scalar_cost. */
423 1, /* scalar_to_vec_cost. */
424 1, /* vec_align_load_cost. */
425 2, /* vec_unalign_load_cost. */
426 1, /* vec_store_cost. */
427 3, /* cond_taken_branch_cost. */
428 1, /* cond_not_taken_branch_cost. */
431 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
432 (we ensure the alignment). For small blocks inline loop is still a
433 noticeable win, for bigger blocks either rep movsl or rep movsb is
434 way to go. Rep movsb has apparently more expensive startup time in CPU,
435 but after 4K the difference is down in the noise. */
436 static stringop_algs pentiumpro_memcpy[2] = {
437 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
438 {8192, rep_prefix_4_byte, false},
439 {-1, rep_prefix_1_byte, false}}},
440 DUMMY_STRINGOP_ALGS};
441 static stringop_algs pentiumpro_memset[2] = {
442 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
443 {8192, rep_prefix_4_byte, false},
444 {-1, libcall, false}}},
445 DUMMY_STRINGOP_ALGS};
446 static const
447 struct processor_costs pentiumpro_cost = {
448 COSTS_N_INSNS (1), /* cost of an add instruction */
449 COSTS_N_INSNS (1), /* cost of a lea instruction */
450 COSTS_N_INSNS (1), /* variable shift costs */
451 COSTS_N_INSNS (1), /* constant shift costs */
452 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
453 COSTS_N_INSNS (4), /* HI */
454 COSTS_N_INSNS (4), /* SI */
455 COSTS_N_INSNS (4), /* DI */
456 COSTS_N_INSNS (4)}, /* other */
457 0, /* cost of multiply per each bit set */
458 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
459 COSTS_N_INSNS (17), /* HI */
460 COSTS_N_INSNS (17), /* SI */
461 COSTS_N_INSNS (17), /* DI */
462 COSTS_N_INSNS (17)}, /* other */
463 COSTS_N_INSNS (1), /* cost of movsx */
464 COSTS_N_INSNS (1), /* cost of movzx */
465 8, /* "large" insn */
466 6, /* MOVE_RATIO */
467 2, /* cost for loading QImode using movzbl */
468 {4, 4, 4}, /* cost of loading integer registers
469 in QImode, HImode and SImode.
470 Relative to reg-reg move (2). */
471 {2, 2, 2}, /* cost of storing integer registers */
472 2, /* cost of reg,reg fld/fst */
473 {2, 2, 6}, /* cost of loading fp registers
474 in SFmode, DFmode and XFmode */
475 {4, 4, 6}, /* cost of storing fp registers
476 in SFmode, DFmode and XFmode */
477 2, /* cost of moving MMX register */
478 {2, 2}, /* cost of loading MMX registers
479 in SImode and DImode */
480 {2, 2}, /* cost of storing MMX registers
481 in SImode and DImode */
482 2, /* cost of moving SSE register */
483 {2, 2, 8}, /* cost of loading SSE registers
484 in SImode, DImode and TImode */
485 {2, 2, 8}, /* cost of storing SSE registers
486 in SImode, DImode and TImode */
487 3, /* MMX or SSE register to integer */
488 8, /* size of l1 cache. */
489 256, /* size of l2 cache */
490 32, /* size of prefetch block */
491 6, /* number of parallel prefetches */
492 2, /* Branch cost */
493 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
494 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
495 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
496 COSTS_N_INSNS (2), /* cost of FABS instruction. */
497 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
498 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
499 pentiumpro_memcpy,
500 pentiumpro_memset,
501 1, /* scalar_stmt_cost. */
502 1, /* scalar load_cost. */
503 1, /* scalar_store_cost. */
504 1, /* vec_stmt_cost. */
505 1, /* vec_to_scalar_cost. */
506 1, /* scalar_to_vec_cost. */
507 1, /* vec_align_load_cost. */
508 2, /* vec_unalign_load_cost. */
509 1, /* vec_store_cost. */
510 3, /* cond_taken_branch_cost. */
511 1, /* cond_not_taken_branch_cost. */
514 static stringop_algs geode_memcpy[2] = {
515 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
516 DUMMY_STRINGOP_ALGS};
517 static stringop_algs geode_memset[2] = {
518 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
519 DUMMY_STRINGOP_ALGS};
520 static const
521 struct processor_costs geode_cost = {
522 COSTS_N_INSNS (1), /* cost of an add instruction */
523 COSTS_N_INSNS (1), /* cost of a lea instruction */
524 COSTS_N_INSNS (2), /* variable shift costs */
525 COSTS_N_INSNS (1), /* constant shift costs */
526 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
527 COSTS_N_INSNS (4), /* HI */
528 COSTS_N_INSNS (7), /* SI */
529 COSTS_N_INSNS (7), /* DI */
530 COSTS_N_INSNS (7)}, /* other */
531 0, /* cost of multiply per each bit set */
532 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
533 COSTS_N_INSNS (23), /* HI */
534 COSTS_N_INSNS (39), /* SI */
535 COSTS_N_INSNS (39), /* DI */
536 COSTS_N_INSNS (39)}, /* other */
537 COSTS_N_INSNS (1), /* cost of movsx */
538 COSTS_N_INSNS (1), /* cost of movzx */
539 8, /* "large" insn */
540 4, /* MOVE_RATIO */
541 1, /* cost for loading QImode using movzbl */
542 {1, 1, 1}, /* cost of loading integer registers
543 in QImode, HImode and SImode.
544 Relative to reg-reg move (2). */
545 {1, 1, 1}, /* cost of storing integer registers */
546 1, /* cost of reg,reg fld/fst */
547 {1, 1, 1}, /* cost of loading fp registers
548 in SFmode, DFmode and XFmode */
549 {4, 6, 6}, /* cost of storing fp registers
550 in SFmode, DFmode and XFmode */
552 1, /* cost of moving MMX register */
553 {1, 1}, /* cost of loading MMX registers
554 in SImode and DImode */
555 {1, 1}, /* cost of storing MMX registers
556 in SImode and DImode */
557 1, /* cost of moving SSE register */
558 {1, 1, 1}, /* cost of loading SSE registers
559 in SImode, DImode and TImode */
560 {1, 1, 1}, /* cost of storing SSE registers
561 in SImode, DImode and TImode */
562 1, /* MMX or SSE register to integer */
563 64, /* size of l1 cache. */
564 128, /* size of l2 cache. */
565 32, /* size of prefetch block */
566 1, /* number of parallel prefetches */
567 1, /* Branch cost */
568 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
569 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
570 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
571 COSTS_N_INSNS (1), /* cost of FABS instruction. */
572 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
573 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
574 geode_memcpy,
575 geode_memset,
576 1, /* scalar_stmt_cost. */
577 1, /* scalar load_cost. */
578 1, /* scalar_store_cost. */
579 1, /* vec_stmt_cost. */
580 1, /* vec_to_scalar_cost. */
581 1, /* scalar_to_vec_cost. */
582 1, /* vec_align_load_cost. */
583 2, /* vec_unalign_load_cost. */
584 1, /* vec_store_cost. */
585 3, /* cond_taken_branch_cost. */
586 1, /* cond_not_taken_branch_cost. */
589 static stringop_algs k6_memcpy[2] = {
590 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
591 DUMMY_STRINGOP_ALGS};
592 static stringop_algs k6_memset[2] = {
593 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
594 DUMMY_STRINGOP_ALGS};
595 static const
596 struct processor_costs k6_cost = {
597 COSTS_N_INSNS (1), /* cost of an add instruction */
598 COSTS_N_INSNS (2), /* cost of a lea instruction */
599 COSTS_N_INSNS (1), /* variable shift costs */
600 COSTS_N_INSNS (1), /* constant shift costs */
601 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
602 COSTS_N_INSNS (3), /* HI */
603 COSTS_N_INSNS (3), /* SI */
604 COSTS_N_INSNS (3), /* DI */
605 COSTS_N_INSNS (3)}, /* other */
606 0, /* cost of multiply per each bit set */
607 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
608 COSTS_N_INSNS (18), /* HI */
609 COSTS_N_INSNS (18), /* SI */
610 COSTS_N_INSNS (18), /* DI */
611 COSTS_N_INSNS (18)}, /* other */
612 COSTS_N_INSNS (2), /* cost of movsx */
613 COSTS_N_INSNS (2), /* cost of movzx */
614 8, /* "large" insn */
615 4, /* MOVE_RATIO */
616 3, /* cost for loading QImode using movzbl */
617 {4, 5, 4}, /* cost of loading integer registers
618 in QImode, HImode and SImode.
619 Relative to reg-reg move (2). */
620 {2, 3, 2}, /* cost of storing integer registers */
621 4, /* cost of reg,reg fld/fst */
622 {6, 6, 6}, /* cost of loading fp registers
623 in SFmode, DFmode and XFmode */
624 {4, 4, 4}, /* cost of storing fp registers
625 in SFmode, DFmode and XFmode */
626 2, /* cost of moving MMX register */
627 {2, 2}, /* cost of loading MMX registers
628 in SImode and DImode */
629 {2, 2}, /* cost of storing MMX registers
630 in SImode and DImode */
631 2, /* cost of moving SSE register */
632 {2, 2, 8}, /* cost of loading SSE registers
633 in SImode, DImode and TImode */
634 {2, 2, 8}, /* cost of storing SSE registers
635 in SImode, DImode and TImode */
636 6, /* MMX or SSE register to integer */
637 32, /* size of l1 cache. */
638 32, /* size of l2 cache. Some models
639 have integrated l2 cache, but
640 optimizing for k6 is not important
641 enough to worry about that. */
642 32, /* size of prefetch block */
643 1, /* number of parallel prefetches */
644 1, /* Branch cost */
645 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
646 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
647 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
648 COSTS_N_INSNS (2), /* cost of FABS instruction. */
649 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
650 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
651 k6_memcpy,
652 k6_memset,
653 1, /* scalar_stmt_cost. */
654 1, /* scalar load_cost. */
655 1, /* scalar_store_cost. */
656 1, /* vec_stmt_cost. */
657 1, /* vec_to_scalar_cost. */
658 1, /* scalar_to_vec_cost. */
659 1, /* vec_align_load_cost. */
660 2, /* vec_unalign_load_cost. */
661 1, /* vec_store_cost. */
662 3, /* cond_taken_branch_cost. */
663 1, /* cond_not_taken_branch_cost. */
666 /* For some reason, Athlon deals better with REP prefix (relative to loops)
667 compared to K8. Alignment becomes important after 8 bytes for memcpy and
668 128 bytes for memset. */
669 static stringop_algs athlon_memcpy[2] = {
670 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
671 DUMMY_STRINGOP_ALGS};
672 static stringop_algs athlon_memset[2] = {
673 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
674 DUMMY_STRINGOP_ALGS};
675 static const
676 struct processor_costs athlon_cost = {
677 COSTS_N_INSNS (1), /* cost of an add instruction */
678 COSTS_N_INSNS (2), /* cost of a lea instruction */
679 COSTS_N_INSNS (1), /* variable shift costs */
680 COSTS_N_INSNS (1), /* constant shift costs */
681 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
682 COSTS_N_INSNS (5), /* HI */
683 COSTS_N_INSNS (5), /* SI */
684 COSTS_N_INSNS (5), /* DI */
685 COSTS_N_INSNS (5)}, /* other */
686 0, /* cost of multiply per each bit set */
687 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
688 COSTS_N_INSNS (26), /* HI */
689 COSTS_N_INSNS (42), /* SI */
690 COSTS_N_INSNS (74), /* DI */
691 COSTS_N_INSNS (74)}, /* other */
692 COSTS_N_INSNS (1), /* cost of movsx */
693 COSTS_N_INSNS (1), /* cost of movzx */
694 8, /* "large" insn */
695 9, /* MOVE_RATIO */
696 4, /* cost for loading QImode using movzbl */
697 {3, 4, 3}, /* cost of loading integer registers
698 in QImode, HImode and SImode.
699 Relative to reg-reg move (2). */
700 {3, 4, 3}, /* cost of storing integer registers */
701 4, /* cost of reg,reg fld/fst */
702 {4, 4, 12}, /* cost of loading fp registers
703 in SFmode, DFmode and XFmode */
704 {6, 6, 8}, /* cost of storing fp registers
705 in SFmode, DFmode and XFmode */
706 2, /* cost of moving MMX register */
707 {4, 4}, /* cost of loading MMX registers
708 in SImode and DImode */
709 {4, 4}, /* cost of storing MMX registers
710 in SImode and DImode */
711 2, /* cost of moving SSE register */
712 {4, 4, 6}, /* cost of loading SSE registers
713 in SImode, DImode and TImode */
714 {4, 4, 5}, /* cost of storing SSE registers
715 in SImode, DImode and TImode */
716 5, /* MMX or SSE register to integer */
717 64, /* size of l1 cache. */
718 256, /* size of l2 cache. */
719 64, /* size of prefetch block */
720 6, /* number of parallel prefetches */
721 5, /* Branch cost */
722 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
723 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
724 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
725 COSTS_N_INSNS (2), /* cost of FABS instruction. */
726 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
727 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
728 athlon_memcpy,
729 athlon_memset,
730 1, /* scalar_stmt_cost. */
731 1, /* scalar load_cost. */
732 1, /* scalar_store_cost. */
733 1, /* vec_stmt_cost. */
734 1, /* vec_to_scalar_cost. */
735 1, /* scalar_to_vec_cost. */
736 1, /* vec_align_load_cost. */
737 2, /* vec_unalign_load_cost. */
738 1, /* vec_store_cost. */
739 3, /* cond_taken_branch_cost. */
740 1, /* cond_not_taken_branch_cost. */
743 /* K8 has optimized REP instruction for medium sized blocks, but for very
744 small blocks it is better to use loop. For large blocks, libcall can
745 do nontemporary accesses and beat inline considerably. */
746 static stringop_algs k8_memcpy[2] = {
747 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
748 {-1, rep_prefix_4_byte, false}}},
749 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
750 {-1, libcall, false}}}};
751 static stringop_algs k8_memset[2] = {
752 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
753 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
754 {libcall, {{48, unrolled_loop, false},
755 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
756 static const
757 struct processor_costs k8_cost = {
758 COSTS_N_INSNS (1), /* cost of an add instruction */
759 COSTS_N_INSNS (2), /* cost of a lea instruction */
760 COSTS_N_INSNS (1), /* variable shift costs */
761 COSTS_N_INSNS (1), /* constant shift costs */
762 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
763 COSTS_N_INSNS (4), /* HI */
764 COSTS_N_INSNS (3), /* SI */
765 COSTS_N_INSNS (4), /* DI */
766 COSTS_N_INSNS (5)}, /* other */
767 0, /* cost of multiply per each bit set */
768 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
769 COSTS_N_INSNS (26), /* HI */
770 COSTS_N_INSNS (42), /* SI */
771 COSTS_N_INSNS (74), /* DI */
772 COSTS_N_INSNS (74)}, /* other */
773 COSTS_N_INSNS (1), /* cost of movsx */
774 COSTS_N_INSNS (1), /* cost of movzx */
775 8, /* "large" insn */
776 9, /* MOVE_RATIO */
777 4, /* cost for loading QImode using movzbl */
778 {3, 4, 3}, /* cost of loading integer registers
779 in QImode, HImode and SImode.
780 Relative to reg-reg move (2). */
781 {3, 4, 3}, /* cost of storing integer registers */
782 4, /* cost of reg,reg fld/fst */
783 {4, 4, 12}, /* cost of loading fp registers
784 in SFmode, DFmode and XFmode */
785 {6, 6, 8}, /* cost of storing fp registers
786 in SFmode, DFmode and XFmode */
787 2, /* cost of moving MMX register */
788 {3, 3}, /* cost of loading MMX registers
789 in SImode and DImode */
790 {4, 4}, /* cost of storing MMX registers
791 in SImode and DImode */
792 2, /* cost of moving SSE register */
793 {4, 3, 6}, /* cost of loading SSE registers
794 in SImode, DImode and TImode */
795 {4, 4, 5}, /* cost of storing SSE registers
796 in SImode, DImode and TImode */
797 5, /* MMX or SSE register to integer */
798 64, /* size of l1 cache. */
799 512, /* size of l2 cache. */
800 64, /* size of prefetch block */
801 /* New AMD processors never drop prefetches; if they cannot be performed
802 immediately, they are queued. We set number of simultaneous prefetches
803 to a large constant to reflect this (it probably is not a good idea not
804 to limit number of prefetches at all, as their execution also takes some
805 time). */
806 100, /* number of parallel prefetches */
807 3, /* Branch cost */
808 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
809 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
810 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
811 COSTS_N_INSNS (2), /* cost of FABS instruction. */
812 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
813 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
815 k8_memcpy,
816 k8_memset,
817 4, /* scalar_stmt_cost. */
818 2, /* scalar load_cost. */
819 2, /* scalar_store_cost. */
820 5, /* vec_stmt_cost. */
821 0, /* vec_to_scalar_cost. */
822 2, /* scalar_to_vec_cost. */
823 2, /* vec_align_load_cost. */
824 3, /* vec_unalign_load_cost. */
825 3, /* vec_store_cost. */
826 3, /* cond_taken_branch_cost. */
827 2, /* cond_not_taken_branch_cost. */
830 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
831 very small blocks it is better to use loop. For large blocks, libcall can
832 do nontemporary accesses and beat inline considerably. */
833 static stringop_algs amdfam10_memcpy[2] = {
834 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
835 {-1, rep_prefix_4_byte, false}}},
836 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
837 {-1, libcall, false}}}};
838 static stringop_algs amdfam10_memset[2] = {
839 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
840 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
841 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
842 {-1, libcall, false}}}};
843 struct processor_costs amdfam10_cost = {
844 COSTS_N_INSNS (1), /* cost of an add instruction */
845 COSTS_N_INSNS (2), /* cost of a lea instruction */
846 COSTS_N_INSNS (1), /* variable shift costs */
847 COSTS_N_INSNS (1), /* constant shift costs */
848 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
849 COSTS_N_INSNS (4), /* HI */
850 COSTS_N_INSNS (3), /* SI */
851 COSTS_N_INSNS (4), /* DI */
852 COSTS_N_INSNS (5)}, /* other */
853 0, /* cost of multiply per each bit set */
854 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
855 COSTS_N_INSNS (35), /* HI */
856 COSTS_N_INSNS (51), /* SI */
857 COSTS_N_INSNS (83), /* DI */
858 COSTS_N_INSNS (83)}, /* other */
859 COSTS_N_INSNS (1), /* cost of movsx */
860 COSTS_N_INSNS (1), /* cost of movzx */
861 8, /* "large" insn */
862 9, /* MOVE_RATIO */
863 4, /* cost for loading QImode using movzbl */
864 {3, 4, 3}, /* cost of loading integer registers
865 in QImode, HImode and SImode.
866 Relative to reg-reg move (2). */
867 {3, 4, 3}, /* cost of storing integer registers */
868 4, /* cost of reg,reg fld/fst */
869 {4, 4, 12}, /* cost of loading fp registers
870 in SFmode, DFmode and XFmode */
871 {6, 6, 8}, /* cost of storing fp registers
872 in SFmode, DFmode and XFmode */
873 2, /* cost of moving MMX register */
874 {3, 3}, /* cost of loading MMX registers
875 in SImode and DImode */
876 {4, 4}, /* cost of storing MMX registers
877 in SImode and DImode */
878 2, /* cost of moving SSE register */
879 {4, 4, 3}, /* cost of loading SSE registers
880 in SImode, DImode and TImode */
881 {4, 4, 5}, /* cost of storing SSE registers
882 in SImode, DImode and TImode */
883 3, /* MMX or SSE register to integer */
884 /* On K8:
885 MOVD reg64, xmmreg Double FSTORE 4
886 MOVD reg32, xmmreg Double FSTORE 4
887 On AMDFAM10:
888 MOVD reg64, xmmreg Double FADD 3
889 1/1 1/1
890 MOVD reg32, xmmreg Double FADD 3
891 1/1 1/1 */
892 64, /* size of l1 cache. */
893 512, /* size of l2 cache. */
894 64, /* size of prefetch block */
895 /* New AMD processors never drop prefetches; if they cannot be performed
896 immediately, they are queued. We set number of simultaneous prefetches
897 to a large constant to reflect this (it probably is not a good idea not
898 to limit number of prefetches at all, as their execution also takes some
899 time). */
900 100, /* number of parallel prefetches */
901 2, /* Branch cost */
902 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
903 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
904 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
905 COSTS_N_INSNS (2), /* cost of FABS instruction. */
906 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
907 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
909 amdfam10_memcpy,
910 amdfam10_memset,
911 4, /* scalar_stmt_cost. */
912 2, /* scalar load_cost. */
913 2, /* scalar_store_cost. */
914 6, /* vec_stmt_cost. */
915 0, /* vec_to_scalar_cost. */
916 2, /* scalar_to_vec_cost. */
917 2, /* vec_align_load_cost. */
918 2, /* vec_unalign_load_cost. */
919 2, /* vec_store_cost. */
920 2, /* cond_taken_branch_cost. */
921 1, /* cond_not_taken_branch_cost. */
924 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
925 very small blocks it is better to use loop. For large blocks, libcall
926 can do nontemporary accesses and beat inline considerably. */
927 static stringop_algs bdver1_memcpy[2] = {
928 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
929 {-1, rep_prefix_4_byte, false}}},
930 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
931 {-1, libcall, false}}}};
932 static stringop_algs bdver1_memset[2] = {
933 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
934 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
935 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
936 {-1, libcall, false}}}};
938 const struct processor_costs bdver1_cost = {
939 COSTS_N_INSNS (1), /* cost of an add instruction */
940 COSTS_N_INSNS (1), /* cost of a lea instruction */
941 COSTS_N_INSNS (1), /* variable shift costs */
942 COSTS_N_INSNS (1), /* constant shift costs */
943 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
944 COSTS_N_INSNS (4), /* HI */
945 COSTS_N_INSNS (4), /* SI */
946 COSTS_N_INSNS (6), /* DI */
947 COSTS_N_INSNS (6)}, /* other */
948 0, /* cost of multiply per each bit set */
949 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
950 COSTS_N_INSNS (35), /* HI */
951 COSTS_N_INSNS (51), /* SI */
952 COSTS_N_INSNS (83), /* DI */
953 COSTS_N_INSNS (83)}, /* other */
954 COSTS_N_INSNS (1), /* cost of movsx */
955 COSTS_N_INSNS (1), /* cost of movzx */
956 8, /* "large" insn */
957 9, /* MOVE_RATIO */
958 4, /* cost for loading QImode using movzbl */
959 {5, 5, 4}, /* cost of loading integer registers
960 in QImode, HImode and SImode.
961 Relative to reg-reg move (2). */
962 {4, 4, 4}, /* cost of storing integer registers */
963 2, /* cost of reg,reg fld/fst */
964 {5, 5, 12}, /* cost of loading fp registers
965 in SFmode, DFmode and XFmode */
966 {4, 4, 8}, /* cost of storing fp registers
967 in SFmode, DFmode and XFmode */
968 2, /* cost of moving MMX register */
969 {4, 4}, /* cost of loading MMX registers
970 in SImode and DImode */
971 {4, 4}, /* cost of storing MMX registers
972 in SImode and DImode */
973 2, /* cost of moving SSE register */
974 {4, 4, 4}, /* cost of loading SSE registers
975 in SImode, DImode and TImode */
976 {4, 4, 4}, /* cost of storing SSE registers
977 in SImode, DImode and TImode */
978 2, /* MMX or SSE register to integer */
979 /* On K8:
980 MOVD reg64, xmmreg Double FSTORE 4
981 MOVD reg32, xmmreg Double FSTORE 4
982 On AMDFAM10:
983 MOVD reg64, xmmreg Double FADD 3
984 1/1 1/1
985 MOVD reg32, xmmreg Double FADD 3
986 1/1 1/1 */
987 16, /* size of l1 cache. */
988 2048, /* size of l2 cache. */
989 64, /* size of prefetch block */
990 /* New AMD processors never drop prefetches; if they cannot be performed
991 immediately, they are queued. We set number of simultaneous prefetches
992 to a large constant to reflect this (it probably is not a good idea not
993 to limit number of prefetches at all, as their execution also takes some
994 time). */
995 100, /* number of parallel prefetches */
996 2, /* Branch cost */
997 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
998 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
999 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1000 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1001 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1002 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1004 bdver1_memcpy,
1005 bdver1_memset,
1006 6, /* scalar_stmt_cost. */
1007 4, /* scalar load_cost. */
1008 4, /* scalar_store_cost. */
1009 6, /* vec_stmt_cost. */
1010 0, /* vec_to_scalar_cost. */
1011 2, /* scalar_to_vec_cost. */
1012 4, /* vec_align_load_cost. */
1013 4, /* vec_unalign_load_cost. */
1014 4, /* vec_store_cost. */
1015 2, /* cond_taken_branch_cost. */
1016 1, /* cond_not_taken_branch_cost. */
1019 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1020 very small blocks it is better to use loop. For large blocks, libcall
1021 can do nontemporary accesses and beat inline considerably. */
1023 static stringop_algs bdver2_memcpy[2] = {
1024 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1025 {-1, rep_prefix_4_byte, false}}},
1026 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1027 {-1, libcall, false}}}};
1028 static stringop_algs bdver2_memset[2] = {
1029 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1030 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1031 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1032 {-1, libcall, false}}}};
1034 const struct processor_costs bdver2_cost = {
1035 COSTS_N_INSNS (1), /* cost of an add instruction */
1036 COSTS_N_INSNS (1), /* cost of a lea instruction */
1037 COSTS_N_INSNS (1), /* variable shift costs */
1038 COSTS_N_INSNS (1), /* constant shift costs */
1039 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1040 COSTS_N_INSNS (4), /* HI */
1041 COSTS_N_INSNS (4), /* SI */
1042 COSTS_N_INSNS (6), /* DI */
1043 COSTS_N_INSNS (6)}, /* other */
1044 0, /* cost of multiply per each bit set */
1045 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1046 COSTS_N_INSNS (35), /* HI */
1047 COSTS_N_INSNS (51), /* SI */
1048 COSTS_N_INSNS (83), /* DI */
1049 COSTS_N_INSNS (83)}, /* other */
1050 COSTS_N_INSNS (1), /* cost of movsx */
1051 COSTS_N_INSNS (1), /* cost of movzx */
1052 8, /* "large" insn */
1053 9, /* MOVE_RATIO */
1054 4, /* cost for loading QImode using movzbl */
1055 {5, 5, 4}, /* cost of loading integer registers
1056 in QImode, HImode and SImode.
1057 Relative to reg-reg move (2). */
1058 {4, 4, 4}, /* cost of storing integer registers */
1059 2, /* cost of reg,reg fld/fst */
1060 {5, 5, 12}, /* cost of loading fp registers
1061 in SFmode, DFmode and XFmode */
1062 {4, 4, 8}, /* cost of storing fp registers
1063 in SFmode, DFmode and XFmode */
1064 2, /* cost of moving MMX register */
1065 {4, 4}, /* cost of loading MMX registers
1066 in SImode and DImode */
1067 {4, 4}, /* cost of storing MMX registers
1068 in SImode and DImode */
1069 2, /* cost of moving SSE register */
1070 {4, 4, 4}, /* cost of loading SSE registers
1071 in SImode, DImode and TImode */
1072 {4, 4, 4}, /* cost of storing SSE registers
1073 in SImode, DImode and TImode */
1074 2, /* MMX or SSE register to integer */
1075 /* On K8:
1076 MOVD reg64, xmmreg Double FSTORE 4
1077 MOVD reg32, xmmreg Double FSTORE 4
1078 On AMDFAM10:
1079 MOVD reg64, xmmreg Double FADD 3
1080 1/1 1/1
1081 MOVD reg32, xmmreg Double FADD 3
1082 1/1 1/1 */
1083 16, /* size of l1 cache. */
1084 2048, /* size of l2 cache. */
1085 64, /* size of prefetch block */
1086 /* New AMD processors never drop prefetches; if they cannot be performed
1087 immediately, they are queued. We set number of simultaneous prefetches
1088 to a large constant to reflect this (it probably is not a good idea not
1089 to limit number of prefetches at all, as their execution also takes some
1090 time). */
1091 100, /* number of parallel prefetches */
1092 2, /* Branch cost */
1093 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1094 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1095 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1096 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1097 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1098 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1100 bdver2_memcpy,
1101 bdver2_memset,
1102 6, /* scalar_stmt_cost. */
1103 4, /* scalar load_cost. */
1104 4, /* scalar_store_cost. */
1105 6, /* vec_stmt_cost. */
1106 0, /* vec_to_scalar_cost. */
1107 2, /* scalar_to_vec_cost. */
1108 4, /* vec_align_load_cost. */
1109 4, /* vec_unalign_load_cost. */
1110 4, /* vec_store_cost. */
1111 2, /* cond_taken_branch_cost. */
1112 1, /* cond_not_taken_branch_cost. */
1116 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1117 very small blocks it is better to use loop. For large blocks, libcall
1118 can do nontemporary accesses and beat inline considerably. */
1119 static stringop_algs bdver3_memcpy[2] = {
1120 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1121 {-1, rep_prefix_4_byte, false}}},
1122 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1123 {-1, libcall, false}}}};
1124 static stringop_algs bdver3_memset[2] = {
1125 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1126 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1127 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1128 {-1, libcall, false}}}};
1129 struct processor_costs bdver3_cost = {
1130 COSTS_N_INSNS (1), /* cost of an add instruction */
1131 COSTS_N_INSNS (1), /* cost of a lea instruction */
1132 COSTS_N_INSNS (1), /* variable shift costs */
1133 COSTS_N_INSNS (1), /* constant shift costs */
1134 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1135 COSTS_N_INSNS (4), /* HI */
1136 COSTS_N_INSNS (4), /* SI */
1137 COSTS_N_INSNS (6), /* DI */
1138 COSTS_N_INSNS (6)}, /* other */
1139 0, /* cost of multiply per each bit set */
1140 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1141 COSTS_N_INSNS (35), /* HI */
1142 COSTS_N_INSNS (51), /* SI */
1143 COSTS_N_INSNS (83), /* DI */
1144 COSTS_N_INSNS (83)}, /* other */
1145 COSTS_N_INSNS (1), /* cost of movsx */
1146 COSTS_N_INSNS (1), /* cost of movzx */
1147 8, /* "large" insn */
1148 9, /* MOVE_RATIO */
1149 4, /* cost for loading QImode using movzbl */
1150 {5, 5, 4}, /* cost of loading integer registers
1151 in QImode, HImode and SImode.
1152 Relative to reg-reg move (2). */
1153 {4, 4, 4}, /* cost of storing integer registers */
1154 2, /* cost of reg,reg fld/fst */
1155 {5, 5, 12}, /* cost of loading fp registers
1156 in SFmode, DFmode and XFmode */
1157 {4, 4, 8}, /* cost of storing fp registers
1158 in SFmode, DFmode and XFmode */
1159 2, /* cost of moving MMX register */
1160 {4, 4}, /* cost of loading MMX registers
1161 in SImode and DImode */
1162 {4, 4}, /* cost of storing MMX registers
1163 in SImode and DImode */
1164 2, /* cost of moving SSE register */
1165 {4, 4, 4}, /* cost of loading SSE registers
1166 in SImode, DImode and TImode */
1167 {4, 4, 4}, /* cost of storing SSE registers
1168 in SImode, DImode and TImode */
1169 2, /* MMX or SSE register to integer */
1170 16, /* size of l1 cache. */
1171 2048, /* size of l2 cache. */
1172 64, /* size of prefetch block */
1173 /* New AMD processors never drop prefetches; if they cannot be performed
1174 immediately, they are queued. We set number of simultaneous prefetches
1175 to a large constant to reflect this (it probably is not a good idea not
1176 to limit number of prefetches at all, as their execution also takes some
1177 time). */
1178 100, /* number of parallel prefetches */
1179 2, /* Branch cost */
1180 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1181 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1182 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1183 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1184 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1185 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1187 bdver3_memcpy,
1188 bdver3_memset,
1189 6, /* scalar_stmt_cost. */
1190 4, /* scalar load_cost. */
1191 4, /* scalar_store_cost. */
1192 6, /* vec_stmt_cost. */
1193 0, /* vec_to_scalar_cost. */
1194 2, /* scalar_to_vec_cost. */
1195 4, /* vec_align_load_cost. */
1196 4, /* vec_unalign_load_cost. */
1197 4, /* vec_store_cost. */
1198 2, /* cond_taken_branch_cost. */
1199 1, /* cond_not_taken_branch_cost. */
1202 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1203 very small blocks it is better to use loop. For large blocks, libcall
1204 can do nontemporary accesses and beat inline considerably. */
1205 static stringop_algs bdver4_memcpy[2] = {
1206 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1207 {-1, rep_prefix_4_byte, false}}},
1208 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1209 {-1, libcall, false}}}};
1210 static stringop_algs bdver4_memset[2] = {
1211 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1212 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1213 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1214 {-1, libcall, false}}}};
1215 struct processor_costs bdver4_cost = {
1216 COSTS_N_INSNS (1), /* cost of an add instruction */
1217 COSTS_N_INSNS (1), /* cost of a lea instruction */
1218 COSTS_N_INSNS (1), /* variable shift costs */
1219 COSTS_N_INSNS (1), /* constant shift costs */
1220 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1221 COSTS_N_INSNS (4), /* HI */
1222 COSTS_N_INSNS (4), /* SI */
1223 COSTS_N_INSNS (6), /* DI */
1224 COSTS_N_INSNS (6)}, /* other */
1225 0, /* cost of multiply per each bit set */
1226 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1227 COSTS_N_INSNS (35), /* HI */
1228 COSTS_N_INSNS (51), /* SI */
1229 COSTS_N_INSNS (83), /* DI */
1230 COSTS_N_INSNS (83)}, /* other */
1231 COSTS_N_INSNS (1), /* cost of movsx */
1232 COSTS_N_INSNS (1), /* cost of movzx */
1233 8, /* "large" insn */
1234 9, /* MOVE_RATIO */
1235 4, /* cost for loading QImode using movzbl */
1236 {5, 5, 4}, /* cost of loading integer registers
1237 in QImode, HImode and SImode.
1238 Relative to reg-reg move (2). */
1239 {4, 4, 4}, /* cost of storing integer registers */
1240 2, /* cost of reg,reg fld/fst */
1241 {5, 5, 12}, /* cost of loading fp registers
1242 in SFmode, DFmode and XFmode */
1243 {4, 4, 8}, /* cost of storing fp registers
1244 in SFmode, DFmode and XFmode */
1245 2, /* cost of moving MMX register */
1246 {4, 4}, /* cost of loading MMX registers
1247 in SImode and DImode */
1248 {4, 4}, /* cost of storing MMX registers
1249 in SImode and DImode */
1250 2, /* cost of moving SSE register */
1251 {4, 4, 4}, /* cost of loading SSE registers
1252 in SImode, DImode and TImode */
1253 {4, 4, 4}, /* cost of storing SSE registers
1254 in SImode, DImode and TImode */
1255 2, /* MMX or SSE register to integer */
1256 16, /* size of l1 cache. */
1257 2048, /* size of l2 cache. */
1258 64, /* size of prefetch block */
1259 /* New AMD processors never drop prefetches; if they cannot be performed
1260 immediately, they are queued. We set number of simultaneous prefetches
1261 to a large constant to reflect this (it probably is not a good idea not
1262 to limit number of prefetches at all, as their execution also takes some
1263 time). */
1264 100, /* number of parallel prefetches */
1265 2, /* Branch cost */
1266 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1267 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1268 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1269 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1270 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1271 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1273 bdver4_memcpy,
1274 bdver4_memset,
1275 6, /* scalar_stmt_cost. */
1276 4, /* scalar load_cost. */
1277 4, /* scalar_store_cost. */
1278 6, /* vec_stmt_cost. */
1279 0, /* vec_to_scalar_cost. */
1280 2, /* scalar_to_vec_cost. */
1281 4, /* vec_align_load_cost. */
1282 4, /* vec_unalign_load_cost. */
1283 4, /* vec_store_cost. */
1284 2, /* cond_taken_branch_cost. */
1285 1, /* cond_not_taken_branch_cost. */
1288 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1289 very small blocks it is better to use loop. For large blocks, libcall can
1290 do nontemporary accesses and beat inline considerably. */
1291 static stringop_algs btver1_memcpy[2] = {
1292 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1293 {-1, rep_prefix_4_byte, false}}},
1294 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1295 {-1, libcall, false}}}};
1296 static stringop_algs btver1_memset[2] = {
1297 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1298 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1299 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1300 {-1, libcall, false}}}};
1301 const struct processor_costs btver1_cost = {
1302 COSTS_N_INSNS (1), /* cost of an add instruction */
1303 COSTS_N_INSNS (2), /* cost of a lea instruction */
1304 COSTS_N_INSNS (1), /* variable shift costs */
1305 COSTS_N_INSNS (1), /* constant shift costs */
1306 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1307 COSTS_N_INSNS (4), /* HI */
1308 COSTS_N_INSNS (3), /* SI */
1309 COSTS_N_INSNS (4), /* DI */
1310 COSTS_N_INSNS (5)}, /* other */
1311 0, /* cost of multiply per each bit set */
1312 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1313 COSTS_N_INSNS (35), /* HI */
1314 COSTS_N_INSNS (51), /* SI */
1315 COSTS_N_INSNS (83), /* DI */
1316 COSTS_N_INSNS (83)}, /* other */
1317 COSTS_N_INSNS (1), /* cost of movsx */
1318 COSTS_N_INSNS (1), /* cost of movzx */
1319 8, /* "large" insn */
1320 9, /* MOVE_RATIO */
1321 4, /* cost for loading QImode using movzbl */
1322 {3, 4, 3}, /* cost of loading integer registers
1323 in QImode, HImode and SImode.
1324 Relative to reg-reg move (2). */
1325 {3, 4, 3}, /* cost of storing integer registers */
1326 4, /* cost of reg,reg fld/fst */
1327 {4, 4, 12}, /* cost of loading fp registers
1328 in SFmode, DFmode and XFmode */
1329 {6, 6, 8}, /* cost of storing fp registers
1330 in SFmode, DFmode and XFmode */
1331 2, /* cost of moving MMX register */
1332 {3, 3}, /* cost of loading MMX registers
1333 in SImode and DImode */
1334 {4, 4}, /* cost of storing MMX registers
1335 in SImode and DImode */
1336 2, /* cost of moving SSE register */
1337 {4, 4, 3}, /* cost of loading SSE registers
1338 in SImode, DImode and TImode */
1339 {4, 4, 5}, /* cost of storing SSE registers
1340 in SImode, DImode and TImode */
1341 3, /* MMX or SSE register to integer */
1342 /* On K8:
1343 MOVD reg64, xmmreg Double FSTORE 4
1344 MOVD reg32, xmmreg Double FSTORE 4
1345 On AMDFAM10:
1346 MOVD reg64, xmmreg Double FADD 3
1347 1/1 1/1
1348 MOVD reg32, xmmreg Double FADD 3
1349 1/1 1/1 */
1350 32, /* size of l1 cache. */
1351 512, /* size of l2 cache. */
1352 64, /* size of prefetch block */
1353 100, /* number of parallel prefetches */
1354 2, /* Branch cost */
1355 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1356 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1357 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1358 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1359 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1360 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1362 btver1_memcpy,
1363 btver1_memset,
1364 4, /* scalar_stmt_cost. */
1365 2, /* scalar load_cost. */
1366 2, /* scalar_store_cost. */
1367 6, /* vec_stmt_cost. */
1368 0, /* vec_to_scalar_cost. */
1369 2, /* scalar_to_vec_cost. */
1370 2, /* vec_align_load_cost. */
1371 2, /* vec_unalign_load_cost. */
1372 2, /* vec_store_cost. */
1373 2, /* cond_taken_branch_cost. */
1374 1, /* cond_not_taken_branch_cost. */
1377 static stringop_algs btver2_memcpy[2] = {
1378 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1379 {-1, rep_prefix_4_byte, false}}},
1380 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1381 {-1, libcall, false}}}};
1382 static stringop_algs btver2_memset[2] = {
1383 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1384 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1385 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1386 {-1, libcall, false}}}};
1387 const struct processor_costs btver2_cost = {
1388 COSTS_N_INSNS (1), /* cost of an add instruction */
1389 COSTS_N_INSNS (2), /* cost of a lea instruction */
1390 COSTS_N_INSNS (1), /* variable shift costs */
1391 COSTS_N_INSNS (1), /* constant shift costs */
1392 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1393 COSTS_N_INSNS (4), /* HI */
1394 COSTS_N_INSNS (3), /* SI */
1395 COSTS_N_INSNS (4), /* DI */
1396 COSTS_N_INSNS (5)}, /* other */
1397 0, /* cost of multiply per each bit set */
1398 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1399 COSTS_N_INSNS (35), /* HI */
1400 COSTS_N_INSNS (51), /* SI */
1401 COSTS_N_INSNS (83), /* DI */
1402 COSTS_N_INSNS (83)}, /* other */
1403 COSTS_N_INSNS (1), /* cost of movsx */
1404 COSTS_N_INSNS (1), /* cost of movzx */
1405 8, /* "large" insn */
1406 9, /* MOVE_RATIO */
1407 4, /* cost for loading QImode using movzbl */
1408 {3, 4, 3}, /* cost of loading integer registers
1409 in QImode, HImode and SImode.
1410 Relative to reg-reg move (2). */
1411 {3, 4, 3}, /* cost of storing integer registers */
1412 4, /* cost of reg,reg fld/fst */
1413 {4, 4, 12}, /* cost of loading fp registers
1414 in SFmode, DFmode and XFmode */
1415 {6, 6, 8}, /* cost of storing fp registers
1416 in SFmode, DFmode and XFmode */
1417 2, /* cost of moving MMX register */
1418 {3, 3}, /* cost of loading MMX registers
1419 in SImode and DImode */
1420 {4, 4}, /* cost of storing MMX registers
1421 in SImode and DImode */
1422 2, /* cost of moving SSE register */
1423 {4, 4, 3}, /* cost of loading SSE registers
1424 in SImode, DImode and TImode */
1425 {4, 4, 5}, /* cost of storing SSE registers
1426 in SImode, DImode and TImode */
1427 3, /* MMX or SSE register to integer */
1428 /* On K8:
1429 MOVD reg64, xmmreg Double FSTORE 4
1430 MOVD reg32, xmmreg Double FSTORE 4
1431 On AMDFAM10:
1432 MOVD reg64, xmmreg Double FADD 3
1433 1/1 1/1
1434 MOVD reg32, xmmreg Double FADD 3
1435 1/1 1/1 */
1436 32, /* size of l1 cache. */
1437 2048, /* size of l2 cache. */
1438 64, /* size of prefetch block */
1439 100, /* number of parallel prefetches */
1440 2, /* Branch cost */
1441 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1442 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1443 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1444 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1445 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1446 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1447 btver2_memcpy,
1448 btver2_memset,
1449 4, /* scalar_stmt_cost. */
1450 2, /* scalar load_cost. */
1451 2, /* scalar_store_cost. */
1452 6, /* vec_stmt_cost. */
1453 0, /* vec_to_scalar_cost. */
1454 2, /* scalar_to_vec_cost. */
1455 2, /* vec_align_load_cost. */
1456 2, /* vec_unalign_load_cost. */
1457 2, /* vec_store_cost. */
1458 2, /* cond_taken_branch_cost. */
1459 1, /* cond_not_taken_branch_cost. */
1462 static stringop_algs pentium4_memcpy[2] = {
1463 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1464 DUMMY_STRINGOP_ALGS};
1465 static stringop_algs pentium4_memset[2] = {
1466 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1467 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1468 DUMMY_STRINGOP_ALGS};
1470 static const
1471 struct processor_costs pentium4_cost = {
1472 COSTS_N_INSNS (1), /* cost of an add instruction */
1473 COSTS_N_INSNS (3), /* cost of a lea instruction */
1474 COSTS_N_INSNS (4), /* variable shift costs */
1475 COSTS_N_INSNS (4), /* constant shift costs */
1476 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1477 COSTS_N_INSNS (15), /* HI */
1478 COSTS_N_INSNS (15), /* SI */
1479 COSTS_N_INSNS (15), /* DI */
1480 COSTS_N_INSNS (15)}, /* other */
1481 0, /* cost of multiply per each bit set */
1482 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1483 COSTS_N_INSNS (56), /* HI */
1484 COSTS_N_INSNS (56), /* SI */
1485 COSTS_N_INSNS (56), /* DI */
1486 COSTS_N_INSNS (56)}, /* other */
1487 COSTS_N_INSNS (1), /* cost of movsx */
1488 COSTS_N_INSNS (1), /* cost of movzx */
1489 16, /* "large" insn */
1490 6, /* MOVE_RATIO */
1491 2, /* cost for loading QImode using movzbl */
1492 {4, 5, 4}, /* cost of loading integer registers
1493 in QImode, HImode and SImode.
1494 Relative to reg-reg move (2). */
1495 {2, 3, 2}, /* cost of storing integer registers */
1496 2, /* cost of reg,reg fld/fst */
1497 {2, 2, 6}, /* cost of loading fp registers
1498 in SFmode, DFmode and XFmode */
1499 {4, 4, 6}, /* cost of storing fp registers
1500 in SFmode, DFmode and XFmode */
1501 2, /* cost of moving MMX register */
1502 {2, 2}, /* cost of loading MMX registers
1503 in SImode and DImode */
1504 {2, 2}, /* cost of storing MMX registers
1505 in SImode and DImode */
1506 12, /* cost of moving SSE register */
1507 {12, 12, 12}, /* cost of loading SSE registers
1508 in SImode, DImode and TImode */
1509 {2, 2, 8}, /* cost of storing SSE registers
1510 in SImode, DImode and TImode */
1511 10, /* MMX or SSE register to integer */
1512 8, /* size of l1 cache. */
1513 256, /* size of l2 cache. */
1514 64, /* size of prefetch block */
1515 6, /* number of parallel prefetches */
1516 2, /* Branch cost */
1517 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1518 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1519 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1520 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1521 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1522 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1523 pentium4_memcpy,
1524 pentium4_memset,
1525 1, /* scalar_stmt_cost. */
1526 1, /* scalar load_cost. */
1527 1, /* scalar_store_cost. */
1528 1, /* vec_stmt_cost. */
1529 1, /* vec_to_scalar_cost. */
1530 1, /* scalar_to_vec_cost. */
1531 1, /* vec_align_load_cost. */
1532 2, /* vec_unalign_load_cost. */
1533 1, /* vec_store_cost. */
1534 3, /* cond_taken_branch_cost. */
1535 1, /* cond_not_taken_branch_cost. */
1538 static stringop_algs nocona_memcpy[2] = {
1539 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1540 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1541 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1543 static stringop_algs nocona_memset[2] = {
1544 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1545 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1546 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1547 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1549 static const
1550 struct processor_costs nocona_cost = {
1551 COSTS_N_INSNS (1), /* cost of an add instruction */
1552 COSTS_N_INSNS (1), /* cost of a lea instruction */
1553 COSTS_N_INSNS (1), /* variable shift costs */
1554 COSTS_N_INSNS (1), /* constant shift costs */
1555 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1556 COSTS_N_INSNS (10), /* HI */
1557 COSTS_N_INSNS (10), /* SI */
1558 COSTS_N_INSNS (10), /* DI */
1559 COSTS_N_INSNS (10)}, /* other */
1560 0, /* cost of multiply per each bit set */
1561 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1562 COSTS_N_INSNS (66), /* HI */
1563 COSTS_N_INSNS (66), /* SI */
1564 COSTS_N_INSNS (66), /* DI */
1565 COSTS_N_INSNS (66)}, /* other */
1566 COSTS_N_INSNS (1), /* cost of movsx */
1567 COSTS_N_INSNS (1), /* cost of movzx */
1568 16, /* "large" insn */
1569 17, /* MOVE_RATIO */
1570 4, /* cost for loading QImode using movzbl */
1571 {4, 4, 4}, /* cost of loading integer registers
1572 in QImode, HImode and SImode.
1573 Relative to reg-reg move (2). */
1574 {4, 4, 4}, /* cost of storing integer registers */
1575 3, /* cost of reg,reg fld/fst */
1576 {12, 12, 12}, /* cost of loading fp registers
1577 in SFmode, DFmode and XFmode */
1578 {4, 4, 4}, /* cost of storing fp registers
1579 in SFmode, DFmode and XFmode */
1580 6, /* cost of moving MMX register */
1581 {12, 12}, /* cost of loading MMX registers
1582 in SImode and DImode */
1583 {12, 12}, /* cost of storing MMX registers
1584 in SImode and DImode */
1585 6, /* cost of moving SSE register */
1586 {12, 12, 12}, /* cost of loading SSE registers
1587 in SImode, DImode and TImode */
1588 {12, 12, 12}, /* cost of storing SSE registers
1589 in SImode, DImode and TImode */
1590 8, /* MMX or SSE register to integer */
1591 8, /* size of l1 cache. */
1592 1024, /* size of l2 cache. */
1593 64, /* size of prefetch block */
1594 8, /* number of parallel prefetches */
1595 1, /* Branch cost */
1596 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1597 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1598 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1599 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1600 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1601 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1602 nocona_memcpy,
1603 nocona_memset,
1604 1, /* scalar_stmt_cost. */
1605 1, /* scalar load_cost. */
1606 1, /* scalar_store_cost. */
1607 1, /* vec_stmt_cost. */
1608 1, /* vec_to_scalar_cost. */
1609 1, /* scalar_to_vec_cost. */
1610 1, /* vec_align_load_cost. */
1611 2, /* vec_unalign_load_cost. */
1612 1, /* vec_store_cost. */
1613 3, /* cond_taken_branch_cost. */
1614 1, /* cond_not_taken_branch_cost. */
1617 static stringop_algs atom_memcpy[2] = {
1618 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1619 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1620 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1621 static stringop_algs atom_memset[2] = {
1622 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1623 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1624 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1625 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1626 static const
1627 struct processor_costs atom_cost = {
1628 COSTS_N_INSNS (1), /* cost of an add instruction */
1629 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1630 COSTS_N_INSNS (1), /* variable shift costs */
1631 COSTS_N_INSNS (1), /* constant shift costs */
1632 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1633 COSTS_N_INSNS (4), /* HI */
1634 COSTS_N_INSNS (3), /* SI */
1635 COSTS_N_INSNS (4), /* DI */
1636 COSTS_N_INSNS (2)}, /* other */
1637 0, /* cost of multiply per each bit set */
1638 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1639 COSTS_N_INSNS (26), /* HI */
1640 COSTS_N_INSNS (42), /* SI */
1641 COSTS_N_INSNS (74), /* DI */
1642 COSTS_N_INSNS (74)}, /* other */
1643 COSTS_N_INSNS (1), /* cost of movsx */
1644 COSTS_N_INSNS (1), /* cost of movzx */
1645 8, /* "large" insn */
1646 17, /* MOVE_RATIO */
1647 4, /* cost for loading QImode using movzbl */
1648 {4, 4, 4}, /* cost of loading integer registers
1649 in QImode, HImode and SImode.
1650 Relative to reg-reg move (2). */
1651 {4, 4, 4}, /* cost of storing integer registers */
1652 4, /* cost of reg,reg fld/fst */
1653 {12, 12, 12}, /* cost of loading fp registers
1654 in SFmode, DFmode and XFmode */
1655 {6, 6, 8}, /* cost of storing fp registers
1656 in SFmode, DFmode and XFmode */
1657 2, /* cost of moving MMX register */
1658 {8, 8}, /* cost of loading MMX registers
1659 in SImode and DImode */
1660 {8, 8}, /* cost of storing MMX registers
1661 in SImode and DImode */
1662 2, /* cost of moving SSE register */
1663 {8, 8, 8}, /* cost of loading SSE registers
1664 in SImode, DImode and TImode */
1665 {8, 8, 8}, /* cost of storing SSE registers
1666 in SImode, DImode and TImode */
1667 5, /* MMX or SSE register to integer */
1668 32, /* size of l1 cache. */
1669 256, /* size of l2 cache. */
1670 64, /* size of prefetch block */
1671 6, /* number of parallel prefetches */
1672 3, /* Branch cost */
1673 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1674 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1675 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1676 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1677 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1678 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1679 atom_memcpy,
1680 atom_memset,
1681 1, /* scalar_stmt_cost. */
1682 1, /* scalar load_cost. */
1683 1, /* scalar_store_cost. */
1684 1, /* vec_stmt_cost. */
1685 1, /* vec_to_scalar_cost. */
1686 1, /* scalar_to_vec_cost. */
1687 1, /* vec_align_load_cost. */
1688 2, /* vec_unalign_load_cost. */
1689 1, /* vec_store_cost. */
1690 3, /* cond_taken_branch_cost. */
1691 1, /* cond_not_taken_branch_cost. */
1694 static stringop_algs slm_memcpy[2] = {
1695 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1696 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1697 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1698 static stringop_algs slm_memset[2] = {
1699 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1700 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1701 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1702 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1703 static const
1704 struct processor_costs slm_cost = {
1705 COSTS_N_INSNS (1), /* cost of an add instruction */
1706 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1707 COSTS_N_INSNS (1), /* variable shift costs */
1708 COSTS_N_INSNS (1), /* constant shift costs */
1709 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1710 COSTS_N_INSNS (3), /* HI */
1711 COSTS_N_INSNS (3), /* SI */
1712 COSTS_N_INSNS (4), /* DI */
1713 COSTS_N_INSNS (2)}, /* other */
1714 0, /* cost of multiply per each bit set */
1715 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1716 COSTS_N_INSNS (26), /* HI */
1717 COSTS_N_INSNS (42), /* SI */
1718 COSTS_N_INSNS (74), /* DI */
1719 COSTS_N_INSNS (74)}, /* other */
1720 COSTS_N_INSNS (1), /* cost of movsx */
1721 COSTS_N_INSNS (1), /* cost of movzx */
1722 8, /* "large" insn */
1723 17, /* MOVE_RATIO */
1724 4, /* cost for loading QImode using movzbl */
1725 {4, 4, 4}, /* cost of loading integer registers
1726 in QImode, HImode and SImode.
1727 Relative to reg-reg move (2). */
1728 {4, 4, 4}, /* cost of storing integer registers */
1729 4, /* cost of reg,reg fld/fst */
1730 {12, 12, 12}, /* cost of loading fp registers
1731 in SFmode, DFmode and XFmode */
1732 {6, 6, 8}, /* cost of storing fp registers
1733 in SFmode, DFmode and XFmode */
1734 2, /* cost of moving MMX register */
1735 {8, 8}, /* cost of loading MMX registers
1736 in SImode and DImode */
1737 {8, 8}, /* cost of storing MMX registers
1738 in SImode and DImode */
1739 2, /* cost of moving SSE register */
1740 {8, 8, 8}, /* cost of loading SSE registers
1741 in SImode, DImode and TImode */
1742 {8, 8, 8}, /* cost of storing SSE registers
1743 in SImode, DImode and TImode */
1744 5, /* MMX or SSE register to integer */
1745 32, /* size of l1 cache. */
1746 256, /* size of l2 cache. */
1747 64, /* size of prefetch block */
1748 6, /* number of parallel prefetches */
1749 3, /* Branch cost */
1750 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1751 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1752 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1753 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1754 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1755 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1756 slm_memcpy,
1757 slm_memset,
1758 1, /* scalar_stmt_cost. */
1759 1, /* scalar load_cost. */
1760 1, /* scalar_store_cost. */
1761 1, /* vec_stmt_cost. */
1762 4, /* vec_to_scalar_cost. */
1763 1, /* scalar_to_vec_cost. */
1764 1, /* vec_align_load_cost. */
1765 2, /* vec_unalign_load_cost. */
1766 1, /* vec_store_cost. */
1767 3, /* cond_taken_branch_cost. */
1768 1, /* cond_not_taken_branch_cost. */
1771 static stringop_algs intel_memcpy[2] = {
1772 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1773 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1774 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1775 static stringop_algs intel_memset[2] = {
1776 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1777 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1778 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1779 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1780 static const
1781 struct processor_costs intel_cost = {
1782 COSTS_N_INSNS (1), /* cost of an add instruction */
1783 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1784 COSTS_N_INSNS (1), /* variable shift costs */
1785 COSTS_N_INSNS (1), /* constant shift costs */
1786 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1787 COSTS_N_INSNS (3), /* HI */
1788 COSTS_N_INSNS (3), /* SI */
1789 COSTS_N_INSNS (4), /* DI */
1790 COSTS_N_INSNS (2)}, /* other */
1791 0, /* cost of multiply per each bit set */
1792 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1793 COSTS_N_INSNS (26), /* HI */
1794 COSTS_N_INSNS (42), /* SI */
1795 COSTS_N_INSNS (74), /* DI */
1796 COSTS_N_INSNS (74)}, /* other */
1797 COSTS_N_INSNS (1), /* cost of movsx */
1798 COSTS_N_INSNS (1), /* cost of movzx */
1799 8, /* "large" insn */
1800 17, /* MOVE_RATIO */
1801 4, /* cost for loading QImode using movzbl */
1802 {4, 4, 4}, /* cost of loading integer registers
1803 in QImode, HImode and SImode.
1804 Relative to reg-reg move (2). */
1805 {4, 4, 4}, /* cost of storing integer registers */
1806 4, /* cost of reg,reg fld/fst */
1807 {12, 12, 12}, /* cost of loading fp registers
1808 in SFmode, DFmode and XFmode */
1809 {6, 6, 8}, /* cost of storing fp registers
1810 in SFmode, DFmode and XFmode */
1811 2, /* cost of moving MMX register */
1812 {8, 8}, /* cost of loading MMX registers
1813 in SImode and DImode */
1814 {8, 8}, /* cost of storing MMX registers
1815 in SImode and DImode */
1816 2, /* cost of moving SSE register */
1817 {8, 8, 8}, /* cost of loading SSE registers
1818 in SImode, DImode and TImode */
1819 {8, 8, 8}, /* cost of storing SSE registers
1820 in SImode, DImode and TImode */
1821 5, /* MMX or SSE register to integer */
1822 32, /* size of l1 cache. */
1823 256, /* size of l2 cache. */
1824 64, /* size of prefetch block */
1825 6, /* number of parallel prefetches */
1826 3, /* Branch cost */
1827 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1828 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1829 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1830 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1831 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1832 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1833 intel_memcpy,
1834 intel_memset,
1835 1, /* scalar_stmt_cost. */
1836 1, /* scalar load_cost. */
1837 1, /* scalar_store_cost. */
1838 1, /* vec_stmt_cost. */
1839 4, /* vec_to_scalar_cost. */
1840 1, /* scalar_to_vec_cost. */
1841 1, /* vec_align_load_cost. */
1842 2, /* vec_unalign_load_cost. */
1843 1, /* vec_store_cost. */
1844 3, /* cond_taken_branch_cost. */
1845 1, /* cond_not_taken_branch_cost. */
1848 /* Generic should produce code tuned for Core-i7 (and newer chips)
1849 and btver1 (and newer chips). */
1851 static stringop_algs generic_memcpy[2] = {
1852 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1853 {-1, libcall, false}}},
1854 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1855 {-1, libcall, false}}}};
1856 static stringop_algs generic_memset[2] = {
1857 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1858 {-1, libcall, false}}},
1859 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1860 {-1, libcall, false}}}};
1861 static const
1862 struct processor_costs generic_cost = {
1863 COSTS_N_INSNS (1), /* cost of an add instruction */
1864 /* On all chips taken into consideration lea is 2 cycles and more. With
1865 this cost however our current implementation of synth_mult results in
1866 use of unnecessary temporary registers causing regression on several
1867 SPECfp benchmarks. */
1868 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1869 COSTS_N_INSNS (1), /* variable shift costs */
1870 COSTS_N_INSNS (1), /* constant shift costs */
1871 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1872 COSTS_N_INSNS (4), /* HI */
1873 COSTS_N_INSNS (3), /* SI */
1874 COSTS_N_INSNS (4), /* DI */
1875 COSTS_N_INSNS (2)}, /* other */
1876 0, /* cost of multiply per each bit set */
1877 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1878 COSTS_N_INSNS (26), /* HI */
1879 COSTS_N_INSNS (42), /* SI */
1880 COSTS_N_INSNS (74), /* DI */
1881 COSTS_N_INSNS (74)}, /* other */
1882 COSTS_N_INSNS (1), /* cost of movsx */
1883 COSTS_N_INSNS (1), /* cost of movzx */
1884 8, /* "large" insn */
1885 17, /* MOVE_RATIO */
1886 4, /* cost for loading QImode using movzbl */
1887 {4, 4, 4}, /* cost of loading integer registers
1888 in QImode, HImode and SImode.
1889 Relative to reg-reg move (2). */
1890 {4, 4, 4}, /* cost of storing integer registers */
1891 4, /* cost of reg,reg fld/fst */
1892 {12, 12, 12}, /* cost of loading fp registers
1893 in SFmode, DFmode and XFmode */
1894 {6, 6, 8}, /* cost of storing fp registers
1895 in SFmode, DFmode and XFmode */
1896 2, /* cost of moving MMX register */
1897 {8, 8}, /* cost of loading MMX registers
1898 in SImode and DImode */
1899 {8, 8}, /* cost of storing MMX registers
1900 in SImode and DImode */
1901 2, /* cost of moving SSE register */
1902 {8, 8, 8}, /* cost of loading SSE registers
1903 in SImode, DImode and TImode */
1904 {8, 8, 8}, /* cost of storing SSE registers
1905 in SImode, DImode and TImode */
1906 5, /* MMX or SSE register to integer */
1907 32, /* size of l1 cache. */
1908 512, /* size of l2 cache. */
1909 64, /* size of prefetch block */
1910 6, /* number of parallel prefetches */
1911 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1912 value is increased to perhaps more appropriate value of 5. */
1913 3, /* Branch cost */
1914 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1915 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1916 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1917 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1918 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1919 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1920 generic_memcpy,
1921 generic_memset,
1922 1, /* scalar_stmt_cost. */
1923 1, /* scalar load_cost. */
1924 1, /* scalar_store_cost. */
1925 1, /* vec_stmt_cost. */
1926 1, /* vec_to_scalar_cost. */
1927 1, /* scalar_to_vec_cost. */
1928 1, /* vec_align_load_cost. */
1929 2, /* vec_unalign_load_cost. */
1930 1, /* vec_store_cost. */
1931 3, /* cond_taken_branch_cost. */
1932 1, /* cond_not_taken_branch_cost. */
1935 /* core_cost should produce code tuned for Core familly of CPUs. */
1936 static stringop_algs core_memcpy[2] = {
1937 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
1938 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
1939 {-1, libcall, false}}}};
1940 static stringop_algs core_memset[2] = {
1941 {libcall, {{6, loop_1_byte, true},
1942 {24, loop, true},
1943 {8192, rep_prefix_4_byte, true},
1944 {-1, libcall, false}}},
1945 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
1946 {-1, libcall, false}}}};
1948 static const
1949 struct processor_costs core_cost = {
1950 COSTS_N_INSNS (1), /* cost of an add instruction */
1951 /* On all chips taken into consideration lea is 2 cycles and more. With
1952 this cost however our current implementation of synth_mult results in
1953 use of unnecessary temporary registers causing regression on several
1954 SPECfp benchmarks. */
1955 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1956 COSTS_N_INSNS (1), /* variable shift costs */
1957 COSTS_N_INSNS (1), /* constant shift costs */
1958 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1959 COSTS_N_INSNS (4), /* HI */
1960 COSTS_N_INSNS (3), /* SI */
1961 COSTS_N_INSNS (4), /* DI */
1962 COSTS_N_INSNS (2)}, /* other */
1963 0, /* cost of multiply per each bit set */
1964 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1965 COSTS_N_INSNS (26), /* HI */
1966 COSTS_N_INSNS (42), /* SI */
1967 COSTS_N_INSNS (74), /* DI */
1968 COSTS_N_INSNS (74)}, /* other */
1969 COSTS_N_INSNS (1), /* cost of movsx */
1970 COSTS_N_INSNS (1), /* cost of movzx */
1971 8, /* "large" insn */
1972 17, /* MOVE_RATIO */
1973 4, /* cost for loading QImode using movzbl */
1974 {4, 4, 4}, /* cost of loading integer registers
1975 in QImode, HImode and SImode.
1976 Relative to reg-reg move (2). */
1977 {4, 4, 4}, /* cost of storing integer registers */
1978 4, /* cost of reg,reg fld/fst */
1979 {12, 12, 12}, /* cost of loading fp registers
1980 in SFmode, DFmode and XFmode */
1981 {6, 6, 8}, /* cost of storing fp registers
1982 in SFmode, DFmode and XFmode */
1983 2, /* cost of moving MMX register */
1984 {8, 8}, /* cost of loading MMX registers
1985 in SImode and DImode */
1986 {8, 8}, /* cost of storing MMX registers
1987 in SImode and DImode */
1988 2, /* cost of moving SSE register */
1989 {8, 8, 8}, /* cost of loading SSE registers
1990 in SImode, DImode and TImode */
1991 {8, 8, 8}, /* cost of storing SSE registers
1992 in SImode, DImode and TImode */
1993 5, /* MMX or SSE register to integer */
1994 64, /* size of l1 cache. */
1995 512, /* size of l2 cache. */
1996 64, /* size of prefetch block */
1997 6, /* number of parallel prefetches */
1998 /* FIXME perhaps more appropriate value is 5. */
1999 3, /* Branch cost */
2000 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
2001 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
2002 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
2003 COSTS_N_INSNS (8), /* cost of FABS instruction. */
2004 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
2005 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
2006 core_memcpy,
2007 core_memset,
2008 1, /* scalar_stmt_cost. */
2009 1, /* scalar load_cost. */
2010 1, /* scalar_store_cost. */
2011 1, /* vec_stmt_cost. */
2012 1, /* vec_to_scalar_cost. */
2013 1, /* scalar_to_vec_cost. */
2014 1, /* vec_align_load_cost. */
2015 2, /* vec_unalign_load_cost. */
2016 1, /* vec_store_cost. */
2017 3, /* cond_taken_branch_cost. */
2018 1, /* cond_not_taken_branch_cost. */
2022 /* Set by -mtune. */
2023 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2025 /* Set by -mtune or -Os. */
2026 const struct processor_costs *ix86_cost = &pentium_cost;
2028 /* Processor feature/optimization bitmasks. */
2029 #define m_386 (1<<PROCESSOR_I386)
2030 #define m_486 (1<<PROCESSOR_I486)
2031 #define m_PENT (1<<PROCESSOR_PENTIUM)
2032 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2033 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2034 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2035 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2036 #define m_CORE2 (1<<PROCESSOR_CORE2)
2037 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2038 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2039 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2040 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2041 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2042 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2043 #define m_INTEL (1<<PROCESSOR_INTEL)
2045 #define m_GEODE (1<<PROCESSOR_GEODE)
2046 #define m_K6 (1<<PROCESSOR_K6)
2047 #define m_K6_GEODE (m_K6 | m_GEODE)
2048 #define m_K8 (1<<PROCESSOR_K8)
2049 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2050 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2051 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2052 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2053 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2054 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2055 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2056 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2057 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2058 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2059 #define m_BTVER (m_BTVER1 | m_BTVER2)
2060 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
2062 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2064 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2065 #undef DEF_TUNE
2066 #define DEF_TUNE(tune, name, selector) name,
2067 #include "x86-tune.def"
2068 #undef DEF_TUNE
2071 /* Feature tests against the various tunings. */
2072 unsigned char ix86_tune_features[X86_TUNE_LAST];
2074 /* Feature tests against the various tunings used to create ix86_tune_features
2075 based on the processor mask. */
2076 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2077 #undef DEF_TUNE
2078 #define DEF_TUNE(tune, name, selector) selector,
2079 #include "x86-tune.def"
2080 #undef DEF_TUNE
2083 /* Feature tests against the various architecture variations. */
2084 unsigned char ix86_arch_features[X86_ARCH_LAST];
2086 /* Feature tests against the various architecture variations, used to create
2087 ix86_arch_features based on the processor mask. */
2088 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2089 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2090 ~(m_386 | m_486 | m_PENT | m_K6),
2092 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2093 ~m_386,
2095 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2096 ~(m_386 | m_486),
2098 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2099 ~m_386,
2101 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2102 ~m_386,
2105 /* In case the average insn count for single function invocation is
2106 lower than this constant, emit fast (but longer) prologue and
2107 epilogue code. */
2108 #define FAST_PROLOGUE_INSN_COUNT 20
2110 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2111 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2112 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2113 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2115 /* Array of the smallest class containing reg number REGNO, indexed by
2116 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2118 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2120 /* ax, dx, cx, bx */
2121 AREG, DREG, CREG, BREG,
2122 /* si, di, bp, sp */
2123 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2124 /* FP registers */
2125 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2126 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2127 /* arg pointer */
2128 NON_Q_REGS,
2129 /* flags, fpsr, fpcr, frame */
2130 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2131 /* SSE registers */
2132 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2133 SSE_REGS, SSE_REGS,
2134 /* MMX registers */
2135 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2136 MMX_REGS, MMX_REGS,
2137 /* REX registers */
2138 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2139 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2140 /* SSE REX registers */
2141 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2142 SSE_REGS, SSE_REGS,
2143 /* AVX-512 SSE registers */
2144 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2145 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2146 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2147 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2148 /* Mask registers. */
2149 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2150 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2151 /* MPX bound registers */
2152 BND_REGS, BND_REGS, BND_REGS, BND_REGS,
2155 /* The "default" register map used in 32bit mode. */
2157 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2159 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2160 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2161 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2162 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2163 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2164 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2165 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2166 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2167 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2168 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2169 101, 102, 103, 104, /* bound registers */
2172 /* The "default" register map used in 64bit mode. */
2174 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2176 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2177 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2178 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2179 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2180 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2181 8,9,10,11,12,13,14,15, /* extended integer registers */
2182 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2183 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2184 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2185 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2186 126, 127, 128, 129, /* bound registers */
2189 /* Define the register numbers to be used in Dwarf debugging information.
2190 The SVR4 reference port C compiler uses the following register numbers
2191 in its Dwarf output code:
2192 0 for %eax (gcc regno = 0)
2193 1 for %ecx (gcc regno = 2)
2194 2 for %edx (gcc regno = 1)
2195 3 for %ebx (gcc regno = 3)
2196 4 for %esp (gcc regno = 7)
2197 5 for %ebp (gcc regno = 6)
2198 6 for %esi (gcc regno = 4)
2199 7 for %edi (gcc regno = 5)
2200 The following three DWARF register numbers are never generated by
2201 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2202 believes these numbers have these meanings.
2203 8 for %eip (no gcc equivalent)
2204 9 for %eflags (gcc regno = 17)
2205 10 for %trapno (no gcc equivalent)
2206 It is not at all clear how we should number the FP stack registers
2207 for the x86 architecture. If the version of SDB on x86/svr4 were
2208 a bit less brain dead with respect to floating-point then we would
2209 have a precedent to follow with respect to DWARF register numbers
2210 for x86 FP registers, but the SDB on x86/svr4 is so completely
2211 broken with respect to FP registers that it is hardly worth thinking
2212 of it as something to strive for compatibility with.
2213 The version of x86/svr4 SDB I have at the moment does (partially)
2214 seem to believe that DWARF register number 11 is associated with
2215 the x86 register %st(0), but that's about all. Higher DWARF
2216 register numbers don't seem to be associated with anything in
2217 particular, and even for DWARF regno 11, SDB only seems to under-
2218 stand that it should say that a variable lives in %st(0) (when
2219 asked via an `=' command) if we said it was in DWARF regno 11,
2220 but SDB still prints garbage when asked for the value of the
2221 variable in question (via a `/' command).
2222 (Also note that the labels SDB prints for various FP stack regs
2223 when doing an `x' command are all wrong.)
2224 Note that these problems generally don't affect the native SVR4
2225 C compiler because it doesn't allow the use of -O with -g and
2226 because when it is *not* optimizing, it allocates a memory
2227 location for each floating-point variable, and the memory
2228 location is what gets described in the DWARF AT_location
2229 attribute for the variable in question.
2230 Regardless of the severe mental illness of the x86/svr4 SDB, we
2231 do something sensible here and we use the following DWARF
2232 register numbers. Note that these are all stack-top-relative
2233 numbers.
2234 11 for %st(0) (gcc regno = 8)
2235 12 for %st(1) (gcc regno = 9)
2236 13 for %st(2) (gcc regno = 10)
2237 14 for %st(3) (gcc regno = 11)
2238 15 for %st(4) (gcc regno = 12)
2239 16 for %st(5) (gcc regno = 13)
2240 17 for %st(6) (gcc regno = 14)
2241 18 for %st(7) (gcc regno = 15)
2243 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2245 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2246 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2247 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2248 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2249 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2250 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2251 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2252 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2253 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2254 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2255 101, 102, 103, 104, /* bound registers */
2258 /* Define parameter passing and return registers. */
2260 static int const x86_64_int_parameter_registers[6] =
2262 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2265 static int const x86_64_ms_abi_int_parameter_registers[4] =
2267 CX_REG, DX_REG, R8_REG, R9_REG
2270 static int const x86_64_int_return_registers[4] =
2272 AX_REG, DX_REG, DI_REG, SI_REG
2275 /* Additional registers that are clobbered by SYSV calls. */
2277 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2279 SI_REG, DI_REG,
2280 XMM6_REG, XMM7_REG,
2281 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2282 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2285 /* Define the structure for the machine field in struct function. */
2287 struct GTY(()) stack_local_entry {
2288 unsigned short mode;
2289 unsigned short n;
2290 rtx rtl;
2291 struct stack_local_entry *next;
2294 /* Structure describing stack frame layout.
2295 Stack grows downward:
2297 [arguments]
2298 <- ARG_POINTER
2299 saved pc
2301 saved static chain if ix86_static_chain_on_stack
2303 saved frame pointer if frame_pointer_needed
2304 <- HARD_FRAME_POINTER
2305 [saved regs]
2306 <- regs_save_offset
2307 [padding0]
2309 [saved SSE regs]
2310 <- sse_regs_save_offset
2311 [padding1] |
2312 | <- FRAME_POINTER
2313 [va_arg registers] |
2315 [frame] |
2317 [padding2] | = to_allocate
2318 <- STACK_POINTER
2320 struct ix86_frame
2322 int nsseregs;
2323 int nregs;
2324 int va_arg_size;
2325 int red_zone_size;
2326 int outgoing_arguments_size;
2328 /* The offsets relative to ARG_POINTER. */
2329 HOST_WIDE_INT frame_pointer_offset;
2330 HOST_WIDE_INT hard_frame_pointer_offset;
2331 HOST_WIDE_INT stack_pointer_offset;
2332 HOST_WIDE_INT hfp_save_offset;
2333 HOST_WIDE_INT reg_save_offset;
2334 HOST_WIDE_INT sse_reg_save_offset;
2336 /* When save_regs_using_mov is set, emit prologue using
2337 move instead of push instructions. */
2338 bool save_regs_using_mov;
2341 /* Which cpu are we scheduling for. */
2342 enum attr_cpu ix86_schedule;
2344 /* Which cpu are we optimizing for. */
2345 enum processor_type ix86_tune;
2347 /* Which instruction set architecture to use. */
2348 enum processor_type ix86_arch;
2350 /* True if processor has SSE prefetch instruction. */
2351 unsigned char x86_prefetch_sse;
2353 /* -mstackrealign option */
2354 static const char ix86_force_align_arg_pointer_string[]
2355 = "force_align_arg_pointer";
2357 static rtx (*ix86_gen_leave) (void);
2358 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2359 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2360 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2361 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2362 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2363 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2364 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2365 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2366 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2367 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2368 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2370 /* Preferred alignment for stack boundary in bits. */
2371 unsigned int ix86_preferred_stack_boundary;
2373 /* Alignment for incoming stack boundary in bits specified at
2374 command line. */
2375 static unsigned int ix86_user_incoming_stack_boundary;
2377 /* Default alignment for incoming stack boundary in bits. */
2378 static unsigned int ix86_default_incoming_stack_boundary;
2380 /* Alignment for incoming stack boundary in bits. */
2381 unsigned int ix86_incoming_stack_boundary;
2383 /* Calling abi specific va_list type nodes. */
2384 static GTY(()) tree sysv_va_list_type_node;
2385 static GTY(()) tree ms_va_list_type_node;
2387 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2388 char internal_label_prefix[16];
2389 int internal_label_prefix_len;
2391 /* Fence to use after loop using movnt. */
2392 tree x86_mfence;
2394 /* Register class used for passing given 64bit part of the argument.
2395 These represent classes as documented by the PS ABI, with the exception
2396 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2397 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2399 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2400 whenever possible (upper half does contain padding). */
2401 enum x86_64_reg_class
2403 X86_64_NO_CLASS,
2404 X86_64_INTEGER_CLASS,
2405 X86_64_INTEGERSI_CLASS,
2406 X86_64_SSE_CLASS,
2407 X86_64_SSESF_CLASS,
2408 X86_64_SSEDF_CLASS,
2409 X86_64_SSEUP_CLASS,
2410 X86_64_X87_CLASS,
2411 X86_64_X87UP_CLASS,
2412 X86_64_COMPLEX_X87_CLASS,
2413 X86_64_MEMORY_CLASS
2416 #define MAX_CLASSES 8
2418 /* Table of constants used by fldpi, fldln2, etc.... */
2419 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2420 static bool ext_80387_constants_init = 0;
2423 static struct machine_function * ix86_init_machine_status (void);
2424 static rtx ix86_function_value (const_tree, const_tree, bool);
2425 static bool ix86_function_value_regno_p (const unsigned int);
2426 static unsigned int ix86_function_arg_boundary (machine_mode,
2427 const_tree);
2428 static rtx ix86_static_chain (const_tree, bool);
2429 static int ix86_function_regparm (const_tree, const_tree);
2430 static void ix86_compute_frame_layout (struct ix86_frame *);
2431 static bool ix86_expand_vector_init_one_nonzero (bool, machine_mode,
2432 rtx, rtx, int);
2433 static void ix86_add_new_builtins (HOST_WIDE_INT);
2434 static tree ix86_canonical_va_list_type (tree);
2435 static void predict_jump (int);
2436 static unsigned int split_stack_prologue_scratch_regno (void);
2437 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2439 enum ix86_function_specific_strings
2441 IX86_FUNCTION_SPECIFIC_ARCH,
2442 IX86_FUNCTION_SPECIFIC_TUNE,
2443 IX86_FUNCTION_SPECIFIC_MAX
2446 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2447 const char *, enum fpmath_unit, bool);
2448 static void ix86_function_specific_save (struct cl_target_option *,
2449 struct gcc_options *opts);
2450 static void ix86_function_specific_restore (struct gcc_options *opts,
2451 struct cl_target_option *);
2452 static void ix86_function_specific_print (FILE *, int,
2453 struct cl_target_option *);
2454 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2455 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2456 struct gcc_options *,
2457 struct gcc_options *,
2458 struct gcc_options *);
2459 static bool ix86_can_inline_p (tree, tree);
2460 static void ix86_set_current_function (tree);
2461 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2463 static enum calling_abi ix86_function_abi (const_tree);
2466 #ifndef SUBTARGET32_DEFAULT_CPU
2467 #define SUBTARGET32_DEFAULT_CPU "i386"
2468 #endif
2470 /* Whether -mtune= or -march= were specified */
2471 static int ix86_tune_defaulted;
2472 static int ix86_arch_specified;
2474 /* Vectorization library interface and handlers. */
2475 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2477 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2478 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2480 /* Processor target table, indexed by processor number */
2481 struct ptt
2483 const char *const name; /* processor name */
2484 const struct processor_costs *cost; /* Processor costs */
2485 const int align_loop; /* Default alignments. */
2486 const int align_loop_max_skip;
2487 const int align_jump;
2488 const int align_jump_max_skip;
2489 const int align_func;
2492 /* This table must be in sync with enum processor_type in i386.h. */
2493 static const struct ptt processor_target_table[PROCESSOR_max] =
2495 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2496 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2497 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2498 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2499 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2500 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2501 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2502 {"core2", &core_cost, 16, 10, 16, 10, 16},
2503 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2504 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2505 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2506 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2507 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2508 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2509 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2510 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2511 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2512 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2513 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2514 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2515 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2516 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2517 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2518 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2519 {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2522 static unsigned int
2523 rest_of_handle_insert_vzeroupper (void)
2525 int i;
2527 /* vzeroupper instructions are inserted immediately after reload to
2528 account for possible spills from 256bit registers. The pass
2529 reuses mode switching infrastructure by re-running mode insertion
2530 pass, so disable entities that have already been processed. */
2531 for (i = 0; i < MAX_386_ENTITIES; i++)
2532 ix86_optimize_mode_switching[i] = 0;
2534 ix86_optimize_mode_switching[AVX_U128] = 1;
2536 /* Call optimize_mode_switching. */
2537 g->get_passes ()->execute_pass_mode_switching ();
2538 return 0;
2541 namespace {
2543 const pass_data pass_data_insert_vzeroupper =
2545 RTL_PASS, /* type */
2546 "vzeroupper", /* name */
2547 OPTGROUP_NONE, /* optinfo_flags */
2548 TV_NONE, /* tv_id */
2549 0, /* properties_required */
2550 0, /* properties_provided */
2551 0, /* properties_destroyed */
2552 0, /* todo_flags_start */
2553 TODO_df_finish, /* todo_flags_finish */
2556 class pass_insert_vzeroupper : public rtl_opt_pass
2558 public:
2559 pass_insert_vzeroupper(gcc::context *ctxt)
2560 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2563 /* opt_pass methods: */
2564 virtual bool gate (function *)
2566 return TARGET_AVX && !TARGET_AVX512F && TARGET_VZEROUPPER;
2569 virtual unsigned int execute (function *)
2571 return rest_of_handle_insert_vzeroupper ();
2574 }; // class pass_insert_vzeroupper
2576 } // anon namespace
2578 rtl_opt_pass *
2579 make_pass_insert_vzeroupper (gcc::context *ctxt)
2581 return new pass_insert_vzeroupper (ctxt);
2584 /* Return true if a red-zone is in use. */
2586 static inline bool
2587 ix86_using_red_zone (void)
2589 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2592 /* Return a string that documents the current -m options. The caller is
2593 responsible for freeing the string. */
2595 static char *
2596 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2597 const char *tune, enum fpmath_unit fpmath,
2598 bool add_nl_p)
2600 struct ix86_target_opts
2602 const char *option; /* option string */
2603 HOST_WIDE_INT mask; /* isa mask options */
2606 /* This table is ordered so that options like -msse4.2 that imply
2607 preceding options while match those first. */
2608 static struct ix86_target_opts isa_opts[] =
2610 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2611 { "-mfma", OPTION_MASK_ISA_FMA },
2612 { "-mxop", OPTION_MASK_ISA_XOP },
2613 { "-mlwp", OPTION_MASK_ISA_LWP },
2614 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2615 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2616 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2617 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2618 { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ },
2619 { "-mavx512bw", OPTION_MASK_ISA_AVX512BW },
2620 { "-mavx512vl", OPTION_MASK_ISA_AVX512VL },
2621 { "-mavx512ifma", OPTION_MASK_ISA_AVX512IFMA },
2622 { "-mavx512vbmi", OPTION_MASK_ISA_AVX512VBMI },
2623 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2624 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2625 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2626 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2627 { "-msse3", OPTION_MASK_ISA_SSE3 },
2628 { "-msse2", OPTION_MASK_ISA_SSE2 },
2629 { "-msse", OPTION_MASK_ISA_SSE },
2630 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2631 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2632 { "-mmmx", OPTION_MASK_ISA_MMX },
2633 { "-mabm", OPTION_MASK_ISA_ABM },
2634 { "-mbmi", OPTION_MASK_ISA_BMI },
2635 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2636 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2637 { "-mhle", OPTION_MASK_ISA_HLE },
2638 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2639 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2640 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2641 { "-madx", OPTION_MASK_ISA_ADX },
2642 { "-mtbm", OPTION_MASK_ISA_TBM },
2643 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2644 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2645 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2646 { "-maes", OPTION_MASK_ISA_AES },
2647 { "-msha", OPTION_MASK_ISA_SHA },
2648 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2649 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2650 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2651 { "-mf16c", OPTION_MASK_ISA_F16C },
2652 { "-mrtm", OPTION_MASK_ISA_RTM },
2653 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2654 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2655 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
2656 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT },
2657 { "-mxsavec", OPTION_MASK_ISA_XSAVEC },
2658 { "-mxsaves", OPTION_MASK_ISA_XSAVES },
2659 { "-mmpx", OPTION_MASK_ISA_MPX },
2660 { "-mclwb", OPTION_MASK_ISA_CLWB },
2661 { "-mpcommit", OPTION_MASK_ISA_PCOMMIT },
2664 /* Flag options. */
2665 static struct ix86_target_opts flag_opts[] =
2667 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2668 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2669 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2670 { "-m80387", MASK_80387 },
2671 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2672 { "-malign-double", MASK_ALIGN_DOUBLE },
2673 { "-mcld", MASK_CLD },
2674 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2675 { "-mieee-fp", MASK_IEEE_FP },
2676 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2677 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2678 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2679 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2680 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2681 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2682 { "-mno-red-zone", MASK_NO_RED_ZONE },
2683 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2684 { "-mrecip", MASK_RECIP },
2685 { "-mrtd", MASK_RTD },
2686 { "-msseregparm", MASK_SSEREGPARM },
2687 { "-mstack-arg-probe", MASK_STACK_PROBE },
2688 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2689 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2690 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2691 { "-mvzeroupper", MASK_VZEROUPPER },
2692 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2693 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2694 { "-mprefer-avx128", MASK_PREFER_AVX128},
2697 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2699 char isa_other[40];
2700 char target_other[40];
2701 unsigned num = 0;
2702 unsigned i, j;
2703 char *ret;
2704 char *ptr;
2705 size_t len;
2706 size_t line_len;
2707 size_t sep_len;
2708 const char *abi;
2710 memset (opts, '\0', sizeof (opts));
2712 /* Add -march= option. */
2713 if (arch)
2715 opts[num][0] = "-march=";
2716 opts[num++][1] = arch;
2719 /* Add -mtune= option. */
2720 if (tune)
2722 opts[num][0] = "-mtune=";
2723 opts[num++][1] = tune;
2726 /* Add -m32/-m64/-mx32. */
2727 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2729 if ((isa & OPTION_MASK_ABI_64) != 0)
2730 abi = "-m64";
2731 else
2732 abi = "-mx32";
2733 isa &= ~ (OPTION_MASK_ISA_64BIT
2734 | OPTION_MASK_ABI_64
2735 | OPTION_MASK_ABI_X32);
2737 else
2738 abi = "-m32";
2739 opts[num++][0] = abi;
2741 /* Pick out the options in isa options. */
2742 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2744 if ((isa & isa_opts[i].mask) != 0)
2746 opts[num++][0] = isa_opts[i].option;
2747 isa &= ~ isa_opts[i].mask;
2751 if (isa && add_nl_p)
2753 opts[num++][0] = isa_other;
2754 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2755 isa);
2758 /* Add flag options. */
2759 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2761 if ((flags & flag_opts[i].mask) != 0)
2763 opts[num++][0] = flag_opts[i].option;
2764 flags &= ~ flag_opts[i].mask;
2768 if (flags && add_nl_p)
2770 opts[num++][0] = target_other;
2771 sprintf (target_other, "(other flags: %#x)", flags);
2774 /* Add -fpmath= option. */
2775 if (fpmath)
2777 opts[num][0] = "-mfpmath=";
2778 switch ((int) fpmath)
2780 case FPMATH_387:
2781 opts[num++][1] = "387";
2782 break;
2784 case FPMATH_SSE:
2785 opts[num++][1] = "sse";
2786 break;
2788 case FPMATH_387 | FPMATH_SSE:
2789 opts[num++][1] = "sse+387";
2790 break;
2792 default:
2793 gcc_unreachable ();
2797 /* Any options? */
2798 if (num == 0)
2799 return NULL;
2801 gcc_assert (num < ARRAY_SIZE (opts));
2803 /* Size the string. */
2804 len = 0;
2805 sep_len = (add_nl_p) ? 3 : 1;
2806 for (i = 0; i < num; i++)
2808 len += sep_len;
2809 for (j = 0; j < 2; j++)
2810 if (opts[i][j])
2811 len += strlen (opts[i][j]);
2814 /* Build the string. */
2815 ret = ptr = (char *) xmalloc (len);
2816 line_len = 0;
2818 for (i = 0; i < num; i++)
2820 size_t len2[2];
2822 for (j = 0; j < 2; j++)
2823 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2825 if (i != 0)
2827 *ptr++ = ' ';
2828 line_len++;
2830 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2832 *ptr++ = '\\';
2833 *ptr++ = '\n';
2834 line_len = 0;
2838 for (j = 0; j < 2; j++)
2839 if (opts[i][j])
2841 memcpy (ptr, opts[i][j], len2[j]);
2842 ptr += len2[j];
2843 line_len += len2[j];
2847 *ptr = '\0';
2848 gcc_assert (ret + len >= ptr);
2850 return ret;
2853 /* Return true, if profiling code should be emitted before
2854 prologue. Otherwise it returns false.
2855 Note: For x86 with "hotfix" it is sorried. */
2856 static bool
2857 ix86_profile_before_prologue (void)
2859 return flag_fentry != 0;
2862 /* Function that is callable from the debugger to print the current
2863 options. */
2864 void ATTRIBUTE_UNUSED
2865 ix86_debug_options (void)
2867 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2868 ix86_arch_string, ix86_tune_string,
2869 ix86_fpmath, true);
2871 if (opts)
2873 fprintf (stderr, "%s\n\n", opts);
2874 free (opts);
2876 else
2877 fputs ("<no options>\n\n", stderr);
2879 return;
2882 static const char *stringop_alg_names[] = {
2883 #define DEF_ENUM
2884 #define DEF_ALG(alg, name) #name,
2885 #include "stringop.def"
2886 #undef DEF_ENUM
2887 #undef DEF_ALG
2890 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2891 The string is of the following form (or comma separated list of it):
2893 strategy_alg:max_size:[align|noalign]
2895 where the full size range for the strategy is either [0, max_size] or
2896 [min_size, max_size], in which min_size is the max_size + 1 of the
2897 preceding range. The last size range must have max_size == -1.
2899 Examples:
2902 -mmemcpy-strategy=libcall:-1:noalign
2904 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2908 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2910 This is to tell the compiler to use the following strategy for memset
2911 1) when the expected size is between [1, 16], use rep_8byte strategy;
2912 2) when the size is between [17, 2048], use vector_loop;
2913 3) when the size is > 2048, use libcall. */
2915 struct stringop_size_range
2917 int max;
2918 stringop_alg alg;
2919 bool noalign;
2922 static void
2923 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
2925 const struct stringop_algs *default_algs;
2926 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
2927 char *curr_range_str, *next_range_str;
2928 int i = 0, n = 0;
2930 if (is_memset)
2931 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
2932 else
2933 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
2935 curr_range_str = strategy_str;
2939 int maxs;
2940 char alg_name[128];
2941 char align[16];
2942 next_range_str = strchr (curr_range_str, ',');
2943 if (next_range_str)
2944 *next_range_str++ = '\0';
2946 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
2947 alg_name, &maxs, align))
2949 error ("wrong arg %s to option %s", curr_range_str,
2950 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2951 return;
2954 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
2956 error ("size ranges of option %s should be increasing",
2957 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2958 return;
2961 for (i = 0; i < last_alg; i++)
2962 if (!strcmp (alg_name, stringop_alg_names[i]))
2963 break;
2965 if (i == last_alg)
2967 error ("wrong stringop strategy name %s specified for option %s",
2968 alg_name,
2969 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2970 return;
2973 input_ranges[n].max = maxs;
2974 input_ranges[n].alg = (stringop_alg) i;
2975 if (!strcmp (align, "align"))
2976 input_ranges[n].noalign = false;
2977 else if (!strcmp (align, "noalign"))
2978 input_ranges[n].noalign = true;
2979 else
2981 error ("unknown alignment %s specified for option %s",
2982 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2983 return;
2985 n++;
2986 curr_range_str = next_range_str;
2988 while (curr_range_str);
2990 if (input_ranges[n - 1].max != -1)
2992 error ("the max value for the last size range should be -1"
2993 " for option %s",
2994 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2995 return;
2998 if (n > MAX_STRINGOP_ALGS)
3000 error ("too many size ranges specified in option %s",
3001 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3002 return;
3005 /* Now override the default algs array. */
3006 for (i = 0; i < n; i++)
3008 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
3009 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
3010 = input_ranges[i].alg;
3011 *const_cast<int *>(&default_algs->size[i].noalign)
3012 = input_ranges[i].noalign;
3017 /* parse -mtune-ctrl= option. When DUMP is true,
3018 print the features that are explicitly set. */
3020 static void
3021 parse_mtune_ctrl_str (bool dump)
3023 if (!ix86_tune_ctrl_string)
3024 return;
3026 char *next_feature_string = NULL;
3027 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
3028 char *orig = curr_feature_string;
3029 int i;
3032 bool clear = false;
3034 next_feature_string = strchr (curr_feature_string, ',');
3035 if (next_feature_string)
3036 *next_feature_string++ = '\0';
3037 if (*curr_feature_string == '^')
3039 curr_feature_string++;
3040 clear = true;
3042 for (i = 0; i < X86_TUNE_LAST; i++)
3044 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3046 ix86_tune_features[i] = !clear;
3047 if (dump)
3048 fprintf (stderr, "Explicitly %s feature %s\n",
3049 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3050 break;
3053 if (i == X86_TUNE_LAST)
3054 error ("Unknown parameter to option -mtune-ctrl: %s",
3055 clear ? curr_feature_string - 1 : curr_feature_string);
3056 curr_feature_string = next_feature_string;
3058 while (curr_feature_string);
3059 free (orig);
3062 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3063 processor type. */
3065 static void
3066 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3068 unsigned int ix86_tune_mask = 1u << ix86_tune;
3069 int i;
3071 for (i = 0; i < X86_TUNE_LAST; ++i)
3073 if (ix86_tune_no_default)
3074 ix86_tune_features[i] = 0;
3075 else
3076 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3079 if (dump)
3081 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3082 for (i = 0; i < X86_TUNE_LAST; i++)
3083 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3084 ix86_tune_features[i] ? "on" : "off");
3087 parse_mtune_ctrl_str (dump);
3091 /* Override various settings based on options. If MAIN_ARGS_P, the
3092 options are from the command line, otherwise they are from
3093 attributes. */
3095 static void
3096 ix86_option_override_internal (bool main_args_p,
3097 struct gcc_options *opts,
3098 struct gcc_options *opts_set)
3100 int i;
3101 unsigned int ix86_arch_mask;
3102 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3103 const char *prefix;
3104 const char *suffix;
3105 const char *sw;
3107 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3108 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3109 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3110 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3111 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3112 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3113 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3114 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3115 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3116 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3117 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3118 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3119 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3120 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3121 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3122 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3123 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3124 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3125 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3126 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3127 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3128 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3129 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3130 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3131 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3132 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3133 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3134 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3135 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3136 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3137 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3138 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3139 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3140 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3141 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3142 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3143 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3144 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3145 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3146 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3147 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3148 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3149 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3150 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3151 #define PTA_MPX (HOST_WIDE_INT_1 << 44)
3152 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3153 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
3154 #define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
3155 #define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
3156 #define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
3157 #define PTA_AVX512DQ (HOST_WIDE_INT_1 << 50)
3158 #define PTA_AVX512BW (HOST_WIDE_INT_1 << 51)
3159 #define PTA_AVX512VL (HOST_WIDE_INT_1 << 52)
3160 #define PTA_AVX512IFMA (HOST_WIDE_INT_1 << 53)
3161 #define PTA_AVX512VBMI (HOST_WIDE_INT_1 << 54)
3162 #define PTA_CLWB (HOST_WIDE_INT_1 << 55)
3163 #define PTA_PCOMMIT (HOST_WIDE_INT_1 << 56)
3165 #define PTA_CORE2 \
3166 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3167 | PTA_CX16 | PTA_FXSR)
3168 #define PTA_NEHALEM \
3169 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3170 #define PTA_WESTMERE \
3171 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3172 #define PTA_SANDYBRIDGE \
3173 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3174 #define PTA_IVYBRIDGE \
3175 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3176 #define PTA_HASWELL \
3177 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3178 | PTA_FMA | PTA_MOVBE | PTA_HLE)
3179 #define PTA_BROADWELL \
3180 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3181 #define PTA_BONNELL \
3182 (PTA_CORE2 | PTA_MOVBE)
3183 #define PTA_SILVERMONT \
3184 (PTA_WESTMERE | PTA_MOVBE)
3186 /* if this reaches 64, need to widen struct pta flags below */
3188 static struct pta
3190 const char *const name; /* processor name or nickname. */
3191 const enum processor_type processor;
3192 const enum attr_cpu schedule;
3193 const unsigned HOST_WIDE_INT flags;
3195 const processor_alias_table[] =
3197 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3198 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3199 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3200 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3201 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3202 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3203 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3204 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3205 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3206 PTA_MMX | PTA_SSE | PTA_FXSR},
3207 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3208 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3209 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3210 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3211 PTA_MMX | PTA_SSE | PTA_FXSR},
3212 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3213 PTA_MMX | PTA_SSE | PTA_FXSR},
3214 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3215 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3216 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3217 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3218 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3219 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3220 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3221 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3222 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3223 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3224 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3225 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3226 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3227 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3228 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3229 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3230 PTA_SANDYBRIDGE},
3231 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3232 PTA_SANDYBRIDGE},
3233 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3234 PTA_IVYBRIDGE},
3235 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3236 PTA_IVYBRIDGE},
3237 {"haswell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3238 {"core-avx2", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3239 {"broadwell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_BROADWELL},
3240 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3241 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3242 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3243 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3244 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3245 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3246 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3247 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3248 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3249 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3250 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3251 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3252 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3253 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3254 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3255 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3256 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3257 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3258 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3259 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3260 {"x86-64", PROCESSOR_K8, CPU_K8,
3261 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3262 {"k8", PROCESSOR_K8, CPU_K8,
3263 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3264 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3265 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3266 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3267 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3268 {"opteron", PROCESSOR_K8, CPU_K8,
3269 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3270 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3271 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3272 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3273 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3274 {"athlon64", PROCESSOR_K8, CPU_K8,
3275 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3276 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3277 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3278 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3279 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3280 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3281 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3282 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3283 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3284 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3285 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3286 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3287 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3288 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3289 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3290 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3291 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3292 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3293 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3294 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3295 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3296 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3297 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3298 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3299 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3300 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3301 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3302 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3303 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3304 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3305 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3306 | PTA_XSAVEOPT | PTA_FSGSBASE},
3307 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3308 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3309 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3310 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3311 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3312 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3313 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
3314 | PTA_MOVBE},
3315 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3316 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3317 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3318 | PTA_FXSR | PTA_XSAVE},
3319 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3320 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3321 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3322 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3323 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3324 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3326 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3327 PTA_64BIT
3328 | PTA_HLE /* flags are only used for -march switch. */ },
3331 /* -mrecip options. */
3332 static struct
3334 const char *string; /* option name */
3335 unsigned int mask; /* mask bits to set */
3337 const recip_options[] =
3339 { "all", RECIP_MASK_ALL },
3340 { "none", RECIP_MASK_NONE },
3341 { "div", RECIP_MASK_DIV },
3342 { "sqrt", RECIP_MASK_SQRT },
3343 { "vec-div", RECIP_MASK_VEC_DIV },
3344 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3347 int const pta_size = ARRAY_SIZE (processor_alias_table);
3349 /* Set up prefix/suffix so the error messages refer to either the command
3350 line argument, or the attribute(target). */
3351 if (main_args_p)
3353 prefix = "-m";
3354 suffix = "";
3355 sw = "switch";
3357 else
3359 prefix = "option(\"";
3360 suffix = "\")";
3361 sw = "attribute";
3364 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3365 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3366 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3367 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3368 #ifdef TARGET_BI_ARCH
3369 else
3371 #if TARGET_BI_ARCH == 1
3372 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3373 is on and OPTION_MASK_ABI_X32 is off. We turn off
3374 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3375 -mx32. */
3376 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3377 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3378 #else
3379 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3380 on and OPTION_MASK_ABI_64 is off. We turn off
3381 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3382 -m64 or OPTION_MASK_CODE16 is turned on by -m16. */
3383 if (TARGET_LP64_P (opts->x_ix86_isa_flags)
3384 || TARGET_16BIT_P (opts->x_ix86_isa_flags))
3385 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3386 #endif
3388 #endif
3390 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3392 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3393 OPTION_MASK_ABI_64 for TARGET_X32. */
3394 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3395 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3397 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3398 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3399 | OPTION_MASK_ABI_X32
3400 | OPTION_MASK_ABI_64);
3401 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3403 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3404 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3405 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3406 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3409 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3410 SUBTARGET_OVERRIDE_OPTIONS;
3411 #endif
3413 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3414 SUBSUBTARGET_OVERRIDE_OPTIONS;
3415 #endif
3417 /* -fPIC is the default for x86_64. */
3418 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3419 opts->x_flag_pic = 2;
3421 /* Need to check -mtune=generic first. */
3422 if (opts->x_ix86_tune_string)
3424 /* As special support for cross compilers we read -mtune=native
3425 as -mtune=generic. With native compilers we won't see the
3426 -mtune=native, as it was changed by the driver. */
3427 if (!strcmp (opts->x_ix86_tune_string, "native"))
3429 opts->x_ix86_tune_string = "generic";
3431 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3432 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3433 "%stune=k8%s or %stune=generic%s instead as appropriate",
3434 prefix, suffix, prefix, suffix, prefix, suffix);
3436 else
3438 if (opts->x_ix86_arch_string)
3439 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3440 if (!opts->x_ix86_tune_string)
3442 opts->x_ix86_tune_string
3443 = processor_target_table[TARGET_CPU_DEFAULT].name;
3444 ix86_tune_defaulted = 1;
3447 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3448 or defaulted. We need to use a sensible tune option. */
3449 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3451 opts->x_ix86_tune_string = "generic";
3455 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3456 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3458 /* rep; movq isn't available in 32-bit code. */
3459 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3460 opts->x_ix86_stringop_alg = no_stringop;
3463 if (!opts->x_ix86_arch_string)
3464 opts->x_ix86_arch_string
3465 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3466 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3467 else
3468 ix86_arch_specified = 1;
3470 if (opts_set->x_ix86_pmode)
3472 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3473 && opts->x_ix86_pmode == PMODE_SI)
3474 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3475 && opts->x_ix86_pmode == PMODE_DI))
3476 error ("address mode %qs not supported in the %s bit mode",
3477 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3478 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3480 else
3481 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3482 ? PMODE_DI : PMODE_SI;
3484 if (!opts_set->x_ix86_abi)
3485 opts->x_ix86_abi = DEFAULT_ABI;
3487 /* For targets using ms ABI enable ms-extensions, if not
3488 explicit turned off. For non-ms ABI we turn off this
3489 option. */
3490 if (!opts_set->x_flag_ms_extensions)
3491 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3493 if (opts_set->x_ix86_cmodel)
3495 switch (opts->x_ix86_cmodel)
3497 case CM_SMALL:
3498 case CM_SMALL_PIC:
3499 if (opts->x_flag_pic)
3500 opts->x_ix86_cmodel = CM_SMALL_PIC;
3501 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3502 error ("code model %qs not supported in the %s bit mode",
3503 "small", "32");
3504 break;
3506 case CM_MEDIUM:
3507 case CM_MEDIUM_PIC:
3508 if (opts->x_flag_pic)
3509 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3510 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3511 error ("code model %qs not supported in the %s bit mode",
3512 "medium", "32");
3513 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3514 error ("code model %qs not supported in x32 mode",
3515 "medium");
3516 break;
3518 case CM_LARGE:
3519 case CM_LARGE_PIC:
3520 if (opts->x_flag_pic)
3521 opts->x_ix86_cmodel = CM_LARGE_PIC;
3522 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3523 error ("code model %qs not supported in the %s bit mode",
3524 "large", "32");
3525 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3526 error ("code model %qs not supported in x32 mode",
3527 "large");
3528 break;
3530 case CM_32:
3531 if (opts->x_flag_pic)
3532 error ("code model %s does not support PIC mode", "32");
3533 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3534 error ("code model %qs not supported in the %s bit mode",
3535 "32", "64");
3536 break;
3538 case CM_KERNEL:
3539 if (opts->x_flag_pic)
3541 error ("code model %s does not support PIC mode", "kernel");
3542 opts->x_ix86_cmodel = CM_32;
3544 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3545 error ("code model %qs not supported in the %s bit mode",
3546 "kernel", "32");
3547 break;
3549 default:
3550 gcc_unreachable ();
3553 else
3555 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3556 use of rip-relative addressing. This eliminates fixups that
3557 would otherwise be needed if this object is to be placed in a
3558 DLL, and is essentially just as efficient as direct addressing. */
3559 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3560 && (TARGET_RDOS || TARGET_PECOFF))
3561 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3562 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3563 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3564 else
3565 opts->x_ix86_cmodel = CM_32;
3567 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3569 error ("-masm=intel not supported in this configuration");
3570 opts->x_ix86_asm_dialect = ASM_ATT;
3572 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3573 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3574 sorry ("%i-bit mode not compiled in",
3575 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3577 for (i = 0; i < pta_size; i++)
3578 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3580 ix86_schedule = processor_alias_table[i].schedule;
3581 ix86_arch = processor_alias_table[i].processor;
3582 /* Default cpu tuning to the architecture. */
3583 ix86_tune = ix86_arch;
3585 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3586 && !(processor_alias_table[i].flags & PTA_64BIT))
3587 error ("CPU you selected does not support x86-64 "
3588 "instruction set");
3590 if (processor_alias_table[i].flags & PTA_MMX
3591 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3592 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3593 if (processor_alias_table[i].flags & PTA_3DNOW
3594 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3595 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3596 if (processor_alias_table[i].flags & PTA_3DNOW_A
3597 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3598 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3599 if (processor_alias_table[i].flags & PTA_SSE
3600 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3601 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3602 if (processor_alias_table[i].flags & PTA_SSE2
3603 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3604 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3605 if (processor_alias_table[i].flags & PTA_SSE3
3606 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3607 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3608 if (processor_alias_table[i].flags & PTA_SSSE3
3609 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3610 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3611 if (processor_alias_table[i].flags & PTA_SSE4_1
3612 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3613 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3614 if (processor_alias_table[i].flags & PTA_SSE4_2
3615 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3616 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3617 if (processor_alias_table[i].flags & PTA_AVX
3618 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3619 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3620 if (processor_alias_table[i].flags & PTA_AVX2
3621 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3622 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3623 if (processor_alias_table[i].flags & PTA_FMA
3624 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3625 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3626 if (processor_alias_table[i].flags & PTA_SSE4A
3627 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3628 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3629 if (processor_alias_table[i].flags & PTA_FMA4
3630 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3631 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3632 if (processor_alias_table[i].flags & PTA_XOP
3633 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3634 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3635 if (processor_alias_table[i].flags & PTA_LWP
3636 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3637 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3638 if (processor_alias_table[i].flags & PTA_ABM
3639 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3640 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3641 if (processor_alias_table[i].flags & PTA_BMI
3642 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3643 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3644 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3645 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3646 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3647 if (processor_alias_table[i].flags & PTA_TBM
3648 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3649 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3650 if (processor_alias_table[i].flags & PTA_BMI2
3651 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3652 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3653 if (processor_alias_table[i].flags & PTA_CX16
3654 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3655 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3656 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3657 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3658 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3659 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3660 && (processor_alias_table[i].flags & PTA_NO_SAHF))
3661 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3662 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3663 if (processor_alias_table[i].flags & PTA_MOVBE
3664 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3665 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3666 if (processor_alias_table[i].flags & PTA_AES
3667 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3668 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3669 if (processor_alias_table[i].flags & PTA_SHA
3670 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
3671 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
3672 if (processor_alias_table[i].flags & PTA_PCLMUL
3673 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3674 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3675 if (processor_alias_table[i].flags & PTA_FSGSBASE
3676 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3677 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3678 if (processor_alias_table[i].flags & PTA_RDRND
3679 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3680 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3681 if (processor_alias_table[i].flags & PTA_F16C
3682 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3683 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3684 if (processor_alias_table[i].flags & PTA_RTM
3685 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3686 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3687 if (processor_alias_table[i].flags & PTA_HLE
3688 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3689 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3690 if (processor_alias_table[i].flags & PTA_PRFCHW
3691 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3692 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3693 if (processor_alias_table[i].flags & PTA_RDSEED
3694 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3695 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3696 if (processor_alias_table[i].flags & PTA_ADX
3697 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3698 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3699 if (processor_alias_table[i].flags & PTA_FXSR
3700 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3701 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3702 if (processor_alias_table[i].flags & PTA_XSAVE
3703 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3704 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3705 if (processor_alias_table[i].flags & PTA_XSAVEOPT
3706 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3707 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3708 if (processor_alias_table[i].flags & PTA_AVX512F
3709 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
3710 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
3711 if (processor_alias_table[i].flags & PTA_AVX512ER
3712 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
3713 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
3714 if (processor_alias_table[i].flags & PTA_AVX512PF
3715 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
3716 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
3717 if (processor_alias_table[i].flags & PTA_AVX512CD
3718 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
3719 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
3720 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
3721 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
3722 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
3723 if (processor_alias_table[i].flags & PTA_PCOMMIT
3724 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCOMMIT))
3725 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCOMMIT;
3726 if (processor_alias_table[i].flags & PTA_CLWB
3727 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLWB))
3728 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLWB;
3729 if (processor_alias_table[i].flags & PTA_CLFLUSHOPT
3730 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
3731 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
3732 if (processor_alias_table[i].flags & PTA_XSAVEC
3733 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
3734 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
3735 if (processor_alias_table[i].flags & PTA_XSAVES
3736 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
3737 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
3738 if (processor_alias_table[i].flags & PTA_AVX512DQ
3739 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ))
3740 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ;
3741 if (processor_alias_table[i].flags & PTA_AVX512BW
3742 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW))
3743 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW;
3744 if (processor_alias_table[i].flags & PTA_AVX512VL
3745 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL))
3746 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL;
3747 if (processor_alias_table[i].flags & PTA_MPX
3748 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MPX))
3749 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MPX;
3750 if (processor_alias_table[i].flags & PTA_AVX512VBMI
3751 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VBMI))
3752 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VBMI;
3753 if (processor_alias_table[i].flags & PTA_AVX512IFMA
3754 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512IFMA))
3755 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512IFMA;
3756 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3757 x86_prefetch_sse = true;
3759 break;
3762 if (TARGET_X32 && (opts->x_ix86_isa_flags & OPTION_MASK_ISA_MPX))
3763 error ("Intel MPX does not support x32");
3765 if (TARGET_X32 && (ix86_isa_flags & OPTION_MASK_ISA_MPX))
3766 error ("Intel MPX does not support x32");
3768 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3769 error ("generic CPU can be used only for %stune=%s %s",
3770 prefix, suffix, sw);
3771 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3772 error ("intel CPU can be used only for %stune=%s %s",
3773 prefix, suffix, sw);
3774 else if (i == pta_size)
3775 error ("bad value (%s) for %sarch=%s %s",
3776 opts->x_ix86_arch_string, prefix, suffix, sw);
3778 ix86_arch_mask = 1u << ix86_arch;
3779 for (i = 0; i < X86_ARCH_LAST; ++i)
3780 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3782 for (i = 0; i < pta_size; i++)
3783 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
3785 ix86_schedule = processor_alias_table[i].schedule;
3786 ix86_tune = processor_alias_table[i].processor;
3787 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3789 if (!(processor_alias_table[i].flags & PTA_64BIT))
3791 if (ix86_tune_defaulted)
3793 opts->x_ix86_tune_string = "x86-64";
3794 for (i = 0; i < pta_size; i++)
3795 if (! strcmp (opts->x_ix86_tune_string,
3796 processor_alias_table[i].name))
3797 break;
3798 ix86_schedule = processor_alias_table[i].schedule;
3799 ix86_tune = processor_alias_table[i].processor;
3801 else
3802 error ("CPU you selected does not support x86-64 "
3803 "instruction set");
3806 /* Intel CPUs have always interpreted SSE prefetch instructions as
3807 NOPs; so, we can enable SSE prefetch instructions even when
3808 -mtune (rather than -march) points us to a processor that has them.
3809 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3810 higher processors. */
3811 if (TARGET_CMOV
3812 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3813 x86_prefetch_sse = true;
3814 break;
3817 if (ix86_tune_specified && i == pta_size)
3818 error ("bad value (%s) for %stune=%s %s",
3819 opts->x_ix86_tune_string, prefix, suffix, sw);
3821 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
3823 #ifndef USE_IX86_FRAME_POINTER
3824 #define USE_IX86_FRAME_POINTER 0
3825 #endif
3827 #ifndef USE_X86_64_FRAME_POINTER
3828 #define USE_X86_64_FRAME_POINTER 0
3829 #endif
3831 /* Set the default values for switches whose default depends on TARGET_64BIT
3832 in case they weren't overwritten by command line options. */
3833 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3835 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3836 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3837 if (opts->x_flag_asynchronous_unwind_tables
3838 && !opts_set->x_flag_unwind_tables
3839 && TARGET_64BIT_MS_ABI)
3840 opts->x_flag_unwind_tables = 1;
3841 if (opts->x_flag_asynchronous_unwind_tables == 2)
3842 opts->x_flag_unwind_tables
3843 = opts->x_flag_asynchronous_unwind_tables = 1;
3844 if (opts->x_flag_pcc_struct_return == 2)
3845 opts->x_flag_pcc_struct_return = 0;
3847 else
3849 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3850 opts->x_flag_omit_frame_pointer
3851 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
3852 if (opts->x_flag_asynchronous_unwind_tables == 2)
3853 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3854 if (opts->x_flag_pcc_struct_return == 2)
3855 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3858 ix86_tune_cost = processor_target_table[ix86_tune].cost;
3859 if (opts->x_optimize_size)
3860 ix86_cost = &ix86_size_cost;
3861 else
3862 ix86_cost = ix86_tune_cost;
3864 /* Arrange to set up i386_stack_locals for all functions. */
3865 init_machine_status = ix86_init_machine_status;
3867 /* Validate -mregparm= value. */
3868 if (opts_set->x_ix86_regparm)
3870 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3871 warning (0, "-mregparm is ignored in 64-bit mode");
3872 if (opts->x_ix86_regparm > REGPARM_MAX)
3874 error ("-mregparm=%d is not between 0 and %d",
3875 opts->x_ix86_regparm, REGPARM_MAX);
3876 opts->x_ix86_regparm = 0;
3879 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3880 opts->x_ix86_regparm = REGPARM_MAX;
3882 /* Default align_* from the processor table. */
3883 if (opts->x_align_loops == 0)
3885 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3886 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3888 if (opts->x_align_jumps == 0)
3890 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3891 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3893 if (opts->x_align_functions == 0)
3895 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3898 /* Provide default for -mbranch-cost= value. */
3899 if (!opts_set->x_ix86_branch_cost)
3900 opts->x_ix86_branch_cost = ix86_cost->branch_cost;
3902 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3904 opts->x_target_flags
3905 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
3907 /* Enable by default the SSE and MMX builtins. Do allow the user to
3908 explicitly disable any of these. In particular, disabling SSE and
3909 MMX for kernel code is extremely useful. */
3910 if (!ix86_arch_specified)
3911 opts->x_ix86_isa_flags
3912 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3913 | TARGET_SUBTARGET64_ISA_DEFAULT)
3914 & ~opts->x_ix86_isa_flags_explicit);
3916 if (TARGET_RTD_P (opts->x_target_flags))
3917 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3919 else
3921 opts->x_target_flags
3922 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
3924 if (!ix86_arch_specified)
3925 opts->x_ix86_isa_flags
3926 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
3928 /* i386 ABI does not specify red zone. It still makes sense to use it
3929 when programmer takes care to stack from being destroyed. */
3930 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
3931 opts->x_target_flags |= MASK_NO_RED_ZONE;
3934 /* Keep nonleaf frame pointers. */
3935 if (opts->x_flag_omit_frame_pointer)
3936 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3937 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
3938 opts->x_flag_omit_frame_pointer = 1;
3940 /* If we're doing fast math, we don't care about comparison order
3941 wrt NaNs. This lets us use a shorter comparison sequence. */
3942 if (opts->x_flag_finite_math_only)
3943 opts->x_target_flags &= ~MASK_IEEE_FP;
3945 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3946 since the insns won't need emulation. */
3947 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
3948 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
3950 /* Likewise, if the target doesn't have a 387, or we've specified
3951 software floating point, don't use 387 inline intrinsics. */
3952 if (!TARGET_80387_P (opts->x_target_flags))
3953 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
3955 /* Turn on MMX builtins for -msse. */
3956 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
3957 opts->x_ix86_isa_flags
3958 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
3960 /* Enable SSE prefetch. */
3961 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
3962 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
3963 x86_prefetch_sse = true;
3965 /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
3966 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
3967 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
3968 opts->x_ix86_isa_flags
3969 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
3971 /* Enable popcnt instruction for -msse4.2 or -mabm. */
3972 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
3973 || TARGET_ABM_P (opts->x_ix86_isa_flags))
3974 opts->x_ix86_isa_flags
3975 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
3977 /* Enable lzcnt instruction for -mabm. */
3978 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
3979 opts->x_ix86_isa_flags
3980 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
3982 /* Validate -mpreferred-stack-boundary= value or default it to
3983 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3984 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3985 if (opts_set->x_ix86_preferred_stack_boundary_arg)
3987 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3988 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
3989 int max = (TARGET_SEH ? 4 : 12);
3991 if (opts->x_ix86_preferred_stack_boundary_arg < min
3992 || opts->x_ix86_preferred_stack_boundary_arg > max)
3994 if (min == max)
3995 error ("-mpreferred-stack-boundary is not supported "
3996 "for this target");
3997 else
3998 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3999 opts->x_ix86_preferred_stack_boundary_arg, min, max);
4001 else
4002 ix86_preferred_stack_boundary
4003 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
4006 /* Set the default value for -mstackrealign. */
4007 if (opts->x_ix86_force_align_arg_pointer == -1)
4008 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
4010 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
4012 /* Validate -mincoming-stack-boundary= value or default it to
4013 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
4014 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
4015 if (opts_set->x_ix86_incoming_stack_boundary_arg)
4017 if (opts->x_ix86_incoming_stack_boundary_arg
4018 < (TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2)
4019 || opts->x_ix86_incoming_stack_boundary_arg > 12)
4020 error ("-mincoming-stack-boundary=%d is not between %d and 12",
4021 opts->x_ix86_incoming_stack_boundary_arg,
4022 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2);
4023 else
4025 ix86_user_incoming_stack_boundary
4026 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
4027 ix86_incoming_stack_boundary
4028 = ix86_user_incoming_stack_boundary;
4032 #ifndef NO_PROFILE_COUNTERS
4033 if (flag_nop_mcount)
4034 error ("-mnop-mcount is not compatible with this target");
4035 #endif
4036 if (flag_nop_mcount && flag_pic)
4037 error ("-mnop-mcount is not implemented for -fPIC");
4039 /* Accept -msseregparm only if at least SSE support is enabled. */
4040 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
4041 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
4042 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
4044 if (opts_set->x_ix86_fpmath)
4046 if (opts->x_ix86_fpmath & FPMATH_SSE)
4048 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
4050 warning (0, "SSE instruction set disabled, using 387 arithmetics");
4051 opts->x_ix86_fpmath = FPMATH_387;
4053 else if ((opts->x_ix86_fpmath & FPMATH_387)
4054 && !TARGET_80387_P (opts->x_target_flags))
4056 warning (0, "387 instruction set disabled, using SSE arithmetics");
4057 opts->x_ix86_fpmath = FPMATH_SSE;
4061 /* For all chips supporting SSE2, -mfpmath=sse performs better than
4062 fpmath=387. The second is however default at many targets since the
4063 extra 80bit precision of temporaries is considered to be part of ABI.
4064 Overwrite the default at least for -ffast-math.
4065 TODO: -mfpmath=both seems to produce same performing code with bit
4066 smaller binaries. It is however not clear if register allocation is
4067 ready for this setting.
4068 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
4069 codegen. We may switch to 387 with -ffast-math for size optimized
4070 functions. */
4071 else if (fast_math_flags_set_p (&global_options)
4072 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
4073 opts->x_ix86_fpmath = FPMATH_SSE;
4074 else
4075 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
4077 /* If the i387 is disabled, then do not return values in it. */
4078 if (!TARGET_80387_P (opts->x_target_flags))
4079 opts->x_target_flags &= ~MASK_FLOAT_RETURNS;
4081 /* Use external vectorized library in vectorizing intrinsics. */
4082 if (opts_set->x_ix86_veclibabi_type)
4083 switch (opts->x_ix86_veclibabi_type)
4085 case ix86_veclibabi_type_svml:
4086 ix86_veclib_handler = ix86_veclibabi_svml;
4087 break;
4089 case ix86_veclibabi_type_acml:
4090 ix86_veclib_handler = ix86_veclibabi_acml;
4091 break;
4093 default:
4094 gcc_unreachable ();
4097 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4098 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4099 && !opts->x_optimize_size)
4100 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4102 /* If stack probes are required, the space used for large function
4103 arguments on the stack must also be probed, so enable
4104 -maccumulate-outgoing-args so this happens in the prologue. */
4105 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4106 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4108 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4109 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4110 "for correctness", prefix, suffix);
4111 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4114 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4116 char *p;
4117 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4118 p = strchr (internal_label_prefix, 'X');
4119 internal_label_prefix_len = p - internal_label_prefix;
4120 *p = '\0';
4123 /* When scheduling description is not available, disable scheduler pass
4124 so it won't slow down the compilation and make x87 code slower. */
4125 if (!TARGET_SCHEDULE)
4126 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4128 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4129 ix86_tune_cost->simultaneous_prefetches,
4130 opts->x_param_values,
4131 opts_set->x_param_values);
4132 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4133 ix86_tune_cost->prefetch_block,
4134 opts->x_param_values,
4135 opts_set->x_param_values);
4136 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4137 ix86_tune_cost->l1_cache_size,
4138 opts->x_param_values,
4139 opts_set->x_param_values);
4140 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4141 ix86_tune_cost->l2_cache_size,
4142 opts->x_param_values,
4143 opts_set->x_param_values);
4145 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4146 if (opts->x_flag_prefetch_loop_arrays < 0
4147 && HAVE_prefetch
4148 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4149 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4150 opts->x_flag_prefetch_loop_arrays = 1;
4152 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4153 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4154 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4155 targetm.expand_builtin_va_start = NULL;
4157 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4159 ix86_gen_leave = gen_leave_rex64;
4160 if (Pmode == DImode)
4162 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4163 ix86_gen_tls_local_dynamic_base_64
4164 = gen_tls_local_dynamic_base_64_di;
4166 else
4168 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4169 ix86_gen_tls_local_dynamic_base_64
4170 = gen_tls_local_dynamic_base_64_si;
4173 else
4174 ix86_gen_leave = gen_leave;
4176 if (Pmode == DImode)
4178 ix86_gen_add3 = gen_adddi3;
4179 ix86_gen_sub3 = gen_subdi3;
4180 ix86_gen_sub3_carry = gen_subdi3_carry;
4181 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4182 ix86_gen_andsp = gen_anddi3;
4183 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4184 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4185 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4186 ix86_gen_monitor = gen_sse3_monitor_di;
4188 else
4190 ix86_gen_add3 = gen_addsi3;
4191 ix86_gen_sub3 = gen_subsi3;
4192 ix86_gen_sub3_carry = gen_subsi3_carry;
4193 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4194 ix86_gen_andsp = gen_andsi3;
4195 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4196 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4197 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4198 ix86_gen_monitor = gen_sse3_monitor_si;
4201 #ifdef USE_IX86_CLD
4202 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4203 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4204 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4205 #endif
4207 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
4209 if (opts->x_flag_fentry > 0)
4210 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4211 "with -fpic");
4212 opts->x_flag_fentry = 0;
4214 else if (TARGET_SEH)
4216 if (opts->x_flag_fentry == 0)
4217 sorry ("-mno-fentry isn%'t compatible with SEH");
4218 opts->x_flag_fentry = 1;
4220 else if (opts->x_flag_fentry < 0)
4222 #if defined(PROFILE_BEFORE_PROLOGUE)
4223 opts->x_flag_fentry = 1;
4224 #else
4225 opts->x_flag_fentry = 0;
4226 #endif
4229 /* When not opts->x_optimize for size, enable vzeroupper optimization for
4230 TARGET_AVX with -fexpensive-optimizations and split 32-byte
4231 AVX unaligned load/store. */
4232 if (!opts->x_optimize_size)
4234 if (flag_expensive_optimizations
4235 && !(opts_set->x_target_flags & MASK_VZEROUPPER))
4236 opts->x_target_flags |= MASK_VZEROUPPER;
4237 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4238 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4239 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4240 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4241 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4242 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4243 /* Enable 128-bit AVX instruction generation
4244 for the auto-vectorizer. */
4245 if (TARGET_AVX128_OPTIMAL
4246 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
4247 opts->x_target_flags |= MASK_PREFER_AVX128;
4250 if (opts->x_ix86_recip_name)
4252 char *p = ASTRDUP (opts->x_ix86_recip_name);
4253 char *q;
4254 unsigned int mask, i;
4255 bool invert;
4257 while ((q = strtok (p, ",")) != NULL)
4259 p = NULL;
4260 if (*q == '!')
4262 invert = true;
4263 q++;
4265 else
4266 invert = false;
4268 if (!strcmp (q, "default"))
4269 mask = RECIP_MASK_ALL;
4270 else
4272 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4273 if (!strcmp (q, recip_options[i].string))
4275 mask = recip_options[i].mask;
4276 break;
4279 if (i == ARRAY_SIZE (recip_options))
4281 error ("unknown option for -mrecip=%s", q);
4282 invert = false;
4283 mask = RECIP_MASK_NONE;
4287 opts->x_recip_mask_explicit |= mask;
4288 if (invert)
4289 opts->x_recip_mask &= ~mask;
4290 else
4291 opts->x_recip_mask |= mask;
4295 if (TARGET_RECIP_P (opts->x_target_flags))
4296 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4297 else if (opts_set->x_target_flags & MASK_RECIP)
4298 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4300 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4301 for 64-bit Bionic. */
4302 if (TARGET_HAS_BIONIC
4303 && !(opts_set->x_target_flags
4304 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4305 opts->x_target_flags |= (TARGET_64BIT
4306 ? MASK_LONG_DOUBLE_128
4307 : MASK_LONG_DOUBLE_64);
4309 /* Only one of them can be active. */
4310 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4311 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4313 /* Save the initial options in case the user does function specific
4314 options. */
4315 if (main_args_p)
4316 target_option_default_node = target_option_current_node
4317 = build_target_option_node (opts);
4319 /* Handle stack protector */
4320 if (!opts_set->x_ix86_stack_protector_guard)
4321 opts->x_ix86_stack_protector_guard
4322 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4324 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4325 if (opts->x_ix86_tune_memcpy_strategy)
4327 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4328 ix86_parse_stringop_strategy_string (str, false);
4329 free (str);
4332 if (opts->x_ix86_tune_memset_strategy)
4334 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4335 ix86_parse_stringop_strategy_string (str, true);
4336 free (str);
4340 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4342 static void
4343 ix86_option_override (void)
4345 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
4346 struct register_pass_info insert_vzeroupper_info
4347 = { pass_insert_vzeroupper, "reload",
4348 1, PASS_POS_INSERT_AFTER
4351 ix86_option_override_internal (true, &global_options, &global_options_set);
4354 /* This needs to be done at start up. It's convenient to do it here. */
4355 register_pass (&insert_vzeroupper_info);
4358 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
4359 static char *
4360 ix86_offload_options (void)
4362 if (TARGET_LP64)
4363 return xstrdup ("-foffload-abi=lp64");
4364 return xstrdup ("-foffload-abi=ilp32");
4367 /* Update register usage after having seen the compiler flags. */
4369 static void
4370 ix86_conditional_register_usage (void)
4372 int i, c_mask;
4373 unsigned int j;
4375 /* The PIC register, if it exists, is fixed. */
4376 j = PIC_OFFSET_TABLE_REGNUM;
4377 if (j != INVALID_REGNUM)
4378 fixed_regs[j] = call_used_regs[j] = 1;
4380 /* For 32-bit targets, squash the REX registers. */
4381 if (! TARGET_64BIT)
4383 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4384 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4385 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4386 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4387 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4388 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4391 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4392 c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
4393 : TARGET_64BIT ? (1 << 2)
4394 : (1 << 1));
4396 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4398 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4400 /* Set/reset conditionally defined registers from
4401 CALL_USED_REGISTERS initializer. */
4402 if (call_used_regs[i] > 1)
4403 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4405 /* Calculate registers of CLOBBERED_REGS register set
4406 as call used registers from GENERAL_REGS register set. */
4407 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4408 && call_used_regs[i])
4409 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4412 /* If MMX is disabled, squash the registers. */
4413 if (! TARGET_MMX)
4414 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4415 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4416 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4418 /* If SSE is disabled, squash the registers. */
4419 if (! TARGET_SSE)
4420 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4421 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4422 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4424 /* If the FPU is disabled, squash the registers. */
4425 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4426 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4427 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4428 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4430 /* If AVX512F is disabled, squash the registers. */
4431 if (! TARGET_AVX512F)
4433 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4434 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4436 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4437 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4440 /* If MPX is disabled, squash the registers. */
4441 if (! TARGET_MPX)
4442 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
4443 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4447 /* Save the current options */
4449 static void
4450 ix86_function_specific_save (struct cl_target_option *ptr,
4451 struct gcc_options *opts)
4453 ptr->arch = ix86_arch;
4454 ptr->schedule = ix86_schedule;
4455 ptr->tune = ix86_tune;
4456 ptr->branch_cost = ix86_branch_cost;
4457 ptr->tune_defaulted = ix86_tune_defaulted;
4458 ptr->arch_specified = ix86_arch_specified;
4459 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
4460 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
4461 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
4462 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
4463 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
4464 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
4465 ptr->x_ix86_abi = opts->x_ix86_abi;
4466 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
4467 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
4468 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
4469 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
4470 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
4471 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
4472 ptr->x_ix86_pmode = opts->x_ix86_pmode;
4473 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
4474 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
4475 ptr->x_ix86_regparm = opts->x_ix86_regparm;
4476 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
4477 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
4478 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
4479 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
4480 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
4481 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
4482 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
4483 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
4484 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
4485 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
4487 /* The fields are char but the variables are not; make sure the
4488 values fit in the fields. */
4489 gcc_assert (ptr->arch == ix86_arch);
4490 gcc_assert (ptr->schedule == ix86_schedule);
4491 gcc_assert (ptr->tune == ix86_tune);
4492 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4495 /* Restore the current options */
4497 static void
4498 ix86_function_specific_restore (struct gcc_options *opts,
4499 struct cl_target_option *ptr)
4501 enum processor_type old_tune = ix86_tune;
4502 enum processor_type old_arch = ix86_arch;
4503 unsigned int ix86_arch_mask;
4504 int i;
4506 /* We don't change -fPIC. */
4507 opts->x_flag_pic = flag_pic;
4509 ix86_arch = (enum processor_type) ptr->arch;
4510 ix86_schedule = (enum attr_cpu) ptr->schedule;
4511 ix86_tune = (enum processor_type) ptr->tune;
4512 opts->x_ix86_branch_cost = ptr->branch_cost;
4513 ix86_tune_defaulted = ptr->tune_defaulted;
4514 ix86_arch_specified = ptr->arch_specified;
4515 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4516 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
4517 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
4518 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
4519 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
4520 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
4521 opts->x_ix86_abi = ptr->x_ix86_abi;
4522 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
4523 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
4524 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
4525 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
4526 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
4527 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
4528 opts->x_ix86_pmode = ptr->x_ix86_pmode;
4529 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
4530 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
4531 opts->x_ix86_regparm = ptr->x_ix86_regparm;
4532 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
4533 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
4534 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
4535 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
4536 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
4537 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
4538 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
4539 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
4540 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
4541 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
4543 /* Recreate the arch feature tests if the arch changed */
4544 if (old_arch != ix86_arch)
4546 ix86_arch_mask = 1u << ix86_arch;
4547 for (i = 0; i < X86_ARCH_LAST; ++i)
4548 ix86_arch_features[i]
4549 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4552 /* Recreate the tune optimization tests */
4553 if (old_tune != ix86_tune)
4554 set_ix86_tune_features (ix86_tune, false);
4557 /* Print the current options */
4559 static void
4560 ix86_function_specific_print (FILE *file, int indent,
4561 struct cl_target_option *ptr)
4563 char *target_string
4564 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4565 NULL, NULL, ptr->x_ix86_fpmath, false);
4567 gcc_assert (ptr->arch < PROCESSOR_max);
4568 fprintf (file, "%*sarch = %d (%s)\n",
4569 indent, "",
4570 ptr->arch, processor_target_table[ptr->arch].name);
4572 gcc_assert (ptr->tune < PROCESSOR_max);
4573 fprintf (file, "%*stune = %d (%s)\n",
4574 indent, "",
4575 ptr->tune, processor_target_table[ptr->tune].name);
4577 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4579 if (target_string)
4581 fprintf (file, "%*s%s\n", indent, "", target_string);
4582 free (target_string);
4587 /* Inner function to process the attribute((target(...))), take an argument and
4588 set the current options from the argument. If we have a list, recursively go
4589 over the list. */
4591 static bool
4592 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4593 struct gcc_options *opts,
4594 struct gcc_options *opts_set,
4595 struct gcc_options *enum_opts_set)
4597 char *next_optstr;
4598 bool ret = true;
4600 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4601 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4602 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4603 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4604 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4606 enum ix86_opt_type
4608 ix86_opt_unknown,
4609 ix86_opt_yes,
4610 ix86_opt_no,
4611 ix86_opt_str,
4612 ix86_opt_enum,
4613 ix86_opt_isa
4616 static const struct
4618 const char *string;
4619 size_t len;
4620 enum ix86_opt_type type;
4621 int opt;
4622 int mask;
4623 } attrs[] = {
4624 /* isa options */
4625 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4626 IX86_ATTR_ISA ("abm", OPT_mabm),
4627 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4628 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4629 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4630 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4631 IX86_ATTR_ISA ("aes", OPT_maes),
4632 IX86_ATTR_ISA ("sha", OPT_msha),
4633 IX86_ATTR_ISA ("avx", OPT_mavx),
4634 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4635 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
4636 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
4637 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
4638 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
4639 IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq),
4640 IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw),
4641 IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl),
4642 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4643 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4644 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4645 IX86_ATTR_ISA ("sse", OPT_msse),
4646 IX86_ATTR_ISA ("sse2", OPT_msse2),
4647 IX86_ATTR_ISA ("sse3", OPT_msse3),
4648 IX86_ATTR_ISA ("sse4", OPT_msse4),
4649 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4650 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4651 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4652 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4653 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4654 IX86_ATTR_ISA ("fma", OPT_mfma),
4655 IX86_ATTR_ISA ("xop", OPT_mxop),
4656 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4657 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4658 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4659 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4660 IX86_ATTR_ISA ("rtm", OPT_mrtm),
4661 IX86_ATTR_ISA ("hle", OPT_mhle),
4662 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
4663 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
4664 IX86_ATTR_ISA ("adx", OPT_madx),
4665 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
4666 IX86_ATTR_ISA ("xsave", OPT_mxsave),
4667 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
4668 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
4669 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt),
4670 IX86_ATTR_ISA ("xsavec", OPT_mxsavec),
4671 IX86_ATTR_ISA ("xsaves", OPT_mxsaves),
4672 IX86_ATTR_ISA ("avx512vbmi", OPT_mavx512vbmi),
4673 IX86_ATTR_ISA ("avx512ifma", OPT_mavx512ifma),
4674 IX86_ATTR_ISA ("clwb", OPT_mclwb),
4675 IX86_ATTR_ISA ("pcommit", OPT_mpcommit),
4677 /* enum options */
4678 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4680 /* string options */
4681 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4682 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4684 /* flag options */
4685 IX86_ATTR_YES ("cld",
4686 OPT_mcld,
4687 MASK_CLD),
4689 IX86_ATTR_NO ("fancy-math-387",
4690 OPT_mfancy_math_387,
4691 MASK_NO_FANCY_MATH_387),
4693 IX86_ATTR_YES ("ieee-fp",
4694 OPT_mieee_fp,
4695 MASK_IEEE_FP),
4697 IX86_ATTR_YES ("inline-all-stringops",
4698 OPT_minline_all_stringops,
4699 MASK_INLINE_ALL_STRINGOPS),
4701 IX86_ATTR_YES ("inline-stringops-dynamically",
4702 OPT_minline_stringops_dynamically,
4703 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4705 IX86_ATTR_NO ("align-stringops",
4706 OPT_mno_align_stringops,
4707 MASK_NO_ALIGN_STRINGOPS),
4709 IX86_ATTR_YES ("recip",
4710 OPT_mrecip,
4711 MASK_RECIP),
4715 /* If this is a list, recurse to get the options. */
4716 if (TREE_CODE (args) == TREE_LIST)
4718 bool ret = true;
4720 for (; args; args = TREE_CHAIN (args))
4721 if (TREE_VALUE (args)
4722 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4723 p_strings, opts, opts_set,
4724 enum_opts_set))
4725 ret = false;
4727 return ret;
4730 else if (TREE_CODE (args) != STRING_CST)
4732 error ("attribute %<target%> argument not a string");
4733 return false;
4736 /* Handle multiple arguments separated by commas. */
4737 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4739 while (next_optstr && *next_optstr != '\0')
4741 char *p = next_optstr;
4742 char *orig_p = p;
4743 char *comma = strchr (next_optstr, ',');
4744 const char *opt_string;
4745 size_t len, opt_len;
4746 int opt;
4747 bool opt_set_p;
4748 char ch;
4749 unsigned i;
4750 enum ix86_opt_type type = ix86_opt_unknown;
4751 int mask = 0;
4753 if (comma)
4755 *comma = '\0';
4756 len = comma - next_optstr;
4757 next_optstr = comma + 1;
4759 else
4761 len = strlen (p);
4762 next_optstr = NULL;
4765 /* Recognize no-xxx. */
4766 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4768 opt_set_p = false;
4769 p += 3;
4770 len -= 3;
4772 else
4773 opt_set_p = true;
4775 /* Find the option. */
4776 ch = *p;
4777 opt = N_OPTS;
4778 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4780 type = attrs[i].type;
4781 opt_len = attrs[i].len;
4782 if (ch == attrs[i].string[0]
4783 && ((type != ix86_opt_str && type != ix86_opt_enum)
4784 ? len == opt_len
4785 : len > opt_len)
4786 && memcmp (p, attrs[i].string, opt_len) == 0)
4788 opt = attrs[i].opt;
4789 mask = attrs[i].mask;
4790 opt_string = attrs[i].string;
4791 break;
4795 /* Process the option. */
4796 if (opt == N_OPTS)
4798 error ("attribute(target(\"%s\")) is unknown", orig_p);
4799 ret = false;
4802 else if (type == ix86_opt_isa)
4804 struct cl_decoded_option decoded;
4806 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4807 ix86_handle_option (opts, opts_set,
4808 &decoded, input_location);
4811 else if (type == ix86_opt_yes || type == ix86_opt_no)
4813 if (type == ix86_opt_no)
4814 opt_set_p = !opt_set_p;
4816 if (opt_set_p)
4817 opts->x_target_flags |= mask;
4818 else
4819 opts->x_target_flags &= ~mask;
4822 else if (type == ix86_opt_str)
4824 if (p_strings[opt])
4826 error ("option(\"%s\") was already specified", opt_string);
4827 ret = false;
4829 else
4830 p_strings[opt] = xstrdup (p + opt_len);
4833 else if (type == ix86_opt_enum)
4835 bool arg_ok;
4836 int value;
4838 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4839 if (arg_ok)
4840 set_option (opts, enum_opts_set, opt, value,
4841 p + opt_len, DK_UNSPECIFIED, input_location,
4842 global_dc);
4843 else
4845 error ("attribute(target(\"%s\")) is unknown", orig_p);
4846 ret = false;
4850 else
4851 gcc_unreachable ();
4854 return ret;
4857 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4859 tree
4860 ix86_valid_target_attribute_tree (tree args,
4861 struct gcc_options *opts,
4862 struct gcc_options *opts_set)
4864 const char *orig_arch_string = opts->x_ix86_arch_string;
4865 const char *orig_tune_string = opts->x_ix86_tune_string;
4866 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
4867 int orig_tune_defaulted = ix86_tune_defaulted;
4868 int orig_arch_specified = ix86_arch_specified;
4869 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4870 tree t = NULL_TREE;
4871 int i;
4872 struct cl_target_option *def
4873 = TREE_TARGET_OPTION (target_option_default_node);
4874 struct gcc_options enum_opts_set;
4876 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4878 /* Process each of the options on the chain. */
4879 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
4880 opts_set, &enum_opts_set))
4881 return error_mark_node;
4883 /* If the changed options are different from the default, rerun
4884 ix86_option_override_internal, and then save the options away.
4885 The string options are are attribute options, and will be undone
4886 when we copy the save structure. */
4887 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
4888 || opts->x_target_flags != def->x_target_flags
4889 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4890 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4891 || enum_opts_set.x_ix86_fpmath)
4893 /* If we are using the default tune= or arch=, undo the string assigned,
4894 and use the default. */
4895 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4896 opts->x_ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4897 else if (!orig_arch_specified)
4898 opts->x_ix86_arch_string = NULL;
4900 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4901 opts->x_ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4902 else if (orig_tune_defaulted)
4903 opts->x_ix86_tune_string = NULL;
4905 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4906 if (enum_opts_set.x_ix86_fpmath)
4907 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4908 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4909 && TARGET_SSE_P (opts->x_ix86_isa_flags))
4911 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4912 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4915 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4916 ix86_option_override_internal (false, opts, opts_set);
4918 /* Add any builtin functions with the new isa if any. */
4919 ix86_add_new_builtins (opts->x_ix86_isa_flags);
4921 /* Save the current options unless we are validating options for
4922 #pragma. */
4923 t = build_target_option_node (opts);
4925 opts->x_ix86_arch_string = orig_arch_string;
4926 opts->x_ix86_tune_string = orig_tune_string;
4927 opts_set->x_ix86_fpmath = orig_fpmath_set;
4929 /* Free up memory allocated to hold the strings */
4930 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4931 free (option_strings[i]);
4934 return t;
4937 /* Hook to validate attribute((target("string"))). */
4939 static bool
4940 ix86_valid_target_attribute_p (tree fndecl,
4941 tree ARG_UNUSED (name),
4942 tree args,
4943 int ARG_UNUSED (flags))
4945 struct gcc_options func_options;
4946 tree new_target, new_optimize;
4947 bool ret = true;
4949 /* attribute((target("default"))) does nothing, beyond
4950 affecting multi-versioning. */
4951 if (TREE_VALUE (args)
4952 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
4953 && TREE_CHAIN (args) == NULL_TREE
4954 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
4955 return true;
4957 tree old_optimize = build_optimization_node (&global_options);
4959 /* Get the optimization options of the current function. */
4960 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4962 if (!func_optimize)
4963 func_optimize = old_optimize;
4965 /* Init func_options. */
4966 memset (&func_options, 0, sizeof (func_options));
4967 init_options_struct (&func_options, NULL);
4968 lang_hooks.init_options_struct (&func_options);
4970 cl_optimization_restore (&func_options,
4971 TREE_OPTIMIZATION (func_optimize));
4973 /* Initialize func_options to the default before its target options can
4974 be set. */
4975 cl_target_option_restore (&func_options,
4976 TREE_TARGET_OPTION (target_option_default_node));
4978 new_target = ix86_valid_target_attribute_tree (args, &func_options,
4979 &global_options_set);
4981 new_optimize = build_optimization_node (&func_options);
4983 if (new_target == error_mark_node)
4984 ret = false;
4986 else if (fndecl && new_target)
4988 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4990 if (old_optimize != new_optimize)
4991 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4994 return ret;
4998 /* Hook to determine if one function can safely inline another. */
5000 static bool
5001 ix86_can_inline_p (tree caller, tree callee)
5003 bool ret = false;
5004 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
5005 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
5007 /* If callee has no option attributes, then it is ok to inline. */
5008 if (!callee_tree)
5009 ret = true;
5011 /* If caller has no option attributes, but callee does then it is not ok to
5012 inline. */
5013 else if (!caller_tree)
5014 ret = false;
5016 else
5018 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
5019 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
5021 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
5022 can inline a SSE2 function but a SSE2 function can't inline a SSE4
5023 function. */
5024 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
5025 != callee_opts->x_ix86_isa_flags)
5026 ret = false;
5028 /* See if we have the same non-isa options. */
5029 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
5030 ret = false;
5032 /* See if arch, tune, etc. are the same. */
5033 else if (caller_opts->arch != callee_opts->arch)
5034 ret = false;
5036 else if (caller_opts->tune != callee_opts->tune)
5037 ret = false;
5039 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
5040 ret = false;
5042 else if (caller_opts->branch_cost != callee_opts->branch_cost)
5043 ret = false;
5045 else
5046 ret = true;
5049 return ret;
5053 /* Remember the last target of ix86_set_current_function. */
5054 static GTY(()) tree ix86_previous_fndecl;
5056 /* Set target globals to default. */
5058 static void
5059 ix86_reset_to_default_globals (void)
5061 tree old_tree = (ix86_previous_fndecl
5062 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
5063 : NULL_TREE);
5065 if (old_tree)
5067 tree new_tree = target_option_current_node;
5068 cl_target_option_restore (&global_options,
5069 TREE_TARGET_OPTION (new_tree));
5070 if (TREE_TARGET_GLOBALS (new_tree))
5071 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5072 else if (new_tree == target_option_default_node)
5073 restore_target_globals (&default_target_globals);
5074 else
5075 TREE_TARGET_GLOBALS (new_tree)
5076 = save_target_globals_default_opts ();
5080 /* Invalidate ix86_previous_fndecl cache. */
5081 void
5082 ix86_reset_previous_fndecl (void)
5084 ix86_reset_to_default_globals ();
5085 ix86_previous_fndecl = NULL_TREE;
5088 /* Establish appropriate back-end context for processing the function
5089 FNDECL. The argument might be NULL to indicate processing at top
5090 level, outside of any function scope. */
5091 static void
5092 ix86_set_current_function (tree fndecl)
5094 /* Only change the context if the function changes. This hook is called
5095 several times in the course of compiling a function, and we don't want to
5096 slow things down too much or call target_reinit when it isn't safe. */
5097 if (fndecl && fndecl != ix86_previous_fndecl)
5099 tree old_tree = (ix86_previous_fndecl
5100 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
5101 : NULL_TREE);
5103 tree new_tree = (fndecl
5104 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
5105 : NULL_TREE);
5107 if (old_tree == new_tree)
5110 else if (new_tree && new_tree != target_option_default_node)
5112 cl_target_option_restore (&global_options,
5113 TREE_TARGET_OPTION (new_tree));
5114 if (TREE_TARGET_GLOBALS (new_tree))
5115 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5116 else
5117 TREE_TARGET_GLOBALS (new_tree)
5118 = save_target_globals_default_opts ();
5121 else if (old_tree && old_tree != target_option_default_node)
5122 ix86_reset_to_default_globals ();
5123 ix86_previous_fndecl = fndecl;
5128 /* Return true if this goes in large data/bss. */
5130 static bool
5131 ix86_in_large_data_p (tree exp)
5133 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
5134 return false;
5136 /* Functions are never large data. */
5137 if (TREE_CODE (exp) == FUNCTION_DECL)
5138 return false;
5140 /* Automatic variables are never large data. */
5141 if (TREE_CODE (exp) == VAR_DECL && !is_global_var (exp))
5142 return false;
5144 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
5146 const char *section = DECL_SECTION_NAME (exp);
5147 if (strcmp (section, ".ldata") == 0
5148 || strcmp (section, ".lbss") == 0)
5149 return true;
5150 return false;
5152 else
5154 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5156 /* If this is an incomplete type with size 0, then we can't put it
5157 in data because it might be too big when completed. Also,
5158 int_size_in_bytes returns -1 if size can vary or is larger than
5159 an integer in which case also it is safer to assume that it goes in
5160 large data. */
5161 if (size <= 0 || size > ix86_section_threshold)
5162 return true;
5165 return false;
5168 /* Switch to the appropriate section for output of DECL.
5169 DECL is either a `VAR_DECL' node or a constant of some sort.
5170 RELOC indicates whether forming the initial value of DECL requires
5171 link-time relocations. */
5173 ATTRIBUTE_UNUSED static section *
5174 x86_64_elf_select_section (tree decl, int reloc,
5175 unsigned HOST_WIDE_INT align)
5177 if (ix86_in_large_data_p (decl))
5179 const char *sname = NULL;
5180 unsigned int flags = SECTION_WRITE;
5181 switch (categorize_decl_for_section (decl, reloc))
5183 case SECCAT_DATA:
5184 sname = ".ldata";
5185 break;
5186 case SECCAT_DATA_REL:
5187 sname = ".ldata.rel";
5188 break;
5189 case SECCAT_DATA_REL_LOCAL:
5190 sname = ".ldata.rel.local";
5191 break;
5192 case SECCAT_DATA_REL_RO:
5193 sname = ".ldata.rel.ro";
5194 break;
5195 case SECCAT_DATA_REL_RO_LOCAL:
5196 sname = ".ldata.rel.ro.local";
5197 break;
5198 case SECCAT_BSS:
5199 sname = ".lbss";
5200 flags |= SECTION_BSS;
5201 break;
5202 case SECCAT_RODATA:
5203 case SECCAT_RODATA_MERGE_STR:
5204 case SECCAT_RODATA_MERGE_STR_INIT:
5205 case SECCAT_RODATA_MERGE_CONST:
5206 sname = ".lrodata";
5207 flags = 0;
5208 break;
5209 case SECCAT_SRODATA:
5210 case SECCAT_SDATA:
5211 case SECCAT_SBSS:
5212 gcc_unreachable ();
5213 case SECCAT_TEXT:
5214 case SECCAT_TDATA:
5215 case SECCAT_TBSS:
5216 /* We don't split these for medium model. Place them into
5217 default sections and hope for best. */
5218 break;
5220 if (sname)
5222 /* We might get called with string constants, but get_named_section
5223 doesn't like them as they are not DECLs. Also, we need to set
5224 flags in that case. */
5225 if (!DECL_P (decl))
5226 return get_section (sname, flags, NULL);
5227 return get_named_section (decl, sname, reloc);
5230 return default_elf_select_section (decl, reloc, align);
5233 /* Select a set of attributes for section NAME based on the properties
5234 of DECL and whether or not RELOC indicates that DECL's initializer
5235 might contain runtime relocations. */
5237 static unsigned int ATTRIBUTE_UNUSED
5238 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5240 unsigned int flags = default_section_type_flags (decl, name, reloc);
5242 if (decl == NULL_TREE
5243 && (strcmp (name, ".ldata.rel.ro") == 0
5244 || strcmp (name, ".ldata.rel.ro.local") == 0))
5245 flags |= SECTION_RELRO;
5247 if (strcmp (name, ".lbss") == 0
5248 || strncmp (name, ".lbss.", 5) == 0
5249 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
5250 flags |= SECTION_BSS;
5252 return flags;
5255 /* Build up a unique section name, expressed as a
5256 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5257 RELOC indicates whether the initial value of EXP requires
5258 link-time relocations. */
5260 static void ATTRIBUTE_UNUSED
5261 x86_64_elf_unique_section (tree decl, int reloc)
5263 if (ix86_in_large_data_p (decl))
5265 const char *prefix = NULL;
5266 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5267 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
5269 switch (categorize_decl_for_section (decl, reloc))
5271 case SECCAT_DATA:
5272 case SECCAT_DATA_REL:
5273 case SECCAT_DATA_REL_LOCAL:
5274 case SECCAT_DATA_REL_RO:
5275 case SECCAT_DATA_REL_RO_LOCAL:
5276 prefix = one_only ? ".ld" : ".ldata";
5277 break;
5278 case SECCAT_BSS:
5279 prefix = one_only ? ".lb" : ".lbss";
5280 break;
5281 case SECCAT_RODATA:
5282 case SECCAT_RODATA_MERGE_STR:
5283 case SECCAT_RODATA_MERGE_STR_INIT:
5284 case SECCAT_RODATA_MERGE_CONST:
5285 prefix = one_only ? ".lr" : ".lrodata";
5286 break;
5287 case SECCAT_SRODATA:
5288 case SECCAT_SDATA:
5289 case SECCAT_SBSS:
5290 gcc_unreachable ();
5291 case SECCAT_TEXT:
5292 case SECCAT_TDATA:
5293 case SECCAT_TBSS:
5294 /* We don't split these for medium model. Place them into
5295 default sections and hope for best. */
5296 break;
5298 if (prefix)
5300 const char *name, *linkonce;
5301 char *string;
5303 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5304 name = targetm.strip_name_encoding (name);
5306 /* If we're using one_only, then there needs to be a .gnu.linkonce
5307 prefix to the section name. */
5308 linkonce = one_only ? ".gnu.linkonce" : "";
5310 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5312 set_decl_section_name (decl, string);
5313 return;
5316 default_unique_section (decl, reloc);
5319 #ifdef COMMON_ASM_OP
5320 /* This says how to output assembler code to declare an
5321 uninitialized external linkage data object.
5323 For medium model x86-64 we need to use .largecomm opcode for
5324 large objects. */
5325 void
5326 x86_elf_aligned_common (FILE *file,
5327 const char *name, unsigned HOST_WIDE_INT size,
5328 int align)
5330 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5331 && size > (unsigned int)ix86_section_threshold)
5332 fputs ("\t.largecomm\t", file);
5333 else
5334 fputs (COMMON_ASM_OP, file);
5335 assemble_name (file, name);
5336 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5337 size, align / BITS_PER_UNIT);
5339 #endif
5341 /* Utility function for targets to use in implementing
5342 ASM_OUTPUT_ALIGNED_BSS. */
5344 void
5345 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
5346 unsigned HOST_WIDE_INT size, int align)
5348 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5349 && size > (unsigned int)ix86_section_threshold)
5350 switch_to_section (get_named_section (decl, ".lbss", 0));
5351 else
5352 switch_to_section (bss_section);
5353 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5354 #ifdef ASM_DECLARE_OBJECT_NAME
5355 last_assemble_variable_decl = decl;
5356 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5357 #else
5358 /* Standard thing is just output label for the object. */
5359 ASM_OUTPUT_LABEL (file, name);
5360 #endif /* ASM_DECLARE_OBJECT_NAME */
5361 ASM_OUTPUT_SKIP (file, size ? size : 1);
5364 /* Decide whether we must probe the stack before any space allocation
5365 on this target. It's essentially TARGET_STACK_PROBE except when
5366 -fstack-check causes the stack to be already probed differently. */
5368 bool
5369 ix86_target_stack_probe (void)
5371 /* Do not probe the stack twice if static stack checking is enabled. */
5372 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5373 return false;
5375 return TARGET_STACK_PROBE;
5378 /* Decide whether we can make a sibling call to a function. DECL is the
5379 declaration of the function being targeted by the call and EXP is the
5380 CALL_EXPR representing the call. */
5382 static bool
5383 ix86_function_ok_for_sibcall (tree decl, tree exp)
5385 tree type, decl_or_type;
5386 rtx a, b;
5388 /* If we are generating position-independent code, we cannot sibcall
5389 optimize any indirect call, or a direct call to a global function,
5390 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5391 if (!TARGET_MACHO
5392 && !TARGET_64BIT
5393 && flag_pic
5394 && (!decl || !targetm.binds_local_p (decl)))
5395 return false;
5397 /* If we need to align the outgoing stack, then sibcalling would
5398 unalign the stack, which may break the called function. */
5399 if (ix86_minimum_incoming_stack_boundary (true)
5400 < PREFERRED_STACK_BOUNDARY)
5401 return false;
5403 if (decl)
5405 decl_or_type = decl;
5406 type = TREE_TYPE (decl);
5408 else
5410 /* We're looking at the CALL_EXPR, we need the type of the function. */
5411 type = CALL_EXPR_FN (exp); /* pointer expression */
5412 type = TREE_TYPE (type); /* pointer type */
5413 type = TREE_TYPE (type); /* function type */
5414 decl_or_type = type;
5417 /* Check that the return value locations are the same. Like
5418 if we are returning floats on the 80387 register stack, we cannot
5419 make a sibcall from a function that doesn't return a float to a
5420 function that does or, conversely, from a function that does return
5421 a float to a function that doesn't; the necessary stack adjustment
5422 would not be executed. This is also the place we notice
5423 differences in the return value ABI. Note that it is ok for one
5424 of the functions to have void return type as long as the return
5425 value of the other is passed in a register. */
5426 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5427 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5428 cfun->decl, false);
5429 if (STACK_REG_P (a) || STACK_REG_P (b))
5431 if (!rtx_equal_p (a, b))
5432 return false;
5434 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5436 else if (!rtx_equal_p (a, b))
5437 return false;
5439 if (TARGET_64BIT)
5441 /* The SYSV ABI has more call-clobbered registers;
5442 disallow sibcalls from MS to SYSV. */
5443 if (cfun->machine->call_abi == MS_ABI
5444 && ix86_function_type_abi (type) == SYSV_ABI)
5445 return false;
5447 else
5449 /* If this call is indirect, we'll need to be able to use a
5450 call-clobbered register for the address of the target function.
5451 Make sure that all such registers are not used for passing
5452 parameters. Note that DLLIMPORT functions are indirect. */
5453 if (!decl
5454 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5456 if (ix86_function_regparm (type, NULL) >= 3)
5458 /* ??? Need to count the actual number of registers to be used,
5459 not the possible number of registers. Fix later. */
5460 return false;
5465 /* Otherwise okay. That also includes certain types of indirect calls. */
5466 return true;
5469 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5470 and "sseregparm" calling convention attributes;
5471 arguments as in struct attribute_spec.handler. */
5473 static tree
5474 ix86_handle_cconv_attribute (tree *node, tree name,
5475 tree args,
5476 int,
5477 bool *no_add_attrs)
5479 if (TREE_CODE (*node) != FUNCTION_TYPE
5480 && TREE_CODE (*node) != METHOD_TYPE
5481 && TREE_CODE (*node) != FIELD_DECL
5482 && TREE_CODE (*node) != TYPE_DECL)
5484 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5485 name);
5486 *no_add_attrs = true;
5487 return NULL_TREE;
5490 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5491 if (is_attribute_p ("regparm", name))
5493 tree cst;
5495 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5497 error ("fastcall and regparm attributes are not compatible");
5500 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5502 error ("regparam and thiscall attributes are not compatible");
5505 cst = TREE_VALUE (args);
5506 if (TREE_CODE (cst) != INTEGER_CST)
5508 warning (OPT_Wattributes,
5509 "%qE attribute requires an integer constant argument",
5510 name);
5511 *no_add_attrs = true;
5513 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5515 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5516 name, REGPARM_MAX);
5517 *no_add_attrs = true;
5520 return NULL_TREE;
5523 if (TARGET_64BIT)
5525 /* Do not warn when emulating the MS ABI. */
5526 if ((TREE_CODE (*node) != FUNCTION_TYPE
5527 && TREE_CODE (*node) != METHOD_TYPE)
5528 || ix86_function_type_abi (*node) != MS_ABI)
5529 warning (OPT_Wattributes, "%qE attribute ignored",
5530 name);
5531 *no_add_attrs = true;
5532 return NULL_TREE;
5535 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5536 if (is_attribute_p ("fastcall", name))
5538 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5540 error ("fastcall and cdecl attributes are not compatible");
5542 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5544 error ("fastcall and stdcall attributes are not compatible");
5546 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5548 error ("fastcall and regparm attributes are not compatible");
5550 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5552 error ("fastcall and thiscall attributes are not compatible");
5556 /* Can combine stdcall with fastcall (redundant), regparm and
5557 sseregparm. */
5558 else if (is_attribute_p ("stdcall", name))
5560 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5562 error ("stdcall and cdecl attributes are not compatible");
5564 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5566 error ("stdcall and fastcall attributes are not compatible");
5568 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5570 error ("stdcall and thiscall attributes are not compatible");
5574 /* Can combine cdecl with regparm and sseregparm. */
5575 else if (is_attribute_p ("cdecl", name))
5577 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5579 error ("stdcall and cdecl attributes are not compatible");
5581 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5583 error ("fastcall and cdecl attributes are not compatible");
5585 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5587 error ("cdecl and thiscall attributes are not compatible");
5590 else if (is_attribute_p ("thiscall", name))
5592 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5593 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5594 name);
5595 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5597 error ("stdcall and thiscall attributes are not compatible");
5599 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5601 error ("fastcall and thiscall attributes are not compatible");
5603 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5605 error ("cdecl and thiscall attributes are not compatible");
5609 /* Can combine sseregparm with all attributes. */
5611 return NULL_TREE;
5614 /* The transactional memory builtins are implicitly regparm or fastcall
5615 depending on the ABI. Override the generic do-nothing attribute that
5616 these builtins were declared with, and replace it with one of the two
5617 attributes that we expect elsewhere. */
5619 static tree
5620 ix86_handle_tm_regparm_attribute (tree *node, tree, tree,
5621 int flags, bool *no_add_attrs)
5623 tree alt;
5625 /* In no case do we want to add the placeholder attribute. */
5626 *no_add_attrs = true;
5628 /* The 64-bit ABI is unchanged for transactional memory. */
5629 if (TARGET_64BIT)
5630 return NULL_TREE;
5632 /* ??? Is there a better way to validate 32-bit windows? We have
5633 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5634 if (CHECK_STACK_LIMIT > 0)
5635 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5636 else
5638 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5639 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5641 decl_attributes (node, alt, flags);
5643 return NULL_TREE;
5646 /* This function determines from TYPE the calling-convention. */
5648 unsigned int
5649 ix86_get_callcvt (const_tree type)
5651 unsigned int ret = 0;
5652 bool is_stdarg;
5653 tree attrs;
5655 if (TARGET_64BIT)
5656 return IX86_CALLCVT_CDECL;
5658 attrs = TYPE_ATTRIBUTES (type);
5659 if (attrs != NULL_TREE)
5661 if (lookup_attribute ("cdecl", attrs))
5662 ret |= IX86_CALLCVT_CDECL;
5663 else if (lookup_attribute ("stdcall", attrs))
5664 ret |= IX86_CALLCVT_STDCALL;
5665 else if (lookup_attribute ("fastcall", attrs))
5666 ret |= IX86_CALLCVT_FASTCALL;
5667 else if (lookup_attribute ("thiscall", attrs))
5668 ret |= IX86_CALLCVT_THISCALL;
5670 /* Regparam isn't allowed for thiscall and fastcall. */
5671 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5673 if (lookup_attribute ("regparm", attrs))
5674 ret |= IX86_CALLCVT_REGPARM;
5675 if (lookup_attribute ("sseregparm", attrs))
5676 ret |= IX86_CALLCVT_SSEREGPARM;
5679 if (IX86_BASE_CALLCVT(ret) != 0)
5680 return ret;
5683 is_stdarg = stdarg_p (type);
5684 if (TARGET_RTD && !is_stdarg)
5685 return IX86_CALLCVT_STDCALL | ret;
5687 if (ret != 0
5688 || is_stdarg
5689 || TREE_CODE (type) != METHOD_TYPE
5690 || ix86_function_type_abi (type) != MS_ABI)
5691 return IX86_CALLCVT_CDECL | ret;
5693 return IX86_CALLCVT_THISCALL;
5696 /* Return 0 if the attributes for two types are incompatible, 1 if they
5697 are compatible, and 2 if they are nearly compatible (which causes a
5698 warning to be generated). */
5700 static int
5701 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5703 unsigned int ccvt1, ccvt2;
5705 if (TREE_CODE (type1) != FUNCTION_TYPE
5706 && TREE_CODE (type1) != METHOD_TYPE)
5707 return 1;
5709 ccvt1 = ix86_get_callcvt (type1);
5710 ccvt2 = ix86_get_callcvt (type2);
5711 if (ccvt1 != ccvt2)
5712 return 0;
5713 if (ix86_function_regparm (type1, NULL)
5714 != ix86_function_regparm (type2, NULL))
5715 return 0;
5717 return 1;
5720 /* Return the regparm value for a function with the indicated TYPE and DECL.
5721 DECL may be NULL when calling function indirectly
5722 or considering a libcall. */
5724 static int
5725 ix86_function_regparm (const_tree type, const_tree decl)
5727 tree attr;
5728 int regparm;
5729 unsigned int ccvt;
5731 if (TARGET_64BIT)
5732 return (ix86_function_type_abi (type) == SYSV_ABI
5733 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5734 ccvt = ix86_get_callcvt (type);
5735 regparm = ix86_regparm;
5737 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5739 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5740 if (attr)
5742 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5743 return regparm;
5746 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5747 return 2;
5748 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5749 return 1;
5751 /* Use register calling convention for local functions when possible. */
5752 if (decl
5753 && TREE_CODE (decl) == FUNCTION_DECL
5754 /* Caller and callee must agree on the calling convention, so
5755 checking here just optimize means that with
5756 __attribute__((optimize (...))) caller could use regparm convention
5757 and callee not, or vice versa. Instead look at whether the callee
5758 is optimized or not. */
5759 && opt_for_fn (decl, optimize)
5760 && !(profile_flag && !flag_fentry))
5762 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5763 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE (decl));
5764 if (i && i->local && i->can_change_signature)
5766 int local_regparm, globals = 0, regno;
5768 /* Make sure no regparm register is taken by a
5769 fixed register variable. */
5770 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5771 if (fixed_regs[local_regparm])
5772 break;
5774 /* We don't want to use regparm(3) for nested functions as
5775 these use a static chain pointer in the third argument. */
5776 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5777 local_regparm = 2;
5779 /* In 32-bit mode save a register for the split stack. */
5780 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5781 local_regparm = 2;
5783 /* Each fixed register usage increases register pressure,
5784 so less registers should be used for argument passing.
5785 This functionality can be overriden by an explicit
5786 regparm value. */
5787 for (regno = AX_REG; regno <= DI_REG; regno++)
5788 if (fixed_regs[regno])
5789 globals++;
5791 local_regparm
5792 = globals < local_regparm ? local_regparm - globals : 0;
5794 if (local_regparm > regparm)
5795 regparm = local_regparm;
5799 return regparm;
5802 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5803 DFmode (2) arguments in SSE registers for a function with the
5804 indicated TYPE and DECL. DECL may be NULL when calling function
5805 indirectly or considering a libcall. Otherwise return 0. */
5807 static int
5808 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5810 gcc_assert (!TARGET_64BIT);
5812 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5813 by the sseregparm attribute. */
5814 if (TARGET_SSEREGPARM
5815 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5817 if (!TARGET_SSE)
5819 if (warn)
5821 if (decl)
5822 error ("calling %qD with attribute sseregparm without "
5823 "SSE/SSE2 enabled", decl);
5824 else
5825 error ("calling %qT with attribute sseregparm without "
5826 "SSE/SSE2 enabled", type);
5828 return 0;
5831 return 2;
5834 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5835 (and DFmode for SSE2) arguments in SSE registers. */
5836 if (decl && TARGET_SSE_MATH && optimize
5837 && !(profile_flag && !flag_fentry))
5839 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5840 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5841 if (i && i->local && i->can_change_signature)
5842 return TARGET_SSE2 ? 2 : 1;
5845 return 0;
5848 /* Return true if EAX is live at the start of the function. Used by
5849 ix86_expand_prologue to determine if we need special help before
5850 calling allocate_stack_worker. */
5852 static bool
5853 ix86_eax_live_at_start_p (void)
5855 /* Cheat. Don't bother working forward from ix86_function_regparm
5856 to the function type to whether an actual argument is located in
5857 eax. Instead just look at cfg info, which is still close enough
5858 to correct at this point. This gives false positives for broken
5859 functions that might use uninitialized data that happens to be
5860 allocated in eax, but who cares? */
5861 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
5864 static bool
5865 ix86_keep_aggregate_return_pointer (tree fntype)
5867 tree attr;
5869 if (!TARGET_64BIT)
5871 attr = lookup_attribute ("callee_pop_aggregate_return",
5872 TYPE_ATTRIBUTES (fntype));
5873 if (attr)
5874 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5876 /* For 32-bit MS-ABI the default is to keep aggregate
5877 return pointer. */
5878 if (ix86_function_type_abi (fntype) == MS_ABI)
5879 return true;
5881 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5884 /* Value is the number of bytes of arguments automatically
5885 popped when returning from a subroutine call.
5886 FUNDECL is the declaration node of the function (as a tree),
5887 FUNTYPE is the data type of the function (as a tree),
5888 or for a library call it is an identifier node for the subroutine name.
5889 SIZE is the number of bytes of arguments passed on the stack.
5891 On the 80386, the RTD insn may be used to pop them if the number
5892 of args is fixed, but if the number is variable then the caller
5893 must pop them all. RTD can't be used for library calls now
5894 because the library is compiled with the Unix compiler.
5895 Use of RTD is a selectable option, since it is incompatible with
5896 standard Unix calling sequences. If the option is not selected,
5897 the caller must always pop the args.
5899 The attribute stdcall is equivalent to RTD on a per module basis. */
5901 static int
5902 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5904 unsigned int ccvt;
5906 /* None of the 64-bit ABIs pop arguments. */
5907 if (TARGET_64BIT)
5908 return 0;
5910 ccvt = ix86_get_callcvt (funtype);
5912 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5913 | IX86_CALLCVT_THISCALL)) != 0
5914 && ! stdarg_p (funtype))
5915 return size;
5917 /* Lose any fake structure return argument if it is passed on the stack. */
5918 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5919 && !ix86_keep_aggregate_return_pointer (funtype))
5921 int nregs = ix86_function_regparm (funtype, fundecl);
5922 if (nregs == 0)
5923 return GET_MODE_SIZE (Pmode);
5926 return 0;
5929 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
5931 static bool
5932 ix86_legitimate_combined_insn (rtx_insn *insn)
5934 /* Check operand constraints in case hard registers were propagated
5935 into insn pattern. This check prevents combine pass from
5936 generating insn patterns with invalid hard register operands.
5937 These invalid insns can eventually confuse reload to error out
5938 with a spill failure. See also PRs 46829 and 46843. */
5939 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
5941 int i;
5943 extract_insn (insn);
5944 preprocess_constraints (insn);
5946 int n_operands = recog_data.n_operands;
5947 int n_alternatives = recog_data.n_alternatives;
5948 for (i = 0; i < n_operands; i++)
5950 rtx op = recog_data.operand[i];
5951 machine_mode mode = GET_MODE (op);
5952 const operand_alternative *op_alt;
5953 int offset = 0;
5954 bool win;
5955 int j;
5957 /* For pre-AVX disallow unaligned loads/stores where the
5958 instructions don't support it. */
5959 if (!TARGET_AVX
5960 && VECTOR_MODE_P (GET_MODE (op))
5961 && misaligned_operand (op, GET_MODE (op)))
5963 int min_align = get_attr_ssememalign (insn);
5964 if (min_align == 0)
5965 return false;
5968 /* A unary operator may be accepted by the predicate, but it
5969 is irrelevant for matching constraints. */
5970 if (UNARY_P (op))
5971 op = XEXP (op, 0);
5973 if (GET_CODE (op) == SUBREG)
5975 if (REG_P (SUBREG_REG (op))
5976 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
5977 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
5978 GET_MODE (SUBREG_REG (op)),
5979 SUBREG_BYTE (op),
5980 GET_MODE (op));
5981 op = SUBREG_REG (op);
5984 if (!(REG_P (op) && HARD_REGISTER_P (op)))
5985 continue;
5987 op_alt = recog_op_alt;
5989 /* Operand has no constraints, anything is OK. */
5990 win = !n_alternatives;
5992 alternative_mask preferred = get_preferred_alternatives (insn);
5993 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
5995 if (!TEST_BIT (preferred, j))
5996 continue;
5997 if (op_alt[i].anything_ok
5998 || (op_alt[i].matches != -1
5999 && operands_match_p
6000 (recog_data.operand[i],
6001 recog_data.operand[op_alt[i].matches]))
6002 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
6004 win = true;
6005 break;
6009 if (!win)
6010 return false;
6014 return true;
6017 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
6019 static unsigned HOST_WIDE_INT
6020 ix86_asan_shadow_offset (void)
6022 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
6023 : HOST_WIDE_INT_C (0x7fff8000))
6024 : (HOST_WIDE_INT_1 << 29);
6027 /* Argument support functions. */
6029 /* Return true when register may be used to pass function parameters. */
6030 bool
6031 ix86_function_arg_regno_p (int regno)
6033 int i;
6034 const int *parm_regs;
6036 if (!TARGET_64BIT)
6038 if (TARGET_MACHO)
6039 return (regno < REGPARM_MAX
6040 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
6041 else
6042 return (regno < REGPARM_MAX
6043 || (TARGET_MMX && MMX_REGNO_P (regno)
6044 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
6045 || (TARGET_SSE && SSE_REGNO_P (regno)
6046 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
6049 if (TARGET_SSE && SSE_REGNO_P (regno)
6050 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
6051 return true;
6053 /* TODO: The function should depend on current function ABI but
6054 builtins.c would need updating then. Therefore we use the
6055 default ABI. */
6057 /* RAX is used as hidden argument to va_arg functions. */
6058 if (ix86_abi == SYSV_ABI && regno == AX_REG)
6059 return true;
6061 if (ix86_abi == MS_ABI)
6062 parm_regs = x86_64_ms_abi_int_parameter_registers;
6063 else
6064 parm_regs = x86_64_int_parameter_registers;
6065 for (i = 0; i < (ix86_abi == MS_ABI
6066 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
6067 if (regno == parm_regs[i])
6068 return true;
6069 return false;
6072 /* Return if we do not know how to pass TYPE solely in registers. */
6074 static bool
6075 ix86_must_pass_in_stack (machine_mode mode, const_tree type)
6077 if (must_pass_in_stack_var_size_or_pad (mode, type))
6078 return true;
6080 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
6081 The layout_type routine is crafty and tries to trick us into passing
6082 currently unsupported vector types on the stack by using TImode. */
6083 return (!TARGET_64BIT && mode == TImode
6084 && type && TREE_CODE (type) != VECTOR_TYPE);
6087 /* It returns the size, in bytes, of the area reserved for arguments passed
6088 in registers for the function represented by fndecl dependent to the used
6089 abi format. */
6091 ix86_reg_parm_stack_space (const_tree fndecl)
6093 enum calling_abi call_abi = SYSV_ABI;
6094 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
6095 call_abi = ix86_function_abi (fndecl);
6096 else
6097 call_abi = ix86_function_type_abi (fndecl);
6098 if (TARGET_64BIT && call_abi == MS_ABI)
6099 return 32;
6100 return 0;
6103 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
6104 call abi used. */
6105 enum calling_abi
6106 ix86_function_type_abi (const_tree fntype)
6108 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
6110 enum calling_abi abi = ix86_abi;
6111 if (abi == SYSV_ABI)
6113 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
6114 abi = MS_ABI;
6116 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
6117 abi = SYSV_ABI;
6118 return abi;
6120 return ix86_abi;
6123 /* We add this as a workaround in order to use libc_has_function
6124 hook in i386.md. */
6125 bool
6126 ix86_libc_has_function (enum function_class fn_class)
6128 return targetm.libc_has_function (fn_class);
6131 static bool
6132 ix86_function_ms_hook_prologue (const_tree fn)
6134 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
6136 if (decl_function_context (fn) != NULL_TREE)
6137 error_at (DECL_SOURCE_LOCATION (fn),
6138 "ms_hook_prologue is not compatible with nested function");
6139 else
6140 return true;
6142 return false;
6145 static enum calling_abi
6146 ix86_function_abi (const_tree fndecl)
6148 if (! fndecl)
6149 return ix86_abi;
6150 return ix86_function_type_abi (TREE_TYPE (fndecl));
6153 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
6154 call abi used. */
6155 enum calling_abi
6156 ix86_cfun_abi (void)
6158 if (! cfun)
6159 return ix86_abi;
6160 return cfun->machine->call_abi;
6163 /* Write the extra assembler code needed to declare a function properly. */
6165 void
6166 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
6167 tree decl)
6169 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
6171 if (is_ms_hook)
6173 int i, filler_count = (TARGET_64BIT ? 32 : 16);
6174 unsigned int filler_cc = 0xcccccccc;
6176 for (i = 0; i < filler_count; i += 4)
6177 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
6180 #ifdef SUBTARGET_ASM_UNWIND_INIT
6181 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
6182 #endif
6184 ASM_OUTPUT_LABEL (asm_out_file, fname);
6186 /* Output magic byte marker, if hot-patch attribute is set. */
6187 if (is_ms_hook)
6189 if (TARGET_64BIT)
6191 /* leaq [%rsp + 0], %rsp */
6192 asm_fprintf (asm_out_file, ASM_BYTE
6193 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
6195 else
6197 /* movl.s %edi, %edi
6198 push %ebp
6199 movl.s %esp, %ebp */
6200 asm_fprintf (asm_out_file, ASM_BYTE
6201 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
6206 /* regclass.c */
6207 extern void init_regs (void);
6209 /* Implementation of call abi switching target hook. Specific to FNDECL
6210 the specific call register sets are set. See also
6211 ix86_conditional_register_usage for more details. */
6212 void
6213 ix86_call_abi_override (const_tree fndecl)
6215 if (fndecl == NULL_TREE)
6216 cfun->machine->call_abi = ix86_abi;
6217 else
6218 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
6221 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
6222 expensive re-initialization of init_regs each time we switch function context
6223 since this is needed only during RTL expansion. */
6224 static void
6225 ix86_maybe_switch_abi (void)
6227 if (TARGET_64BIT &&
6228 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
6229 reinit_regs ();
6232 /* Return 1 if pseudo register should be created and used to hold
6233 GOT address for PIC code. */
6234 static bool
6235 ix86_use_pseudo_pic_reg (void)
6237 if ((TARGET_64BIT
6238 && (ix86_cmodel == CM_SMALL_PIC
6239 || TARGET_PECOFF))
6240 || !flag_pic)
6241 return false;
6242 return true;
6245 /* Initialize large model PIC register. */
6247 static void
6248 ix86_init_large_pic_reg (unsigned int tmp_regno)
6250 rtx_code_label *label;
6251 rtx tmp_reg;
6253 gcc_assert (Pmode == DImode);
6254 label = gen_label_rtx ();
6255 emit_label (label);
6256 LABEL_PRESERVE_P (label) = 1;
6257 tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
6258 gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
6259 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
6260 label));
6261 emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6262 emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
6263 pic_offset_table_rtx, tmp_reg));
6266 /* Create and initialize PIC register if required. */
6267 static void
6268 ix86_init_pic_reg (void)
6270 edge entry_edge;
6271 rtx_insn *seq;
6273 if (!ix86_use_pseudo_pic_reg ())
6274 return;
6276 start_sequence ();
6278 if (TARGET_64BIT)
6280 if (ix86_cmodel == CM_LARGE_PIC)
6281 ix86_init_large_pic_reg (R11_REG);
6282 else
6283 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6285 else
6287 /* If there is future mcount call in the function it is more profitable
6288 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
6289 rtx reg = crtl->profile
6290 ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
6291 : pic_offset_table_rtx;
6292 rtx insn = emit_insn (gen_set_got (reg));
6293 RTX_FRAME_RELATED_P (insn) = 1;
6294 if (crtl->profile)
6295 emit_move_insn (pic_offset_table_rtx, reg);
6296 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
6299 seq = get_insns ();
6300 end_sequence ();
6302 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6303 insert_insn_on_edge (seq, entry_edge);
6304 commit_one_edge_insertion (entry_edge);
6307 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6308 for a call to a function whose data type is FNTYPE.
6309 For a library call, FNTYPE is 0. */
6311 void
6312 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
6313 tree fntype, /* tree ptr for function decl */
6314 rtx libname, /* SYMBOL_REF of library name or 0 */
6315 tree fndecl,
6316 int caller)
6318 struct cgraph_local_info *i;
6320 memset (cum, 0, sizeof (*cum));
6322 if (fndecl)
6324 i = cgraph_node::local_info (fndecl);
6325 cum->call_abi = ix86_function_abi (fndecl);
6327 else
6329 i = NULL;
6330 cum->call_abi = ix86_function_type_abi (fntype);
6333 cum->caller = caller;
6335 /* Set up the number of registers to use for passing arguments. */
6336 cum->nregs = ix86_regparm;
6337 if (TARGET_64BIT)
6339 cum->nregs = (cum->call_abi == SYSV_ABI
6340 ? X86_64_REGPARM_MAX
6341 : X86_64_MS_REGPARM_MAX);
6343 if (TARGET_SSE)
6345 cum->sse_nregs = SSE_REGPARM_MAX;
6346 if (TARGET_64BIT)
6348 cum->sse_nregs = (cum->call_abi == SYSV_ABI
6349 ? X86_64_SSE_REGPARM_MAX
6350 : X86_64_MS_SSE_REGPARM_MAX);
6353 if (TARGET_MMX)
6354 cum->mmx_nregs = MMX_REGPARM_MAX;
6355 cum->warn_avx512f = true;
6356 cum->warn_avx = true;
6357 cum->warn_sse = true;
6358 cum->warn_mmx = true;
6360 /* Because type might mismatch in between caller and callee, we need to
6361 use actual type of function for local calls.
6362 FIXME: cgraph_analyze can be told to actually record if function uses
6363 va_start so for local functions maybe_vaarg can be made aggressive
6364 helping K&R code.
6365 FIXME: once typesytem is fixed, we won't need this code anymore. */
6366 if (i && i->local && i->can_change_signature)
6367 fntype = TREE_TYPE (fndecl);
6368 cum->stdarg = stdarg_p (fntype);
6369 cum->maybe_vaarg = (fntype
6370 ? (!prototype_p (fntype) || stdarg_p (fntype))
6371 : !libname);
6373 cum->bnd_regno = FIRST_BND_REG;
6374 cum->bnds_in_bt = 0;
6375 cum->force_bnd_pass = 0;
6377 if (!TARGET_64BIT)
6379 /* If there are variable arguments, then we won't pass anything
6380 in registers in 32-bit mode. */
6381 if (stdarg_p (fntype))
6383 cum->nregs = 0;
6384 cum->sse_nregs = 0;
6385 cum->mmx_nregs = 0;
6386 cum->warn_avx512f = false;
6387 cum->warn_avx = false;
6388 cum->warn_sse = false;
6389 cum->warn_mmx = false;
6390 return;
6393 /* Use ecx and edx registers if function has fastcall attribute,
6394 else look for regparm information. */
6395 if (fntype)
6397 unsigned int ccvt = ix86_get_callcvt (fntype);
6398 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6400 cum->nregs = 1;
6401 cum->fastcall = 1; /* Same first register as in fastcall. */
6403 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6405 cum->nregs = 2;
6406 cum->fastcall = 1;
6408 else
6409 cum->nregs = ix86_function_regparm (fntype, fndecl);
6412 /* Set up the number of SSE registers used for passing SFmode
6413 and DFmode arguments. Warn for mismatching ABI. */
6414 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6418 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6419 But in the case of vector types, it is some vector mode.
6421 When we have only some of our vector isa extensions enabled, then there
6422 are some modes for which vector_mode_supported_p is false. For these
6423 modes, the generic vector support in gcc will choose some non-vector mode
6424 in order to implement the type. By computing the natural mode, we'll
6425 select the proper ABI location for the operand and not depend on whatever
6426 the middle-end decides to do with these vector types.
6428 The midde-end can't deal with the vector types > 16 bytes. In this
6429 case, we return the original mode and warn ABI change if CUM isn't
6430 NULL.
6432 If INT_RETURN is true, warn ABI change if the vector mode isn't
6433 available for function return value. */
6435 static machine_mode
6436 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
6437 bool in_return)
6439 machine_mode mode = TYPE_MODE (type);
6441 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6443 HOST_WIDE_INT size = int_size_in_bytes (type);
6444 if ((size == 8 || size == 16 || size == 32 || size == 64)
6445 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6446 && TYPE_VECTOR_SUBPARTS (type) > 1)
6448 machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6450 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6451 mode = MIN_MODE_VECTOR_FLOAT;
6452 else
6453 mode = MIN_MODE_VECTOR_INT;
6455 /* Get the mode which has this inner mode and number of units. */
6456 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6457 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6458 && GET_MODE_INNER (mode) == innermode)
6460 if (size == 64 && !TARGET_AVX512F)
6462 static bool warnedavx512f;
6463 static bool warnedavx512f_ret;
6465 if (cum && cum->warn_avx512f && !warnedavx512f)
6467 if (warning (OPT_Wpsabi, "AVX512F vector argument "
6468 "without AVX512F enabled changes the ABI"))
6469 warnedavx512f = true;
6471 else if (in_return && !warnedavx512f_ret)
6473 if (warning (OPT_Wpsabi, "AVX512F vector return "
6474 "without AVX512F enabled changes the ABI"))
6475 warnedavx512f_ret = true;
6478 return TYPE_MODE (type);
6480 else if (size == 32 && !TARGET_AVX)
6482 static bool warnedavx;
6483 static bool warnedavx_ret;
6485 if (cum && cum->warn_avx && !warnedavx)
6487 if (warning (OPT_Wpsabi, "AVX vector argument "
6488 "without AVX enabled changes the ABI"))
6489 warnedavx = true;
6491 else if (in_return && !warnedavx_ret)
6493 if (warning (OPT_Wpsabi, "AVX vector return "
6494 "without AVX enabled changes the ABI"))
6495 warnedavx_ret = true;
6498 return TYPE_MODE (type);
6500 else if (((size == 8 && TARGET_64BIT) || size == 16)
6501 && !TARGET_SSE)
6503 static bool warnedsse;
6504 static bool warnedsse_ret;
6506 if (cum && cum->warn_sse && !warnedsse)
6508 if (warning (OPT_Wpsabi, "SSE vector argument "
6509 "without SSE enabled changes the ABI"))
6510 warnedsse = true;
6512 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
6514 if (warning (OPT_Wpsabi, "SSE vector return "
6515 "without SSE enabled changes the ABI"))
6516 warnedsse_ret = true;
6519 else if ((size == 8 && !TARGET_64BIT) && !TARGET_MMX)
6521 static bool warnedmmx;
6522 static bool warnedmmx_ret;
6524 if (cum && cum->warn_mmx && !warnedmmx)
6526 if (warning (OPT_Wpsabi, "MMX vector argument "
6527 "without MMX enabled changes the ABI"))
6528 warnedmmx = true;
6530 else if (in_return && !warnedmmx_ret)
6532 if (warning (OPT_Wpsabi, "MMX vector return "
6533 "without MMX enabled changes the ABI"))
6534 warnedmmx_ret = true;
6537 return mode;
6540 gcc_unreachable ();
6544 return mode;
6547 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6548 this may not agree with the mode that the type system has chosen for the
6549 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6550 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6552 static rtx
6553 gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
6554 unsigned int regno)
6556 rtx tmp;
6558 if (orig_mode != BLKmode)
6559 tmp = gen_rtx_REG (orig_mode, regno);
6560 else
6562 tmp = gen_rtx_REG (mode, regno);
6563 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6564 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6567 return tmp;
6570 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6571 of this code is to classify each 8bytes of incoming argument by the register
6572 class and assign registers accordingly. */
6574 /* Return the union class of CLASS1 and CLASS2.
6575 See the x86-64 PS ABI for details. */
6577 static enum x86_64_reg_class
6578 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6580 /* Rule #1: If both classes are equal, this is the resulting class. */
6581 if (class1 == class2)
6582 return class1;
6584 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6585 the other class. */
6586 if (class1 == X86_64_NO_CLASS)
6587 return class2;
6588 if (class2 == X86_64_NO_CLASS)
6589 return class1;
6591 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6592 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6593 return X86_64_MEMORY_CLASS;
6595 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6596 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6597 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6598 return X86_64_INTEGERSI_CLASS;
6599 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6600 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6601 return X86_64_INTEGER_CLASS;
6603 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6604 MEMORY is used. */
6605 if (class1 == X86_64_X87_CLASS
6606 || class1 == X86_64_X87UP_CLASS
6607 || class1 == X86_64_COMPLEX_X87_CLASS
6608 || class2 == X86_64_X87_CLASS
6609 || class2 == X86_64_X87UP_CLASS
6610 || class2 == X86_64_COMPLEX_X87_CLASS)
6611 return X86_64_MEMORY_CLASS;
6613 /* Rule #6: Otherwise class SSE is used. */
6614 return X86_64_SSE_CLASS;
6617 /* Classify the argument of type TYPE and mode MODE.
6618 CLASSES will be filled by the register class used to pass each word
6619 of the operand. The number of words is returned. In case the parameter
6620 should be passed in memory, 0 is returned. As a special case for zero
6621 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6623 BIT_OFFSET is used internally for handling records and specifies offset
6624 of the offset in bits modulo 512 to avoid overflow cases.
6626 See the x86-64 PS ABI for details.
6629 static int
6630 classify_argument (machine_mode mode, const_tree type,
6631 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6633 HOST_WIDE_INT bytes =
6634 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6635 int words
6636 = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6638 /* Variable sized entities are always passed/returned in memory. */
6639 if (bytes < 0)
6640 return 0;
6642 if (mode != VOIDmode
6643 && targetm.calls.must_pass_in_stack (mode, type))
6644 return 0;
6646 if (type && AGGREGATE_TYPE_P (type))
6648 int i;
6649 tree field;
6650 enum x86_64_reg_class subclasses[MAX_CLASSES];
6652 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
6653 if (bytes > 64)
6654 return 0;
6656 for (i = 0; i < words; i++)
6657 classes[i] = X86_64_NO_CLASS;
6659 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6660 signalize memory class, so handle it as special case. */
6661 if (!words)
6663 classes[0] = X86_64_NO_CLASS;
6664 return 1;
6667 /* Classify each field of record and merge classes. */
6668 switch (TREE_CODE (type))
6670 case RECORD_TYPE:
6671 /* And now merge the fields of structure. */
6672 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6674 if (TREE_CODE (field) == FIELD_DECL)
6676 int num;
6678 if (TREE_TYPE (field) == error_mark_node)
6679 continue;
6681 /* Bitfields are always classified as integer. Handle them
6682 early, since later code would consider them to be
6683 misaligned integers. */
6684 if (DECL_BIT_FIELD (field))
6686 for (i = (int_bit_position (field)
6687 + (bit_offset % 64)) / 8 / 8;
6688 i < ((int_bit_position (field) + (bit_offset % 64))
6689 + tree_to_shwi (DECL_SIZE (field))
6690 + 63) / 8 / 8; i++)
6691 classes[i] =
6692 merge_classes (X86_64_INTEGER_CLASS,
6693 classes[i]);
6695 else
6697 int pos;
6699 type = TREE_TYPE (field);
6701 /* Flexible array member is ignored. */
6702 if (TYPE_MODE (type) == BLKmode
6703 && TREE_CODE (type) == ARRAY_TYPE
6704 && TYPE_SIZE (type) == NULL_TREE
6705 && TYPE_DOMAIN (type) != NULL_TREE
6706 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6707 == NULL_TREE))
6709 static bool warned;
6711 if (!warned && warn_psabi)
6713 warned = true;
6714 inform (input_location,
6715 "the ABI of passing struct with"
6716 " a flexible array member has"
6717 " changed in GCC 4.4");
6719 continue;
6721 num = classify_argument (TYPE_MODE (type), type,
6722 subclasses,
6723 (int_bit_position (field)
6724 + bit_offset) % 512);
6725 if (!num)
6726 return 0;
6727 pos = (int_bit_position (field)
6728 + (bit_offset % 64)) / 8 / 8;
6729 for (i = 0; i < num && (i + pos) < words; i++)
6730 classes[i + pos] =
6731 merge_classes (subclasses[i], classes[i + pos]);
6735 break;
6737 case ARRAY_TYPE:
6738 /* Arrays are handled as small records. */
6740 int num;
6741 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6742 TREE_TYPE (type), subclasses, bit_offset);
6743 if (!num)
6744 return 0;
6746 /* The partial classes are now full classes. */
6747 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6748 subclasses[0] = X86_64_SSE_CLASS;
6749 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6750 && !((bit_offset % 64) == 0 && bytes == 4))
6751 subclasses[0] = X86_64_INTEGER_CLASS;
6753 for (i = 0; i < words; i++)
6754 classes[i] = subclasses[i % num];
6756 break;
6758 case UNION_TYPE:
6759 case QUAL_UNION_TYPE:
6760 /* Unions are similar to RECORD_TYPE but offset is always 0.
6762 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6764 if (TREE_CODE (field) == FIELD_DECL)
6766 int num;
6768 if (TREE_TYPE (field) == error_mark_node)
6769 continue;
6771 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6772 TREE_TYPE (field), subclasses,
6773 bit_offset);
6774 if (!num)
6775 return 0;
6776 for (i = 0; i < num && i < words; i++)
6777 classes[i] = merge_classes (subclasses[i], classes[i]);
6780 break;
6782 default:
6783 gcc_unreachable ();
6786 if (words > 2)
6788 /* When size > 16 bytes, if the first one isn't
6789 X86_64_SSE_CLASS or any other ones aren't
6790 X86_64_SSEUP_CLASS, everything should be passed in
6791 memory. */
6792 if (classes[0] != X86_64_SSE_CLASS)
6793 return 0;
6795 for (i = 1; i < words; i++)
6796 if (classes[i] != X86_64_SSEUP_CLASS)
6797 return 0;
6800 /* Final merger cleanup. */
6801 for (i = 0; i < words; i++)
6803 /* If one class is MEMORY, everything should be passed in
6804 memory. */
6805 if (classes[i] == X86_64_MEMORY_CLASS)
6806 return 0;
6808 /* The X86_64_SSEUP_CLASS should be always preceded by
6809 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6810 if (classes[i] == X86_64_SSEUP_CLASS
6811 && classes[i - 1] != X86_64_SSE_CLASS
6812 && classes[i - 1] != X86_64_SSEUP_CLASS)
6814 /* The first one should never be X86_64_SSEUP_CLASS. */
6815 gcc_assert (i != 0);
6816 classes[i] = X86_64_SSE_CLASS;
6819 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6820 everything should be passed in memory. */
6821 if (classes[i] == X86_64_X87UP_CLASS
6822 && (classes[i - 1] != X86_64_X87_CLASS))
6824 static bool warned;
6826 /* The first one should never be X86_64_X87UP_CLASS. */
6827 gcc_assert (i != 0);
6828 if (!warned && warn_psabi)
6830 warned = true;
6831 inform (input_location,
6832 "the ABI of passing union with long double"
6833 " has changed in GCC 4.4");
6835 return 0;
6838 return words;
6841 /* Compute alignment needed. We align all types to natural boundaries with
6842 exception of XFmode that is aligned to 64bits. */
6843 if (mode != VOIDmode && mode != BLKmode)
6845 int mode_alignment = GET_MODE_BITSIZE (mode);
6847 if (mode == XFmode)
6848 mode_alignment = 128;
6849 else if (mode == XCmode)
6850 mode_alignment = 256;
6851 if (COMPLEX_MODE_P (mode))
6852 mode_alignment /= 2;
6853 /* Misaligned fields are always returned in memory. */
6854 if (bit_offset % mode_alignment)
6855 return 0;
6858 /* for V1xx modes, just use the base mode */
6859 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6860 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6861 mode = GET_MODE_INNER (mode);
6863 /* Classification of atomic types. */
6864 switch (mode)
6866 case SDmode:
6867 case DDmode:
6868 classes[0] = X86_64_SSE_CLASS;
6869 return 1;
6870 case TDmode:
6871 classes[0] = X86_64_SSE_CLASS;
6872 classes[1] = X86_64_SSEUP_CLASS;
6873 return 2;
6874 case DImode:
6875 case SImode:
6876 case HImode:
6877 case QImode:
6878 case CSImode:
6879 case CHImode:
6880 case CQImode:
6882 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
6884 /* Analyze last 128 bits only. */
6885 size = (size - 1) & 0x7f;
6887 if (size < 32)
6889 classes[0] = X86_64_INTEGERSI_CLASS;
6890 return 1;
6892 else if (size < 64)
6894 classes[0] = X86_64_INTEGER_CLASS;
6895 return 1;
6897 else if (size < 64+32)
6899 classes[0] = X86_64_INTEGER_CLASS;
6900 classes[1] = X86_64_INTEGERSI_CLASS;
6901 return 2;
6903 else if (size < 64+64)
6905 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6906 return 2;
6908 else
6909 gcc_unreachable ();
6911 case CDImode:
6912 case TImode:
6913 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6914 return 2;
6915 case COImode:
6916 case OImode:
6917 /* OImode shouldn't be used directly. */
6918 gcc_unreachable ();
6919 case CTImode:
6920 return 0;
6921 case SFmode:
6922 if (!(bit_offset % 64))
6923 classes[0] = X86_64_SSESF_CLASS;
6924 else
6925 classes[0] = X86_64_SSE_CLASS;
6926 return 1;
6927 case DFmode:
6928 classes[0] = X86_64_SSEDF_CLASS;
6929 return 1;
6930 case XFmode:
6931 classes[0] = X86_64_X87_CLASS;
6932 classes[1] = X86_64_X87UP_CLASS;
6933 return 2;
6934 case TFmode:
6935 classes[0] = X86_64_SSE_CLASS;
6936 classes[1] = X86_64_SSEUP_CLASS;
6937 return 2;
6938 case SCmode:
6939 classes[0] = X86_64_SSE_CLASS;
6940 if (!(bit_offset % 64))
6941 return 1;
6942 else
6944 static bool warned;
6946 if (!warned && warn_psabi)
6948 warned = true;
6949 inform (input_location,
6950 "the ABI of passing structure with complex float"
6951 " member has changed in GCC 4.4");
6953 classes[1] = X86_64_SSESF_CLASS;
6954 return 2;
6956 case DCmode:
6957 classes[0] = X86_64_SSEDF_CLASS;
6958 classes[1] = X86_64_SSEDF_CLASS;
6959 return 2;
6960 case XCmode:
6961 classes[0] = X86_64_COMPLEX_X87_CLASS;
6962 return 1;
6963 case TCmode:
6964 /* This modes is larger than 16 bytes. */
6965 return 0;
6966 case V8SFmode:
6967 case V8SImode:
6968 case V32QImode:
6969 case V16HImode:
6970 case V4DFmode:
6971 case V4DImode:
6972 classes[0] = X86_64_SSE_CLASS;
6973 classes[1] = X86_64_SSEUP_CLASS;
6974 classes[2] = X86_64_SSEUP_CLASS;
6975 classes[3] = X86_64_SSEUP_CLASS;
6976 return 4;
6977 case V8DFmode:
6978 case V16SFmode:
6979 case V8DImode:
6980 case V16SImode:
6981 case V32HImode:
6982 case V64QImode:
6983 classes[0] = X86_64_SSE_CLASS;
6984 classes[1] = X86_64_SSEUP_CLASS;
6985 classes[2] = X86_64_SSEUP_CLASS;
6986 classes[3] = X86_64_SSEUP_CLASS;
6987 classes[4] = X86_64_SSEUP_CLASS;
6988 classes[5] = X86_64_SSEUP_CLASS;
6989 classes[6] = X86_64_SSEUP_CLASS;
6990 classes[7] = X86_64_SSEUP_CLASS;
6991 return 8;
6992 case V4SFmode:
6993 case V4SImode:
6994 case V16QImode:
6995 case V8HImode:
6996 case V2DFmode:
6997 case V2DImode:
6998 classes[0] = X86_64_SSE_CLASS;
6999 classes[1] = X86_64_SSEUP_CLASS;
7000 return 2;
7001 case V1TImode:
7002 case V1DImode:
7003 case V2SFmode:
7004 case V2SImode:
7005 case V4HImode:
7006 case V8QImode:
7007 classes[0] = X86_64_SSE_CLASS;
7008 return 1;
7009 case BLKmode:
7010 case VOIDmode:
7011 return 0;
7012 default:
7013 gcc_assert (VECTOR_MODE_P (mode));
7015 if (bytes > 16)
7016 return 0;
7018 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
7020 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
7021 classes[0] = X86_64_INTEGERSI_CLASS;
7022 else
7023 classes[0] = X86_64_INTEGER_CLASS;
7024 classes[1] = X86_64_INTEGER_CLASS;
7025 return 1 + (bytes > 8);
7029 /* Examine the argument and return set number of register required in each
7030 class. Return true iff parameter should be passed in memory. */
7032 static bool
7033 examine_argument (machine_mode mode, const_tree type, int in_return,
7034 int *int_nregs, int *sse_nregs)
7036 enum x86_64_reg_class regclass[MAX_CLASSES];
7037 int n = classify_argument (mode, type, regclass, 0);
7039 *int_nregs = 0;
7040 *sse_nregs = 0;
7042 if (!n)
7043 return true;
7044 for (n--; n >= 0; n--)
7045 switch (regclass[n])
7047 case X86_64_INTEGER_CLASS:
7048 case X86_64_INTEGERSI_CLASS:
7049 (*int_nregs)++;
7050 break;
7051 case X86_64_SSE_CLASS:
7052 case X86_64_SSESF_CLASS:
7053 case X86_64_SSEDF_CLASS:
7054 (*sse_nregs)++;
7055 break;
7056 case X86_64_NO_CLASS:
7057 case X86_64_SSEUP_CLASS:
7058 break;
7059 case X86_64_X87_CLASS:
7060 case X86_64_X87UP_CLASS:
7061 case X86_64_COMPLEX_X87_CLASS:
7062 if (!in_return)
7063 return true;
7064 break;
7065 case X86_64_MEMORY_CLASS:
7066 gcc_unreachable ();
7069 return false;
7072 /* Construct container for the argument used by GCC interface. See
7073 FUNCTION_ARG for the detailed description. */
7075 static rtx
7076 construct_container (machine_mode mode, machine_mode orig_mode,
7077 const_tree type, int in_return, int nintregs, int nsseregs,
7078 const int *intreg, int sse_regno)
7080 /* The following variables hold the static issued_error state. */
7081 static bool issued_sse_arg_error;
7082 static bool issued_sse_ret_error;
7083 static bool issued_x87_ret_error;
7085 machine_mode tmpmode;
7086 int bytes =
7087 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
7088 enum x86_64_reg_class regclass[MAX_CLASSES];
7089 int n;
7090 int i;
7091 int nexps = 0;
7092 int needed_sseregs, needed_intregs;
7093 rtx exp[MAX_CLASSES];
7094 rtx ret;
7096 n = classify_argument (mode, type, regclass, 0);
7097 if (!n)
7098 return NULL;
7099 if (examine_argument (mode, type, in_return, &needed_intregs,
7100 &needed_sseregs))
7101 return NULL;
7102 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
7103 return NULL;
7105 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
7106 some less clueful developer tries to use floating-point anyway. */
7107 if (needed_sseregs && !TARGET_SSE)
7109 if (in_return)
7111 if (!issued_sse_ret_error)
7113 error ("SSE register return with SSE disabled");
7114 issued_sse_ret_error = true;
7117 else if (!issued_sse_arg_error)
7119 error ("SSE register argument with SSE disabled");
7120 issued_sse_arg_error = true;
7122 return NULL;
7125 /* Likewise, error if the ABI requires us to return values in the
7126 x87 registers and the user specified -mno-80387. */
7127 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
7128 for (i = 0; i < n; i++)
7129 if (regclass[i] == X86_64_X87_CLASS
7130 || regclass[i] == X86_64_X87UP_CLASS
7131 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
7133 if (!issued_x87_ret_error)
7135 error ("x87 register return with x87 disabled");
7136 issued_x87_ret_error = true;
7138 return NULL;
7141 /* First construct simple cases. Avoid SCmode, since we want to use
7142 single register to pass this type. */
7143 if (n == 1 && mode != SCmode)
7144 switch (regclass[0])
7146 case X86_64_INTEGER_CLASS:
7147 case X86_64_INTEGERSI_CLASS:
7148 return gen_rtx_REG (mode, intreg[0]);
7149 case X86_64_SSE_CLASS:
7150 case X86_64_SSESF_CLASS:
7151 case X86_64_SSEDF_CLASS:
7152 if (mode != BLKmode)
7153 return gen_reg_or_parallel (mode, orig_mode,
7154 SSE_REGNO (sse_regno));
7155 break;
7156 case X86_64_X87_CLASS:
7157 case X86_64_COMPLEX_X87_CLASS:
7158 return gen_rtx_REG (mode, FIRST_STACK_REG);
7159 case X86_64_NO_CLASS:
7160 /* Zero sized array, struct or class. */
7161 return NULL;
7162 default:
7163 gcc_unreachable ();
7165 if (n == 2
7166 && regclass[0] == X86_64_SSE_CLASS
7167 && regclass[1] == X86_64_SSEUP_CLASS
7168 && mode != BLKmode)
7169 return gen_reg_or_parallel (mode, orig_mode,
7170 SSE_REGNO (sse_regno));
7171 if (n == 4
7172 && regclass[0] == X86_64_SSE_CLASS
7173 && regclass[1] == X86_64_SSEUP_CLASS
7174 && regclass[2] == X86_64_SSEUP_CLASS
7175 && regclass[3] == X86_64_SSEUP_CLASS
7176 && mode != BLKmode)
7177 return gen_reg_or_parallel (mode, orig_mode,
7178 SSE_REGNO (sse_regno));
7179 if (n == 8
7180 && regclass[0] == X86_64_SSE_CLASS
7181 && regclass[1] == X86_64_SSEUP_CLASS
7182 && regclass[2] == X86_64_SSEUP_CLASS
7183 && regclass[3] == X86_64_SSEUP_CLASS
7184 && regclass[4] == X86_64_SSEUP_CLASS
7185 && regclass[5] == X86_64_SSEUP_CLASS
7186 && regclass[6] == X86_64_SSEUP_CLASS
7187 && regclass[7] == X86_64_SSEUP_CLASS
7188 && mode != BLKmode)
7189 return gen_reg_or_parallel (mode, orig_mode,
7190 SSE_REGNO (sse_regno));
7191 if (n == 2
7192 && regclass[0] == X86_64_X87_CLASS
7193 && regclass[1] == X86_64_X87UP_CLASS)
7194 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
7196 if (n == 2
7197 && regclass[0] == X86_64_INTEGER_CLASS
7198 && regclass[1] == X86_64_INTEGER_CLASS
7199 && (mode == CDImode || mode == TImode)
7200 && intreg[0] + 1 == intreg[1])
7201 return gen_rtx_REG (mode, intreg[0]);
7203 /* Otherwise figure out the entries of the PARALLEL. */
7204 for (i = 0; i < n; i++)
7206 int pos;
7208 switch (regclass[i])
7210 case X86_64_NO_CLASS:
7211 break;
7212 case X86_64_INTEGER_CLASS:
7213 case X86_64_INTEGERSI_CLASS:
7214 /* Merge TImodes on aligned occasions here too. */
7215 if (i * 8 + 8 > bytes)
7216 tmpmode
7217 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
7218 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
7219 tmpmode = SImode;
7220 else
7221 tmpmode = DImode;
7222 /* We've requested 24 bytes we
7223 don't have mode for. Use DImode. */
7224 if (tmpmode == BLKmode)
7225 tmpmode = DImode;
7226 exp [nexps++]
7227 = gen_rtx_EXPR_LIST (VOIDmode,
7228 gen_rtx_REG (tmpmode, *intreg),
7229 GEN_INT (i*8));
7230 intreg++;
7231 break;
7232 case X86_64_SSESF_CLASS:
7233 exp [nexps++]
7234 = gen_rtx_EXPR_LIST (VOIDmode,
7235 gen_rtx_REG (SFmode,
7236 SSE_REGNO (sse_regno)),
7237 GEN_INT (i*8));
7238 sse_regno++;
7239 break;
7240 case X86_64_SSEDF_CLASS:
7241 exp [nexps++]
7242 = gen_rtx_EXPR_LIST (VOIDmode,
7243 gen_rtx_REG (DFmode,
7244 SSE_REGNO (sse_regno)),
7245 GEN_INT (i*8));
7246 sse_regno++;
7247 break;
7248 case X86_64_SSE_CLASS:
7249 pos = i;
7250 switch (n)
7252 case 1:
7253 tmpmode = DImode;
7254 break;
7255 case 2:
7256 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
7258 tmpmode = TImode;
7259 i++;
7261 else
7262 tmpmode = DImode;
7263 break;
7264 case 4:
7265 gcc_assert (i == 0
7266 && regclass[1] == X86_64_SSEUP_CLASS
7267 && regclass[2] == X86_64_SSEUP_CLASS
7268 && regclass[3] == X86_64_SSEUP_CLASS);
7269 tmpmode = OImode;
7270 i += 3;
7271 break;
7272 case 8:
7273 gcc_assert (i == 0
7274 && regclass[1] == X86_64_SSEUP_CLASS
7275 && regclass[2] == X86_64_SSEUP_CLASS
7276 && regclass[3] == X86_64_SSEUP_CLASS
7277 && regclass[4] == X86_64_SSEUP_CLASS
7278 && regclass[5] == X86_64_SSEUP_CLASS
7279 && regclass[6] == X86_64_SSEUP_CLASS
7280 && regclass[7] == X86_64_SSEUP_CLASS);
7281 tmpmode = XImode;
7282 i += 7;
7283 break;
7284 default:
7285 gcc_unreachable ();
7287 exp [nexps++]
7288 = gen_rtx_EXPR_LIST (VOIDmode,
7289 gen_rtx_REG (tmpmode,
7290 SSE_REGNO (sse_regno)),
7291 GEN_INT (pos*8));
7292 sse_regno++;
7293 break;
7294 default:
7295 gcc_unreachable ();
7299 /* Empty aligned struct, union or class. */
7300 if (nexps == 0)
7301 return NULL;
7303 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
7304 for (i = 0; i < nexps; i++)
7305 XVECEXP (ret, 0, i) = exp [i];
7306 return ret;
7309 /* Update the data in CUM to advance over an argument of mode MODE
7310 and data type TYPE. (TYPE is null for libcalls where that information
7311 may not be available.)
7313 Return a number of integer regsiters advanced over. */
7315 static int
7316 function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
7317 const_tree type, HOST_WIDE_INT bytes,
7318 HOST_WIDE_INT words)
7320 int res = 0;
7322 switch (mode)
7324 default:
7325 break;
7327 case BLKmode:
7328 if (bytes < 0)
7329 break;
7330 /* FALLTHRU */
7332 case DImode:
7333 case SImode:
7334 case HImode:
7335 case QImode:
7336 cum->words += words;
7337 cum->nregs -= words;
7338 cum->regno += words;
7339 if (cum->nregs >= 0)
7340 res = words;
7341 if (cum->nregs <= 0)
7343 cum->nregs = 0;
7344 cum->regno = 0;
7346 break;
7348 case OImode:
7349 /* OImode shouldn't be used directly. */
7350 gcc_unreachable ();
7352 case DFmode:
7353 if (cum->float_in_sse < 2)
7354 break;
7355 case SFmode:
7356 if (cum->float_in_sse < 1)
7357 break;
7358 /* FALLTHRU */
7360 case V8SFmode:
7361 case V8SImode:
7362 case V64QImode:
7363 case V32HImode:
7364 case V16SImode:
7365 case V8DImode:
7366 case V16SFmode:
7367 case V8DFmode:
7368 case V32QImode:
7369 case V16HImode:
7370 case V4DFmode:
7371 case V4DImode:
7372 case TImode:
7373 case V16QImode:
7374 case V8HImode:
7375 case V4SImode:
7376 case V2DImode:
7377 case V4SFmode:
7378 case V2DFmode:
7379 if (!type || !AGGREGATE_TYPE_P (type))
7381 cum->sse_words += words;
7382 cum->sse_nregs -= 1;
7383 cum->sse_regno += 1;
7384 if (cum->sse_nregs <= 0)
7386 cum->sse_nregs = 0;
7387 cum->sse_regno = 0;
7390 break;
7392 case V8QImode:
7393 case V4HImode:
7394 case V2SImode:
7395 case V2SFmode:
7396 case V1TImode:
7397 case V1DImode:
7398 if (!type || !AGGREGATE_TYPE_P (type))
7400 cum->mmx_words += words;
7401 cum->mmx_nregs -= 1;
7402 cum->mmx_regno += 1;
7403 if (cum->mmx_nregs <= 0)
7405 cum->mmx_nregs = 0;
7406 cum->mmx_regno = 0;
7409 break;
7412 return res;
7415 static int
7416 function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
7417 const_tree type, HOST_WIDE_INT words, bool named)
7419 int int_nregs, sse_nregs;
7421 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
7422 if (!named && (VALID_AVX512F_REG_MODE (mode)
7423 || VALID_AVX256_REG_MODE (mode)))
7424 return 0;
7426 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
7427 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
7429 cum->nregs -= int_nregs;
7430 cum->sse_nregs -= sse_nregs;
7431 cum->regno += int_nregs;
7432 cum->sse_regno += sse_nregs;
7433 return int_nregs;
7435 else
7437 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
7438 cum->words = (cum->words + align - 1) & ~(align - 1);
7439 cum->words += words;
7440 return 0;
7444 static int
7445 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
7446 HOST_WIDE_INT words)
7448 /* Otherwise, this should be passed indirect. */
7449 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
7451 cum->words += words;
7452 if (cum->nregs > 0)
7454 cum->nregs -= 1;
7455 cum->regno += 1;
7456 return 1;
7458 return 0;
7461 /* Update the data in CUM to advance over an argument of mode MODE and
7462 data type TYPE. (TYPE is null for libcalls where that information
7463 may not be available.) */
7465 static void
7466 ix86_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
7467 const_tree type, bool named)
7469 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7470 HOST_WIDE_INT bytes, words;
7471 int nregs;
7473 if (mode == BLKmode)
7474 bytes = int_size_in_bytes (type);
7475 else
7476 bytes = GET_MODE_SIZE (mode);
7477 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7479 if (type)
7480 mode = type_natural_mode (type, NULL, false);
7482 if ((type && POINTER_BOUNDS_TYPE_P (type))
7483 || POINTER_BOUNDS_MODE_P (mode))
7485 /* If we pass bounds in BT then just update remained bounds count. */
7486 if (cum->bnds_in_bt)
7488 cum->bnds_in_bt--;
7489 return;
7492 /* Update remained number of bounds to force. */
7493 if (cum->force_bnd_pass)
7494 cum->force_bnd_pass--;
7496 cum->bnd_regno++;
7498 return;
7501 /* The first arg not going to Bounds Tables resets this counter. */
7502 cum->bnds_in_bt = 0;
7503 /* For unnamed args we always pass bounds to avoid bounds mess when
7504 passed and received types do not match. If bounds do not follow
7505 unnamed arg, still pretend required number of bounds were passed. */
7506 if (cum->force_bnd_pass)
7508 cum->bnd_regno += cum->force_bnd_pass;
7509 cum->force_bnd_pass = 0;
7512 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7513 nregs = function_arg_advance_ms_64 (cum, bytes, words);
7514 else if (TARGET_64BIT)
7515 nregs = function_arg_advance_64 (cum, mode, type, words, named);
7516 else
7517 nregs = function_arg_advance_32 (cum, mode, type, bytes, words);
7519 /* For stdarg we expect bounds to be passed for each value passed
7520 in register. */
7521 if (cum->stdarg)
7522 cum->force_bnd_pass = nregs;
7523 /* For pointers passed in memory we expect bounds passed in Bounds
7524 Table. */
7525 if (!nregs)
7526 cum->bnds_in_bt = chkp_type_bounds_count (type);
7529 /* Define where to put the arguments to a function.
7530 Value is zero to push the argument on the stack,
7531 or a hard register in which to store the argument.
7533 MODE is the argument's machine mode.
7534 TYPE is the data type of the argument (as a tree).
7535 This is null for libcalls where that information may
7536 not be available.
7537 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7538 the preceding args and about the function being called.
7539 NAMED is nonzero if this argument is a named parameter
7540 (otherwise it is an extra parameter matching an ellipsis). */
7542 static rtx
7543 function_arg_32 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7544 machine_mode orig_mode, const_tree type,
7545 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
7547 /* Avoid the AL settings for the Unix64 ABI. */
7548 if (mode == VOIDmode)
7549 return constm1_rtx;
7551 switch (mode)
7553 default:
7554 break;
7556 case BLKmode:
7557 if (bytes < 0)
7558 break;
7559 /* FALLTHRU */
7560 case DImode:
7561 case SImode:
7562 case HImode:
7563 case QImode:
7564 if (words <= cum->nregs)
7566 int regno = cum->regno;
7568 /* Fastcall allocates the first two DWORD (SImode) or
7569 smaller arguments to ECX and EDX if it isn't an
7570 aggregate type . */
7571 if (cum->fastcall)
7573 if (mode == BLKmode
7574 || mode == DImode
7575 || (type && AGGREGATE_TYPE_P (type)))
7576 break;
7578 /* ECX not EAX is the first allocated register. */
7579 if (regno == AX_REG)
7580 regno = CX_REG;
7582 return gen_rtx_REG (mode, regno);
7584 break;
7586 case DFmode:
7587 if (cum->float_in_sse < 2)
7588 break;
7589 case SFmode:
7590 if (cum->float_in_sse < 1)
7591 break;
7592 /* FALLTHRU */
7593 case TImode:
7594 /* In 32bit, we pass TImode in xmm registers. */
7595 case V16QImode:
7596 case V8HImode:
7597 case V4SImode:
7598 case V2DImode:
7599 case V4SFmode:
7600 case V2DFmode:
7601 if (!type || !AGGREGATE_TYPE_P (type))
7603 if (cum->sse_nregs)
7604 return gen_reg_or_parallel (mode, orig_mode,
7605 cum->sse_regno + FIRST_SSE_REG);
7607 break;
7609 case OImode:
7610 case XImode:
7611 /* OImode and XImode shouldn't be used directly. */
7612 gcc_unreachable ();
7614 case V64QImode:
7615 case V32HImode:
7616 case V16SImode:
7617 case V8DImode:
7618 case V16SFmode:
7619 case V8DFmode:
7620 case V8SFmode:
7621 case V8SImode:
7622 case V32QImode:
7623 case V16HImode:
7624 case V4DFmode:
7625 case V4DImode:
7626 if (!type || !AGGREGATE_TYPE_P (type))
7628 if (cum->sse_nregs)
7629 return gen_reg_or_parallel (mode, orig_mode,
7630 cum->sse_regno + FIRST_SSE_REG);
7632 break;
7634 case V8QImode:
7635 case V4HImode:
7636 case V2SImode:
7637 case V2SFmode:
7638 case V1TImode:
7639 case V1DImode:
7640 if (!type || !AGGREGATE_TYPE_P (type))
7642 if (cum->mmx_nregs)
7643 return gen_reg_or_parallel (mode, orig_mode,
7644 cum->mmx_regno + FIRST_MMX_REG);
7646 break;
7649 return NULL_RTX;
7652 static rtx
7653 function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7654 machine_mode orig_mode, const_tree type, bool named)
7656 /* Handle a hidden AL argument containing number of registers
7657 for varargs x86-64 functions. */
7658 if (mode == VOIDmode)
7659 return GEN_INT (cum->maybe_vaarg
7660 ? (cum->sse_nregs < 0
7661 ? X86_64_SSE_REGPARM_MAX
7662 : cum->sse_regno)
7663 : -1);
7665 switch (mode)
7667 default:
7668 break;
7670 case V8SFmode:
7671 case V8SImode:
7672 case V32QImode:
7673 case V16HImode:
7674 case V4DFmode:
7675 case V4DImode:
7676 case V16SFmode:
7677 case V16SImode:
7678 case V64QImode:
7679 case V32HImode:
7680 case V8DFmode:
7681 case V8DImode:
7682 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
7683 if (!named)
7684 return NULL;
7685 break;
7688 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7689 cum->sse_nregs,
7690 &x86_64_int_parameter_registers [cum->regno],
7691 cum->sse_regno);
7694 static rtx
7695 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7696 machine_mode orig_mode, bool named,
7697 HOST_WIDE_INT bytes)
7699 unsigned int regno;
7701 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7702 We use value of -2 to specify that current function call is MSABI. */
7703 if (mode == VOIDmode)
7704 return GEN_INT (-2);
7706 /* If we've run out of registers, it goes on the stack. */
7707 if (cum->nregs == 0)
7708 return NULL_RTX;
7710 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7712 /* Only floating point modes are passed in anything but integer regs. */
7713 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7715 if (named)
7716 regno = cum->regno + FIRST_SSE_REG;
7717 else
7719 rtx t1, t2;
7721 /* Unnamed floating parameters are passed in both the
7722 SSE and integer registers. */
7723 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7724 t2 = gen_rtx_REG (mode, regno);
7725 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7726 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7727 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7730 /* Handle aggregated types passed in register. */
7731 if (orig_mode == BLKmode)
7733 if (bytes > 0 && bytes <= 8)
7734 mode = (bytes > 4 ? DImode : SImode);
7735 if (mode == BLKmode)
7736 mode = DImode;
7739 return gen_reg_or_parallel (mode, orig_mode, regno);
7742 /* Return where to put the arguments to a function.
7743 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7745 MODE is the argument's machine mode. TYPE is the data type of the
7746 argument. It is null for libcalls where that information may not be
7747 available. CUM gives information about the preceding args and about
7748 the function being called. NAMED is nonzero if this argument is a
7749 named parameter (otherwise it is an extra parameter matching an
7750 ellipsis). */
7752 static rtx
7753 ix86_function_arg (cumulative_args_t cum_v, machine_mode omode,
7754 const_tree type, bool named)
7756 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7757 machine_mode mode = omode;
7758 HOST_WIDE_INT bytes, words;
7759 rtx arg;
7761 /* All pointer bounds argumntas are handled separately here. */
7762 if ((type && POINTER_BOUNDS_TYPE_P (type))
7763 || POINTER_BOUNDS_MODE_P (mode))
7765 /* Return NULL if bounds are forced to go in Bounds Table. */
7766 if (cum->bnds_in_bt)
7767 arg = NULL;
7768 /* Return the next available bound reg if any. */
7769 else if (cum->bnd_regno <= LAST_BND_REG)
7770 arg = gen_rtx_REG (BNDmode, cum->bnd_regno);
7771 /* Return the next special slot number otherwise. */
7772 else
7773 arg = GEN_INT (cum->bnd_regno - LAST_BND_REG - 1);
7775 return arg;
7778 if (mode == BLKmode)
7779 bytes = int_size_in_bytes (type);
7780 else
7781 bytes = GET_MODE_SIZE (mode);
7782 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7784 /* To simplify the code below, represent vector types with a vector mode
7785 even if MMX/SSE are not active. */
7786 if (type && TREE_CODE (type) == VECTOR_TYPE)
7787 mode = type_natural_mode (type, cum, false);
7789 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7790 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7791 else if (TARGET_64BIT)
7792 arg = function_arg_64 (cum, mode, omode, type, named);
7793 else
7794 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7796 return arg;
7799 /* A C expression that indicates when an argument must be passed by
7800 reference. If nonzero for an argument, a copy of that argument is
7801 made in memory and a pointer to the argument is passed instead of
7802 the argument itself. The pointer is passed in whatever way is
7803 appropriate for passing a pointer to that type. */
7805 static bool
7806 ix86_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
7807 const_tree type, bool)
7809 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7811 /* See Windows x64 Software Convention. */
7812 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7814 int msize = (int) GET_MODE_SIZE (mode);
7815 if (type)
7817 /* Arrays are passed by reference. */
7818 if (TREE_CODE (type) == ARRAY_TYPE)
7819 return true;
7821 if (AGGREGATE_TYPE_P (type))
7823 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7824 are passed by reference. */
7825 msize = int_size_in_bytes (type);
7829 /* __m128 is passed by reference. */
7830 switch (msize) {
7831 case 1: case 2: case 4: case 8:
7832 break;
7833 default:
7834 return true;
7837 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7838 return 1;
7840 return 0;
7843 /* Return true when TYPE should be 128bit aligned for 32bit argument
7844 passing ABI. XXX: This function is obsolete and is only used for
7845 checking psABI compatibility with previous versions of GCC. */
7847 static bool
7848 ix86_compat_aligned_value_p (const_tree type)
7850 machine_mode mode = TYPE_MODE (type);
7851 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7852 || mode == TDmode
7853 || mode == TFmode
7854 || mode == TCmode)
7855 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7856 return true;
7857 if (TYPE_ALIGN (type) < 128)
7858 return false;
7860 if (AGGREGATE_TYPE_P (type))
7862 /* Walk the aggregates recursively. */
7863 switch (TREE_CODE (type))
7865 case RECORD_TYPE:
7866 case UNION_TYPE:
7867 case QUAL_UNION_TYPE:
7869 tree field;
7871 /* Walk all the structure fields. */
7872 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7874 if (TREE_CODE (field) == FIELD_DECL
7875 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
7876 return true;
7878 break;
7881 case ARRAY_TYPE:
7882 /* Just for use if some languages passes arrays by value. */
7883 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
7884 return true;
7885 break;
7887 default:
7888 gcc_unreachable ();
7891 return false;
7894 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7895 XXX: This function is obsolete and is only used for checking psABI
7896 compatibility with previous versions of GCC. */
7898 static unsigned int
7899 ix86_compat_function_arg_boundary (machine_mode mode,
7900 const_tree type, unsigned int align)
7902 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7903 natural boundaries. */
7904 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
7906 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7907 make an exception for SSE modes since these require 128bit
7908 alignment.
7910 The handling here differs from field_alignment. ICC aligns MMX
7911 arguments to 4 byte boundaries, while structure fields are aligned
7912 to 8 byte boundaries. */
7913 if (!type)
7915 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
7916 align = PARM_BOUNDARY;
7918 else
7920 if (!ix86_compat_aligned_value_p (type))
7921 align = PARM_BOUNDARY;
7924 if (align > BIGGEST_ALIGNMENT)
7925 align = BIGGEST_ALIGNMENT;
7926 return align;
7929 /* Return true when TYPE should be 128bit aligned for 32bit argument
7930 passing ABI. */
7932 static bool
7933 ix86_contains_aligned_value_p (const_tree type)
7935 machine_mode mode = TYPE_MODE (type);
7937 if (mode == XFmode || mode == XCmode)
7938 return false;
7940 if (TYPE_ALIGN (type) < 128)
7941 return false;
7943 if (AGGREGATE_TYPE_P (type))
7945 /* Walk the aggregates recursively. */
7946 switch (TREE_CODE (type))
7948 case RECORD_TYPE:
7949 case UNION_TYPE:
7950 case QUAL_UNION_TYPE:
7952 tree field;
7954 /* Walk all the structure fields. */
7955 for (field = TYPE_FIELDS (type);
7956 field;
7957 field = DECL_CHAIN (field))
7959 if (TREE_CODE (field) == FIELD_DECL
7960 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7961 return true;
7963 break;
7966 case ARRAY_TYPE:
7967 /* Just for use if some languages passes arrays by value. */
7968 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7969 return true;
7970 break;
7972 default:
7973 gcc_unreachable ();
7976 else
7977 return TYPE_ALIGN (type) >= 128;
7979 return false;
7982 /* Gives the alignment boundary, in bits, of an argument with the
7983 specified mode and type. */
7985 static unsigned int
7986 ix86_function_arg_boundary (machine_mode mode, const_tree type)
7988 unsigned int align;
7989 if (type)
7991 /* Since the main variant type is used for call, we convert it to
7992 the main variant type. */
7993 type = TYPE_MAIN_VARIANT (type);
7994 align = TYPE_ALIGN (type);
7996 else
7997 align = GET_MODE_ALIGNMENT (mode);
7998 if (align < PARM_BOUNDARY)
7999 align = PARM_BOUNDARY;
8000 else
8002 static bool warned;
8003 unsigned int saved_align = align;
8005 if (!TARGET_64BIT)
8007 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
8008 if (!type)
8010 if (mode == XFmode || mode == XCmode)
8011 align = PARM_BOUNDARY;
8013 else if (!ix86_contains_aligned_value_p (type))
8014 align = PARM_BOUNDARY;
8016 if (align < 128)
8017 align = PARM_BOUNDARY;
8020 if (warn_psabi
8021 && !warned
8022 && align != ix86_compat_function_arg_boundary (mode, type,
8023 saved_align))
8025 warned = true;
8026 inform (input_location,
8027 "The ABI for passing parameters with %d-byte"
8028 " alignment has changed in GCC 4.6",
8029 align / BITS_PER_UNIT);
8033 return align;
8036 /* Return true if N is a possible register number of function value. */
8038 static bool
8039 ix86_function_value_regno_p (const unsigned int regno)
8041 switch (regno)
8043 case AX_REG:
8044 return true;
8045 case DX_REG:
8046 return (!TARGET_64BIT || ix86_abi != MS_ABI);
8047 case DI_REG:
8048 case SI_REG:
8049 return TARGET_64BIT && ix86_abi != MS_ABI;
8051 case FIRST_BND_REG:
8052 return chkp_function_instrumented_p (current_function_decl);
8054 /* Complex values are returned in %st(0)/%st(1) pair. */
8055 case ST0_REG:
8056 case ST1_REG:
8057 /* TODO: The function should depend on current function ABI but
8058 builtins.c would need updating then. Therefore we use the
8059 default ABI. */
8060 if (TARGET_64BIT && ix86_abi == MS_ABI)
8061 return false;
8062 return TARGET_FLOAT_RETURNS_IN_80387;
8064 /* Complex values are returned in %xmm0/%xmm1 pair. */
8065 case XMM0_REG:
8066 case XMM1_REG:
8067 return TARGET_SSE;
8069 case MM0_REG:
8070 if (TARGET_MACHO || TARGET_64BIT)
8071 return false;
8072 return TARGET_MMX;
8075 return false;
8078 /* Define how to find the value returned by a function.
8079 VALTYPE is the data type of the value (as a tree).
8080 If the precise function being called is known, FUNC is its FUNCTION_DECL;
8081 otherwise, FUNC is 0. */
8083 static rtx
8084 function_value_32 (machine_mode orig_mode, machine_mode mode,
8085 const_tree fntype, const_tree fn)
8087 unsigned int regno;
8089 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
8090 we normally prevent this case when mmx is not available. However
8091 some ABIs may require the result to be returned like DImode. */
8092 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
8093 regno = FIRST_MMX_REG;
8095 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
8096 we prevent this case when sse is not available. However some ABIs
8097 may require the result to be returned like integer TImode. */
8098 else if (mode == TImode
8099 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
8100 regno = FIRST_SSE_REG;
8102 /* 32-byte vector modes in %ymm0. */
8103 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
8104 regno = FIRST_SSE_REG;
8106 /* 64-byte vector modes in %zmm0. */
8107 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
8108 regno = FIRST_SSE_REG;
8110 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
8111 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
8112 regno = FIRST_FLOAT_REG;
8113 else
8114 /* Most things go in %eax. */
8115 regno = AX_REG;
8117 /* Override FP return register with %xmm0 for local functions when
8118 SSE math is enabled or for functions with sseregparm attribute. */
8119 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
8121 int sse_level = ix86_function_sseregparm (fntype, fn, false);
8122 if ((sse_level >= 1 && mode == SFmode)
8123 || (sse_level == 2 && mode == DFmode))
8124 regno = FIRST_SSE_REG;
8127 /* OImode shouldn't be used directly. */
8128 gcc_assert (mode != OImode);
8130 return gen_rtx_REG (orig_mode, regno);
8133 static rtx
8134 function_value_64 (machine_mode orig_mode, machine_mode mode,
8135 const_tree valtype)
8137 rtx ret;
8139 /* Handle libcalls, which don't provide a type node. */
8140 if (valtype == NULL)
8142 unsigned int regno;
8144 switch (mode)
8146 case SFmode:
8147 case SCmode:
8148 case DFmode:
8149 case DCmode:
8150 case TFmode:
8151 case SDmode:
8152 case DDmode:
8153 case TDmode:
8154 regno = FIRST_SSE_REG;
8155 break;
8156 case XFmode:
8157 case XCmode:
8158 regno = FIRST_FLOAT_REG;
8159 break;
8160 case TCmode:
8161 return NULL;
8162 default:
8163 regno = AX_REG;
8166 return gen_rtx_REG (mode, regno);
8168 else if (POINTER_TYPE_P (valtype))
8170 /* Pointers are always returned in word_mode. */
8171 mode = word_mode;
8174 ret = construct_container (mode, orig_mode, valtype, 1,
8175 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
8176 x86_64_int_return_registers, 0);
8178 /* For zero sized structures, construct_container returns NULL, but we
8179 need to keep rest of compiler happy by returning meaningful value. */
8180 if (!ret)
8181 ret = gen_rtx_REG (orig_mode, AX_REG);
8183 return ret;
8186 static rtx
8187 function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
8188 const_tree valtype)
8190 unsigned int regno = AX_REG;
8192 if (TARGET_SSE)
8194 switch (GET_MODE_SIZE (mode))
8196 case 16:
8197 if (valtype != NULL_TREE
8198 && !VECTOR_INTEGER_TYPE_P (valtype)
8199 && !VECTOR_INTEGER_TYPE_P (valtype)
8200 && !INTEGRAL_TYPE_P (valtype)
8201 && !VECTOR_FLOAT_TYPE_P (valtype))
8202 break;
8203 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8204 && !COMPLEX_MODE_P (mode))
8205 regno = FIRST_SSE_REG;
8206 break;
8207 case 8:
8208 case 4:
8209 if (mode == SFmode || mode == DFmode)
8210 regno = FIRST_SSE_REG;
8211 break;
8212 default:
8213 break;
8216 return gen_rtx_REG (orig_mode, regno);
8219 static rtx
8220 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
8221 machine_mode orig_mode, machine_mode mode)
8223 const_tree fn, fntype;
8225 fn = NULL_TREE;
8226 if (fntype_or_decl && DECL_P (fntype_or_decl))
8227 fn = fntype_or_decl;
8228 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
8230 if ((valtype && POINTER_BOUNDS_TYPE_P (valtype))
8231 || POINTER_BOUNDS_MODE_P (mode))
8232 return gen_rtx_REG (BNDmode, FIRST_BND_REG);
8233 else if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
8234 return function_value_ms_64 (orig_mode, mode, valtype);
8235 else if (TARGET_64BIT)
8236 return function_value_64 (orig_mode, mode, valtype);
8237 else
8238 return function_value_32 (orig_mode, mode, fntype, fn);
8241 static rtx
8242 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
8244 machine_mode mode, orig_mode;
8246 orig_mode = TYPE_MODE (valtype);
8247 mode = type_natural_mode (valtype, NULL, true);
8248 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
8251 /* Return an RTX representing a place where a function returns
8252 or recieves pointer bounds or NULL if no bounds are returned.
8254 VALTYPE is a data type of a value returned by the function.
8256 FN_DECL_OR_TYPE is a tree node representing FUNCTION_DECL
8257 or FUNCTION_TYPE of the function.
8259 If OUTGOING is false, return a place in which the caller will
8260 see the return value. Otherwise, return a place where a
8261 function returns a value. */
8263 static rtx
8264 ix86_function_value_bounds (const_tree valtype,
8265 const_tree fntype_or_decl ATTRIBUTE_UNUSED,
8266 bool outgoing ATTRIBUTE_UNUSED)
8268 rtx res = NULL_RTX;
8270 if (BOUNDED_TYPE_P (valtype))
8271 res = gen_rtx_REG (BNDmode, FIRST_BND_REG);
8272 else if (chkp_type_has_pointer (valtype))
8274 bitmap slots;
8275 rtx bounds[2];
8276 bitmap_iterator bi;
8277 unsigned i, bnd_no = 0;
8279 bitmap_obstack_initialize (NULL);
8280 slots = BITMAP_ALLOC (NULL);
8281 chkp_find_bound_slots (valtype, slots);
8283 EXECUTE_IF_SET_IN_BITMAP (slots, 0, i, bi)
8285 rtx reg = gen_rtx_REG (BNDmode, FIRST_BND_REG + bnd_no);
8286 rtx offs = GEN_INT (i * POINTER_SIZE / BITS_PER_UNIT);
8287 gcc_assert (bnd_no < 2);
8288 bounds[bnd_no++] = gen_rtx_EXPR_LIST (VOIDmode, reg, offs);
8291 res = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (bnd_no, bounds));
8293 BITMAP_FREE (slots);
8294 bitmap_obstack_release (NULL);
8296 else
8297 res = NULL_RTX;
8299 return res;
8302 /* Pointer function arguments and return values are promoted to
8303 word_mode. */
8305 static machine_mode
8306 ix86_promote_function_mode (const_tree type, machine_mode mode,
8307 int *punsignedp, const_tree fntype,
8308 int for_return)
8310 if (type != NULL_TREE && POINTER_TYPE_P (type))
8312 *punsignedp = POINTERS_EXTEND_UNSIGNED;
8313 return word_mode;
8315 return default_promote_function_mode (type, mode, punsignedp, fntype,
8316 for_return);
8319 /* Return true if a structure, union or array with MODE containing FIELD
8320 should be accessed using BLKmode. */
8322 static bool
8323 ix86_member_type_forces_blk (const_tree field, machine_mode mode)
8325 /* Union with XFmode must be in BLKmode. */
8326 return (mode == XFmode
8327 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
8328 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
8332 ix86_libcall_value (machine_mode mode)
8334 return ix86_function_value_1 (NULL, NULL, mode, mode);
8337 /* Return true iff type is returned in memory. */
8339 static bool
8340 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
8342 #ifdef SUBTARGET_RETURN_IN_MEMORY
8343 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
8344 #else
8345 const machine_mode mode = type_natural_mode (type, NULL, true);
8346 HOST_WIDE_INT size;
8348 if (POINTER_BOUNDS_TYPE_P (type))
8349 return false;
8351 if (TARGET_64BIT)
8353 if (ix86_function_type_abi (fntype) == MS_ABI)
8355 size = int_size_in_bytes (type);
8357 /* __m128 is returned in xmm0. */
8358 if ((!type || VECTOR_INTEGER_TYPE_P (type)
8359 || INTEGRAL_TYPE_P (type)
8360 || VECTOR_FLOAT_TYPE_P (type))
8361 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8362 && !COMPLEX_MODE_P (mode)
8363 && (GET_MODE_SIZE (mode) == 16 || size == 16))
8364 return false;
8366 /* Otherwise, the size must be exactly in [1248]. */
8367 return size != 1 && size != 2 && size != 4 && size != 8;
8369 else
8371 int needed_intregs, needed_sseregs;
8373 return examine_argument (mode, type, 1,
8374 &needed_intregs, &needed_sseregs);
8377 else
8379 if (mode == BLKmode)
8380 return true;
8382 size = int_size_in_bytes (type);
8384 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
8385 return false;
8387 if (VECTOR_MODE_P (mode) || mode == TImode)
8389 /* User-created vectors small enough to fit in EAX. */
8390 if (size < 8)
8391 return false;
8393 /* Unless ABI prescibes otherwise,
8394 MMX/3dNow values are returned in MM0 if available. */
8396 if (size == 8)
8397 return TARGET_VECT8_RETURNS || !TARGET_MMX;
8399 /* SSE values are returned in XMM0 if available. */
8400 if (size == 16)
8401 return !TARGET_SSE;
8403 /* AVX values are returned in YMM0 if available. */
8404 if (size == 32)
8405 return !TARGET_AVX;
8407 /* AVX512F values are returned in ZMM0 if available. */
8408 if (size == 64)
8409 return !TARGET_AVX512F;
8412 if (mode == XFmode)
8413 return false;
8415 if (size > 12)
8416 return true;
8418 /* OImode shouldn't be used directly. */
8419 gcc_assert (mode != OImode);
8421 return false;
8423 #endif
8427 /* Create the va_list data type. */
8429 /* Returns the calling convention specific va_list date type.
8430 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
8432 static tree
8433 ix86_build_builtin_va_list_abi (enum calling_abi abi)
8435 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
8437 /* For i386 we use plain pointer to argument area. */
8438 if (!TARGET_64BIT || abi == MS_ABI)
8439 return build_pointer_type (char_type_node);
8441 record = lang_hooks.types.make_type (RECORD_TYPE);
8442 type_decl = build_decl (BUILTINS_LOCATION,
8443 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8445 f_gpr = build_decl (BUILTINS_LOCATION,
8446 FIELD_DECL, get_identifier ("gp_offset"),
8447 unsigned_type_node);
8448 f_fpr = build_decl (BUILTINS_LOCATION,
8449 FIELD_DECL, get_identifier ("fp_offset"),
8450 unsigned_type_node);
8451 f_ovf = build_decl (BUILTINS_LOCATION,
8452 FIELD_DECL, get_identifier ("overflow_arg_area"),
8453 ptr_type_node);
8454 f_sav = build_decl (BUILTINS_LOCATION,
8455 FIELD_DECL, get_identifier ("reg_save_area"),
8456 ptr_type_node);
8458 va_list_gpr_counter_field = f_gpr;
8459 va_list_fpr_counter_field = f_fpr;
8461 DECL_FIELD_CONTEXT (f_gpr) = record;
8462 DECL_FIELD_CONTEXT (f_fpr) = record;
8463 DECL_FIELD_CONTEXT (f_ovf) = record;
8464 DECL_FIELD_CONTEXT (f_sav) = record;
8466 TYPE_STUB_DECL (record) = type_decl;
8467 TYPE_NAME (record) = type_decl;
8468 TYPE_FIELDS (record) = f_gpr;
8469 DECL_CHAIN (f_gpr) = f_fpr;
8470 DECL_CHAIN (f_fpr) = f_ovf;
8471 DECL_CHAIN (f_ovf) = f_sav;
8473 layout_type (record);
8475 /* The correct type is an array type of one element. */
8476 return build_array_type (record, build_index_type (size_zero_node));
8479 /* Setup the builtin va_list data type and for 64-bit the additional
8480 calling convention specific va_list data types. */
8482 static tree
8483 ix86_build_builtin_va_list (void)
8485 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
8487 /* Initialize abi specific va_list builtin types. */
8488 if (TARGET_64BIT)
8490 tree t;
8491 if (ix86_abi == MS_ABI)
8493 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
8494 if (TREE_CODE (t) != RECORD_TYPE)
8495 t = build_variant_type_copy (t);
8496 sysv_va_list_type_node = t;
8498 else
8500 t = ret;
8501 if (TREE_CODE (t) != RECORD_TYPE)
8502 t = build_variant_type_copy (t);
8503 sysv_va_list_type_node = t;
8505 if (ix86_abi != MS_ABI)
8507 t = ix86_build_builtin_va_list_abi (MS_ABI);
8508 if (TREE_CODE (t) != RECORD_TYPE)
8509 t = build_variant_type_copy (t);
8510 ms_va_list_type_node = t;
8512 else
8514 t = ret;
8515 if (TREE_CODE (t) != RECORD_TYPE)
8516 t = build_variant_type_copy (t);
8517 ms_va_list_type_node = t;
8521 return ret;
8524 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
8526 static void
8527 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
8529 rtx save_area, mem;
8530 alias_set_type set;
8531 int i, max;
8533 /* GPR size of varargs save area. */
8534 if (cfun->va_list_gpr_size)
8535 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
8536 else
8537 ix86_varargs_gpr_size = 0;
8539 /* FPR size of varargs save area. We don't need it if we don't pass
8540 anything in SSE registers. */
8541 if (TARGET_SSE && cfun->va_list_fpr_size)
8542 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
8543 else
8544 ix86_varargs_fpr_size = 0;
8546 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
8547 return;
8549 save_area = frame_pointer_rtx;
8550 set = get_varargs_alias_set ();
8552 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8553 if (max > X86_64_REGPARM_MAX)
8554 max = X86_64_REGPARM_MAX;
8556 for (i = cum->regno; i < max; i++)
8558 mem = gen_rtx_MEM (word_mode,
8559 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
8560 MEM_NOTRAP_P (mem) = 1;
8561 set_mem_alias_set (mem, set);
8562 emit_move_insn (mem,
8563 gen_rtx_REG (word_mode,
8564 x86_64_int_parameter_registers[i]));
8567 if (ix86_varargs_fpr_size)
8569 machine_mode smode;
8570 rtx_code_label *label;
8571 rtx test;
8573 /* Now emit code to save SSE registers. The AX parameter contains number
8574 of SSE parameter registers used to call this function, though all we
8575 actually check here is the zero/non-zero status. */
8577 label = gen_label_rtx ();
8578 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
8579 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
8580 label));
8582 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8583 we used movdqa (i.e. TImode) instead? Perhaps even better would
8584 be if we could determine the real mode of the data, via a hook
8585 into pass_stdarg. Ignore all that for now. */
8586 smode = V4SFmode;
8587 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
8588 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
8590 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
8591 if (max > X86_64_SSE_REGPARM_MAX)
8592 max = X86_64_SSE_REGPARM_MAX;
8594 for (i = cum->sse_regno; i < max; ++i)
8596 mem = plus_constant (Pmode, save_area,
8597 i * 16 + ix86_varargs_gpr_size);
8598 mem = gen_rtx_MEM (smode, mem);
8599 MEM_NOTRAP_P (mem) = 1;
8600 set_mem_alias_set (mem, set);
8601 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
8603 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
8606 emit_label (label);
8610 static void
8611 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
8613 alias_set_type set = get_varargs_alias_set ();
8614 int i;
8616 /* Reset to zero, as there might be a sysv vaarg used
8617 before. */
8618 ix86_varargs_gpr_size = 0;
8619 ix86_varargs_fpr_size = 0;
8621 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
8623 rtx reg, mem;
8625 mem = gen_rtx_MEM (Pmode,
8626 plus_constant (Pmode, virtual_incoming_args_rtx,
8627 i * UNITS_PER_WORD));
8628 MEM_NOTRAP_P (mem) = 1;
8629 set_mem_alias_set (mem, set);
8631 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
8632 emit_move_insn (mem, reg);
8636 static void
8637 ix86_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
8638 tree type, int *, int no_rtl)
8640 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8641 CUMULATIVE_ARGS next_cum;
8642 tree fntype;
8644 /* This argument doesn't appear to be used anymore. Which is good,
8645 because the old code here didn't suppress rtl generation. */
8646 gcc_assert (!no_rtl);
8648 if (!TARGET_64BIT)
8649 return;
8651 fntype = TREE_TYPE (current_function_decl);
8653 /* For varargs, we do not want to skip the dummy va_dcl argument.
8654 For stdargs, we do want to skip the last named argument. */
8655 next_cum = *cum;
8656 if (stdarg_p (fntype))
8657 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8658 true);
8660 if (cum->call_abi == MS_ABI)
8661 setup_incoming_varargs_ms_64 (&next_cum);
8662 else
8663 setup_incoming_varargs_64 (&next_cum);
8666 static void
8667 ix86_setup_incoming_vararg_bounds (cumulative_args_t cum_v,
8668 enum machine_mode mode,
8669 tree type,
8670 int *pretend_size ATTRIBUTE_UNUSED,
8671 int no_rtl)
8673 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8674 CUMULATIVE_ARGS next_cum;
8675 tree fntype;
8676 rtx save_area;
8677 int bnd_reg, i, max;
8679 gcc_assert (!no_rtl);
8681 /* Do nothing if we use plain pointer to argument area. */
8682 if (!TARGET_64BIT || cum->call_abi == MS_ABI)
8683 return;
8685 fntype = TREE_TYPE (current_function_decl);
8687 /* For varargs, we do not want to skip the dummy va_dcl argument.
8688 For stdargs, we do want to skip the last named argument. */
8689 next_cum = *cum;
8690 if (stdarg_p (fntype))
8691 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8692 true);
8693 save_area = frame_pointer_rtx;
8695 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8696 if (max > X86_64_REGPARM_MAX)
8697 max = X86_64_REGPARM_MAX;
8699 bnd_reg = cum->bnd_regno + cum->force_bnd_pass;
8700 if (chkp_function_instrumented_p (current_function_decl))
8701 for (i = cum->regno; i < max; i++)
8703 rtx addr = plus_constant (Pmode, save_area, i * UNITS_PER_WORD);
8704 rtx reg = gen_rtx_REG (DImode,
8705 x86_64_int_parameter_registers[i]);
8706 rtx ptr = reg;
8707 rtx bounds;
8709 if (bnd_reg <= LAST_BND_REG)
8710 bounds = gen_rtx_REG (BNDmode, bnd_reg);
8711 else
8713 rtx ldx_addr =
8714 plus_constant (Pmode, arg_pointer_rtx,
8715 (LAST_BND_REG - bnd_reg) * GET_MODE_SIZE (Pmode));
8716 bounds = gen_reg_rtx (BNDmode);
8717 emit_insn (BNDmode == BND64mode
8718 ? gen_bnd64_ldx (bounds, ldx_addr, ptr)
8719 : gen_bnd32_ldx (bounds, ldx_addr, ptr));
8722 emit_insn (BNDmode == BND64mode
8723 ? gen_bnd64_stx (addr, ptr, bounds)
8724 : gen_bnd32_stx (addr, ptr, bounds));
8726 bnd_reg++;
8731 /* Checks if TYPE is of kind va_list char *. */
8733 static bool
8734 is_va_list_char_pointer (tree type)
8736 tree canonic;
8738 /* For 32-bit it is always true. */
8739 if (!TARGET_64BIT)
8740 return true;
8741 canonic = ix86_canonical_va_list_type (type);
8742 return (canonic == ms_va_list_type_node
8743 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
8746 /* Implement va_start. */
8748 static void
8749 ix86_va_start (tree valist, rtx nextarg)
8751 HOST_WIDE_INT words, n_gpr, n_fpr;
8752 tree f_gpr, f_fpr, f_ovf, f_sav;
8753 tree gpr, fpr, ovf, sav, t;
8754 tree type;
8755 rtx ovf_rtx;
8757 if (flag_split_stack
8758 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8760 unsigned int scratch_regno;
8762 /* When we are splitting the stack, we can't refer to the stack
8763 arguments using internal_arg_pointer, because they may be on
8764 the old stack. The split stack prologue will arrange to
8765 leave a pointer to the old stack arguments in a scratch
8766 register, which we here copy to a pseudo-register. The split
8767 stack prologue can't set the pseudo-register directly because
8768 it (the prologue) runs before any registers have been saved. */
8770 scratch_regno = split_stack_prologue_scratch_regno ();
8771 if (scratch_regno != INVALID_REGNUM)
8773 rtx reg;
8774 rtx_insn *seq;
8776 reg = gen_reg_rtx (Pmode);
8777 cfun->machine->split_stack_varargs_pointer = reg;
8779 start_sequence ();
8780 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8781 seq = get_insns ();
8782 end_sequence ();
8784 push_topmost_sequence ();
8785 emit_insn_after (seq, entry_of_function ());
8786 pop_topmost_sequence ();
8790 /* Only 64bit target needs something special. */
8791 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8793 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8794 std_expand_builtin_va_start (valist, nextarg);
8795 else
8797 rtx va_r, next;
8799 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8800 next = expand_binop (ptr_mode, add_optab,
8801 cfun->machine->split_stack_varargs_pointer,
8802 crtl->args.arg_offset_rtx,
8803 NULL_RTX, 0, OPTAB_LIB_WIDEN);
8804 convert_move (va_r, next, 0);
8806 /* Store zero bounds for va_list. */
8807 if (chkp_function_instrumented_p (current_function_decl))
8808 chkp_expand_bounds_reset_for_mem (valist,
8809 make_tree (TREE_TYPE (valist),
8810 next));
8813 return;
8816 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8817 f_fpr = DECL_CHAIN (f_gpr);
8818 f_ovf = DECL_CHAIN (f_fpr);
8819 f_sav = DECL_CHAIN (f_ovf);
8821 valist = build_simple_mem_ref (valist);
8822 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
8823 /* The following should be folded into the MEM_REF offset. */
8824 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
8825 f_gpr, NULL_TREE);
8826 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
8827 f_fpr, NULL_TREE);
8828 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
8829 f_ovf, NULL_TREE);
8830 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
8831 f_sav, NULL_TREE);
8833 /* Count number of gp and fp argument registers used. */
8834 words = crtl->args.info.words;
8835 n_gpr = crtl->args.info.regno;
8836 n_fpr = crtl->args.info.sse_regno;
8838 if (cfun->va_list_gpr_size)
8840 type = TREE_TYPE (gpr);
8841 t = build2 (MODIFY_EXPR, type,
8842 gpr, build_int_cst (type, n_gpr * 8));
8843 TREE_SIDE_EFFECTS (t) = 1;
8844 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8847 if (TARGET_SSE && cfun->va_list_fpr_size)
8849 type = TREE_TYPE (fpr);
8850 t = build2 (MODIFY_EXPR, type, fpr,
8851 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
8852 TREE_SIDE_EFFECTS (t) = 1;
8853 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8856 /* Find the overflow area. */
8857 type = TREE_TYPE (ovf);
8858 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8859 ovf_rtx = crtl->args.internal_arg_pointer;
8860 else
8861 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
8862 t = make_tree (type, ovf_rtx);
8863 if (words != 0)
8864 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
8866 /* Store zero bounds for overflow area pointer. */
8867 if (chkp_function_instrumented_p (current_function_decl))
8868 chkp_expand_bounds_reset_for_mem (ovf, t);
8870 t = build2 (MODIFY_EXPR, type, ovf, t);
8871 TREE_SIDE_EFFECTS (t) = 1;
8872 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8874 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
8876 /* Find the register save area.
8877 Prologue of the function save it right above stack frame. */
8878 type = TREE_TYPE (sav);
8879 t = make_tree (type, frame_pointer_rtx);
8880 if (!ix86_varargs_gpr_size)
8881 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
8883 /* Store zero bounds for save area pointer. */
8884 if (chkp_function_instrumented_p (current_function_decl))
8885 chkp_expand_bounds_reset_for_mem (sav, t);
8887 t = build2 (MODIFY_EXPR, type, sav, t);
8888 TREE_SIDE_EFFECTS (t) = 1;
8889 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8893 /* Implement va_arg. */
8895 static tree
8896 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
8897 gimple_seq *post_p)
8899 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
8900 tree f_gpr, f_fpr, f_ovf, f_sav;
8901 tree gpr, fpr, ovf, sav, t;
8902 int size, rsize;
8903 tree lab_false, lab_over = NULL_TREE;
8904 tree addr, t2;
8905 rtx container;
8906 int indirect_p = 0;
8907 tree ptrtype;
8908 machine_mode nat_mode;
8909 unsigned int arg_boundary;
8911 /* Only 64bit target needs something special. */
8912 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8913 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
8915 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8916 f_fpr = DECL_CHAIN (f_gpr);
8917 f_ovf = DECL_CHAIN (f_fpr);
8918 f_sav = DECL_CHAIN (f_ovf);
8920 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
8921 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
8922 valist = build_va_arg_indirect_ref (valist);
8923 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
8924 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
8925 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
8927 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
8928 if (indirect_p)
8929 type = build_pointer_type (type);
8930 size = int_size_in_bytes (type);
8931 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8933 nat_mode = type_natural_mode (type, NULL, false);
8934 switch (nat_mode)
8936 case V8SFmode:
8937 case V8SImode:
8938 case V32QImode:
8939 case V16HImode:
8940 case V4DFmode:
8941 case V4DImode:
8942 case V16SFmode:
8943 case V16SImode:
8944 case V64QImode:
8945 case V32HImode:
8946 case V8DFmode:
8947 case V8DImode:
8948 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
8949 if (!TARGET_64BIT_MS_ABI)
8951 container = NULL;
8952 break;
8955 default:
8956 container = construct_container (nat_mode, TYPE_MODE (type),
8957 type, 0, X86_64_REGPARM_MAX,
8958 X86_64_SSE_REGPARM_MAX, intreg,
8960 break;
8963 /* Pull the value out of the saved registers. */
8965 addr = create_tmp_var (ptr_type_node, "addr");
8967 if (container)
8969 int needed_intregs, needed_sseregs;
8970 bool need_temp;
8971 tree int_addr, sse_addr;
8973 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8974 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8976 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
8978 need_temp = (!REG_P (container)
8979 && ((needed_intregs && TYPE_ALIGN (type) > 64)
8980 || TYPE_ALIGN (type) > 128));
8982 /* In case we are passing structure, verify that it is consecutive block
8983 on the register save area. If not we need to do moves. */
8984 if (!need_temp && !REG_P (container))
8986 /* Verify that all registers are strictly consecutive */
8987 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
8989 int i;
8991 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8993 rtx slot = XVECEXP (container, 0, i);
8994 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
8995 || INTVAL (XEXP (slot, 1)) != i * 16)
8996 need_temp = 1;
8999 else
9001 int i;
9003 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
9005 rtx slot = XVECEXP (container, 0, i);
9006 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
9007 || INTVAL (XEXP (slot, 1)) != i * 8)
9008 need_temp = 1;
9012 if (!need_temp)
9014 int_addr = addr;
9015 sse_addr = addr;
9017 else
9019 int_addr = create_tmp_var (ptr_type_node, "int_addr");
9020 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
9023 /* First ensure that we fit completely in registers. */
9024 if (needed_intregs)
9026 t = build_int_cst (TREE_TYPE (gpr),
9027 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
9028 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
9029 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9030 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9031 gimplify_and_add (t, pre_p);
9033 if (needed_sseregs)
9035 t = build_int_cst (TREE_TYPE (fpr),
9036 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
9037 + X86_64_REGPARM_MAX * 8);
9038 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
9039 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9040 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9041 gimplify_and_add (t, pre_p);
9044 /* Compute index to start of area used for integer regs. */
9045 if (needed_intregs)
9047 /* int_addr = gpr + sav; */
9048 t = fold_build_pointer_plus (sav, gpr);
9049 gimplify_assign (int_addr, t, pre_p);
9051 if (needed_sseregs)
9053 /* sse_addr = fpr + sav; */
9054 t = fold_build_pointer_plus (sav, fpr);
9055 gimplify_assign (sse_addr, t, pre_p);
9057 if (need_temp)
9059 int i, prev_size = 0;
9060 tree temp = create_tmp_var (type, "va_arg_tmp");
9062 /* addr = &temp; */
9063 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
9064 gimplify_assign (addr, t, pre_p);
9066 for (i = 0; i < XVECLEN (container, 0); i++)
9068 rtx slot = XVECEXP (container, 0, i);
9069 rtx reg = XEXP (slot, 0);
9070 machine_mode mode = GET_MODE (reg);
9071 tree piece_type;
9072 tree addr_type;
9073 tree daddr_type;
9074 tree src_addr, src;
9075 int src_offset;
9076 tree dest_addr, dest;
9077 int cur_size = GET_MODE_SIZE (mode);
9079 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
9080 prev_size = INTVAL (XEXP (slot, 1));
9081 if (prev_size + cur_size > size)
9083 cur_size = size - prev_size;
9084 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
9085 if (mode == BLKmode)
9086 mode = QImode;
9088 piece_type = lang_hooks.types.type_for_mode (mode, 1);
9089 if (mode == GET_MODE (reg))
9090 addr_type = build_pointer_type (piece_type);
9091 else
9092 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
9093 true);
9094 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
9095 true);
9097 if (SSE_REGNO_P (REGNO (reg)))
9099 src_addr = sse_addr;
9100 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
9102 else
9104 src_addr = int_addr;
9105 src_offset = REGNO (reg) * 8;
9107 src_addr = fold_convert (addr_type, src_addr);
9108 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
9110 dest_addr = fold_convert (daddr_type, addr);
9111 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
9112 if (cur_size == GET_MODE_SIZE (mode))
9114 src = build_va_arg_indirect_ref (src_addr);
9115 dest = build_va_arg_indirect_ref (dest_addr);
9117 gimplify_assign (dest, src, pre_p);
9119 else
9121 tree copy
9122 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
9123 3, dest_addr, src_addr,
9124 size_int (cur_size));
9125 gimplify_and_add (copy, pre_p);
9127 prev_size += cur_size;
9131 if (needed_intregs)
9133 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
9134 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
9135 gimplify_assign (gpr, t, pre_p);
9138 if (needed_sseregs)
9140 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
9141 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
9142 gimplify_assign (fpr, t, pre_p);
9145 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
9147 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
9150 /* ... otherwise out of the overflow area. */
9152 /* When we align parameter on stack for caller, if the parameter
9153 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
9154 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
9155 here with caller. */
9156 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
9157 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
9158 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
9160 /* Care for on-stack alignment if needed. */
9161 if (arg_boundary <= 64 || size == 0)
9162 t = ovf;
9163 else
9165 HOST_WIDE_INT align = arg_boundary / 8;
9166 t = fold_build_pointer_plus_hwi (ovf, align - 1);
9167 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
9168 build_int_cst (TREE_TYPE (t), -align));
9171 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
9172 gimplify_assign (addr, t, pre_p);
9174 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
9175 gimplify_assign (unshare_expr (ovf), t, pre_p);
9177 if (container)
9178 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
9180 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
9181 addr = fold_convert (ptrtype, addr);
9183 if (indirect_p)
9184 addr = build_va_arg_indirect_ref (addr);
9185 return build_va_arg_indirect_ref (addr);
9188 /* Return true if OPNUM's MEM should be matched
9189 in movabs* patterns. */
9191 bool
9192 ix86_check_movabs (rtx insn, int opnum)
9194 rtx set, mem;
9196 set = PATTERN (insn);
9197 if (GET_CODE (set) == PARALLEL)
9198 set = XVECEXP (set, 0, 0);
9199 gcc_assert (GET_CODE (set) == SET);
9200 mem = XEXP (set, opnum);
9201 while (GET_CODE (mem) == SUBREG)
9202 mem = SUBREG_REG (mem);
9203 gcc_assert (MEM_P (mem));
9204 return volatile_ok || !MEM_VOLATILE_P (mem);
9207 /* Initialize the table of extra 80387 mathematical constants. */
9209 static void
9210 init_ext_80387_constants (void)
9212 static const char * cst[5] =
9214 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
9215 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
9216 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
9217 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
9218 "3.1415926535897932385128089594061862044", /* 4: fldpi */
9220 int i;
9222 for (i = 0; i < 5; i++)
9224 real_from_string (&ext_80387_constants_table[i], cst[i]);
9225 /* Ensure each constant is rounded to XFmode precision. */
9226 real_convert (&ext_80387_constants_table[i],
9227 XFmode, &ext_80387_constants_table[i]);
9230 ext_80387_constants_init = 1;
9233 /* Return non-zero if the constant is something that
9234 can be loaded with a special instruction. */
9237 standard_80387_constant_p (rtx x)
9239 machine_mode mode = GET_MODE (x);
9241 REAL_VALUE_TYPE r;
9243 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
9244 return -1;
9246 if (x == CONST0_RTX (mode))
9247 return 1;
9248 if (x == CONST1_RTX (mode))
9249 return 2;
9251 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9253 /* For XFmode constants, try to find a special 80387 instruction when
9254 optimizing for size or on those CPUs that benefit from them. */
9255 if (mode == XFmode
9256 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
9258 int i;
9260 if (! ext_80387_constants_init)
9261 init_ext_80387_constants ();
9263 for (i = 0; i < 5; i++)
9264 if (real_identical (&r, &ext_80387_constants_table[i]))
9265 return i + 3;
9268 /* Load of the constant -0.0 or -1.0 will be split as
9269 fldz;fchs or fld1;fchs sequence. */
9270 if (real_isnegzero (&r))
9271 return 8;
9272 if (real_identical (&r, &dconstm1))
9273 return 9;
9275 return 0;
9278 /* Return the opcode of the special instruction to be used to load
9279 the constant X. */
9281 const char *
9282 standard_80387_constant_opcode (rtx x)
9284 switch (standard_80387_constant_p (x))
9286 case 1:
9287 return "fldz";
9288 case 2:
9289 return "fld1";
9290 case 3:
9291 return "fldlg2";
9292 case 4:
9293 return "fldln2";
9294 case 5:
9295 return "fldl2e";
9296 case 6:
9297 return "fldl2t";
9298 case 7:
9299 return "fldpi";
9300 case 8:
9301 case 9:
9302 return "#";
9303 default:
9304 gcc_unreachable ();
9308 /* Return the CONST_DOUBLE representing the 80387 constant that is
9309 loaded by the specified special instruction. The argument IDX
9310 matches the return value from standard_80387_constant_p. */
9313 standard_80387_constant_rtx (int idx)
9315 int i;
9317 if (! ext_80387_constants_init)
9318 init_ext_80387_constants ();
9320 switch (idx)
9322 case 3:
9323 case 4:
9324 case 5:
9325 case 6:
9326 case 7:
9327 i = idx - 3;
9328 break;
9330 default:
9331 gcc_unreachable ();
9334 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
9335 XFmode);
9338 /* Return 1 if X is all 0s and 2 if x is all 1s
9339 in supported SSE/AVX vector mode. */
9342 standard_sse_constant_p (rtx x)
9344 machine_mode mode = GET_MODE (x);
9346 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
9347 return 1;
9348 if (vector_all_ones_operand (x, mode))
9349 switch (mode)
9351 case V16QImode:
9352 case V8HImode:
9353 case V4SImode:
9354 case V2DImode:
9355 if (TARGET_SSE2)
9356 return 2;
9357 case V32QImode:
9358 case V16HImode:
9359 case V8SImode:
9360 case V4DImode:
9361 if (TARGET_AVX2)
9362 return 2;
9363 case V64QImode:
9364 case V32HImode:
9365 case V16SImode:
9366 case V8DImode:
9367 if (TARGET_AVX512F)
9368 return 2;
9369 default:
9370 break;
9373 return 0;
9376 /* Return the opcode of the special instruction to be used to load
9377 the constant X. */
9379 const char *
9380 standard_sse_constant_opcode (rtx_insn *insn, rtx x)
9382 switch (standard_sse_constant_p (x))
9384 case 1:
9385 switch (get_attr_mode (insn))
9387 case MODE_XI:
9388 return "vpxord\t%g0, %g0, %g0";
9389 case MODE_V16SF:
9390 return TARGET_AVX512DQ ? "vxorps\t%g0, %g0, %g0"
9391 : "vpxord\t%g0, %g0, %g0";
9392 case MODE_V8DF:
9393 return TARGET_AVX512DQ ? "vxorpd\t%g0, %g0, %g0"
9394 : "vpxorq\t%g0, %g0, %g0";
9395 case MODE_TI:
9396 return TARGET_AVX512VL ? "vpxord\t%t0, %t0, %t0"
9397 : "%vpxor\t%0, %d0";
9398 case MODE_V2DF:
9399 return "%vxorpd\t%0, %d0";
9400 case MODE_V4SF:
9401 return "%vxorps\t%0, %d0";
9403 case MODE_OI:
9404 return TARGET_AVX512VL ? "vpxord\t%x0, %x0, %x0"
9405 : "vpxor\t%x0, %x0, %x0";
9406 case MODE_V4DF:
9407 return "vxorpd\t%x0, %x0, %x0";
9408 case MODE_V8SF:
9409 return "vxorps\t%x0, %x0, %x0";
9411 default:
9412 break;
9415 case 2:
9416 if (TARGET_AVX512VL
9417 || get_attr_mode (insn) == MODE_XI
9418 || get_attr_mode (insn) == MODE_V8DF
9419 || get_attr_mode (insn) == MODE_V16SF)
9420 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
9421 if (TARGET_AVX)
9422 return "vpcmpeqd\t%0, %0, %0";
9423 else
9424 return "pcmpeqd\t%0, %0";
9426 default:
9427 break;
9429 gcc_unreachable ();
9432 /* Returns true if OP contains a symbol reference */
9434 bool
9435 symbolic_reference_mentioned_p (rtx op)
9437 const char *fmt;
9438 int i;
9440 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
9441 return true;
9443 fmt = GET_RTX_FORMAT (GET_CODE (op));
9444 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
9446 if (fmt[i] == 'E')
9448 int j;
9450 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
9451 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
9452 return true;
9455 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
9456 return true;
9459 return false;
9462 /* Return true if it is appropriate to emit `ret' instructions in the
9463 body of a function. Do this only if the epilogue is simple, needing a
9464 couple of insns. Prior to reloading, we can't tell how many registers
9465 must be saved, so return false then. Return false if there is no frame
9466 marker to de-allocate. */
9468 bool
9469 ix86_can_use_return_insn_p (void)
9471 struct ix86_frame frame;
9473 if (! reload_completed || frame_pointer_needed)
9474 return 0;
9476 /* Don't allow more than 32k pop, since that's all we can do
9477 with one instruction. */
9478 if (crtl->args.pops_args && crtl->args.size >= 32768)
9479 return 0;
9481 ix86_compute_frame_layout (&frame);
9482 return (frame.stack_pointer_offset == UNITS_PER_WORD
9483 && (frame.nregs + frame.nsseregs) == 0);
9486 /* Value should be nonzero if functions must have frame pointers.
9487 Zero means the frame pointer need not be set up (and parms may
9488 be accessed via the stack pointer) in functions that seem suitable. */
9490 static bool
9491 ix86_frame_pointer_required (void)
9493 /* If we accessed previous frames, then the generated code expects
9494 to be able to access the saved ebp value in our frame. */
9495 if (cfun->machine->accesses_prev_frame)
9496 return true;
9498 /* Several x86 os'es need a frame pointer for other reasons,
9499 usually pertaining to setjmp. */
9500 if (SUBTARGET_FRAME_POINTER_REQUIRED)
9501 return true;
9503 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
9504 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
9505 return true;
9507 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
9508 allocation is 4GB. */
9509 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
9510 return true;
9512 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
9513 turns off the frame pointer by default. Turn it back on now if
9514 we've not got a leaf function. */
9515 if (TARGET_OMIT_LEAF_FRAME_POINTER
9516 && (!crtl->is_leaf
9517 || ix86_current_function_calls_tls_descriptor))
9518 return true;
9520 if (crtl->profile && !flag_fentry)
9521 return true;
9523 return false;
9526 /* Record that the current function accesses previous call frames. */
9528 void
9529 ix86_setup_frame_addresses (void)
9531 cfun->machine->accesses_prev_frame = 1;
9534 #ifndef USE_HIDDEN_LINKONCE
9535 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
9536 # define USE_HIDDEN_LINKONCE 1
9537 # else
9538 # define USE_HIDDEN_LINKONCE 0
9539 # endif
9540 #endif
9542 static int pic_labels_used;
9544 /* Fills in the label name that should be used for a pc thunk for
9545 the given register. */
9547 static void
9548 get_pc_thunk_name (char name[32], unsigned int regno)
9550 gcc_assert (!TARGET_64BIT);
9552 if (USE_HIDDEN_LINKONCE)
9553 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
9554 else
9555 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
9559 /* This function generates code for -fpic that loads %ebx with
9560 the return address of the caller and then returns. */
9562 static void
9563 ix86_code_end (void)
9565 rtx xops[2];
9566 int regno;
9568 for (regno = AX_REG; regno <= SP_REG; regno++)
9570 char name[32];
9571 tree decl;
9573 if (!(pic_labels_used & (1 << regno)))
9574 continue;
9576 get_pc_thunk_name (name, regno);
9578 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9579 get_identifier (name),
9580 build_function_type_list (void_type_node, NULL_TREE));
9581 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9582 NULL_TREE, void_type_node);
9583 TREE_PUBLIC (decl) = 1;
9584 TREE_STATIC (decl) = 1;
9585 DECL_IGNORED_P (decl) = 1;
9587 #if TARGET_MACHO
9588 if (TARGET_MACHO)
9590 switch_to_section (darwin_sections[text_coal_section]);
9591 fputs ("\t.weak_definition\t", asm_out_file);
9592 assemble_name (asm_out_file, name);
9593 fputs ("\n\t.private_extern\t", asm_out_file);
9594 assemble_name (asm_out_file, name);
9595 putc ('\n', asm_out_file);
9596 ASM_OUTPUT_LABEL (asm_out_file, name);
9597 DECL_WEAK (decl) = 1;
9599 else
9600 #endif
9601 if (USE_HIDDEN_LINKONCE)
9603 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
9605 targetm.asm_out.unique_section (decl, 0);
9606 switch_to_section (get_named_section (decl, NULL, 0));
9608 targetm.asm_out.globalize_label (asm_out_file, name);
9609 fputs ("\t.hidden\t", asm_out_file);
9610 assemble_name (asm_out_file, name);
9611 putc ('\n', asm_out_file);
9612 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
9614 else
9616 switch_to_section (text_section);
9617 ASM_OUTPUT_LABEL (asm_out_file, name);
9620 DECL_INITIAL (decl) = make_node (BLOCK);
9621 current_function_decl = decl;
9622 init_function_start (decl);
9623 first_function_block_is_cold = false;
9624 /* Make sure unwind info is emitted for the thunk if needed. */
9625 final_start_function (emit_barrier (), asm_out_file, 1);
9627 /* Pad stack IP move with 4 instructions (two NOPs count
9628 as one instruction). */
9629 if (TARGET_PAD_SHORT_FUNCTION)
9631 int i = 8;
9633 while (i--)
9634 fputs ("\tnop\n", asm_out_file);
9637 xops[0] = gen_rtx_REG (Pmode, regno);
9638 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
9639 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
9640 output_asm_insn ("%!ret", NULL);
9641 final_end_function ();
9642 init_insn_lengths ();
9643 free_after_compilation (cfun);
9644 set_cfun (NULL);
9645 current_function_decl = NULL;
9648 if (flag_split_stack)
9649 file_end_indicate_split_stack ();
9652 /* Emit code for the SET_GOT patterns. */
9654 const char *
9655 output_set_got (rtx dest, rtx label)
9657 rtx xops[3];
9659 xops[0] = dest;
9661 if (TARGET_VXWORKS_RTP && flag_pic)
9663 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
9664 xops[2] = gen_rtx_MEM (Pmode,
9665 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
9666 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
9668 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9669 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9670 an unadorned address. */
9671 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
9672 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
9673 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
9674 return "";
9677 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
9679 if (!flag_pic)
9681 if (TARGET_MACHO)
9682 /* We don't need a pic base, we're not producing pic. */
9683 gcc_unreachable ();
9685 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
9686 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
9687 targetm.asm_out.internal_label (asm_out_file, "L",
9688 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
9690 else
9692 char name[32];
9693 get_pc_thunk_name (name, REGNO (dest));
9694 pic_labels_used |= 1 << REGNO (dest);
9696 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
9697 xops[2] = gen_rtx_MEM (QImode, xops[2]);
9698 output_asm_insn ("%!call\t%X2", xops);
9700 #if TARGET_MACHO
9701 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
9702 This is what will be referenced by the Mach-O PIC subsystem. */
9703 if (machopic_should_output_picbase_label () || !label)
9704 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
9706 /* When we are restoring the pic base at the site of a nonlocal label,
9707 and we decided to emit the pic base above, we will still output a
9708 local label used for calculating the correction offset (even though
9709 the offset will be 0 in that case). */
9710 if (label)
9711 targetm.asm_out.internal_label (asm_out_file, "L",
9712 CODE_LABEL_NUMBER (label));
9713 #endif
9716 if (!TARGET_MACHO)
9717 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
9719 return "";
9722 /* Generate an "push" pattern for input ARG. */
9724 static rtx
9725 gen_push (rtx arg)
9727 struct machine_function *m = cfun->machine;
9729 if (m->fs.cfa_reg == stack_pointer_rtx)
9730 m->fs.cfa_offset += UNITS_PER_WORD;
9731 m->fs.sp_offset += UNITS_PER_WORD;
9733 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9734 arg = gen_rtx_REG (word_mode, REGNO (arg));
9736 return gen_rtx_SET (VOIDmode,
9737 gen_rtx_MEM (word_mode,
9738 gen_rtx_PRE_DEC (Pmode,
9739 stack_pointer_rtx)),
9740 arg);
9743 /* Generate an "pop" pattern for input ARG. */
9745 static rtx
9746 gen_pop (rtx arg)
9748 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9749 arg = gen_rtx_REG (word_mode, REGNO (arg));
9751 return gen_rtx_SET (VOIDmode,
9752 arg,
9753 gen_rtx_MEM (word_mode,
9754 gen_rtx_POST_INC (Pmode,
9755 stack_pointer_rtx)));
9758 /* Return >= 0 if there is an unused call-clobbered register available
9759 for the entire function. */
9761 static unsigned int
9762 ix86_select_alt_pic_regnum (void)
9764 if (ix86_use_pseudo_pic_reg ())
9765 return INVALID_REGNUM;
9767 if (crtl->is_leaf
9768 && !crtl->profile
9769 && !ix86_current_function_calls_tls_descriptor)
9771 int i, drap;
9772 /* Can't use the same register for both PIC and DRAP. */
9773 if (crtl->drap_reg)
9774 drap = REGNO (crtl->drap_reg);
9775 else
9776 drap = -1;
9777 for (i = 2; i >= 0; --i)
9778 if (i != drap && !df_regs_ever_live_p (i))
9779 return i;
9782 return INVALID_REGNUM;
9785 /* Return TRUE if we need to save REGNO. */
9787 static bool
9788 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
9790 if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
9791 && pic_offset_table_rtx)
9793 if (ix86_use_pseudo_pic_reg ())
9795 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
9796 _mcount in prologue. */
9797 if (!TARGET_64BIT && flag_pic && crtl->profile)
9798 return true;
9800 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9801 || crtl->profile
9802 || crtl->calls_eh_return
9803 || crtl->uses_const_pool
9804 || cfun->has_nonlocal_label)
9805 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
9808 if (crtl->calls_eh_return && maybe_eh_return)
9810 unsigned i;
9811 for (i = 0; ; i++)
9813 unsigned test = EH_RETURN_DATA_REGNO (i);
9814 if (test == INVALID_REGNUM)
9815 break;
9816 if (test == regno)
9817 return true;
9821 if (crtl->drap_reg
9822 && regno == REGNO (crtl->drap_reg)
9823 && !cfun->machine->no_drap_save_restore)
9824 return true;
9826 return (df_regs_ever_live_p (regno)
9827 && !call_used_regs[regno]
9828 && !fixed_regs[regno]
9829 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
9832 /* Return number of saved general prupose registers. */
9834 static int
9835 ix86_nsaved_regs (void)
9837 int nregs = 0;
9838 int regno;
9840 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9841 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9842 nregs ++;
9843 return nregs;
9846 /* Return number of saved SSE registrers. */
9848 static int
9849 ix86_nsaved_sseregs (void)
9851 int nregs = 0;
9852 int regno;
9854 if (!TARGET_64BIT_MS_ABI)
9855 return 0;
9856 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9857 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9858 nregs ++;
9859 return nregs;
9862 /* Given FROM and TO register numbers, say whether this elimination is
9863 allowed. If stack alignment is needed, we can only replace argument
9864 pointer with hard frame pointer, or replace frame pointer with stack
9865 pointer. Otherwise, frame pointer elimination is automatically
9866 handled and all other eliminations are valid. */
9868 static bool
9869 ix86_can_eliminate (const int from, const int to)
9871 if (stack_realign_fp)
9872 return ((from == ARG_POINTER_REGNUM
9873 && to == HARD_FRAME_POINTER_REGNUM)
9874 || (from == FRAME_POINTER_REGNUM
9875 && to == STACK_POINTER_REGNUM));
9876 else
9877 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
9880 /* Return the offset between two registers, one to be eliminated, and the other
9881 its replacement, at the start of a routine. */
9883 HOST_WIDE_INT
9884 ix86_initial_elimination_offset (int from, int to)
9886 struct ix86_frame frame;
9887 ix86_compute_frame_layout (&frame);
9889 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9890 return frame.hard_frame_pointer_offset;
9891 else if (from == FRAME_POINTER_REGNUM
9892 && to == HARD_FRAME_POINTER_REGNUM)
9893 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
9894 else
9896 gcc_assert (to == STACK_POINTER_REGNUM);
9898 if (from == ARG_POINTER_REGNUM)
9899 return frame.stack_pointer_offset;
9901 gcc_assert (from == FRAME_POINTER_REGNUM);
9902 return frame.stack_pointer_offset - frame.frame_pointer_offset;
9906 /* In a dynamically-aligned function, we can't know the offset from
9907 stack pointer to frame pointer, so we must ensure that setjmp
9908 eliminates fp against the hard fp (%ebp) rather than trying to
9909 index from %esp up to the top of the frame across a gap that is
9910 of unknown (at compile-time) size. */
9911 static rtx
9912 ix86_builtin_setjmp_frame_value (void)
9914 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
9917 /* When using -fsplit-stack, the allocation routines set a field in
9918 the TCB to the bottom of the stack plus this much space, measured
9919 in bytes. */
9921 #define SPLIT_STACK_AVAILABLE 256
9923 /* Fill structure ix86_frame about frame of currently computed function. */
9925 static void
9926 ix86_compute_frame_layout (struct ix86_frame *frame)
9928 unsigned HOST_WIDE_INT stack_alignment_needed;
9929 HOST_WIDE_INT offset;
9930 unsigned HOST_WIDE_INT preferred_alignment;
9931 HOST_WIDE_INT size = get_frame_size ();
9932 HOST_WIDE_INT to_allocate;
9934 frame->nregs = ix86_nsaved_regs ();
9935 frame->nsseregs = ix86_nsaved_sseregs ();
9937 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9938 function prologues and leaf. */
9939 if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
9940 && (!crtl->is_leaf || cfun->calls_alloca != 0
9941 || ix86_current_function_calls_tls_descriptor))
9943 crtl->preferred_stack_boundary = 128;
9944 crtl->stack_alignment_needed = 128;
9946 /* preferred_stack_boundary is never updated for call
9947 expanded from tls descriptor. Update it here. We don't update it in
9948 expand stage because according to the comments before
9949 ix86_current_function_calls_tls_descriptor, tls calls may be optimized
9950 away. */
9951 else if (ix86_current_function_calls_tls_descriptor
9952 && crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY)
9954 crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
9955 if (crtl->stack_alignment_needed < PREFERRED_STACK_BOUNDARY)
9956 crtl->stack_alignment_needed = PREFERRED_STACK_BOUNDARY;
9959 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
9960 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
9962 gcc_assert (!size || stack_alignment_needed);
9963 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
9964 gcc_assert (preferred_alignment <= stack_alignment_needed);
9966 /* For SEH we have to limit the amount of code movement into the prologue.
9967 At present we do this via a BLOCKAGE, at which point there's very little
9968 scheduling that can be done, which means that there's very little point
9969 in doing anything except PUSHs. */
9970 if (TARGET_SEH)
9971 cfun->machine->use_fast_prologue_epilogue = false;
9973 /* During reload iteration the amount of registers saved can change.
9974 Recompute the value as needed. Do not recompute when amount of registers
9975 didn't change as reload does multiple calls to the function and does not
9976 expect the decision to change within single iteration. */
9977 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
9978 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
9980 int count = frame->nregs;
9981 struct cgraph_node *node = cgraph_node::get (current_function_decl);
9983 cfun->machine->use_fast_prologue_epilogue_nregs = count;
9985 /* The fast prologue uses move instead of push to save registers. This
9986 is significantly longer, but also executes faster as modern hardware
9987 can execute the moves in parallel, but can't do that for push/pop.
9989 Be careful about choosing what prologue to emit: When function takes
9990 many instructions to execute we may use slow version as well as in
9991 case function is known to be outside hot spot (this is known with
9992 feedback only). Weight the size of function by number of registers
9993 to save as it is cheap to use one or two push instructions but very
9994 slow to use many of them. */
9995 if (count)
9996 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
9997 if (node->frequency < NODE_FREQUENCY_NORMAL
9998 || (flag_branch_probabilities
9999 && node->frequency < NODE_FREQUENCY_HOT))
10000 cfun->machine->use_fast_prologue_epilogue = false;
10001 else
10002 cfun->machine->use_fast_prologue_epilogue
10003 = !expensive_function_p (count);
10006 frame->save_regs_using_mov
10007 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
10008 /* If static stack checking is enabled and done with probes,
10009 the registers need to be saved before allocating the frame. */
10010 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
10012 /* Skip return address. */
10013 offset = UNITS_PER_WORD;
10015 /* Skip pushed static chain. */
10016 if (ix86_static_chain_on_stack)
10017 offset += UNITS_PER_WORD;
10019 /* Skip saved base pointer. */
10020 if (frame_pointer_needed)
10021 offset += UNITS_PER_WORD;
10022 frame->hfp_save_offset = offset;
10024 /* The traditional frame pointer location is at the top of the frame. */
10025 frame->hard_frame_pointer_offset = offset;
10027 /* Register save area */
10028 offset += frame->nregs * UNITS_PER_WORD;
10029 frame->reg_save_offset = offset;
10031 /* On SEH target, registers are pushed just before the frame pointer
10032 location. */
10033 if (TARGET_SEH)
10034 frame->hard_frame_pointer_offset = offset;
10036 /* Align and set SSE register save area. */
10037 if (frame->nsseregs)
10039 /* The only ABI that has saved SSE registers (Win64) also has a
10040 16-byte aligned default stack, and thus we don't need to be
10041 within the re-aligned local stack frame to save them. */
10042 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
10043 offset = (offset + 16 - 1) & -16;
10044 offset += frame->nsseregs * 16;
10046 frame->sse_reg_save_offset = offset;
10048 /* The re-aligned stack starts here. Values before this point are not
10049 directly comparable with values below this point. In order to make
10050 sure that no value happens to be the same before and after, force
10051 the alignment computation below to add a non-zero value. */
10052 if (stack_realign_fp)
10053 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
10055 /* Va-arg area */
10056 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
10057 offset += frame->va_arg_size;
10059 /* Align start of frame for local function. */
10060 if (stack_realign_fp
10061 || offset != frame->sse_reg_save_offset
10062 || size != 0
10063 || !crtl->is_leaf
10064 || cfun->calls_alloca
10065 || ix86_current_function_calls_tls_descriptor)
10066 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
10068 /* Frame pointer points here. */
10069 frame->frame_pointer_offset = offset;
10071 offset += size;
10073 /* Add outgoing arguments area. Can be skipped if we eliminated
10074 all the function calls as dead code.
10075 Skipping is however impossible when function calls alloca. Alloca
10076 expander assumes that last crtl->outgoing_args_size
10077 of stack frame are unused. */
10078 if (ACCUMULATE_OUTGOING_ARGS
10079 && (!crtl->is_leaf || cfun->calls_alloca
10080 || ix86_current_function_calls_tls_descriptor))
10082 offset += crtl->outgoing_args_size;
10083 frame->outgoing_arguments_size = crtl->outgoing_args_size;
10085 else
10086 frame->outgoing_arguments_size = 0;
10088 /* Align stack boundary. Only needed if we're calling another function
10089 or using alloca. */
10090 if (!crtl->is_leaf || cfun->calls_alloca
10091 || ix86_current_function_calls_tls_descriptor)
10092 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
10094 /* We've reached end of stack frame. */
10095 frame->stack_pointer_offset = offset;
10097 /* Size prologue needs to allocate. */
10098 to_allocate = offset - frame->sse_reg_save_offset;
10100 if ((!to_allocate && frame->nregs <= 1)
10101 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
10102 frame->save_regs_using_mov = false;
10104 if (ix86_using_red_zone ()
10105 && crtl->sp_is_unchanging
10106 && crtl->is_leaf
10107 && !ix86_current_function_calls_tls_descriptor)
10109 frame->red_zone_size = to_allocate;
10110 if (frame->save_regs_using_mov)
10111 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
10112 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
10113 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
10115 else
10116 frame->red_zone_size = 0;
10117 frame->stack_pointer_offset -= frame->red_zone_size;
10119 /* The SEH frame pointer location is near the bottom of the frame.
10120 This is enforced by the fact that the difference between the
10121 stack pointer and the frame pointer is limited to 240 bytes in
10122 the unwind data structure. */
10123 if (TARGET_SEH)
10125 HOST_WIDE_INT diff;
10127 /* If we can leave the frame pointer where it is, do so. Also, returns
10128 the establisher frame for __builtin_frame_address (0). */
10129 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
10130 if (diff <= SEH_MAX_FRAME_SIZE
10131 && (diff > 240 || (diff & 15) != 0)
10132 && !crtl->accesses_prior_frames)
10134 /* Ideally we'd determine what portion of the local stack frame
10135 (within the constraint of the lowest 240) is most heavily used.
10136 But without that complication, simply bias the frame pointer
10137 by 128 bytes so as to maximize the amount of the local stack
10138 frame that is addressable with 8-bit offsets. */
10139 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
10144 /* This is semi-inlined memory_address_length, but simplified
10145 since we know that we're always dealing with reg+offset, and
10146 to avoid having to create and discard all that rtl. */
10148 static inline int
10149 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
10151 int len = 4;
10153 if (offset == 0)
10155 /* EBP and R13 cannot be encoded without an offset. */
10156 len = (regno == BP_REG || regno == R13_REG);
10158 else if (IN_RANGE (offset, -128, 127))
10159 len = 1;
10161 /* ESP and R12 must be encoded with a SIB byte. */
10162 if (regno == SP_REG || regno == R12_REG)
10163 len++;
10165 return len;
10168 /* Return an RTX that points to CFA_OFFSET within the stack frame.
10169 The valid base registers are taken from CFUN->MACHINE->FS. */
10171 static rtx
10172 choose_baseaddr (HOST_WIDE_INT cfa_offset)
10174 const struct machine_function *m = cfun->machine;
10175 rtx base_reg = NULL;
10176 HOST_WIDE_INT base_offset = 0;
10178 if (m->use_fast_prologue_epilogue)
10180 /* Choose the base register most likely to allow the most scheduling
10181 opportunities. Generally FP is valid throughout the function,
10182 while DRAP must be reloaded within the epilogue. But choose either
10183 over the SP due to increased encoding size. */
10185 if (m->fs.fp_valid)
10187 base_reg = hard_frame_pointer_rtx;
10188 base_offset = m->fs.fp_offset - cfa_offset;
10190 else if (m->fs.drap_valid)
10192 base_reg = crtl->drap_reg;
10193 base_offset = 0 - cfa_offset;
10195 else if (m->fs.sp_valid)
10197 base_reg = stack_pointer_rtx;
10198 base_offset = m->fs.sp_offset - cfa_offset;
10201 else
10203 HOST_WIDE_INT toffset;
10204 int len = 16, tlen;
10206 /* Choose the base register with the smallest address encoding.
10207 With a tie, choose FP > DRAP > SP. */
10208 if (m->fs.sp_valid)
10210 base_reg = stack_pointer_rtx;
10211 base_offset = m->fs.sp_offset - cfa_offset;
10212 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
10214 if (m->fs.drap_valid)
10216 toffset = 0 - cfa_offset;
10217 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
10218 if (tlen <= len)
10220 base_reg = crtl->drap_reg;
10221 base_offset = toffset;
10222 len = tlen;
10225 if (m->fs.fp_valid)
10227 toffset = m->fs.fp_offset - cfa_offset;
10228 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
10229 if (tlen <= len)
10231 base_reg = hard_frame_pointer_rtx;
10232 base_offset = toffset;
10233 len = tlen;
10237 gcc_assert (base_reg != NULL);
10239 return plus_constant (Pmode, base_reg, base_offset);
10242 /* Emit code to save registers in the prologue. */
10244 static void
10245 ix86_emit_save_regs (void)
10247 unsigned int regno;
10248 rtx insn;
10250 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
10251 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10253 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
10254 RTX_FRAME_RELATED_P (insn) = 1;
10258 /* Emit a single register save at CFA - CFA_OFFSET. */
10260 static void
10261 ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
10262 HOST_WIDE_INT cfa_offset)
10264 struct machine_function *m = cfun->machine;
10265 rtx reg = gen_rtx_REG (mode, regno);
10266 rtx mem, addr, base, insn;
10268 addr = choose_baseaddr (cfa_offset);
10269 mem = gen_frame_mem (mode, addr);
10271 /* For SSE saves, we need to indicate the 128-bit alignment. */
10272 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
10274 insn = emit_move_insn (mem, reg);
10275 RTX_FRAME_RELATED_P (insn) = 1;
10277 base = addr;
10278 if (GET_CODE (base) == PLUS)
10279 base = XEXP (base, 0);
10280 gcc_checking_assert (REG_P (base));
10282 /* When saving registers into a re-aligned local stack frame, avoid
10283 any tricky guessing by dwarf2out. */
10284 if (m->fs.realigned)
10286 gcc_checking_assert (stack_realign_drap);
10288 if (regno == REGNO (crtl->drap_reg))
10290 /* A bit of a hack. We force the DRAP register to be saved in
10291 the re-aligned stack frame, which provides us with a copy
10292 of the CFA that will last past the prologue. Install it. */
10293 gcc_checking_assert (cfun->machine->fs.fp_valid);
10294 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
10295 cfun->machine->fs.fp_offset - cfa_offset);
10296 mem = gen_rtx_MEM (mode, addr);
10297 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
10299 else
10301 /* The frame pointer is a stable reference within the
10302 aligned frame. Use it. */
10303 gcc_checking_assert (cfun->machine->fs.fp_valid);
10304 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
10305 cfun->machine->fs.fp_offset - cfa_offset);
10306 mem = gen_rtx_MEM (mode, addr);
10307 add_reg_note (insn, REG_CFA_EXPRESSION,
10308 gen_rtx_SET (VOIDmode, mem, reg));
10312 /* The memory may not be relative to the current CFA register,
10313 which means that we may need to generate a new pattern for
10314 use by the unwind info. */
10315 else if (base != m->fs.cfa_reg)
10317 addr = plus_constant (Pmode, m->fs.cfa_reg,
10318 m->fs.cfa_offset - cfa_offset);
10319 mem = gen_rtx_MEM (mode, addr);
10320 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
10324 /* Emit code to save registers using MOV insns.
10325 First register is stored at CFA - CFA_OFFSET. */
10326 static void
10327 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
10329 unsigned int regno;
10331 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10332 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10334 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
10335 cfa_offset -= UNITS_PER_WORD;
10339 /* Emit code to save SSE registers using MOV insns.
10340 First register is stored at CFA - CFA_OFFSET. */
10341 static void
10342 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
10344 unsigned int regno;
10346 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10347 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10349 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
10350 cfa_offset -= 16;
10354 static GTY(()) rtx queued_cfa_restores;
10356 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
10357 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
10358 Don't add the note if the previously saved value will be left untouched
10359 within stack red-zone till return, as unwinders can find the same value
10360 in the register and on the stack. */
10362 static void
10363 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
10365 if (!crtl->shrink_wrapped
10366 && cfa_offset <= cfun->machine->fs.red_zone_offset)
10367 return;
10369 if (insn)
10371 add_reg_note (insn, REG_CFA_RESTORE, reg);
10372 RTX_FRAME_RELATED_P (insn) = 1;
10374 else
10375 queued_cfa_restores
10376 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
10379 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
10381 static void
10382 ix86_add_queued_cfa_restore_notes (rtx insn)
10384 rtx last;
10385 if (!queued_cfa_restores)
10386 return;
10387 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
10389 XEXP (last, 1) = REG_NOTES (insn);
10390 REG_NOTES (insn) = queued_cfa_restores;
10391 queued_cfa_restores = NULL_RTX;
10392 RTX_FRAME_RELATED_P (insn) = 1;
10395 /* Expand prologue or epilogue stack adjustment.
10396 The pattern exist to put a dependency on all ebp-based memory accesses.
10397 STYLE should be negative if instructions should be marked as frame related,
10398 zero if %r11 register is live and cannot be freely used and positive
10399 otherwise. */
10401 static void
10402 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
10403 int style, bool set_cfa)
10405 struct machine_function *m = cfun->machine;
10406 rtx insn;
10407 bool add_frame_related_expr = false;
10409 if (Pmode == SImode)
10410 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
10411 else if (x86_64_immediate_operand (offset, DImode))
10412 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
10413 else
10415 rtx tmp;
10416 /* r11 is used by indirect sibcall return as well, set before the
10417 epilogue and used after the epilogue. */
10418 if (style)
10419 tmp = gen_rtx_REG (DImode, R11_REG);
10420 else
10422 gcc_assert (src != hard_frame_pointer_rtx
10423 && dest != hard_frame_pointer_rtx);
10424 tmp = hard_frame_pointer_rtx;
10426 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
10427 if (style < 0)
10428 add_frame_related_expr = true;
10430 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
10433 insn = emit_insn (insn);
10434 if (style >= 0)
10435 ix86_add_queued_cfa_restore_notes (insn);
10437 if (set_cfa)
10439 rtx r;
10441 gcc_assert (m->fs.cfa_reg == src);
10442 m->fs.cfa_offset += INTVAL (offset);
10443 m->fs.cfa_reg = dest;
10445 r = gen_rtx_PLUS (Pmode, src, offset);
10446 r = gen_rtx_SET (VOIDmode, dest, r);
10447 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
10448 RTX_FRAME_RELATED_P (insn) = 1;
10450 else if (style < 0)
10452 RTX_FRAME_RELATED_P (insn) = 1;
10453 if (add_frame_related_expr)
10455 rtx r = gen_rtx_PLUS (Pmode, src, offset);
10456 r = gen_rtx_SET (VOIDmode, dest, r);
10457 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
10461 if (dest == stack_pointer_rtx)
10463 HOST_WIDE_INT ooffset = m->fs.sp_offset;
10464 bool valid = m->fs.sp_valid;
10466 if (src == hard_frame_pointer_rtx)
10468 valid = m->fs.fp_valid;
10469 ooffset = m->fs.fp_offset;
10471 else if (src == crtl->drap_reg)
10473 valid = m->fs.drap_valid;
10474 ooffset = 0;
10476 else
10478 /* Else there are two possibilities: SP itself, which we set
10479 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
10480 taken care of this by hand along the eh_return path. */
10481 gcc_checking_assert (src == stack_pointer_rtx
10482 || offset == const0_rtx);
10485 m->fs.sp_offset = ooffset - INTVAL (offset);
10486 m->fs.sp_valid = valid;
10490 /* Find an available register to be used as dynamic realign argument
10491 pointer regsiter. Such a register will be written in prologue and
10492 used in begin of body, so it must not be
10493 1. parameter passing register.
10494 2. GOT pointer.
10495 We reuse static-chain register if it is available. Otherwise, we
10496 use DI for i386 and R13 for x86-64. We chose R13 since it has
10497 shorter encoding.
10499 Return: the regno of chosen register. */
10501 static unsigned int
10502 find_drap_reg (void)
10504 tree decl = cfun->decl;
10506 if (TARGET_64BIT)
10508 /* Use R13 for nested function or function need static chain.
10509 Since function with tail call may use any caller-saved
10510 registers in epilogue, DRAP must not use caller-saved
10511 register in such case. */
10512 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10513 return R13_REG;
10515 return R10_REG;
10517 else
10519 /* Use DI for nested function or function need static chain.
10520 Since function with tail call may use any caller-saved
10521 registers in epilogue, DRAP must not use caller-saved
10522 register in such case. */
10523 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10524 return DI_REG;
10526 /* Reuse static chain register if it isn't used for parameter
10527 passing. */
10528 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
10530 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
10531 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
10532 return CX_REG;
10534 return DI_REG;
10538 /* Return minimum incoming stack alignment. */
10540 static unsigned int
10541 ix86_minimum_incoming_stack_boundary (bool sibcall)
10543 unsigned int incoming_stack_boundary;
10545 /* Prefer the one specified at command line. */
10546 if (ix86_user_incoming_stack_boundary)
10547 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
10548 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
10549 if -mstackrealign is used, it isn't used for sibcall check and
10550 estimated stack alignment is 128bit. */
10551 else if (!sibcall
10552 && !TARGET_64BIT
10553 && ix86_force_align_arg_pointer
10554 && crtl->stack_alignment_estimated == 128)
10555 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10556 else
10557 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
10559 /* Incoming stack alignment can be changed on individual functions
10560 via force_align_arg_pointer attribute. We use the smallest
10561 incoming stack boundary. */
10562 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
10563 && lookup_attribute (ix86_force_align_arg_pointer_string,
10564 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
10565 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10567 /* The incoming stack frame has to be aligned at least at
10568 parm_stack_boundary. */
10569 if (incoming_stack_boundary < crtl->parm_stack_boundary)
10570 incoming_stack_boundary = crtl->parm_stack_boundary;
10572 /* Stack at entrance of main is aligned by runtime. We use the
10573 smallest incoming stack boundary. */
10574 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
10575 && DECL_NAME (current_function_decl)
10576 && MAIN_NAME_P (DECL_NAME (current_function_decl))
10577 && DECL_FILE_SCOPE_P (current_function_decl))
10578 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
10580 return incoming_stack_boundary;
10583 /* Update incoming stack boundary and estimated stack alignment. */
10585 static void
10586 ix86_update_stack_boundary (void)
10588 ix86_incoming_stack_boundary
10589 = ix86_minimum_incoming_stack_boundary (false);
10591 /* x86_64 vararg needs 16byte stack alignment for register save
10592 area. */
10593 if (TARGET_64BIT
10594 && cfun->stdarg
10595 && crtl->stack_alignment_estimated < 128)
10596 crtl->stack_alignment_estimated = 128;
10599 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
10600 needed or an rtx for DRAP otherwise. */
10602 static rtx
10603 ix86_get_drap_rtx (void)
10605 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
10606 crtl->need_drap = true;
10608 if (stack_realign_drap)
10610 /* Assign DRAP to vDRAP and returns vDRAP */
10611 unsigned int regno = find_drap_reg ();
10612 rtx drap_vreg;
10613 rtx arg_ptr;
10614 rtx_insn *seq, *insn;
10616 arg_ptr = gen_rtx_REG (Pmode, regno);
10617 crtl->drap_reg = arg_ptr;
10619 start_sequence ();
10620 drap_vreg = copy_to_reg (arg_ptr);
10621 seq = get_insns ();
10622 end_sequence ();
10624 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
10625 if (!optimize)
10627 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
10628 RTX_FRAME_RELATED_P (insn) = 1;
10630 return drap_vreg;
10632 else
10633 return NULL;
10636 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
10638 static rtx
10639 ix86_internal_arg_pointer (void)
10641 return virtual_incoming_args_rtx;
10644 struct scratch_reg {
10645 rtx reg;
10646 bool saved;
10649 /* Return a short-lived scratch register for use on function entry.
10650 In 32-bit mode, it is valid only after the registers are saved
10651 in the prologue. This register must be released by means of
10652 release_scratch_register_on_entry once it is dead. */
10654 static void
10655 get_scratch_register_on_entry (struct scratch_reg *sr)
10657 int regno;
10659 sr->saved = false;
10661 if (TARGET_64BIT)
10663 /* We always use R11 in 64-bit mode. */
10664 regno = R11_REG;
10666 else
10668 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
10669 bool fastcall_p
10670 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10671 bool thiscall_p
10672 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10673 bool static_chain_p = DECL_STATIC_CHAIN (decl);
10674 int regparm = ix86_function_regparm (fntype, decl);
10675 int drap_regno
10676 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
10678 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10679 for the static chain register. */
10680 if ((regparm < 1 || (fastcall_p && !static_chain_p))
10681 && drap_regno != AX_REG)
10682 regno = AX_REG;
10683 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10684 for the static chain register. */
10685 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
10686 regno = AX_REG;
10687 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
10688 regno = DX_REG;
10689 /* ecx is the static chain register. */
10690 else if (regparm < 3 && !fastcall_p && !thiscall_p
10691 && !static_chain_p
10692 && drap_regno != CX_REG)
10693 regno = CX_REG;
10694 else if (ix86_save_reg (BX_REG, true))
10695 regno = BX_REG;
10696 /* esi is the static chain register. */
10697 else if (!(regparm == 3 && static_chain_p)
10698 && ix86_save_reg (SI_REG, true))
10699 regno = SI_REG;
10700 else if (ix86_save_reg (DI_REG, true))
10701 regno = DI_REG;
10702 else
10704 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
10705 sr->saved = true;
10709 sr->reg = gen_rtx_REG (Pmode, regno);
10710 if (sr->saved)
10712 rtx insn = emit_insn (gen_push (sr->reg));
10713 RTX_FRAME_RELATED_P (insn) = 1;
10717 /* Release a scratch register obtained from the preceding function. */
10719 static void
10720 release_scratch_register_on_entry (struct scratch_reg *sr)
10722 if (sr->saved)
10724 struct machine_function *m = cfun->machine;
10725 rtx x, insn = emit_insn (gen_pop (sr->reg));
10727 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
10728 RTX_FRAME_RELATED_P (insn) = 1;
10729 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
10730 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10731 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
10732 m->fs.sp_offset -= UNITS_PER_WORD;
10736 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
10738 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
10740 static void
10741 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
10743 /* We skip the probe for the first interval + a small dope of 4 words and
10744 probe that many bytes past the specified size to maintain a protection
10745 area at the botton of the stack. */
10746 const int dope = 4 * UNITS_PER_WORD;
10747 rtx size_rtx = GEN_INT (size), last;
10749 /* See if we have a constant small number of probes to generate. If so,
10750 that's the easy case. The run-time loop is made up of 11 insns in the
10751 generic case while the compile-time loop is made up of 3+2*(n-1) insns
10752 for n # of intervals. */
10753 if (size <= 5 * PROBE_INTERVAL)
10755 HOST_WIDE_INT i, adjust;
10756 bool first_probe = true;
10758 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10759 values of N from 1 until it exceeds SIZE. If only one probe is
10760 needed, this will not generate any code. Then adjust and probe
10761 to PROBE_INTERVAL + SIZE. */
10762 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10764 if (first_probe)
10766 adjust = 2 * PROBE_INTERVAL + dope;
10767 first_probe = false;
10769 else
10770 adjust = PROBE_INTERVAL;
10772 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10773 plus_constant (Pmode, stack_pointer_rtx,
10774 -adjust)));
10775 emit_stack_probe (stack_pointer_rtx);
10778 if (first_probe)
10779 adjust = size + PROBE_INTERVAL + dope;
10780 else
10781 adjust = size + PROBE_INTERVAL - i;
10783 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10784 plus_constant (Pmode, stack_pointer_rtx,
10785 -adjust)));
10786 emit_stack_probe (stack_pointer_rtx);
10788 /* Adjust back to account for the additional first interval. */
10789 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10790 plus_constant (Pmode, stack_pointer_rtx,
10791 PROBE_INTERVAL + dope)));
10794 /* Otherwise, do the same as above, but in a loop. Note that we must be
10795 extra careful with variables wrapping around because we might be at
10796 the very top (or the very bottom) of the address space and we have
10797 to be able to handle this case properly; in particular, we use an
10798 equality test for the loop condition. */
10799 else
10801 HOST_WIDE_INT rounded_size;
10802 struct scratch_reg sr;
10804 get_scratch_register_on_entry (&sr);
10807 /* Step 1: round SIZE to the previous multiple of the interval. */
10809 rounded_size = size & -PROBE_INTERVAL;
10812 /* Step 2: compute initial and final value of the loop counter. */
10814 /* SP = SP_0 + PROBE_INTERVAL. */
10815 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10816 plus_constant (Pmode, stack_pointer_rtx,
10817 - (PROBE_INTERVAL + dope))));
10819 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10820 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
10821 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
10822 gen_rtx_PLUS (Pmode, sr.reg,
10823 stack_pointer_rtx)));
10826 /* Step 3: the loop
10828 while (SP != LAST_ADDR)
10830 SP = SP + PROBE_INTERVAL
10831 probe at SP
10834 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10835 values of N from 1 until it is equal to ROUNDED_SIZE. */
10837 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
10840 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10841 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10843 if (size != rounded_size)
10845 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10846 plus_constant (Pmode, stack_pointer_rtx,
10847 rounded_size - size)));
10848 emit_stack_probe (stack_pointer_rtx);
10851 /* Adjust back to account for the additional first interval. */
10852 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10853 plus_constant (Pmode, stack_pointer_rtx,
10854 PROBE_INTERVAL + dope)));
10856 release_scratch_register_on_entry (&sr);
10859 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
10861 /* Even if the stack pointer isn't the CFA register, we need to correctly
10862 describe the adjustments made to it, in particular differentiate the
10863 frame-related ones from the frame-unrelated ones. */
10864 if (size > 0)
10866 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
10867 XVECEXP (expr, 0, 0)
10868 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10869 plus_constant (Pmode, stack_pointer_rtx, -size));
10870 XVECEXP (expr, 0, 1)
10871 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10872 plus_constant (Pmode, stack_pointer_rtx,
10873 PROBE_INTERVAL + dope + size));
10874 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
10875 RTX_FRAME_RELATED_P (last) = 1;
10877 cfun->machine->fs.sp_offset += size;
10880 /* Make sure nothing is scheduled before we are done. */
10881 emit_insn (gen_blockage ());
10884 /* Adjust the stack pointer up to REG while probing it. */
10886 const char *
10887 output_adjust_stack_and_probe (rtx reg)
10889 static int labelno = 0;
10890 char loop_lab[32], end_lab[32];
10891 rtx xops[2];
10893 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10894 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10896 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10898 /* Jump to END_LAB if SP == LAST_ADDR. */
10899 xops[0] = stack_pointer_rtx;
10900 xops[1] = reg;
10901 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10902 fputs ("\tje\t", asm_out_file);
10903 assemble_name_raw (asm_out_file, end_lab);
10904 fputc ('\n', asm_out_file);
10906 /* SP = SP + PROBE_INTERVAL. */
10907 xops[1] = GEN_INT (PROBE_INTERVAL);
10908 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10910 /* Probe at SP. */
10911 xops[1] = const0_rtx;
10912 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
10914 fprintf (asm_out_file, "\tjmp\t");
10915 assemble_name_raw (asm_out_file, loop_lab);
10916 fputc ('\n', asm_out_file);
10918 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10920 return "";
10923 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10924 inclusive. These are offsets from the current stack pointer. */
10926 static void
10927 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
10929 /* See if we have a constant small number of probes to generate. If so,
10930 that's the easy case. The run-time loop is made up of 7 insns in the
10931 generic case while the compile-time loop is made up of n insns for n #
10932 of intervals. */
10933 if (size <= 7 * PROBE_INTERVAL)
10935 HOST_WIDE_INT i;
10937 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10938 it exceeds SIZE. If only one probe is needed, this will not
10939 generate any code. Then probe at FIRST + SIZE. */
10940 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10941 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10942 -(first + i)));
10944 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10945 -(first + size)));
10948 /* Otherwise, do the same as above, but in a loop. Note that we must be
10949 extra careful with variables wrapping around because we might be at
10950 the very top (or the very bottom) of the address space and we have
10951 to be able to handle this case properly; in particular, we use an
10952 equality test for the loop condition. */
10953 else
10955 HOST_WIDE_INT rounded_size, last;
10956 struct scratch_reg sr;
10958 get_scratch_register_on_entry (&sr);
10961 /* Step 1: round SIZE to the previous multiple of the interval. */
10963 rounded_size = size & -PROBE_INTERVAL;
10966 /* Step 2: compute initial and final value of the loop counter. */
10968 /* TEST_OFFSET = FIRST. */
10969 emit_move_insn (sr.reg, GEN_INT (-first));
10971 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10972 last = first + rounded_size;
10975 /* Step 3: the loop
10977 while (TEST_ADDR != LAST_ADDR)
10979 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10980 probe at TEST_ADDR
10983 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10984 until it is equal to ROUNDED_SIZE. */
10986 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
10989 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10990 that SIZE is equal to ROUNDED_SIZE. */
10992 if (size != rounded_size)
10993 emit_stack_probe (plus_constant (Pmode,
10994 gen_rtx_PLUS (Pmode,
10995 stack_pointer_rtx,
10996 sr.reg),
10997 rounded_size - size));
10999 release_scratch_register_on_entry (&sr);
11002 /* Make sure nothing is scheduled before we are done. */
11003 emit_insn (gen_blockage ());
11006 /* Probe a range of stack addresses from REG to END, inclusive. These are
11007 offsets from the current stack pointer. */
11009 const char *
11010 output_probe_stack_range (rtx reg, rtx end)
11012 static int labelno = 0;
11013 char loop_lab[32], end_lab[32];
11014 rtx xops[3];
11016 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
11017 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
11019 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
11021 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
11022 xops[0] = reg;
11023 xops[1] = end;
11024 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
11025 fputs ("\tje\t", asm_out_file);
11026 assemble_name_raw (asm_out_file, end_lab);
11027 fputc ('\n', asm_out_file);
11029 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
11030 xops[1] = GEN_INT (PROBE_INTERVAL);
11031 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
11033 /* Probe at TEST_ADDR. */
11034 xops[0] = stack_pointer_rtx;
11035 xops[1] = reg;
11036 xops[2] = const0_rtx;
11037 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
11039 fprintf (asm_out_file, "\tjmp\t");
11040 assemble_name_raw (asm_out_file, loop_lab);
11041 fputc ('\n', asm_out_file);
11043 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
11045 return "";
11048 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
11049 to be generated in correct form. */
11050 static void
11051 ix86_finalize_stack_realign_flags (void)
11053 /* Check if stack realign is really needed after reload, and
11054 stores result in cfun */
11055 unsigned int incoming_stack_boundary
11056 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
11057 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
11058 unsigned int stack_realign = (incoming_stack_boundary
11059 < (crtl->is_leaf
11060 ? crtl->max_used_stack_slot_alignment
11061 : crtl->stack_alignment_needed));
11063 if (crtl->stack_realign_finalized)
11065 /* After stack_realign_needed is finalized, we can't no longer
11066 change it. */
11067 gcc_assert (crtl->stack_realign_needed == stack_realign);
11068 return;
11071 /* If the only reason for frame_pointer_needed is that we conservatively
11072 assumed stack realignment might be needed, but in the end nothing that
11073 needed the stack alignment had been spilled, clear frame_pointer_needed
11074 and say we don't need stack realignment. */
11075 if (stack_realign
11076 && frame_pointer_needed
11077 && crtl->is_leaf
11078 && flag_omit_frame_pointer
11079 && crtl->sp_is_unchanging
11080 && !ix86_current_function_calls_tls_descriptor
11081 && !crtl->accesses_prior_frames
11082 && !cfun->calls_alloca
11083 && !crtl->calls_eh_return
11084 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
11085 && !ix86_frame_pointer_required ()
11086 && get_frame_size () == 0
11087 && ix86_nsaved_sseregs () == 0
11088 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
11090 HARD_REG_SET set_up_by_prologue, prologue_used;
11091 basic_block bb;
11093 CLEAR_HARD_REG_SET (prologue_used);
11094 CLEAR_HARD_REG_SET (set_up_by_prologue);
11095 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
11096 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
11097 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
11098 HARD_FRAME_POINTER_REGNUM);
11099 FOR_EACH_BB_FN (bb, cfun)
11101 rtx_insn *insn;
11102 FOR_BB_INSNS (bb, insn)
11103 if (NONDEBUG_INSN_P (insn)
11104 && requires_stack_frame_p (insn, prologue_used,
11105 set_up_by_prologue))
11107 crtl->stack_realign_needed = stack_realign;
11108 crtl->stack_realign_finalized = true;
11109 return;
11113 /* If drap has been set, but it actually isn't live at the start
11114 of the function, there is no reason to set it up. */
11115 if (crtl->drap_reg)
11117 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
11118 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
11120 crtl->drap_reg = NULL_RTX;
11121 crtl->need_drap = false;
11124 else
11125 cfun->machine->no_drap_save_restore = true;
11127 frame_pointer_needed = false;
11128 stack_realign = false;
11129 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
11130 crtl->stack_alignment_needed = incoming_stack_boundary;
11131 crtl->stack_alignment_estimated = incoming_stack_boundary;
11132 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
11133 crtl->preferred_stack_boundary = incoming_stack_boundary;
11134 df_finish_pass (true);
11135 df_scan_alloc (NULL);
11136 df_scan_blocks ();
11137 df_compute_regs_ever_live (true);
11138 df_analyze ();
11141 crtl->stack_realign_needed = stack_realign;
11142 crtl->stack_realign_finalized = true;
11145 /* Delete SET_GOT right after entry block if it is allocated to reg. */
11147 static void
11148 ix86_elim_entry_set_got (rtx reg)
11150 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
11151 rtx_insn *c_insn = BB_HEAD (bb);
11152 if (!NONDEBUG_INSN_P (c_insn))
11153 c_insn = next_nonnote_nondebug_insn (c_insn);
11154 if (c_insn && NONJUMP_INSN_P (c_insn))
11156 rtx pat = PATTERN (c_insn);
11157 if (GET_CODE (pat) == PARALLEL)
11159 rtx vec = XVECEXP (pat, 0, 0);
11160 if (GET_CODE (vec) == SET
11161 && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT
11162 && REGNO (XEXP (vec, 0)) == REGNO (reg))
11163 delete_insn (c_insn);
11168 /* Expand the prologue into a bunch of separate insns. */
11170 void
11171 ix86_expand_prologue (void)
11173 struct machine_function *m = cfun->machine;
11174 rtx insn, t;
11175 struct ix86_frame frame;
11176 HOST_WIDE_INT allocate;
11177 bool int_registers_saved;
11178 bool sse_registers_saved;
11180 ix86_finalize_stack_realign_flags ();
11182 /* DRAP should not coexist with stack_realign_fp */
11183 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
11185 memset (&m->fs, 0, sizeof (m->fs));
11187 /* Initialize CFA state for before the prologue. */
11188 m->fs.cfa_reg = stack_pointer_rtx;
11189 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
11191 /* Track SP offset to the CFA. We continue tracking this after we've
11192 swapped the CFA register away from SP. In the case of re-alignment
11193 this is fudged; we're interested to offsets within the local frame. */
11194 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
11195 m->fs.sp_valid = true;
11197 ix86_compute_frame_layout (&frame);
11199 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
11201 /* We should have already generated an error for any use of
11202 ms_hook on a nested function. */
11203 gcc_checking_assert (!ix86_static_chain_on_stack);
11205 /* Check if profiling is active and we shall use profiling before
11206 prologue variant. If so sorry. */
11207 if (crtl->profile && flag_fentry != 0)
11208 sorry ("ms_hook_prologue attribute isn%'t compatible "
11209 "with -mfentry for 32-bit");
11211 /* In ix86_asm_output_function_label we emitted:
11212 8b ff movl.s %edi,%edi
11213 55 push %ebp
11214 8b ec movl.s %esp,%ebp
11216 This matches the hookable function prologue in Win32 API
11217 functions in Microsoft Windows XP Service Pack 2 and newer.
11218 Wine uses this to enable Windows apps to hook the Win32 API
11219 functions provided by Wine.
11221 What that means is that we've already set up the frame pointer. */
11223 if (frame_pointer_needed
11224 && !(crtl->drap_reg && crtl->stack_realign_needed))
11226 rtx push, mov;
11228 /* We've decided to use the frame pointer already set up.
11229 Describe this to the unwinder by pretending that both
11230 push and mov insns happen right here.
11232 Putting the unwind info here at the end of the ms_hook
11233 is done so that we can make absolutely certain we get
11234 the required byte sequence at the start of the function,
11235 rather than relying on an assembler that can produce
11236 the exact encoding required.
11238 However it does mean (in the unpatched case) that we have
11239 a 1 insn window where the asynchronous unwind info is
11240 incorrect. However, if we placed the unwind info at
11241 its correct location we would have incorrect unwind info
11242 in the patched case. Which is probably all moot since
11243 I don't expect Wine generates dwarf2 unwind info for the
11244 system libraries that use this feature. */
11246 insn = emit_insn (gen_blockage ());
11248 push = gen_push (hard_frame_pointer_rtx);
11249 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
11250 stack_pointer_rtx);
11251 RTX_FRAME_RELATED_P (push) = 1;
11252 RTX_FRAME_RELATED_P (mov) = 1;
11254 RTX_FRAME_RELATED_P (insn) = 1;
11255 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11256 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
11258 /* Note that gen_push incremented m->fs.cfa_offset, even
11259 though we didn't emit the push insn here. */
11260 m->fs.cfa_reg = hard_frame_pointer_rtx;
11261 m->fs.fp_offset = m->fs.cfa_offset;
11262 m->fs.fp_valid = true;
11264 else
11266 /* The frame pointer is not needed so pop %ebp again.
11267 This leaves us with a pristine state. */
11268 emit_insn (gen_pop (hard_frame_pointer_rtx));
11272 /* The first insn of a function that accepts its static chain on the
11273 stack is to push the register that would be filled in by a direct
11274 call. This insn will be skipped by the trampoline. */
11275 else if (ix86_static_chain_on_stack)
11277 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
11278 emit_insn (gen_blockage ());
11280 /* We don't want to interpret this push insn as a register save,
11281 only as a stack adjustment. The real copy of the register as
11282 a save will be done later, if needed. */
11283 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
11284 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
11285 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
11286 RTX_FRAME_RELATED_P (insn) = 1;
11289 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
11290 of DRAP is needed and stack realignment is really needed after reload */
11291 if (stack_realign_drap)
11293 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11295 /* Only need to push parameter pointer reg if it is caller saved. */
11296 if (!call_used_regs[REGNO (crtl->drap_reg)])
11298 /* Push arg pointer reg */
11299 insn = emit_insn (gen_push (crtl->drap_reg));
11300 RTX_FRAME_RELATED_P (insn) = 1;
11303 /* Grab the argument pointer. */
11304 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
11305 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11306 RTX_FRAME_RELATED_P (insn) = 1;
11307 m->fs.cfa_reg = crtl->drap_reg;
11308 m->fs.cfa_offset = 0;
11310 /* Align the stack. */
11311 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11312 stack_pointer_rtx,
11313 GEN_INT (-align_bytes)));
11314 RTX_FRAME_RELATED_P (insn) = 1;
11316 /* Replicate the return address on the stack so that return
11317 address can be reached via (argp - 1) slot. This is needed
11318 to implement macro RETURN_ADDR_RTX and intrinsic function
11319 expand_builtin_return_addr etc. */
11320 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
11321 t = gen_frame_mem (word_mode, t);
11322 insn = emit_insn (gen_push (t));
11323 RTX_FRAME_RELATED_P (insn) = 1;
11325 /* For the purposes of frame and register save area addressing,
11326 we've started over with a new frame. */
11327 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
11328 m->fs.realigned = true;
11331 int_registers_saved = (frame.nregs == 0);
11332 sse_registers_saved = (frame.nsseregs == 0);
11334 if (frame_pointer_needed && !m->fs.fp_valid)
11336 /* Note: AT&T enter does NOT have reversed args. Enter is probably
11337 slower on all targets. Also sdb doesn't like it. */
11338 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
11339 RTX_FRAME_RELATED_P (insn) = 1;
11341 /* Push registers now, before setting the frame pointer
11342 on SEH target. */
11343 if (!int_registers_saved
11344 && TARGET_SEH
11345 && !frame.save_regs_using_mov)
11347 ix86_emit_save_regs ();
11348 int_registers_saved = true;
11349 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11352 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
11354 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
11355 RTX_FRAME_RELATED_P (insn) = 1;
11357 if (m->fs.cfa_reg == stack_pointer_rtx)
11358 m->fs.cfa_reg = hard_frame_pointer_rtx;
11359 m->fs.fp_offset = m->fs.sp_offset;
11360 m->fs.fp_valid = true;
11364 if (!int_registers_saved)
11366 /* If saving registers via PUSH, do so now. */
11367 if (!frame.save_regs_using_mov)
11369 ix86_emit_save_regs ();
11370 int_registers_saved = true;
11371 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11374 /* When using red zone we may start register saving before allocating
11375 the stack frame saving one cycle of the prologue. However, avoid
11376 doing this if we have to probe the stack; at least on x86_64 the
11377 stack probe can turn into a call that clobbers a red zone location. */
11378 else if (ix86_using_red_zone ()
11379 && (! TARGET_STACK_PROBE
11380 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
11382 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11383 int_registers_saved = true;
11387 if (stack_realign_fp)
11389 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11390 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
11392 /* The computation of the size of the re-aligned stack frame means
11393 that we must allocate the size of the register save area before
11394 performing the actual alignment. Otherwise we cannot guarantee
11395 that there's enough storage above the realignment point. */
11396 if (m->fs.sp_offset != frame.sse_reg_save_offset)
11397 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11398 GEN_INT (m->fs.sp_offset
11399 - frame.sse_reg_save_offset),
11400 -1, false);
11402 /* Align the stack. */
11403 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11404 stack_pointer_rtx,
11405 GEN_INT (-align_bytes)));
11407 /* For the purposes of register save area addressing, the stack
11408 pointer is no longer valid. As for the value of sp_offset,
11409 see ix86_compute_frame_layout, which we need to match in order
11410 to pass verification of stack_pointer_offset at the end. */
11411 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
11412 m->fs.sp_valid = false;
11415 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
11417 if (flag_stack_usage_info)
11419 /* We start to count from ARG_POINTER. */
11420 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
11422 /* If it was realigned, take into account the fake frame. */
11423 if (stack_realign_drap)
11425 if (ix86_static_chain_on_stack)
11426 stack_size += UNITS_PER_WORD;
11428 if (!call_used_regs[REGNO (crtl->drap_reg)])
11429 stack_size += UNITS_PER_WORD;
11431 /* This over-estimates by 1 minimal-stack-alignment-unit but
11432 mitigates that by counting in the new return address slot. */
11433 current_function_dynamic_stack_size
11434 += crtl->stack_alignment_needed / BITS_PER_UNIT;
11437 current_function_static_stack_size = stack_size;
11440 /* On SEH target with very large frame size, allocate an area to save
11441 SSE registers (as the very large allocation won't be described). */
11442 if (TARGET_SEH
11443 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
11444 && !sse_registers_saved)
11446 HOST_WIDE_INT sse_size =
11447 frame.sse_reg_save_offset - frame.reg_save_offset;
11449 gcc_assert (int_registers_saved);
11451 /* No need to do stack checking as the area will be immediately
11452 written. */
11453 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11454 GEN_INT (-sse_size), -1,
11455 m->fs.cfa_reg == stack_pointer_rtx);
11456 allocate -= sse_size;
11457 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11458 sse_registers_saved = true;
11461 /* The stack has already been decremented by the instruction calling us
11462 so probe if the size is non-negative to preserve the protection area. */
11463 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
11465 /* We expect the registers to be saved when probes are used. */
11466 gcc_assert (int_registers_saved);
11468 if (STACK_CHECK_MOVING_SP)
11470 if (!(crtl->is_leaf && !cfun->calls_alloca
11471 && allocate <= PROBE_INTERVAL))
11473 ix86_adjust_stack_and_probe (allocate);
11474 allocate = 0;
11477 else
11479 HOST_WIDE_INT size = allocate;
11481 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
11482 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
11484 if (TARGET_STACK_PROBE)
11486 if (crtl->is_leaf && !cfun->calls_alloca)
11488 if (size > PROBE_INTERVAL)
11489 ix86_emit_probe_stack_range (0, size);
11491 else
11492 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
11494 else
11496 if (crtl->is_leaf && !cfun->calls_alloca)
11498 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
11499 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
11500 size - STACK_CHECK_PROTECT);
11502 else
11503 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
11508 if (allocate == 0)
11510 else if (!ix86_target_stack_probe ()
11511 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
11513 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11514 GEN_INT (-allocate), -1,
11515 m->fs.cfa_reg == stack_pointer_rtx);
11517 else
11519 rtx eax = gen_rtx_REG (Pmode, AX_REG);
11520 rtx r10 = NULL;
11521 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
11522 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
11523 bool eax_live = ix86_eax_live_at_start_p ();
11524 bool r10_live = false;
11526 if (TARGET_64BIT)
11527 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
11529 if (eax_live)
11531 insn = emit_insn (gen_push (eax));
11532 allocate -= UNITS_PER_WORD;
11533 /* Note that SEH directives need to continue tracking the stack
11534 pointer even after the frame pointer has been set up. */
11535 if (sp_is_cfa_reg || TARGET_SEH)
11537 if (sp_is_cfa_reg)
11538 m->fs.cfa_offset += UNITS_PER_WORD;
11539 RTX_FRAME_RELATED_P (insn) = 1;
11543 if (r10_live)
11545 r10 = gen_rtx_REG (Pmode, R10_REG);
11546 insn = emit_insn (gen_push (r10));
11547 allocate -= UNITS_PER_WORD;
11548 if (sp_is_cfa_reg || TARGET_SEH)
11550 if (sp_is_cfa_reg)
11551 m->fs.cfa_offset += UNITS_PER_WORD;
11552 RTX_FRAME_RELATED_P (insn) = 1;
11556 emit_move_insn (eax, GEN_INT (allocate));
11557 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
11559 /* Use the fact that AX still contains ALLOCATE. */
11560 adjust_stack_insn = (Pmode == DImode
11561 ? gen_pro_epilogue_adjust_stack_di_sub
11562 : gen_pro_epilogue_adjust_stack_si_sub);
11564 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
11565 stack_pointer_rtx, eax));
11567 if (sp_is_cfa_reg || TARGET_SEH)
11569 if (sp_is_cfa_reg)
11570 m->fs.cfa_offset += allocate;
11571 RTX_FRAME_RELATED_P (insn) = 1;
11572 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11573 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11574 plus_constant (Pmode, stack_pointer_rtx,
11575 -allocate)));
11577 m->fs.sp_offset += allocate;
11579 /* Use stack_pointer_rtx for relative addressing so that code
11580 works for realigned stack, too. */
11581 if (r10_live && eax_live)
11583 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11584 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11585 gen_frame_mem (word_mode, t));
11586 t = plus_constant (Pmode, t, UNITS_PER_WORD);
11587 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
11588 gen_frame_mem (word_mode, t));
11590 else if (eax_live || r10_live)
11592 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11593 emit_move_insn (gen_rtx_REG (word_mode,
11594 (eax_live ? AX_REG : R10_REG)),
11595 gen_frame_mem (word_mode, t));
11598 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
11600 /* If we havn't already set up the frame pointer, do so now. */
11601 if (frame_pointer_needed && !m->fs.fp_valid)
11603 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
11604 GEN_INT (frame.stack_pointer_offset
11605 - frame.hard_frame_pointer_offset));
11606 insn = emit_insn (insn);
11607 RTX_FRAME_RELATED_P (insn) = 1;
11608 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
11610 if (m->fs.cfa_reg == stack_pointer_rtx)
11611 m->fs.cfa_reg = hard_frame_pointer_rtx;
11612 m->fs.fp_offset = frame.hard_frame_pointer_offset;
11613 m->fs.fp_valid = true;
11616 if (!int_registers_saved)
11617 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11618 if (!sse_registers_saved)
11619 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11621 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
11622 in PROLOGUE. */
11623 if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
11625 rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
11626 insn = emit_insn (gen_set_got (pic));
11627 RTX_FRAME_RELATED_P (insn) = 1;
11628 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
11629 emit_insn (gen_prologue_use (pic));
11630 /* Deleting already emmitted SET_GOT if exist and allocated to
11631 REAL_PIC_OFFSET_TABLE_REGNUM. */
11632 ix86_elim_entry_set_got (pic);
11635 if (crtl->drap_reg && !crtl->stack_realign_needed)
11637 /* vDRAP is setup but after reload it turns out stack realign
11638 isn't necessary, here we will emit prologue to setup DRAP
11639 without stack realign adjustment */
11640 t = choose_baseaddr (0);
11641 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11644 /* Prevent instructions from being scheduled into register save push
11645 sequence when access to the redzone area is done through frame pointer.
11646 The offset between the frame pointer and the stack pointer is calculated
11647 relative to the value of the stack pointer at the end of the function
11648 prologue, and moving instructions that access redzone area via frame
11649 pointer inside push sequence violates this assumption. */
11650 if (frame_pointer_needed && frame.red_zone_size)
11651 emit_insn (gen_memory_blockage ());
11653 /* Emit cld instruction if stringops are used in the function. */
11654 if (TARGET_CLD && ix86_current_function_needs_cld)
11655 emit_insn (gen_cld ());
11657 /* SEH requires that the prologue end within 256 bytes of the start of
11658 the function. Prevent instruction schedules that would extend that.
11659 Further, prevent alloca modifications to the stack pointer from being
11660 combined with prologue modifications. */
11661 if (TARGET_SEH)
11662 emit_insn (gen_prologue_use (stack_pointer_rtx));
11665 /* Emit code to restore REG using a POP insn. */
11667 static void
11668 ix86_emit_restore_reg_using_pop (rtx reg)
11670 struct machine_function *m = cfun->machine;
11671 rtx insn = emit_insn (gen_pop (reg));
11673 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
11674 m->fs.sp_offset -= UNITS_PER_WORD;
11676 if (m->fs.cfa_reg == crtl->drap_reg
11677 && REGNO (reg) == REGNO (crtl->drap_reg))
11679 /* Previously we'd represented the CFA as an expression
11680 like *(%ebp - 8). We've just popped that value from
11681 the stack, which means we need to reset the CFA to
11682 the drap register. This will remain until we restore
11683 the stack pointer. */
11684 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11685 RTX_FRAME_RELATED_P (insn) = 1;
11687 /* This means that the DRAP register is valid for addressing too. */
11688 m->fs.drap_valid = true;
11689 return;
11692 if (m->fs.cfa_reg == stack_pointer_rtx)
11694 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11695 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11696 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11697 RTX_FRAME_RELATED_P (insn) = 1;
11699 m->fs.cfa_offset -= UNITS_PER_WORD;
11702 /* When the frame pointer is the CFA, and we pop it, we are
11703 swapping back to the stack pointer as the CFA. This happens
11704 for stack frames that don't allocate other data, so we assume
11705 the stack pointer is now pointing at the return address, i.e.
11706 the function entry state, which makes the offset be 1 word. */
11707 if (reg == hard_frame_pointer_rtx)
11709 m->fs.fp_valid = false;
11710 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11712 m->fs.cfa_reg = stack_pointer_rtx;
11713 m->fs.cfa_offset -= UNITS_PER_WORD;
11715 add_reg_note (insn, REG_CFA_DEF_CFA,
11716 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11717 GEN_INT (m->fs.cfa_offset)));
11718 RTX_FRAME_RELATED_P (insn) = 1;
11723 /* Emit code to restore saved registers using POP insns. */
11725 static void
11726 ix86_emit_restore_regs_using_pop (void)
11728 unsigned int regno;
11730 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11731 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
11732 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
11735 /* Emit code and notes for the LEAVE instruction. */
11737 static void
11738 ix86_emit_leave (void)
11740 struct machine_function *m = cfun->machine;
11741 rtx insn = emit_insn (ix86_gen_leave ());
11743 ix86_add_queued_cfa_restore_notes (insn);
11745 gcc_assert (m->fs.fp_valid);
11746 m->fs.sp_valid = true;
11747 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
11748 m->fs.fp_valid = false;
11750 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11752 m->fs.cfa_reg = stack_pointer_rtx;
11753 m->fs.cfa_offset = m->fs.sp_offset;
11755 add_reg_note (insn, REG_CFA_DEF_CFA,
11756 plus_constant (Pmode, stack_pointer_rtx,
11757 m->fs.sp_offset));
11758 RTX_FRAME_RELATED_P (insn) = 1;
11760 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
11761 m->fs.fp_offset);
11764 /* Emit code to restore saved registers using MOV insns.
11765 First register is restored from CFA - CFA_OFFSET. */
11766 static void
11767 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
11768 bool maybe_eh_return)
11770 struct machine_function *m = cfun->machine;
11771 unsigned int regno;
11773 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11774 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11776 rtx reg = gen_rtx_REG (word_mode, regno);
11777 rtx insn, mem;
11779 mem = choose_baseaddr (cfa_offset);
11780 mem = gen_frame_mem (word_mode, mem);
11781 insn = emit_move_insn (reg, mem);
11783 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
11785 /* Previously we'd represented the CFA as an expression
11786 like *(%ebp - 8). We've just popped that value from
11787 the stack, which means we need to reset the CFA to
11788 the drap register. This will remain until we restore
11789 the stack pointer. */
11790 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11791 RTX_FRAME_RELATED_P (insn) = 1;
11793 /* This means that the DRAP register is valid for addressing. */
11794 m->fs.drap_valid = true;
11796 else
11797 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11799 cfa_offset -= UNITS_PER_WORD;
11803 /* Emit code to restore saved registers using MOV insns.
11804 First register is restored from CFA - CFA_OFFSET. */
11805 static void
11806 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
11807 bool maybe_eh_return)
11809 unsigned int regno;
11811 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11812 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11814 rtx reg = gen_rtx_REG (V4SFmode, regno);
11815 rtx mem;
11817 mem = choose_baseaddr (cfa_offset);
11818 mem = gen_rtx_MEM (V4SFmode, mem);
11819 set_mem_align (mem, 128);
11820 emit_move_insn (reg, mem);
11822 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11824 cfa_offset -= 16;
11828 /* Restore function stack, frame, and registers. */
11830 void
11831 ix86_expand_epilogue (int style)
11833 struct machine_function *m = cfun->machine;
11834 struct machine_frame_state frame_state_save = m->fs;
11835 struct ix86_frame frame;
11836 bool restore_regs_via_mov;
11837 bool using_drap;
11839 ix86_finalize_stack_realign_flags ();
11840 ix86_compute_frame_layout (&frame);
11842 m->fs.sp_valid = (!frame_pointer_needed
11843 || (crtl->sp_is_unchanging
11844 && !stack_realign_fp));
11845 gcc_assert (!m->fs.sp_valid
11846 || m->fs.sp_offset == frame.stack_pointer_offset);
11848 /* The FP must be valid if the frame pointer is present. */
11849 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
11850 gcc_assert (!m->fs.fp_valid
11851 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
11853 /* We must have *some* valid pointer to the stack frame. */
11854 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
11856 /* The DRAP is never valid at this point. */
11857 gcc_assert (!m->fs.drap_valid);
11859 /* See the comment about red zone and frame
11860 pointer usage in ix86_expand_prologue. */
11861 if (frame_pointer_needed && frame.red_zone_size)
11862 emit_insn (gen_memory_blockage ());
11864 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
11865 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
11867 /* Determine the CFA offset of the end of the red-zone. */
11868 m->fs.red_zone_offset = 0;
11869 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
11871 /* The red-zone begins below the return address. */
11872 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
11874 /* When the register save area is in the aligned portion of
11875 the stack, determine the maximum runtime displacement that
11876 matches up with the aligned frame. */
11877 if (stack_realign_drap)
11878 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
11879 + UNITS_PER_WORD);
11882 /* Special care must be taken for the normal return case of a function
11883 using eh_return: the eax and edx registers are marked as saved, but
11884 not restored along this path. Adjust the save location to match. */
11885 if (crtl->calls_eh_return && style != 2)
11886 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
11888 /* EH_RETURN requires the use of moves to function properly. */
11889 if (crtl->calls_eh_return)
11890 restore_regs_via_mov = true;
11891 /* SEH requires the use of pops to identify the epilogue. */
11892 else if (TARGET_SEH)
11893 restore_regs_via_mov = false;
11894 /* If we're only restoring one register and sp is not valid then
11895 using a move instruction to restore the register since it's
11896 less work than reloading sp and popping the register. */
11897 else if (!m->fs.sp_valid && frame.nregs <= 1)
11898 restore_regs_via_mov = true;
11899 else if (TARGET_EPILOGUE_USING_MOVE
11900 && cfun->machine->use_fast_prologue_epilogue
11901 && (frame.nregs > 1
11902 || m->fs.sp_offset != frame.reg_save_offset))
11903 restore_regs_via_mov = true;
11904 else if (frame_pointer_needed
11905 && !frame.nregs
11906 && m->fs.sp_offset != frame.reg_save_offset)
11907 restore_regs_via_mov = true;
11908 else if (frame_pointer_needed
11909 && TARGET_USE_LEAVE
11910 && cfun->machine->use_fast_prologue_epilogue
11911 && frame.nregs == 1)
11912 restore_regs_via_mov = true;
11913 else
11914 restore_regs_via_mov = false;
11916 if (restore_regs_via_mov || frame.nsseregs)
11918 /* Ensure that the entire register save area is addressable via
11919 the stack pointer, if we will restore via sp. */
11920 if (TARGET_64BIT
11921 && m->fs.sp_offset > 0x7fffffff
11922 && !(m->fs.fp_valid || m->fs.drap_valid)
11923 && (frame.nsseregs + frame.nregs) != 0)
11925 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11926 GEN_INT (m->fs.sp_offset
11927 - frame.sse_reg_save_offset),
11928 style,
11929 m->fs.cfa_reg == stack_pointer_rtx);
11933 /* If there are any SSE registers to restore, then we have to do it
11934 via moves, since there's obviously no pop for SSE regs. */
11935 if (frame.nsseregs)
11936 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
11937 style == 2);
11939 if (restore_regs_via_mov)
11941 rtx t;
11943 if (frame.nregs)
11944 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
11946 /* eh_return epilogues need %ecx added to the stack pointer. */
11947 if (style == 2)
11949 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
11951 /* Stack align doesn't work with eh_return. */
11952 gcc_assert (!stack_realign_drap);
11953 /* Neither does regparm nested functions. */
11954 gcc_assert (!ix86_static_chain_on_stack);
11956 if (frame_pointer_needed)
11958 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
11959 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
11960 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
11962 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
11963 insn = emit_move_insn (hard_frame_pointer_rtx, t);
11965 /* Note that we use SA as a temporary CFA, as the return
11966 address is at the proper place relative to it. We
11967 pretend this happens at the FP restore insn because
11968 prior to this insn the FP would be stored at the wrong
11969 offset relative to SA, and after this insn we have no
11970 other reasonable register to use for the CFA. We don't
11971 bother resetting the CFA to the SP for the duration of
11972 the return insn. */
11973 add_reg_note (insn, REG_CFA_DEF_CFA,
11974 plus_constant (Pmode, sa, UNITS_PER_WORD));
11975 ix86_add_queued_cfa_restore_notes (insn);
11976 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
11977 RTX_FRAME_RELATED_P (insn) = 1;
11979 m->fs.cfa_reg = sa;
11980 m->fs.cfa_offset = UNITS_PER_WORD;
11981 m->fs.fp_valid = false;
11983 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
11984 const0_rtx, style, false);
11986 else
11988 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
11989 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
11990 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
11991 ix86_add_queued_cfa_restore_notes (insn);
11993 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
11994 if (m->fs.cfa_offset != UNITS_PER_WORD)
11996 m->fs.cfa_offset = UNITS_PER_WORD;
11997 add_reg_note (insn, REG_CFA_DEF_CFA,
11998 plus_constant (Pmode, stack_pointer_rtx,
11999 UNITS_PER_WORD));
12000 RTX_FRAME_RELATED_P (insn) = 1;
12003 m->fs.sp_offset = UNITS_PER_WORD;
12004 m->fs.sp_valid = true;
12007 else
12009 /* SEH requires that the function end with (1) a stack adjustment
12010 if necessary, (2) a sequence of pops, and (3) a return or
12011 jump instruction. Prevent insns from the function body from
12012 being scheduled into this sequence. */
12013 if (TARGET_SEH)
12015 /* Prevent a catch region from being adjacent to the standard
12016 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
12017 several other flags that would be interesting to test are
12018 not yet set up. */
12019 if (flag_non_call_exceptions)
12020 emit_insn (gen_nops (const1_rtx));
12021 else
12022 emit_insn (gen_blockage ());
12025 /* First step is to deallocate the stack frame so that we can
12026 pop the registers. Also do it on SEH target for very large
12027 frame as the emitted instructions aren't allowed by the ABI in
12028 epilogues. */
12029 if (!m->fs.sp_valid
12030 || (TARGET_SEH
12031 && (m->fs.sp_offset - frame.reg_save_offset
12032 >= SEH_MAX_FRAME_SIZE)))
12034 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
12035 GEN_INT (m->fs.fp_offset
12036 - frame.reg_save_offset),
12037 style, false);
12039 else if (m->fs.sp_offset != frame.reg_save_offset)
12041 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12042 GEN_INT (m->fs.sp_offset
12043 - frame.reg_save_offset),
12044 style,
12045 m->fs.cfa_reg == stack_pointer_rtx);
12048 ix86_emit_restore_regs_using_pop ();
12051 /* If we used a stack pointer and haven't already got rid of it,
12052 then do so now. */
12053 if (m->fs.fp_valid)
12055 /* If the stack pointer is valid and pointing at the frame
12056 pointer store address, then we only need a pop. */
12057 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
12058 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
12059 /* Leave results in shorter dependency chains on CPUs that are
12060 able to grok it fast. */
12061 else if (TARGET_USE_LEAVE
12062 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
12063 || !cfun->machine->use_fast_prologue_epilogue)
12064 ix86_emit_leave ();
12065 else
12067 pro_epilogue_adjust_stack (stack_pointer_rtx,
12068 hard_frame_pointer_rtx,
12069 const0_rtx, style, !using_drap);
12070 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
12074 if (using_drap)
12076 int param_ptr_offset = UNITS_PER_WORD;
12077 rtx insn;
12079 gcc_assert (stack_realign_drap);
12081 if (ix86_static_chain_on_stack)
12082 param_ptr_offset += UNITS_PER_WORD;
12083 if (!call_used_regs[REGNO (crtl->drap_reg)])
12084 param_ptr_offset += UNITS_PER_WORD;
12086 insn = emit_insn (gen_rtx_SET
12087 (VOIDmode, stack_pointer_rtx,
12088 gen_rtx_PLUS (Pmode,
12089 crtl->drap_reg,
12090 GEN_INT (-param_ptr_offset))));
12091 m->fs.cfa_reg = stack_pointer_rtx;
12092 m->fs.cfa_offset = param_ptr_offset;
12093 m->fs.sp_offset = param_ptr_offset;
12094 m->fs.realigned = false;
12096 add_reg_note (insn, REG_CFA_DEF_CFA,
12097 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12098 GEN_INT (param_ptr_offset)));
12099 RTX_FRAME_RELATED_P (insn) = 1;
12101 if (!call_used_regs[REGNO (crtl->drap_reg)])
12102 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
12105 /* At this point the stack pointer must be valid, and we must have
12106 restored all of the registers. We may not have deallocated the
12107 entire stack frame. We've delayed this until now because it may
12108 be possible to merge the local stack deallocation with the
12109 deallocation forced by ix86_static_chain_on_stack. */
12110 gcc_assert (m->fs.sp_valid);
12111 gcc_assert (!m->fs.fp_valid);
12112 gcc_assert (!m->fs.realigned);
12113 if (m->fs.sp_offset != UNITS_PER_WORD)
12115 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12116 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
12117 style, true);
12119 else
12120 ix86_add_queued_cfa_restore_notes (get_last_insn ());
12122 /* Sibcall epilogues don't want a return instruction. */
12123 if (style == 0)
12125 m->fs = frame_state_save;
12126 return;
12129 if (crtl->args.pops_args && crtl->args.size)
12131 rtx popc = GEN_INT (crtl->args.pops_args);
12133 /* i386 can only pop 64K bytes. If asked to pop more, pop return
12134 address, do explicit add, and jump indirectly to the caller. */
12136 if (crtl->args.pops_args >= 65536)
12138 rtx ecx = gen_rtx_REG (SImode, CX_REG);
12139 rtx insn;
12141 /* There is no "pascal" calling convention in any 64bit ABI. */
12142 gcc_assert (!TARGET_64BIT);
12144 insn = emit_insn (gen_pop (ecx));
12145 m->fs.cfa_offset -= UNITS_PER_WORD;
12146 m->fs.sp_offset -= UNITS_PER_WORD;
12148 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
12149 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
12150 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
12151 add_reg_note (insn, REG_CFA_REGISTER,
12152 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
12153 RTX_FRAME_RELATED_P (insn) = 1;
12155 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12156 popc, -1, true);
12157 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
12159 else
12160 emit_jump_insn (gen_simple_return_pop_internal (popc));
12162 else
12163 emit_jump_insn (gen_simple_return_internal ());
12165 /* Restore the state back to the state from the prologue,
12166 so that it's correct for the next epilogue. */
12167 m->fs = frame_state_save;
12170 /* Reset from the function's potential modifications. */
12172 static void
12173 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, HOST_WIDE_INT)
12175 if (pic_offset_table_rtx
12176 && !ix86_use_pseudo_pic_reg ())
12177 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
12178 #if TARGET_MACHO
12179 /* Mach-O doesn't support labels at the end of objects, so if
12180 it looks like we might want one, insert a NOP. */
12182 rtx_insn *insn = get_last_insn ();
12183 rtx_insn *deleted_debug_label = NULL;
12184 while (insn
12185 && NOTE_P (insn)
12186 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
12188 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
12189 notes only, instead set their CODE_LABEL_NUMBER to -1,
12190 otherwise there would be code generation differences
12191 in between -g and -g0. */
12192 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
12193 deleted_debug_label = insn;
12194 insn = PREV_INSN (insn);
12196 if (insn
12197 && (LABEL_P (insn)
12198 || (NOTE_P (insn)
12199 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
12200 fputs ("\tnop\n", file);
12201 else if (deleted_debug_label)
12202 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
12203 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
12204 CODE_LABEL_NUMBER (insn) = -1;
12206 #endif
12210 /* Return a scratch register to use in the split stack prologue. The
12211 split stack prologue is used for -fsplit-stack. It is the first
12212 instructions in the function, even before the regular prologue.
12213 The scratch register can be any caller-saved register which is not
12214 used for parameters or for the static chain. */
12216 static unsigned int
12217 split_stack_prologue_scratch_regno (void)
12219 if (TARGET_64BIT)
12220 return R11_REG;
12221 else
12223 bool is_fastcall, is_thiscall;
12224 int regparm;
12226 is_fastcall = (lookup_attribute ("fastcall",
12227 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
12228 != NULL);
12229 is_thiscall = (lookup_attribute ("thiscall",
12230 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
12231 != NULL);
12232 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
12234 if (is_fastcall)
12236 if (DECL_STATIC_CHAIN (cfun->decl))
12238 sorry ("-fsplit-stack does not support fastcall with "
12239 "nested function");
12240 return INVALID_REGNUM;
12242 return AX_REG;
12244 else if (is_thiscall)
12246 if (!DECL_STATIC_CHAIN (cfun->decl))
12247 return DX_REG;
12248 return AX_REG;
12250 else if (regparm < 3)
12252 if (!DECL_STATIC_CHAIN (cfun->decl))
12253 return CX_REG;
12254 else
12256 if (regparm >= 2)
12258 sorry ("-fsplit-stack does not support 2 register "
12259 "parameters for a nested function");
12260 return INVALID_REGNUM;
12262 return DX_REG;
12265 else
12267 /* FIXME: We could make this work by pushing a register
12268 around the addition and comparison. */
12269 sorry ("-fsplit-stack does not support 3 register parameters");
12270 return INVALID_REGNUM;
12275 /* A SYMBOL_REF for the function which allocates new stackspace for
12276 -fsplit-stack. */
12278 static GTY(()) rtx split_stack_fn;
12280 /* A SYMBOL_REF for the more stack function when using the large
12281 model. */
12283 static GTY(()) rtx split_stack_fn_large;
12285 /* Handle -fsplit-stack. These are the first instructions in the
12286 function, even before the regular prologue. */
12288 void
12289 ix86_expand_split_stack_prologue (void)
12291 struct ix86_frame frame;
12292 HOST_WIDE_INT allocate;
12293 unsigned HOST_WIDE_INT args_size;
12294 rtx_code_label *label;
12295 rtx limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
12296 rtx scratch_reg = NULL_RTX;
12297 rtx_code_label *varargs_label = NULL;
12298 rtx fn;
12300 gcc_assert (flag_split_stack && reload_completed);
12302 ix86_finalize_stack_realign_flags ();
12303 ix86_compute_frame_layout (&frame);
12304 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
12306 /* This is the label we will branch to if we have enough stack
12307 space. We expect the basic block reordering pass to reverse this
12308 branch if optimizing, so that we branch in the unlikely case. */
12309 label = gen_label_rtx ();
12311 /* We need to compare the stack pointer minus the frame size with
12312 the stack boundary in the TCB. The stack boundary always gives
12313 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
12314 can compare directly. Otherwise we need to do an addition. */
12316 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12317 UNSPEC_STACK_CHECK);
12318 limit = gen_rtx_CONST (Pmode, limit);
12319 limit = gen_rtx_MEM (Pmode, limit);
12320 if (allocate < SPLIT_STACK_AVAILABLE)
12321 current = stack_pointer_rtx;
12322 else
12324 unsigned int scratch_regno;
12325 rtx offset;
12327 /* We need a scratch register to hold the stack pointer minus
12328 the required frame size. Since this is the very start of the
12329 function, the scratch register can be any caller-saved
12330 register which is not used for parameters. */
12331 offset = GEN_INT (- allocate);
12332 scratch_regno = split_stack_prologue_scratch_regno ();
12333 if (scratch_regno == INVALID_REGNUM)
12334 return;
12335 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12336 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
12338 /* We don't use ix86_gen_add3 in this case because it will
12339 want to split to lea, but when not optimizing the insn
12340 will not be split after this point. */
12341 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12342 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12343 offset)));
12345 else
12347 emit_move_insn (scratch_reg, offset);
12348 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
12349 stack_pointer_rtx));
12351 current = scratch_reg;
12354 ix86_expand_branch (GEU, current, limit, label);
12355 jump_insn = get_last_insn ();
12356 JUMP_LABEL (jump_insn) = label;
12358 /* Mark the jump as very likely to be taken. */
12359 add_int_reg_note (jump_insn, REG_BR_PROB,
12360 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
12362 if (split_stack_fn == NULL_RTX)
12364 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
12365 SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
12367 fn = split_stack_fn;
12369 /* Get more stack space. We pass in the desired stack space and the
12370 size of the arguments to copy to the new stack. In 32-bit mode
12371 we push the parameters; __morestack will return on a new stack
12372 anyhow. In 64-bit mode we pass the parameters in r10 and
12373 r11. */
12374 allocate_rtx = GEN_INT (allocate);
12375 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
12376 call_fusage = NULL_RTX;
12377 if (TARGET_64BIT)
12379 rtx reg10, reg11;
12381 reg10 = gen_rtx_REG (Pmode, R10_REG);
12382 reg11 = gen_rtx_REG (Pmode, R11_REG);
12384 /* If this function uses a static chain, it will be in %r10.
12385 Preserve it across the call to __morestack. */
12386 if (DECL_STATIC_CHAIN (cfun->decl))
12388 rtx rax;
12390 rax = gen_rtx_REG (word_mode, AX_REG);
12391 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
12392 use_reg (&call_fusage, rax);
12395 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
12396 && !TARGET_PECOFF)
12398 HOST_WIDE_INT argval;
12400 gcc_assert (Pmode == DImode);
12401 /* When using the large model we need to load the address
12402 into a register, and we've run out of registers. So we
12403 switch to a different calling convention, and we call a
12404 different function: __morestack_large. We pass the
12405 argument size in the upper 32 bits of r10 and pass the
12406 frame size in the lower 32 bits. */
12407 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
12408 gcc_assert ((args_size & 0xffffffff) == args_size);
12410 if (split_stack_fn_large == NULL_RTX)
12412 split_stack_fn_large =
12413 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
12414 SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
12416 if (ix86_cmodel == CM_LARGE_PIC)
12418 rtx_code_label *label;
12419 rtx x;
12421 label = gen_label_rtx ();
12422 emit_label (label);
12423 LABEL_PRESERVE_P (label) = 1;
12424 emit_insn (gen_set_rip_rex64 (reg10, label));
12425 emit_insn (gen_set_got_offset_rex64 (reg11, label));
12426 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
12427 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
12428 UNSPEC_GOT);
12429 x = gen_rtx_CONST (Pmode, x);
12430 emit_move_insn (reg11, x);
12431 x = gen_rtx_PLUS (Pmode, reg10, reg11);
12432 x = gen_const_mem (Pmode, x);
12433 emit_move_insn (reg11, x);
12435 else
12436 emit_move_insn (reg11, split_stack_fn_large);
12438 fn = reg11;
12440 argval = ((args_size << 16) << 16) + allocate;
12441 emit_move_insn (reg10, GEN_INT (argval));
12443 else
12445 emit_move_insn (reg10, allocate_rtx);
12446 emit_move_insn (reg11, GEN_INT (args_size));
12447 use_reg (&call_fusage, reg11);
12450 use_reg (&call_fusage, reg10);
12452 else
12454 emit_insn (gen_push (GEN_INT (args_size)));
12455 emit_insn (gen_push (allocate_rtx));
12457 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
12458 GEN_INT (UNITS_PER_WORD), constm1_rtx,
12459 NULL_RTX, false);
12460 add_function_usage_to (call_insn, call_fusage);
12462 /* In order to make call/return prediction work right, we now need
12463 to execute a return instruction. See
12464 libgcc/config/i386/morestack.S for the details on how this works.
12466 For flow purposes gcc must not see this as a return
12467 instruction--we need control flow to continue at the subsequent
12468 label. Therefore, we use an unspec. */
12469 gcc_assert (crtl->args.pops_args < 65536);
12470 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
12472 /* If we are in 64-bit mode and this function uses a static chain,
12473 we saved %r10 in %rax before calling _morestack. */
12474 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
12475 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
12476 gen_rtx_REG (word_mode, AX_REG));
12478 /* If this function calls va_start, we need to store a pointer to
12479 the arguments on the old stack, because they may not have been
12480 all copied to the new stack. At this point the old stack can be
12481 found at the frame pointer value used by __morestack, because
12482 __morestack has set that up before calling back to us. Here we
12483 store that pointer in a scratch register, and in
12484 ix86_expand_prologue we store the scratch register in a stack
12485 slot. */
12486 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12488 unsigned int scratch_regno;
12489 rtx frame_reg;
12490 int words;
12492 scratch_regno = split_stack_prologue_scratch_regno ();
12493 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12494 frame_reg = gen_rtx_REG (Pmode, BP_REG);
12496 /* 64-bit:
12497 fp -> old fp value
12498 return address within this function
12499 return address of caller of this function
12500 stack arguments
12501 So we add three words to get to the stack arguments.
12503 32-bit:
12504 fp -> old fp value
12505 return address within this function
12506 first argument to __morestack
12507 second argument to __morestack
12508 return address of caller of this function
12509 stack arguments
12510 So we add five words to get to the stack arguments.
12512 words = TARGET_64BIT ? 3 : 5;
12513 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12514 gen_rtx_PLUS (Pmode, frame_reg,
12515 GEN_INT (words * UNITS_PER_WORD))));
12517 varargs_label = gen_label_rtx ();
12518 emit_jump_insn (gen_jump (varargs_label));
12519 JUMP_LABEL (get_last_insn ()) = varargs_label;
12521 emit_barrier ();
12524 emit_label (label);
12525 LABEL_NUSES (label) = 1;
12527 /* If this function calls va_start, we now have to set the scratch
12528 register for the case where we do not call __morestack. In this
12529 case we need to set it based on the stack pointer. */
12530 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12532 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12533 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12534 GEN_INT (UNITS_PER_WORD))));
12536 emit_label (varargs_label);
12537 LABEL_NUSES (varargs_label) = 1;
12541 /* We may have to tell the dataflow pass that the split stack prologue
12542 is initializing a scratch register. */
12544 static void
12545 ix86_live_on_entry (bitmap regs)
12547 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12549 gcc_assert (flag_split_stack);
12550 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
12554 /* Extract the parts of an RTL expression that is a valid memory address
12555 for an instruction. Return 0 if the structure of the address is
12556 grossly off. Return -1 if the address contains ASHIFT, so it is not
12557 strictly valid, but still used for computing length of lea instruction. */
12560 ix86_decompose_address (rtx addr, struct ix86_address *out)
12562 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
12563 rtx base_reg, index_reg;
12564 HOST_WIDE_INT scale = 1;
12565 rtx scale_rtx = NULL_RTX;
12566 rtx tmp;
12567 int retval = 1;
12568 enum ix86_address_seg seg = SEG_DEFAULT;
12570 /* Allow zero-extended SImode addresses,
12571 they will be emitted with addr32 prefix. */
12572 if (TARGET_64BIT && GET_MODE (addr) == DImode)
12574 if (GET_CODE (addr) == ZERO_EXTEND
12575 && GET_MODE (XEXP (addr, 0)) == SImode)
12577 addr = XEXP (addr, 0);
12578 if (CONST_INT_P (addr))
12579 return 0;
12581 else if (GET_CODE (addr) == AND
12582 && const_32bit_mask (XEXP (addr, 1), DImode))
12584 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
12585 if (addr == NULL_RTX)
12586 return 0;
12588 if (CONST_INT_P (addr))
12589 return 0;
12593 /* Allow SImode subregs of DImode addresses,
12594 they will be emitted with addr32 prefix. */
12595 if (TARGET_64BIT && GET_MODE (addr) == SImode)
12597 if (GET_CODE (addr) == SUBREG
12598 && GET_MODE (SUBREG_REG (addr)) == DImode)
12600 addr = SUBREG_REG (addr);
12601 if (CONST_INT_P (addr))
12602 return 0;
12606 if (REG_P (addr))
12607 base = addr;
12608 else if (GET_CODE (addr) == SUBREG)
12610 if (REG_P (SUBREG_REG (addr)))
12611 base = addr;
12612 else
12613 return 0;
12615 else if (GET_CODE (addr) == PLUS)
12617 rtx addends[4], op;
12618 int n = 0, i;
12620 op = addr;
12623 if (n >= 4)
12624 return 0;
12625 addends[n++] = XEXP (op, 1);
12626 op = XEXP (op, 0);
12628 while (GET_CODE (op) == PLUS);
12629 if (n >= 4)
12630 return 0;
12631 addends[n] = op;
12633 for (i = n; i >= 0; --i)
12635 op = addends[i];
12636 switch (GET_CODE (op))
12638 case MULT:
12639 if (index)
12640 return 0;
12641 index = XEXP (op, 0);
12642 scale_rtx = XEXP (op, 1);
12643 break;
12645 case ASHIFT:
12646 if (index)
12647 return 0;
12648 index = XEXP (op, 0);
12649 tmp = XEXP (op, 1);
12650 if (!CONST_INT_P (tmp))
12651 return 0;
12652 scale = INTVAL (tmp);
12653 if ((unsigned HOST_WIDE_INT) scale > 3)
12654 return 0;
12655 scale = 1 << scale;
12656 break;
12658 case ZERO_EXTEND:
12659 op = XEXP (op, 0);
12660 if (GET_CODE (op) != UNSPEC)
12661 return 0;
12662 /* FALLTHRU */
12664 case UNSPEC:
12665 if (XINT (op, 1) == UNSPEC_TP
12666 && TARGET_TLS_DIRECT_SEG_REFS
12667 && seg == SEG_DEFAULT)
12668 seg = DEFAULT_TLS_SEG_REG;
12669 else
12670 return 0;
12671 break;
12673 case SUBREG:
12674 if (!REG_P (SUBREG_REG (op)))
12675 return 0;
12676 /* FALLTHRU */
12678 case REG:
12679 if (!base)
12680 base = op;
12681 else if (!index)
12682 index = op;
12683 else
12684 return 0;
12685 break;
12687 case CONST:
12688 case CONST_INT:
12689 case SYMBOL_REF:
12690 case LABEL_REF:
12691 if (disp)
12692 return 0;
12693 disp = op;
12694 break;
12696 default:
12697 return 0;
12701 else if (GET_CODE (addr) == MULT)
12703 index = XEXP (addr, 0); /* index*scale */
12704 scale_rtx = XEXP (addr, 1);
12706 else if (GET_CODE (addr) == ASHIFT)
12708 /* We're called for lea too, which implements ashift on occasion. */
12709 index = XEXP (addr, 0);
12710 tmp = XEXP (addr, 1);
12711 if (!CONST_INT_P (tmp))
12712 return 0;
12713 scale = INTVAL (tmp);
12714 if ((unsigned HOST_WIDE_INT) scale > 3)
12715 return 0;
12716 scale = 1 << scale;
12717 retval = -1;
12719 else
12720 disp = addr; /* displacement */
12722 if (index)
12724 if (REG_P (index))
12726 else if (GET_CODE (index) == SUBREG
12727 && REG_P (SUBREG_REG (index)))
12729 else
12730 return 0;
12733 /* Extract the integral value of scale. */
12734 if (scale_rtx)
12736 if (!CONST_INT_P (scale_rtx))
12737 return 0;
12738 scale = INTVAL (scale_rtx);
12741 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
12742 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
12744 /* Avoid useless 0 displacement. */
12745 if (disp == const0_rtx && (base || index))
12746 disp = NULL_RTX;
12748 /* Allow arg pointer and stack pointer as index if there is not scaling. */
12749 if (base_reg && index_reg && scale == 1
12750 && (index_reg == arg_pointer_rtx
12751 || index_reg == frame_pointer_rtx
12752 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
12754 std::swap (base, index);
12755 std::swap (base_reg, index_reg);
12758 /* Special case: %ebp cannot be encoded as a base without a displacement.
12759 Similarly %r13. */
12760 if (!disp
12761 && base_reg
12762 && (base_reg == hard_frame_pointer_rtx
12763 || base_reg == frame_pointer_rtx
12764 || base_reg == arg_pointer_rtx
12765 || (REG_P (base_reg)
12766 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
12767 || REGNO (base_reg) == R13_REG))))
12768 disp = const0_rtx;
12770 /* Special case: on K6, [%esi] makes the instruction vector decoded.
12771 Avoid this by transforming to [%esi+0].
12772 Reload calls address legitimization without cfun defined, so we need
12773 to test cfun for being non-NULL. */
12774 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
12775 && base_reg && !index_reg && !disp
12776 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
12777 disp = const0_rtx;
12779 /* Special case: encode reg+reg instead of reg*2. */
12780 if (!base && index && scale == 2)
12781 base = index, base_reg = index_reg, scale = 1;
12783 /* Special case: scaling cannot be encoded without base or displacement. */
12784 if (!base && !disp && index && scale != 1)
12785 disp = const0_rtx;
12787 out->base = base;
12788 out->index = index;
12789 out->disp = disp;
12790 out->scale = scale;
12791 out->seg = seg;
12793 return retval;
12796 /* Return cost of the memory address x.
12797 For i386, it is better to use a complex address than let gcc copy
12798 the address into a reg and make a new pseudo. But not if the address
12799 requires to two regs - that would mean more pseudos with longer
12800 lifetimes. */
12801 static int
12802 ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
12804 struct ix86_address parts;
12805 int cost = 1;
12806 int ok = ix86_decompose_address (x, &parts);
12808 gcc_assert (ok);
12810 if (parts.base && GET_CODE (parts.base) == SUBREG)
12811 parts.base = SUBREG_REG (parts.base);
12812 if (parts.index && GET_CODE (parts.index) == SUBREG)
12813 parts.index = SUBREG_REG (parts.index);
12815 /* Attempt to minimize number of registers in the address. */
12816 if ((parts.base
12817 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
12818 || (parts.index
12819 && (!REG_P (parts.index)
12820 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
12821 cost++;
12823 /* When address base or index is "pic_offset_table_rtx" we don't increase
12824 address cost. When a memopt with "pic_offset_table_rtx" is not invariant
12825 itself it most likely means that base or index is not invariant.
12826 Therefore only "pic_offset_table_rtx" could be hoisted out, which is not
12827 profitable for x86. */
12828 if (parts.base
12829 && (!pic_offset_table_rtx
12830 || REGNO (pic_offset_table_rtx) != REGNO(parts.base))
12831 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
12832 && parts.index
12833 && (!pic_offset_table_rtx
12834 || REGNO (pic_offset_table_rtx) != REGNO(parts.index))
12835 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
12836 && parts.base != parts.index)
12837 cost++;
12839 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
12840 since it's predecode logic can't detect the length of instructions
12841 and it degenerates to vector decoded. Increase cost of such
12842 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
12843 to split such addresses or even refuse such addresses at all.
12845 Following addressing modes are affected:
12846 [base+scale*index]
12847 [scale*index+disp]
12848 [base+index]
12850 The first and last case may be avoidable by explicitly coding the zero in
12851 memory address, but I don't have AMD-K6 machine handy to check this
12852 theory. */
12854 if (TARGET_K6
12855 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
12856 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
12857 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
12858 cost += 10;
12860 return cost;
12863 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
12864 this is used for to form addresses to local data when -fPIC is in
12865 use. */
12867 static bool
12868 darwin_local_data_pic (rtx disp)
12870 return (GET_CODE (disp) == UNSPEC
12871 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
12874 /* Determine if a given RTX is a valid constant. We already know this
12875 satisfies CONSTANT_P. */
12877 static bool
12878 ix86_legitimate_constant_p (machine_mode, rtx x)
12880 /* Pointer bounds constants are not valid. */
12881 if (POINTER_BOUNDS_MODE_P (GET_MODE (x)))
12882 return false;
12884 switch (GET_CODE (x))
12886 case CONST:
12887 x = XEXP (x, 0);
12889 if (GET_CODE (x) == PLUS)
12891 if (!CONST_INT_P (XEXP (x, 1)))
12892 return false;
12893 x = XEXP (x, 0);
12896 if (TARGET_MACHO && darwin_local_data_pic (x))
12897 return true;
12899 /* Only some unspecs are valid as "constants". */
12900 if (GET_CODE (x) == UNSPEC)
12901 switch (XINT (x, 1))
12903 case UNSPEC_GOT:
12904 case UNSPEC_GOTOFF:
12905 case UNSPEC_PLTOFF:
12906 return TARGET_64BIT;
12907 case UNSPEC_TPOFF:
12908 case UNSPEC_NTPOFF:
12909 x = XVECEXP (x, 0, 0);
12910 return (GET_CODE (x) == SYMBOL_REF
12911 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12912 case UNSPEC_DTPOFF:
12913 x = XVECEXP (x, 0, 0);
12914 return (GET_CODE (x) == SYMBOL_REF
12915 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
12916 default:
12917 return false;
12920 /* We must have drilled down to a symbol. */
12921 if (GET_CODE (x) == LABEL_REF)
12922 return true;
12923 if (GET_CODE (x) != SYMBOL_REF)
12924 return false;
12925 /* FALLTHRU */
12927 case SYMBOL_REF:
12928 /* TLS symbols are never valid. */
12929 if (SYMBOL_REF_TLS_MODEL (x))
12930 return false;
12932 /* DLLIMPORT symbols are never valid. */
12933 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12934 && SYMBOL_REF_DLLIMPORT_P (x))
12935 return false;
12937 #if TARGET_MACHO
12938 /* mdynamic-no-pic */
12939 if (MACHO_DYNAMIC_NO_PIC_P)
12940 return machopic_symbol_defined_p (x);
12941 #endif
12942 break;
12944 case CONST_DOUBLE:
12945 if (GET_MODE (x) == TImode
12946 && x != CONST0_RTX (TImode)
12947 && !TARGET_64BIT)
12948 return false;
12949 break;
12951 case CONST_VECTOR:
12952 if (!standard_sse_constant_p (x))
12953 return false;
12955 default:
12956 break;
12959 /* Otherwise we handle everything else in the move patterns. */
12960 return true;
12963 /* Determine if it's legal to put X into the constant pool. This
12964 is not possible for the address of thread-local symbols, which
12965 is checked above. */
12967 static bool
12968 ix86_cannot_force_const_mem (machine_mode mode, rtx x)
12970 /* We can always put integral constants and vectors in memory. */
12971 switch (GET_CODE (x))
12973 case CONST_INT:
12974 case CONST_DOUBLE:
12975 case CONST_VECTOR:
12976 return false;
12978 default:
12979 break;
12981 return !ix86_legitimate_constant_p (mode, x);
12984 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
12985 otherwise zero. */
12987 static bool
12988 is_imported_p (rtx x)
12990 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
12991 || GET_CODE (x) != SYMBOL_REF)
12992 return false;
12994 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
12998 /* Nonzero if the constant value X is a legitimate general operand
12999 when generating PIC code. It is given that flag_pic is on and
13000 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
13002 bool
13003 legitimate_pic_operand_p (rtx x)
13005 rtx inner;
13007 switch (GET_CODE (x))
13009 case CONST:
13010 inner = XEXP (x, 0);
13011 if (GET_CODE (inner) == PLUS
13012 && CONST_INT_P (XEXP (inner, 1)))
13013 inner = XEXP (inner, 0);
13015 /* Only some unspecs are valid as "constants". */
13016 if (GET_CODE (inner) == UNSPEC)
13017 switch (XINT (inner, 1))
13019 case UNSPEC_GOT:
13020 case UNSPEC_GOTOFF:
13021 case UNSPEC_PLTOFF:
13022 return TARGET_64BIT;
13023 case UNSPEC_TPOFF:
13024 x = XVECEXP (inner, 0, 0);
13025 return (GET_CODE (x) == SYMBOL_REF
13026 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
13027 case UNSPEC_MACHOPIC_OFFSET:
13028 return legitimate_pic_address_disp_p (x);
13029 default:
13030 return false;
13032 /* FALLTHRU */
13034 case SYMBOL_REF:
13035 case LABEL_REF:
13036 return legitimate_pic_address_disp_p (x);
13038 default:
13039 return true;
13043 /* Determine if a given CONST RTX is a valid memory displacement
13044 in PIC mode. */
13046 bool
13047 legitimate_pic_address_disp_p (rtx disp)
13049 bool saw_plus;
13051 /* In 64bit mode we can allow direct addresses of symbols and labels
13052 when they are not dynamic symbols. */
13053 if (TARGET_64BIT)
13055 rtx op0 = disp, op1;
13057 switch (GET_CODE (disp))
13059 case LABEL_REF:
13060 return true;
13062 case CONST:
13063 if (GET_CODE (XEXP (disp, 0)) != PLUS)
13064 break;
13065 op0 = XEXP (XEXP (disp, 0), 0);
13066 op1 = XEXP (XEXP (disp, 0), 1);
13067 if (!CONST_INT_P (op1)
13068 || INTVAL (op1) >= 16*1024*1024
13069 || INTVAL (op1) < -16*1024*1024)
13070 break;
13071 if (GET_CODE (op0) == LABEL_REF)
13072 return true;
13073 if (GET_CODE (op0) == CONST
13074 && GET_CODE (XEXP (op0, 0)) == UNSPEC
13075 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
13076 return true;
13077 if (GET_CODE (op0) == UNSPEC
13078 && XINT (op0, 1) == UNSPEC_PCREL)
13079 return true;
13080 if (GET_CODE (op0) != SYMBOL_REF)
13081 break;
13082 /* FALLTHRU */
13084 case SYMBOL_REF:
13085 /* TLS references should always be enclosed in UNSPEC.
13086 The dllimported symbol needs always to be resolved. */
13087 if (SYMBOL_REF_TLS_MODEL (op0)
13088 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
13089 return false;
13091 if (TARGET_PECOFF)
13093 if (is_imported_p (op0))
13094 return true;
13096 if (SYMBOL_REF_FAR_ADDR_P (op0)
13097 || !SYMBOL_REF_LOCAL_P (op0))
13098 break;
13100 /* Function-symbols need to be resolved only for
13101 large-model.
13102 For the small-model we don't need to resolve anything
13103 here. */
13104 if ((ix86_cmodel != CM_LARGE_PIC
13105 && SYMBOL_REF_FUNCTION_P (op0))
13106 || ix86_cmodel == CM_SMALL_PIC)
13107 return true;
13108 /* Non-external symbols don't need to be resolved for
13109 large, and medium-model. */
13110 if ((ix86_cmodel == CM_LARGE_PIC
13111 || ix86_cmodel == CM_MEDIUM_PIC)
13112 && !SYMBOL_REF_EXTERNAL_P (op0))
13113 return true;
13115 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
13116 && (SYMBOL_REF_LOCAL_P (op0)
13117 || (HAVE_LD_PIE_COPYRELOC
13118 && flag_pie
13119 && !SYMBOL_REF_WEAK (op0)
13120 && !SYMBOL_REF_FUNCTION_P (op0)))
13121 && ix86_cmodel != CM_LARGE_PIC)
13122 return true;
13123 break;
13125 default:
13126 break;
13129 if (GET_CODE (disp) != CONST)
13130 return false;
13131 disp = XEXP (disp, 0);
13133 if (TARGET_64BIT)
13135 /* We are unsafe to allow PLUS expressions. This limit allowed distance
13136 of GOT tables. We should not need these anyway. */
13137 if (GET_CODE (disp) != UNSPEC
13138 || (XINT (disp, 1) != UNSPEC_GOTPCREL
13139 && XINT (disp, 1) != UNSPEC_GOTOFF
13140 && XINT (disp, 1) != UNSPEC_PCREL
13141 && XINT (disp, 1) != UNSPEC_PLTOFF))
13142 return false;
13144 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
13145 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
13146 return false;
13147 return true;
13150 saw_plus = false;
13151 if (GET_CODE (disp) == PLUS)
13153 if (!CONST_INT_P (XEXP (disp, 1)))
13154 return false;
13155 disp = XEXP (disp, 0);
13156 saw_plus = true;
13159 if (TARGET_MACHO && darwin_local_data_pic (disp))
13160 return true;
13162 if (GET_CODE (disp) != UNSPEC)
13163 return false;
13165 switch (XINT (disp, 1))
13167 case UNSPEC_GOT:
13168 if (saw_plus)
13169 return false;
13170 /* We need to check for both symbols and labels because VxWorks loads
13171 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
13172 details. */
13173 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
13174 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
13175 case UNSPEC_GOTOFF:
13176 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
13177 While ABI specify also 32bit relocation but we don't produce it in
13178 small PIC model at all. */
13179 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
13180 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
13181 && !TARGET_64BIT)
13182 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
13183 return false;
13184 case UNSPEC_GOTTPOFF:
13185 case UNSPEC_GOTNTPOFF:
13186 case UNSPEC_INDNTPOFF:
13187 if (saw_plus)
13188 return false;
13189 disp = XVECEXP (disp, 0, 0);
13190 return (GET_CODE (disp) == SYMBOL_REF
13191 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
13192 case UNSPEC_NTPOFF:
13193 disp = XVECEXP (disp, 0, 0);
13194 return (GET_CODE (disp) == SYMBOL_REF
13195 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
13196 case UNSPEC_DTPOFF:
13197 disp = XVECEXP (disp, 0, 0);
13198 return (GET_CODE (disp) == SYMBOL_REF
13199 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
13202 return false;
13205 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
13206 replace the input X, or the original X if no replacement is called for.
13207 The output parameter *WIN is 1 if the calling macro should goto WIN,
13208 0 if it should not. */
13210 bool
13211 ix86_legitimize_reload_address (rtx x, machine_mode, int opnum, int type,
13212 int)
13214 /* Reload can generate:
13216 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
13217 (reg:DI 97))
13218 (reg:DI 2 cx))
13220 This RTX is rejected from ix86_legitimate_address_p due to
13221 non-strictness of base register 97. Following this rejection,
13222 reload pushes all three components into separate registers,
13223 creating invalid memory address RTX.
13225 Following code reloads only the invalid part of the
13226 memory address RTX. */
13228 if (GET_CODE (x) == PLUS
13229 && REG_P (XEXP (x, 1))
13230 && GET_CODE (XEXP (x, 0)) == PLUS
13231 && REG_P (XEXP (XEXP (x, 0), 1)))
13233 rtx base, index;
13234 bool something_reloaded = false;
13236 base = XEXP (XEXP (x, 0), 1);
13237 if (!REG_OK_FOR_BASE_STRICT_P (base))
13239 push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
13240 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
13241 opnum, (enum reload_type) type);
13242 something_reloaded = true;
13245 index = XEXP (x, 1);
13246 if (!REG_OK_FOR_INDEX_STRICT_P (index))
13248 push_reload (index, NULL_RTX, &XEXP (x, 1), NULL,
13249 INDEX_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
13250 opnum, (enum reload_type) type);
13251 something_reloaded = true;
13254 gcc_assert (something_reloaded);
13255 return true;
13258 return false;
13261 /* Determine if op is suitable RTX for an address register.
13262 Return naked register if a register or a register subreg is
13263 found, otherwise return NULL_RTX. */
13265 static rtx
13266 ix86_validate_address_register (rtx op)
13268 machine_mode mode = GET_MODE (op);
13270 /* Only SImode or DImode registers can form the address. */
13271 if (mode != SImode && mode != DImode)
13272 return NULL_RTX;
13274 if (REG_P (op))
13275 return op;
13276 else if (GET_CODE (op) == SUBREG)
13278 rtx reg = SUBREG_REG (op);
13280 if (!REG_P (reg))
13281 return NULL_RTX;
13283 mode = GET_MODE (reg);
13285 /* Don't allow SUBREGs that span more than a word. It can
13286 lead to spill failures when the register is one word out
13287 of a two word structure. */
13288 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
13289 return NULL_RTX;
13291 /* Allow only SUBREGs of non-eliminable hard registers. */
13292 if (register_no_elim_operand (reg, mode))
13293 return reg;
13296 /* Op is not a register. */
13297 return NULL_RTX;
13300 /* Recognizes RTL expressions that are valid memory addresses for an
13301 instruction. The MODE argument is the machine mode for the MEM
13302 expression that wants to use this address.
13304 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
13305 convert common non-canonical forms to canonical form so that they will
13306 be recognized. */
13308 static bool
13309 ix86_legitimate_address_p (machine_mode, rtx addr, bool strict)
13311 struct ix86_address parts;
13312 rtx base, index, disp;
13313 HOST_WIDE_INT scale;
13314 enum ix86_address_seg seg;
13316 if (ix86_decompose_address (addr, &parts) <= 0)
13317 /* Decomposition failed. */
13318 return false;
13320 base = parts.base;
13321 index = parts.index;
13322 disp = parts.disp;
13323 scale = parts.scale;
13324 seg = parts.seg;
13326 /* Validate base register. */
13327 if (base)
13329 rtx reg = ix86_validate_address_register (base);
13331 if (reg == NULL_RTX)
13332 return false;
13334 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
13335 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
13336 /* Base is not valid. */
13337 return false;
13340 /* Validate index register. */
13341 if (index)
13343 rtx reg = ix86_validate_address_register (index);
13345 if (reg == NULL_RTX)
13346 return false;
13348 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
13349 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
13350 /* Index is not valid. */
13351 return false;
13354 /* Index and base should have the same mode. */
13355 if (base && index
13356 && GET_MODE (base) != GET_MODE (index))
13357 return false;
13359 /* Address override works only on the (%reg) part of %fs:(%reg). */
13360 if (seg != SEG_DEFAULT
13361 && ((base && GET_MODE (base) != word_mode)
13362 || (index && GET_MODE (index) != word_mode)))
13363 return false;
13365 /* Validate scale factor. */
13366 if (scale != 1)
13368 if (!index)
13369 /* Scale without index. */
13370 return false;
13372 if (scale != 2 && scale != 4 && scale != 8)
13373 /* Scale is not a valid multiplier. */
13374 return false;
13377 /* Validate displacement. */
13378 if (disp)
13380 if (GET_CODE (disp) == CONST
13381 && GET_CODE (XEXP (disp, 0)) == UNSPEC
13382 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
13383 switch (XINT (XEXP (disp, 0), 1))
13385 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
13386 used. While ABI specify also 32bit relocations, we don't produce
13387 them at all and use IP relative instead. */
13388 case UNSPEC_GOT:
13389 case UNSPEC_GOTOFF:
13390 gcc_assert (flag_pic);
13391 if (!TARGET_64BIT)
13392 goto is_legitimate_pic;
13394 /* 64bit address unspec. */
13395 return false;
13397 case UNSPEC_GOTPCREL:
13398 case UNSPEC_PCREL:
13399 gcc_assert (flag_pic);
13400 goto is_legitimate_pic;
13402 case UNSPEC_GOTTPOFF:
13403 case UNSPEC_GOTNTPOFF:
13404 case UNSPEC_INDNTPOFF:
13405 case UNSPEC_NTPOFF:
13406 case UNSPEC_DTPOFF:
13407 break;
13409 case UNSPEC_STACK_CHECK:
13410 gcc_assert (flag_split_stack);
13411 break;
13413 default:
13414 /* Invalid address unspec. */
13415 return false;
13418 else if (SYMBOLIC_CONST (disp)
13419 && (flag_pic
13420 || (TARGET_MACHO
13421 #if TARGET_MACHO
13422 && MACHOPIC_INDIRECT
13423 && !machopic_operand_p (disp)
13424 #endif
13428 is_legitimate_pic:
13429 if (TARGET_64BIT && (index || base))
13431 /* foo@dtpoff(%rX) is ok. */
13432 if (GET_CODE (disp) != CONST
13433 || GET_CODE (XEXP (disp, 0)) != PLUS
13434 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
13435 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
13436 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
13437 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
13438 /* Non-constant pic memory reference. */
13439 return false;
13441 else if ((!TARGET_MACHO || flag_pic)
13442 && ! legitimate_pic_address_disp_p (disp))
13443 /* Displacement is an invalid pic construct. */
13444 return false;
13445 #if TARGET_MACHO
13446 else if (MACHO_DYNAMIC_NO_PIC_P
13447 && !ix86_legitimate_constant_p (Pmode, disp))
13448 /* displacment must be referenced via non_lazy_pointer */
13449 return false;
13450 #endif
13452 /* This code used to verify that a symbolic pic displacement
13453 includes the pic_offset_table_rtx register.
13455 While this is good idea, unfortunately these constructs may
13456 be created by "adds using lea" optimization for incorrect
13457 code like:
13459 int a;
13460 int foo(int i)
13462 return *(&a+i);
13465 This code is nonsensical, but results in addressing
13466 GOT table with pic_offset_table_rtx base. We can't
13467 just refuse it easily, since it gets matched by
13468 "addsi3" pattern, that later gets split to lea in the
13469 case output register differs from input. While this
13470 can be handled by separate addsi pattern for this case
13471 that never results in lea, this seems to be easier and
13472 correct fix for crash to disable this test. */
13474 else if (GET_CODE (disp) != LABEL_REF
13475 && !CONST_INT_P (disp)
13476 && (GET_CODE (disp) != CONST
13477 || !ix86_legitimate_constant_p (Pmode, disp))
13478 && (GET_CODE (disp) != SYMBOL_REF
13479 || !ix86_legitimate_constant_p (Pmode, disp)))
13480 /* Displacement is not constant. */
13481 return false;
13482 else if (TARGET_64BIT
13483 && !x86_64_immediate_operand (disp, VOIDmode))
13484 /* Displacement is out of range. */
13485 return false;
13486 /* In x32 mode, constant addresses are sign extended to 64bit, so
13487 we have to prevent addresses from 0x80000000 to 0xffffffff. */
13488 else if (TARGET_X32 && !(index || base)
13489 && CONST_INT_P (disp)
13490 && val_signbit_known_set_p (SImode, INTVAL (disp)))
13491 return false;
13494 /* Everything looks valid. */
13495 return true;
13498 /* Determine if a given RTX is a valid constant address. */
13500 bool
13501 constant_address_p (rtx x)
13503 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
13506 /* Return a unique alias set for the GOT. */
13508 static alias_set_type
13509 ix86_GOT_alias_set (void)
13511 static alias_set_type set = -1;
13512 if (set == -1)
13513 set = new_alias_set ();
13514 return set;
13517 /* Set regs_ever_live for PIC base address register
13518 to true if required. */
13519 static void
13520 set_pic_reg_ever_live ()
13522 if (reload_in_progress)
13523 df_set_regs_ever_live (REGNO (pic_offset_table_rtx), true);
13526 /* Return a legitimate reference for ORIG (an address) using the
13527 register REG. If REG is 0, a new pseudo is generated.
13529 There are two types of references that must be handled:
13531 1. Global data references must load the address from the GOT, via
13532 the PIC reg. An insn is emitted to do this load, and the reg is
13533 returned.
13535 2. Static data references, constant pool addresses, and code labels
13536 compute the address as an offset from the GOT, whose base is in
13537 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
13538 differentiate them from global data objects. The returned
13539 address is the PIC reg + an unspec constant.
13541 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
13542 reg also appears in the address. */
13544 static rtx
13545 legitimize_pic_address (rtx orig, rtx reg)
13547 rtx addr = orig;
13548 rtx new_rtx = orig;
13550 #if TARGET_MACHO
13551 if (TARGET_MACHO && !TARGET_64BIT)
13553 if (reg == 0)
13554 reg = gen_reg_rtx (Pmode);
13555 /* Use the generic Mach-O PIC machinery. */
13556 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
13558 #endif
13560 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13562 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13563 if (tmp)
13564 return tmp;
13567 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
13568 new_rtx = addr;
13569 else if (TARGET_64BIT && !TARGET_PECOFF
13570 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
13572 rtx tmpreg;
13573 /* This symbol may be referenced via a displacement from the PIC
13574 base address (@GOTOFF). */
13576 set_pic_reg_ever_live ();
13577 if (GET_CODE (addr) == CONST)
13578 addr = XEXP (addr, 0);
13579 if (GET_CODE (addr) == PLUS)
13581 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13582 UNSPEC_GOTOFF);
13583 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13585 else
13586 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13587 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13588 if (!reg)
13589 tmpreg = gen_reg_rtx (Pmode);
13590 else
13591 tmpreg = reg;
13592 emit_move_insn (tmpreg, new_rtx);
13594 if (reg != 0)
13596 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
13597 tmpreg, 1, OPTAB_DIRECT);
13598 new_rtx = reg;
13600 else
13601 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
13603 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
13605 /* This symbol may be referenced via a displacement from the PIC
13606 base address (@GOTOFF). */
13608 set_pic_reg_ever_live ();
13609 if (GET_CODE (addr) == CONST)
13610 addr = XEXP (addr, 0);
13611 if (GET_CODE (addr) == PLUS)
13613 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13614 UNSPEC_GOTOFF);
13615 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13617 else
13618 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13619 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13620 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13622 if (reg != 0)
13624 emit_move_insn (reg, new_rtx);
13625 new_rtx = reg;
13628 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
13629 /* We can't use @GOTOFF for text labels on VxWorks;
13630 see gotoff_operand. */
13631 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
13633 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13634 if (tmp)
13635 return tmp;
13637 /* For x64 PE-COFF there is no GOT table. So we use address
13638 directly. */
13639 if (TARGET_64BIT && TARGET_PECOFF)
13641 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
13642 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13644 if (reg == 0)
13645 reg = gen_reg_rtx (Pmode);
13646 emit_move_insn (reg, new_rtx);
13647 new_rtx = reg;
13649 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
13651 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
13652 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13653 new_rtx = gen_const_mem (Pmode, new_rtx);
13654 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13656 if (reg == 0)
13657 reg = gen_reg_rtx (Pmode);
13658 /* Use directly gen_movsi, otherwise the address is loaded
13659 into register for CSE. We don't want to CSE this addresses,
13660 instead we CSE addresses from the GOT table, so skip this. */
13661 emit_insn (gen_movsi (reg, new_rtx));
13662 new_rtx = reg;
13664 else
13666 /* This symbol must be referenced via a load from the
13667 Global Offset Table (@GOT). */
13669 set_pic_reg_ever_live ();
13670 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
13671 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13672 if (TARGET_64BIT)
13673 new_rtx = force_reg (Pmode, new_rtx);
13674 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13675 new_rtx = gen_const_mem (Pmode, new_rtx);
13676 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13678 if (reg == 0)
13679 reg = gen_reg_rtx (Pmode);
13680 emit_move_insn (reg, new_rtx);
13681 new_rtx = reg;
13684 else
13686 if (CONST_INT_P (addr)
13687 && !x86_64_immediate_operand (addr, VOIDmode))
13689 if (reg)
13691 emit_move_insn (reg, addr);
13692 new_rtx = reg;
13694 else
13695 new_rtx = force_reg (Pmode, addr);
13697 else if (GET_CODE (addr) == CONST)
13699 addr = XEXP (addr, 0);
13701 /* We must match stuff we generate before. Assume the only
13702 unspecs that can get here are ours. Not that we could do
13703 anything with them anyway.... */
13704 if (GET_CODE (addr) == UNSPEC
13705 || (GET_CODE (addr) == PLUS
13706 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
13707 return orig;
13708 gcc_assert (GET_CODE (addr) == PLUS);
13710 if (GET_CODE (addr) == PLUS)
13712 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
13714 /* Check first to see if this is a constant offset from a @GOTOFF
13715 symbol reference. */
13716 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
13717 && CONST_INT_P (op1))
13719 if (!TARGET_64BIT)
13721 set_pic_reg_ever_live ();
13722 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
13723 UNSPEC_GOTOFF);
13724 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
13725 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13726 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13728 if (reg != 0)
13730 emit_move_insn (reg, new_rtx);
13731 new_rtx = reg;
13734 else
13736 if (INTVAL (op1) < -16*1024*1024
13737 || INTVAL (op1) >= 16*1024*1024)
13739 if (!x86_64_immediate_operand (op1, Pmode))
13740 op1 = force_reg (Pmode, op1);
13741 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
13745 else
13747 rtx base = legitimize_pic_address (op0, reg);
13748 machine_mode mode = GET_MODE (base);
13749 new_rtx
13750 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
13752 if (CONST_INT_P (new_rtx))
13754 if (INTVAL (new_rtx) < -16*1024*1024
13755 || INTVAL (new_rtx) >= 16*1024*1024)
13757 if (!x86_64_immediate_operand (new_rtx, mode))
13758 new_rtx = force_reg (mode, new_rtx);
13759 new_rtx
13760 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
13762 else
13763 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
13765 else
13767 if (GET_CODE (new_rtx) == PLUS
13768 && CONSTANT_P (XEXP (new_rtx, 1)))
13770 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
13771 new_rtx = XEXP (new_rtx, 1);
13773 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
13778 return new_rtx;
13781 /* Load the thread pointer. If TO_REG is true, force it into a register. */
13783 static rtx
13784 get_thread_pointer (machine_mode tp_mode, bool to_reg)
13786 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
13788 if (GET_MODE (tp) != tp_mode)
13790 gcc_assert (GET_MODE (tp) == SImode);
13791 gcc_assert (tp_mode == DImode);
13793 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
13796 if (to_reg)
13797 tp = copy_to_mode_reg (tp_mode, tp);
13799 return tp;
13802 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13804 static GTY(()) rtx ix86_tls_symbol;
13806 static rtx
13807 ix86_tls_get_addr (void)
13809 if (!ix86_tls_symbol)
13811 const char *sym
13812 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
13813 ? "___tls_get_addr" : "__tls_get_addr");
13815 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
13818 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
13820 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
13821 UNSPEC_PLTOFF);
13822 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
13823 gen_rtx_CONST (Pmode, unspec));
13826 return ix86_tls_symbol;
13829 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13831 static GTY(()) rtx ix86_tls_module_base_symbol;
13834 ix86_tls_module_base (void)
13836 if (!ix86_tls_module_base_symbol)
13838 ix86_tls_module_base_symbol
13839 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
13841 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13842 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13845 return ix86_tls_module_base_symbol;
13848 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
13849 false if we expect this to be used for a memory address and true if
13850 we expect to load the address into a register. */
13852 static rtx
13853 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
13855 rtx dest, base, off;
13856 rtx pic = NULL_RTX, tp = NULL_RTX;
13857 machine_mode tp_mode = Pmode;
13858 int type;
13860 /* Fall back to global dynamic model if tool chain cannot support local
13861 dynamic. */
13862 if (TARGET_SUN_TLS && !TARGET_64BIT
13863 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
13864 && model == TLS_MODEL_LOCAL_DYNAMIC)
13865 model = TLS_MODEL_GLOBAL_DYNAMIC;
13867 switch (model)
13869 case TLS_MODEL_GLOBAL_DYNAMIC:
13870 dest = gen_reg_rtx (Pmode);
13872 if (!TARGET_64BIT)
13874 if (flag_pic && !TARGET_PECOFF)
13875 pic = pic_offset_table_rtx;
13876 else
13878 pic = gen_reg_rtx (Pmode);
13879 emit_insn (gen_set_got (pic));
13883 if (TARGET_GNU2_TLS)
13885 if (TARGET_64BIT)
13886 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
13887 else
13888 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
13890 tp = get_thread_pointer (Pmode, true);
13891 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
13893 if (GET_MODE (x) != Pmode)
13894 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13896 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13898 else
13900 rtx caddr = ix86_tls_get_addr ();
13902 if (TARGET_64BIT)
13904 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13905 rtx_insn *insns;
13907 start_sequence ();
13908 emit_call_insn
13909 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
13910 insns = get_insns ();
13911 end_sequence ();
13913 if (GET_MODE (x) != Pmode)
13914 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13916 RTL_CONST_CALL_P (insns) = 1;
13917 emit_libcall_block (insns, dest, rax, x);
13919 else
13920 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
13922 break;
13924 case TLS_MODEL_LOCAL_DYNAMIC:
13925 base = gen_reg_rtx (Pmode);
13927 if (!TARGET_64BIT)
13929 if (flag_pic)
13930 pic = pic_offset_table_rtx;
13931 else
13933 pic = gen_reg_rtx (Pmode);
13934 emit_insn (gen_set_got (pic));
13938 if (TARGET_GNU2_TLS)
13940 rtx tmp = ix86_tls_module_base ();
13942 if (TARGET_64BIT)
13943 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
13944 else
13945 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
13947 tp = get_thread_pointer (Pmode, true);
13948 set_unique_reg_note (get_last_insn (), REG_EQUAL,
13949 gen_rtx_MINUS (Pmode, tmp, tp));
13951 else
13953 rtx caddr = ix86_tls_get_addr ();
13955 if (TARGET_64BIT)
13957 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13958 rtx_insn *insns;
13959 rtx eqv;
13961 start_sequence ();
13962 emit_call_insn
13963 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
13964 insns = get_insns ();
13965 end_sequence ();
13967 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
13968 share the LD_BASE result with other LD model accesses. */
13969 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
13970 UNSPEC_TLS_LD_BASE);
13972 RTL_CONST_CALL_P (insns) = 1;
13973 emit_libcall_block (insns, base, rax, eqv);
13975 else
13976 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
13979 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
13980 off = gen_rtx_CONST (Pmode, off);
13982 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
13984 if (TARGET_GNU2_TLS)
13986 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
13988 if (GET_MODE (x) != Pmode)
13989 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13991 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13993 break;
13995 case TLS_MODEL_INITIAL_EXEC:
13996 if (TARGET_64BIT)
13998 if (TARGET_SUN_TLS && !TARGET_X32)
14000 /* The Sun linker took the AMD64 TLS spec literally
14001 and can only handle %rax as destination of the
14002 initial executable code sequence. */
14004 dest = gen_reg_rtx (DImode);
14005 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
14006 return dest;
14009 /* Generate DImode references to avoid %fs:(%reg32)
14010 problems and linker IE->LE relaxation bug. */
14011 tp_mode = DImode;
14012 pic = NULL;
14013 type = UNSPEC_GOTNTPOFF;
14015 else if (flag_pic)
14017 set_pic_reg_ever_live ();
14018 pic = pic_offset_table_rtx;
14019 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
14021 else if (!TARGET_ANY_GNU_TLS)
14023 pic = gen_reg_rtx (Pmode);
14024 emit_insn (gen_set_got (pic));
14025 type = UNSPEC_GOTTPOFF;
14027 else
14029 pic = NULL;
14030 type = UNSPEC_INDNTPOFF;
14033 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
14034 off = gen_rtx_CONST (tp_mode, off);
14035 if (pic)
14036 off = gen_rtx_PLUS (tp_mode, pic, off);
14037 off = gen_const_mem (tp_mode, off);
14038 set_mem_alias_set (off, ix86_GOT_alias_set ());
14040 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14042 base = get_thread_pointer (tp_mode,
14043 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
14044 off = force_reg (tp_mode, off);
14045 return gen_rtx_PLUS (tp_mode, base, off);
14047 else
14049 base = get_thread_pointer (Pmode, true);
14050 dest = gen_reg_rtx (Pmode);
14051 emit_insn (ix86_gen_sub3 (dest, base, off));
14053 break;
14055 case TLS_MODEL_LOCAL_EXEC:
14056 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
14057 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14058 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
14059 off = gen_rtx_CONST (Pmode, off);
14061 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14063 base = get_thread_pointer (Pmode,
14064 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
14065 return gen_rtx_PLUS (Pmode, base, off);
14067 else
14069 base = get_thread_pointer (Pmode, true);
14070 dest = gen_reg_rtx (Pmode);
14071 emit_insn (ix86_gen_sub3 (dest, base, off));
14073 break;
14075 default:
14076 gcc_unreachable ();
14079 return dest;
14082 /* Create or return the unique __imp_DECL dllimport symbol corresponding
14083 to symbol DECL if BEIMPORT is true. Otherwise create or return the
14084 unique refptr-DECL symbol corresponding to symbol DECL. */
14086 struct dllimport_hasher : ggc_cache_hasher<tree_map *>
14088 static inline hashval_t hash (tree_map *m) { return m->hash; }
14089 static inline bool
14090 equal (tree_map *a, tree_map *b)
14092 return a->base.from == b->base.from;
14095 static void
14096 handle_cache_entry (tree_map *&m)
14098 extern void gt_ggc_mx (tree_map *&);
14099 if (m == HTAB_EMPTY_ENTRY || m == HTAB_DELETED_ENTRY)
14100 return;
14101 else if (ggc_marked_p (m->base.from))
14102 gt_ggc_mx (m);
14103 else
14104 m = static_cast<tree_map *> (HTAB_DELETED_ENTRY);
14108 static GTY((cache)) hash_table<dllimport_hasher> *dllimport_map;
14110 static tree
14111 get_dllimport_decl (tree decl, bool beimport)
14113 struct tree_map *h, in;
14114 const char *name;
14115 const char *prefix;
14116 size_t namelen, prefixlen;
14117 char *imp_name;
14118 tree to;
14119 rtx rtl;
14121 if (!dllimport_map)
14122 dllimport_map = hash_table<dllimport_hasher>::create_ggc (512);
14124 in.hash = htab_hash_pointer (decl);
14125 in.base.from = decl;
14126 tree_map **loc = dllimport_map->find_slot_with_hash (&in, in.hash, INSERT);
14127 h = *loc;
14128 if (h)
14129 return h->to;
14131 *loc = h = ggc_alloc<tree_map> ();
14132 h->hash = in.hash;
14133 h->base.from = decl;
14134 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
14135 VAR_DECL, NULL, ptr_type_node);
14136 DECL_ARTIFICIAL (to) = 1;
14137 DECL_IGNORED_P (to) = 1;
14138 DECL_EXTERNAL (to) = 1;
14139 TREE_READONLY (to) = 1;
14141 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
14142 name = targetm.strip_name_encoding (name);
14143 if (beimport)
14144 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
14145 ? "*__imp_" : "*__imp__";
14146 else
14147 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
14148 namelen = strlen (name);
14149 prefixlen = strlen (prefix);
14150 imp_name = (char *) alloca (namelen + prefixlen + 1);
14151 memcpy (imp_name, prefix, prefixlen);
14152 memcpy (imp_name + prefixlen, name, namelen + 1);
14154 name = ggc_alloc_string (imp_name, namelen + prefixlen);
14155 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
14156 SET_SYMBOL_REF_DECL (rtl, to);
14157 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
14158 if (!beimport)
14160 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
14161 #ifdef SUB_TARGET_RECORD_STUB
14162 SUB_TARGET_RECORD_STUB (name);
14163 #endif
14166 rtl = gen_const_mem (Pmode, rtl);
14167 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
14169 SET_DECL_RTL (to, rtl);
14170 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
14172 return to;
14175 /* Expand SYMBOL into its corresponding far-addresse symbol.
14176 WANT_REG is true if we require the result be a register. */
14178 static rtx
14179 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
14181 tree imp_decl;
14182 rtx x;
14184 gcc_assert (SYMBOL_REF_DECL (symbol));
14185 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
14187 x = DECL_RTL (imp_decl);
14188 if (want_reg)
14189 x = force_reg (Pmode, x);
14190 return x;
14193 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
14194 true if we require the result be a register. */
14196 static rtx
14197 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
14199 tree imp_decl;
14200 rtx x;
14202 gcc_assert (SYMBOL_REF_DECL (symbol));
14203 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
14205 x = DECL_RTL (imp_decl);
14206 if (want_reg)
14207 x = force_reg (Pmode, x);
14208 return x;
14211 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
14212 is true if we require the result be a register. */
14214 static rtx
14215 legitimize_pe_coff_symbol (rtx addr, bool inreg)
14217 if (!TARGET_PECOFF)
14218 return NULL_RTX;
14220 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14222 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
14223 return legitimize_dllimport_symbol (addr, inreg);
14224 if (GET_CODE (addr) == CONST
14225 && GET_CODE (XEXP (addr, 0)) == PLUS
14226 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
14227 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
14229 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
14230 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
14234 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
14235 return NULL_RTX;
14236 if (GET_CODE (addr) == SYMBOL_REF
14237 && !is_imported_p (addr)
14238 && SYMBOL_REF_EXTERNAL_P (addr)
14239 && SYMBOL_REF_DECL (addr))
14240 return legitimize_pe_coff_extern_decl (addr, inreg);
14242 if (GET_CODE (addr) == CONST
14243 && GET_CODE (XEXP (addr, 0)) == PLUS
14244 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
14245 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
14246 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
14247 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
14249 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
14250 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
14252 return NULL_RTX;
14255 /* Try machine-dependent ways of modifying an illegitimate address
14256 to be legitimate. If we find one, return the new, valid address.
14257 This macro is used in only one place: `memory_address' in explow.c.
14259 OLDX is the address as it was before break_out_memory_refs was called.
14260 In some cases it is useful to look at this to decide what needs to be done.
14262 It is always safe for this macro to do nothing. It exists to recognize
14263 opportunities to optimize the output.
14265 For the 80386, we handle X+REG by loading X into a register R and
14266 using R+REG. R will go in a general reg and indexing will be used.
14267 However, if REG is a broken-out memory address or multiplication,
14268 nothing needs to be done because REG can certainly go in a general reg.
14270 When -fpic is used, special handling is needed for symbolic references.
14271 See comments by legitimize_pic_address in i386.c for details. */
14273 static rtx
14274 ix86_legitimize_address (rtx x, rtx, machine_mode mode)
14276 int changed = 0;
14277 unsigned log;
14279 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
14280 if (log)
14281 return legitimize_tls_address (x, (enum tls_model) log, false);
14282 if (GET_CODE (x) == CONST
14283 && GET_CODE (XEXP (x, 0)) == PLUS
14284 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
14285 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
14287 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
14288 (enum tls_model) log, false);
14289 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
14292 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14294 rtx tmp = legitimize_pe_coff_symbol (x, true);
14295 if (tmp)
14296 return tmp;
14299 if (flag_pic && SYMBOLIC_CONST (x))
14300 return legitimize_pic_address (x, 0);
14302 #if TARGET_MACHO
14303 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
14304 return machopic_indirect_data_reference (x, 0);
14305 #endif
14307 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
14308 if (GET_CODE (x) == ASHIFT
14309 && CONST_INT_P (XEXP (x, 1))
14310 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
14312 changed = 1;
14313 log = INTVAL (XEXP (x, 1));
14314 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
14315 GEN_INT (1 << log));
14318 if (GET_CODE (x) == PLUS)
14320 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
14322 if (GET_CODE (XEXP (x, 0)) == ASHIFT
14323 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14324 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
14326 changed = 1;
14327 log = INTVAL (XEXP (XEXP (x, 0), 1));
14328 XEXP (x, 0) = gen_rtx_MULT (Pmode,
14329 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
14330 GEN_INT (1 << log));
14333 if (GET_CODE (XEXP (x, 1)) == ASHIFT
14334 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
14335 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
14337 changed = 1;
14338 log = INTVAL (XEXP (XEXP (x, 1), 1));
14339 XEXP (x, 1) = gen_rtx_MULT (Pmode,
14340 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
14341 GEN_INT (1 << log));
14344 /* Put multiply first if it isn't already. */
14345 if (GET_CODE (XEXP (x, 1)) == MULT)
14347 rtx tmp = XEXP (x, 0);
14348 XEXP (x, 0) = XEXP (x, 1);
14349 XEXP (x, 1) = tmp;
14350 changed = 1;
14353 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
14354 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
14355 created by virtual register instantiation, register elimination, and
14356 similar optimizations. */
14357 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
14359 changed = 1;
14360 x = gen_rtx_PLUS (Pmode,
14361 gen_rtx_PLUS (Pmode, XEXP (x, 0),
14362 XEXP (XEXP (x, 1), 0)),
14363 XEXP (XEXP (x, 1), 1));
14366 /* Canonicalize
14367 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
14368 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
14369 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
14370 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
14371 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
14372 && CONSTANT_P (XEXP (x, 1)))
14374 rtx constant;
14375 rtx other = NULL_RTX;
14377 if (CONST_INT_P (XEXP (x, 1)))
14379 constant = XEXP (x, 1);
14380 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
14382 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
14384 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
14385 other = XEXP (x, 1);
14387 else
14388 constant = 0;
14390 if (constant)
14392 changed = 1;
14393 x = gen_rtx_PLUS (Pmode,
14394 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
14395 XEXP (XEXP (XEXP (x, 0), 1), 0)),
14396 plus_constant (Pmode, other,
14397 INTVAL (constant)));
14401 if (changed && ix86_legitimate_address_p (mode, x, false))
14402 return x;
14404 if (GET_CODE (XEXP (x, 0)) == MULT)
14406 changed = 1;
14407 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
14410 if (GET_CODE (XEXP (x, 1)) == MULT)
14412 changed = 1;
14413 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
14416 if (changed
14417 && REG_P (XEXP (x, 1))
14418 && REG_P (XEXP (x, 0)))
14419 return x;
14421 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
14423 changed = 1;
14424 x = legitimize_pic_address (x, 0);
14427 if (changed && ix86_legitimate_address_p (mode, x, false))
14428 return x;
14430 if (REG_P (XEXP (x, 0)))
14432 rtx temp = gen_reg_rtx (Pmode);
14433 rtx val = force_operand (XEXP (x, 1), temp);
14434 if (val != temp)
14436 val = convert_to_mode (Pmode, val, 1);
14437 emit_move_insn (temp, val);
14440 XEXP (x, 1) = temp;
14441 return x;
14444 else if (REG_P (XEXP (x, 1)))
14446 rtx temp = gen_reg_rtx (Pmode);
14447 rtx val = force_operand (XEXP (x, 0), temp);
14448 if (val != temp)
14450 val = convert_to_mode (Pmode, val, 1);
14451 emit_move_insn (temp, val);
14454 XEXP (x, 0) = temp;
14455 return x;
14459 return x;
14462 /* Print an integer constant expression in assembler syntax. Addition
14463 and subtraction are the only arithmetic that may appear in these
14464 expressions. FILE is the stdio stream to write to, X is the rtx, and
14465 CODE is the operand print code from the output string. */
14467 static void
14468 output_pic_addr_const (FILE *file, rtx x, int code)
14470 char buf[256];
14472 switch (GET_CODE (x))
14474 case PC:
14475 gcc_assert (flag_pic);
14476 putc ('.', file);
14477 break;
14479 case SYMBOL_REF:
14480 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
14481 output_addr_const (file, x);
14482 else
14484 const char *name = XSTR (x, 0);
14486 /* Mark the decl as referenced so that cgraph will
14487 output the function. */
14488 if (SYMBOL_REF_DECL (x))
14489 mark_decl_referenced (SYMBOL_REF_DECL (x));
14491 #if TARGET_MACHO
14492 if (MACHOPIC_INDIRECT
14493 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14494 name = machopic_indirection_name (x, /*stub_p=*/true);
14495 #endif
14496 assemble_name (file, name);
14498 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
14499 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
14500 fputs ("@PLT", file);
14501 break;
14503 case LABEL_REF:
14504 x = XEXP (x, 0);
14505 /* FALLTHRU */
14506 case CODE_LABEL:
14507 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
14508 assemble_name (asm_out_file, buf);
14509 break;
14511 case CONST_INT:
14512 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14513 break;
14515 case CONST:
14516 /* This used to output parentheses around the expression,
14517 but that does not work on the 386 (either ATT or BSD assembler). */
14518 output_pic_addr_const (file, XEXP (x, 0), code);
14519 break;
14521 case CONST_DOUBLE:
14522 if (GET_MODE (x) == VOIDmode)
14524 /* We can use %d if the number is <32 bits and positive. */
14525 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
14526 fprintf (file, "0x%lx%08lx",
14527 (unsigned long) CONST_DOUBLE_HIGH (x),
14528 (unsigned long) CONST_DOUBLE_LOW (x));
14529 else
14530 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
14532 else
14533 /* We can't handle floating point constants;
14534 TARGET_PRINT_OPERAND must handle them. */
14535 output_operand_lossage ("floating constant misused");
14536 break;
14538 case PLUS:
14539 /* Some assemblers need integer constants to appear first. */
14540 if (CONST_INT_P (XEXP (x, 0)))
14542 output_pic_addr_const (file, XEXP (x, 0), code);
14543 putc ('+', file);
14544 output_pic_addr_const (file, XEXP (x, 1), code);
14546 else
14548 gcc_assert (CONST_INT_P (XEXP (x, 1)));
14549 output_pic_addr_const (file, XEXP (x, 1), code);
14550 putc ('+', file);
14551 output_pic_addr_const (file, XEXP (x, 0), code);
14553 break;
14555 case MINUS:
14556 if (!TARGET_MACHO)
14557 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
14558 output_pic_addr_const (file, XEXP (x, 0), code);
14559 putc ('-', file);
14560 output_pic_addr_const (file, XEXP (x, 1), code);
14561 if (!TARGET_MACHO)
14562 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
14563 break;
14565 case UNSPEC:
14566 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
14568 bool f = i386_asm_output_addr_const_extra (file, x);
14569 gcc_assert (f);
14570 break;
14573 gcc_assert (XVECLEN (x, 0) == 1);
14574 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
14575 switch (XINT (x, 1))
14577 case UNSPEC_GOT:
14578 fputs ("@GOT", file);
14579 break;
14580 case UNSPEC_GOTOFF:
14581 fputs ("@GOTOFF", file);
14582 break;
14583 case UNSPEC_PLTOFF:
14584 fputs ("@PLTOFF", file);
14585 break;
14586 case UNSPEC_PCREL:
14587 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14588 "(%rip)" : "[rip]", file);
14589 break;
14590 case UNSPEC_GOTPCREL:
14591 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14592 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
14593 break;
14594 case UNSPEC_GOTTPOFF:
14595 /* FIXME: This might be @TPOFF in Sun ld too. */
14596 fputs ("@gottpoff", file);
14597 break;
14598 case UNSPEC_TPOFF:
14599 fputs ("@tpoff", file);
14600 break;
14601 case UNSPEC_NTPOFF:
14602 if (TARGET_64BIT)
14603 fputs ("@tpoff", file);
14604 else
14605 fputs ("@ntpoff", file);
14606 break;
14607 case UNSPEC_DTPOFF:
14608 fputs ("@dtpoff", file);
14609 break;
14610 case UNSPEC_GOTNTPOFF:
14611 if (TARGET_64BIT)
14612 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14613 "@gottpoff(%rip)": "@gottpoff[rip]", file);
14614 else
14615 fputs ("@gotntpoff", file);
14616 break;
14617 case UNSPEC_INDNTPOFF:
14618 fputs ("@indntpoff", file);
14619 break;
14620 #if TARGET_MACHO
14621 case UNSPEC_MACHOPIC_OFFSET:
14622 putc ('-', file);
14623 machopic_output_function_base_name (file);
14624 break;
14625 #endif
14626 default:
14627 output_operand_lossage ("invalid UNSPEC as operand");
14628 break;
14630 break;
14632 default:
14633 output_operand_lossage ("invalid expression as operand");
14637 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
14638 We need to emit DTP-relative relocations. */
14640 static void ATTRIBUTE_UNUSED
14641 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
14643 fputs (ASM_LONG, file);
14644 output_addr_const (file, x);
14645 fputs ("@dtpoff", file);
14646 switch (size)
14648 case 4:
14649 break;
14650 case 8:
14651 fputs (", 0", file);
14652 break;
14653 default:
14654 gcc_unreachable ();
14658 /* Return true if X is a representation of the PIC register. This copes
14659 with calls from ix86_find_base_term, where the register might have
14660 been replaced by a cselib value. */
14662 static bool
14663 ix86_pic_register_p (rtx x)
14665 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
14666 return (pic_offset_table_rtx
14667 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
14668 else if (!REG_P (x))
14669 return false;
14670 else if (pic_offset_table_rtx)
14672 if (REGNO (x) == REGNO (pic_offset_table_rtx))
14673 return true;
14674 if (HARD_REGISTER_P (x)
14675 && !HARD_REGISTER_P (pic_offset_table_rtx)
14676 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
14677 return true;
14678 return false;
14680 else
14681 return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
14684 /* Helper function for ix86_delegitimize_address.
14685 Attempt to delegitimize TLS local-exec accesses. */
14687 static rtx
14688 ix86_delegitimize_tls_address (rtx orig_x)
14690 rtx x = orig_x, unspec;
14691 struct ix86_address addr;
14693 if (!TARGET_TLS_DIRECT_SEG_REFS)
14694 return orig_x;
14695 if (MEM_P (x))
14696 x = XEXP (x, 0);
14697 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
14698 return orig_x;
14699 if (ix86_decompose_address (x, &addr) == 0
14700 || addr.seg != DEFAULT_TLS_SEG_REG
14701 || addr.disp == NULL_RTX
14702 || GET_CODE (addr.disp) != CONST)
14703 return orig_x;
14704 unspec = XEXP (addr.disp, 0);
14705 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
14706 unspec = XEXP (unspec, 0);
14707 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
14708 return orig_x;
14709 x = XVECEXP (unspec, 0, 0);
14710 gcc_assert (GET_CODE (x) == SYMBOL_REF);
14711 if (unspec != XEXP (addr.disp, 0))
14712 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
14713 if (addr.index)
14715 rtx idx = addr.index;
14716 if (addr.scale != 1)
14717 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
14718 x = gen_rtx_PLUS (Pmode, idx, x);
14720 if (addr.base)
14721 x = gen_rtx_PLUS (Pmode, addr.base, x);
14722 if (MEM_P (orig_x))
14723 x = replace_equiv_address_nv (orig_x, x);
14724 return x;
14727 /* In the name of slightly smaller debug output, and to cater to
14728 general assembler lossage, recognize PIC+GOTOFF and turn it back
14729 into a direct symbol reference.
14731 On Darwin, this is necessary to avoid a crash, because Darwin
14732 has a different PIC label for each routine but the DWARF debugging
14733 information is not associated with any particular routine, so it's
14734 necessary to remove references to the PIC label from RTL stored by
14735 the DWARF output code. */
14737 static rtx
14738 ix86_delegitimize_address (rtx x)
14740 rtx orig_x = delegitimize_mem_from_attrs (x);
14741 /* addend is NULL or some rtx if x is something+GOTOFF where
14742 something doesn't include the PIC register. */
14743 rtx addend = NULL_RTX;
14744 /* reg_addend is NULL or a multiple of some register. */
14745 rtx reg_addend = NULL_RTX;
14746 /* const_addend is NULL or a const_int. */
14747 rtx const_addend = NULL_RTX;
14748 /* This is the result, or NULL. */
14749 rtx result = NULL_RTX;
14751 x = orig_x;
14753 if (MEM_P (x))
14754 x = XEXP (x, 0);
14756 if (TARGET_64BIT)
14758 if (GET_CODE (x) == CONST
14759 && GET_CODE (XEXP (x, 0)) == PLUS
14760 && GET_MODE (XEXP (x, 0)) == Pmode
14761 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14762 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
14763 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
14765 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
14766 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
14767 if (MEM_P (orig_x))
14768 x = replace_equiv_address_nv (orig_x, x);
14769 return x;
14772 if (GET_CODE (x) == CONST
14773 && GET_CODE (XEXP (x, 0)) == UNSPEC
14774 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
14775 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
14776 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
14778 x = XVECEXP (XEXP (x, 0), 0, 0);
14779 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
14781 x = simplify_gen_subreg (GET_MODE (orig_x), x,
14782 GET_MODE (x), 0);
14783 if (x == NULL_RTX)
14784 return orig_x;
14786 return x;
14789 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
14790 return ix86_delegitimize_tls_address (orig_x);
14792 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
14793 and -mcmodel=medium -fpic. */
14796 if (GET_CODE (x) != PLUS
14797 || GET_CODE (XEXP (x, 1)) != CONST)
14798 return ix86_delegitimize_tls_address (orig_x);
14800 if (ix86_pic_register_p (XEXP (x, 0)))
14801 /* %ebx + GOT/GOTOFF */
14803 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14805 /* %ebx + %reg * scale + GOT/GOTOFF */
14806 reg_addend = XEXP (x, 0);
14807 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
14808 reg_addend = XEXP (reg_addend, 1);
14809 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
14810 reg_addend = XEXP (reg_addend, 0);
14811 else
14813 reg_addend = NULL_RTX;
14814 addend = XEXP (x, 0);
14817 else
14818 addend = XEXP (x, 0);
14820 x = XEXP (XEXP (x, 1), 0);
14821 if (GET_CODE (x) == PLUS
14822 && CONST_INT_P (XEXP (x, 1)))
14824 const_addend = XEXP (x, 1);
14825 x = XEXP (x, 0);
14828 if (GET_CODE (x) == UNSPEC
14829 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
14830 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
14831 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
14832 && !MEM_P (orig_x) && !addend)))
14833 result = XVECEXP (x, 0, 0);
14835 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
14836 && !MEM_P (orig_x))
14837 result = XVECEXP (x, 0, 0);
14839 if (! result)
14840 return ix86_delegitimize_tls_address (orig_x);
14842 if (const_addend)
14843 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
14844 if (reg_addend)
14845 result = gen_rtx_PLUS (Pmode, reg_addend, result);
14846 if (addend)
14848 /* If the rest of original X doesn't involve the PIC register, add
14849 addend and subtract pic_offset_table_rtx. This can happen e.g.
14850 for code like:
14851 leal (%ebx, %ecx, 4), %ecx
14853 movl foo@GOTOFF(%ecx), %edx
14854 in which case we return (%ecx - %ebx) + foo
14855 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
14856 and reload has completed. */
14857 if (pic_offset_table_rtx
14858 && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
14859 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
14860 pic_offset_table_rtx),
14861 result);
14862 else if (pic_offset_table_rtx && !TARGET_MACHO && !TARGET_VXWORKS_RTP)
14864 rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
14865 tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
14866 result = gen_rtx_PLUS (Pmode, tmp, result);
14868 else
14869 return orig_x;
14871 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
14873 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
14874 if (result == NULL_RTX)
14875 return orig_x;
14877 return result;
14880 /* If X is a machine specific address (i.e. a symbol or label being
14881 referenced as a displacement from the GOT implemented using an
14882 UNSPEC), then return the base term. Otherwise return X. */
14885 ix86_find_base_term (rtx x)
14887 rtx term;
14889 if (TARGET_64BIT)
14891 if (GET_CODE (x) != CONST)
14892 return x;
14893 term = XEXP (x, 0);
14894 if (GET_CODE (term) == PLUS
14895 && (CONST_INT_P (XEXP (term, 1))
14896 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
14897 term = XEXP (term, 0);
14898 if (GET_CODE (term) != UNSPEC
14899 || (XINT (term, 1) != UNSPEC_GOTPCREL
14900 && XINT (term, 1) != UNSPEC_PCREL))
14901 return x;
14903 return XVECEXP (term, 0, 0);
14906 return ix86_delegitimize_address (x);
14909 static void
14910 put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
14911 bool fp, FILE *file)
14913 const char *suffix;
14915 if (mode == CCFPmode || mode == CCFPUmode)
14917 code = ix86_fp_compare_code_to_integer (code);
14918 mode = CCmode;
14920 if (reverse)
14921 code = reverse_condition (code);
14923 switch (code)
14925 case EQ:
14926 switch (mode)
14928 case CCAmode:
14929 suffix = "a";
14930 break;
14932 case CCCmode:
14933 suffix = "c";
14934 break;
14936 case CCOmode:
14937 suffix = "o";
14938 break;
14940 case CCSmode:
14941 suffix = "s";
14942 break;
14944 default:
14945 suffix = "e";
14947 break;
14948 case NE:
14949 switch (mode)
14951 case CCAmode:
14952 suffix = "na";
14953 break;
14955 case CCCmode:
14956 suffix = "nc";
14957 break;
14959 case CCOmode:
14960 suffix = "no";
14961 break;
14963 case CCSmode:
14964 suffix = "ns";
14965 break;
14967 default:
14968 suffix = "ne";
14970 break;
14971 case GT:
14972 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
14973 suffix = "g";
14974 break;
14975 case GTU:
14976 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
14977 Those same assemblers have the same but opposite lossage on cmov. */
14978 if (mode == CCmode)
14979 suffix = fp ? "nbe" : "a";
14980 else
14981 gcc_unreachable ();
14982 break;
14983 case LT:
14984 switch (mode)
14986 case CCNOmode:
14987 case CCGOCmode:
14988 suffix = "s";
14989 break;
14991 case CCmode:
14992 case CCGCmode:
14993 suffix = "l";
14994 break;
14996 default:
14997 gcc_unreachable ();
14999 break;
15000 case LTU:
15001 if (mode == CCmode)
15002 suffix = "b";
15003 else if (mode == CCCmode)
15004 suffix = fp ? "b" : "c";
15005 else
15006 gcc_unreachable ();
15007 break;
15008 case GE:
15009 switch (mode)
15011 case CCNOmode:
15012 case CCGOCmode:
15013 suffix = "ns";
15014 break;
15016 case CCmode:
15017 case CCGCmode:
15018 suffix = "ge";
15019 break;
15021 default:
15022 gcc_unreachable ();
15024 break;
15025 case GEU:
15026 if (mode == CCmode)
15027 suffix = "nb";
15028 else if (mode == CCCmode)
15029 suffix = fp ? "nb" : "nc";
15030 else
15031 gcc_unreachable ();
15032 break;
15033 case LE:
15034 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
15035 suffix = "le";
15036 break;
15037 case LEU:
15038 if (mode == CCmode)
15039 suffix = "be";
15040 else
15041 gcc_unreachable ();
15042 break;
15043 case UNORDERED:
15044 suffix = fp ? "u" : "p";
15045 break;
15046 case ORDERED:
15047 suffix = fp ? "nu" : "np";
15048 break;
15049 default:
15050 gcc_unreachable ();
15052 fputs (suffix, file);
15055 /* Print the name of register X to FILE based on its machine mode and number.
15056 If CODE is 'w', pretend the mode is HImode.
15057 If CODE is 'b', pretend the mode is QImode.
15058 If CODE is 'k', pretend the mode is SImode.
15059 If CODE is 'q', pretend the mode is DImode.
15060 If CODE is 'x', pretend the mode is V4SFmode.
15061 If CODE is 't', pretend the mode is V8SFmode.
15062 If CODE is 'g', pretend the mode is V16SFmode.
15063 If CODE is 'h', pretend the reg is the 'high' byte register.
15064 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
15065 If CODE is 'd', duplicate the operand for AVX instruction.
15068 void
15069 print_reg (rtx x, int code, FILE *file)
15071 const char *reg;
15072 unsigned int regno;
15073 bool duplicated = code == 'd' && TARGET_AVX;
15075 if (ASSEMBLER_DIALECT == ASM_ATT)
15076 putc ('%', file);
15078 if (x == pc_rtx)
15080 gcc_assert (TARGET_64BIT);
15081 fputs ("rip", file);
15082 return;
15085 regno = true_regnum (x);
15086 gcc_assert (regno != ARG_POINTER_REGNUM
15087 && regno != FRAME_POINTER_REGNUM
15088 && regno != FLAGS_REG
15089 && regno != FPSR_REG
15090 && regno != FPCR_REG);
15092 if (code == 'w' || MMX_REG_P (x))
15093 code = 2;
15094 else if (code == 'b')
15095 code = 1;
15096 else if (code == 'k')
15097 code = 4;
15098 else if (code == 'q')
15099 code = 8;
15100 else if (code == 'y')
15101 code = 3;
15102 else if (code == 'h')
15103 code = 0;
15104 else if (code == 'x')
15105 code = 16;
15106 else if (code == 't')
15107 code = 32;
15108 else if (code == 'g')
15109 code = 64;
15110 else
15111 code = GET_MODE_SIZE (GET_MODE (x));
15113 /* Irritatingly, AMD extended registers use different naming convention
15114 from the normal registers: "r%d[bwd]" */
15115 if (REX_INT_REGNO_P (regno))
15117 gcc_assert (TARGET_64BIT);
15118 putc ('r', file);
15119 fprint_ul (file, regno - FIRST_REX_INT_REG + 8);
15120 switch (code)
15122 case 0:
15123 error ("extended registers have no high halves");
15124 break;
15125 case 1:
15126 putc ('b', file);
15127 break;
15128 case 2:
15129 putc ('w', file);
15130 break;
15131 case 4:
15132 putc ('d', file);
15133 break;
15134 case 8:
15135 /* no suffix */
15136 break;
15137 default:
15138 error ("unsupported operand size for extended register");
15139 break;
15141 return;
15144 reg = NULL;
15145 switch (code)
15147 case 3:
15148 if (STACK_TOP_P (x))
15150 reg = "st(0)";
15151 break;
15153 /* FALLTHRU */
15154 case 8:
15155 case 4:
15156 case 12:
15157 if (! ANY_FP_REG_P (x) && ! ANY_MASK_REG_P (x) && ! ANY_BND_REG_P (x))
15158 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
15159 /* FALLTHRU */
15160 case 16:
15161 case 2:
15162 normal:
15163 reg = hi_reg_name[regno];
15164 break;
15165 case 1:
15166 if (regno >= ARRAY_SIZE (qi_reg_name))
15167 goto normal;
15168 reg = qi_reg_name[regno];
15169 break;
15170 case 0:
15171 if (regno >= ARRAY_SIZE (qi_high_reg_name))
15172 goto normal;
15173 reg = qi_high_reg_name[regno];
15174 break;
15175 case 32:
15176 if (SSE_REG_P (x))
15178 gcc_assert (!duplicated);
15179 putc ('y', file);
15180 fputs (hi_reg_name[regno] + 1, file);
15181 return;
15183 case 64:
15184 if (SSE_REG_P (x))
15186 gcc_assert (!duplicated);
15187 putc ('z', file);
15188 fputs (hi_reg_name[REGNO (x)] + 1, file);
15189 return;
15191 break;
15192 default:
15193 gcc_unreachable ();
15196 fputs (reg, file);
15197 if (duplicated)
15199 if (ASSEMBLER_DIALECT == ASM_ATT)
15200 fprintf (file, ", %%%s", reg);
15201 else
15202 fprintf (file, ", %s", reg);
15206 /* Meaning of CODE:
15207 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
15208 C -- print opcode suffix for set/cmov insn.
15209 c -- like C, but print reversed condition
15210 F,f -- likewise, but for floating-point.
15211 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
15212 otherwise nothing
15213 R -- print embeded rounding and sae.
15214 r -- print only sae.
15215 z -- print the opcode suffix for the size of the current operand.
15216 Z -- likewise, with special suffixes for x87 instructions.
15217 * -- print a star (in certain assembler syntax)
15218 A -- print an absolute memory reference.
15219 E -- print address with DImode register names if TARGET_64BIT.
15220 w -- print the operand as if it's a "word" (HImode) even if it isn't.
15221 s -- print a shift double count, followed by the assemblers argument
15222 delimiter.
15223 b -- print the QImode name of the register for the indicated operand.
15224 %b0 would print %al if operands[0] is reg 0.
15225 w -- likewise, print the HImode name of the register.
15226 k -- likewise, print the SImode name of the register.
15227 q -- likewise, print the DImode name of the register.
15228 x -- likewise, print the V4SFmode name of the register.
15229 t -- likewise, print the V8SFmode name of the register.
15230 g -- likewise, print the V16SFmode name of the register.
15231 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
15232 y -- print "st(0)" instead of "st" as a register.
15233 d -- print duplicated register operand for AVX instruction.
15234 D -- print condition for SSE cmp instruction.
15235 P -- if PIC, print an @PLT suffix.
15236 p -- print raw symbol name.
15237 X -- don't print any sort of PIC '@' suffix for a symbol.
15238 & -- print some in-use local-dynamic symbol name.
15239 H -- print a memory address offset by 8; used for sse high-parts
15240 Y -- print condition for XOP pcom* instruction.
15241 + -- print a branch hint as 'cs' or 'ds' prefix
15242 ; -- print a semicolon (after prefixes due to bug in older gas).
15243 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
15244 @ -- print a segment register of thread base pointer load
15245 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
15246 ! -- print MPX prefix for jxx/call/ret instructions if required.
15249 void
15250 ix86_print_operand (FILE *file, rtx x, int code)
15252 if (code)
15254 switch (code)
15256 case 'A':
15257 switch (ASSEMBLER_DIALECT)
15259 case ASM_ATT:
15260 putc ('*', file);
15261 break;
15263 case ASM_INTEL:
15264 /* Intel syntax. For absolute addresses, registers should not
15265 be surrounded by braces. */
15266 if (!REG_P (x))
15268 putc ('[', file);
15269 ix86_print_operand (file, x, 0);
15270 putc (']', file);
15271 return;
15273 break;
15275 default:
15276 gcc_unreachable ();
15279 ix86_print_operand (file, x, 0);
15280 return;
15282 case 'E':
15283 /* Wrap address in an UNSPEC to declare special handling. */
15284 if (TARGET_64BIT)
15285 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
15287 output_address (x);
15288 return;
15290 case 'L':
15291 if (ASSEMBLER_DIALECT == ASM_ATT)
15292 putc ('l', file);
15293 return;
15295 case 'W':
15296 if (ASSEMBLER_DIALECT == ASM_ATT)
15297 putc ('w', file);
15298 return;
15300 case 'B':
15301 if (ASSEMBLER_DIALECT == ASM_ATT)
15302 putc ('b', file);
15303 return;
15305 case 'Q':
15306 if (ASSEMBLER_DIALECT == ASM_ATT)
15307 putc ('l', file);
15308 return;
15310 case 'S':
15311 if (ASSEMBLER_DIALECT == ASM_ATT)
15312 putc ('s', file);
15313 return;
15315 case 'T':
15316 if (ASSEMBLER_DIALECT == ASM_ATT)
15317 putc ('t', file);
15318 return;
15320 case 'O':
15321 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15322 if (ASSEMBLER_DIALECT != ASM_ATT)
15323 return;
15325 switch (GET_MODE_SIZE (GET_MODE (x)))
15327 case 2:
15328 putc ('w', file);
15329 break;
15331 case 4:
15332 putc ('l', file);
15333 break;
15335 case 8:
15336 putc ('q', file);
15337 break;
15339 default:
15340 output_operand_lossage
15341 ("invalid operand size for operand code 'O'");
15342 return;
15345 putc ('.', file);
15346 #endif
15347 return;
15349 case 'z':
15350 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15352 /* Opcodes don't get size suffixes if using Intel opcodes. */
15353 if (ASSEMBLER_DIALECT == ASM_INTEL)
15354 return;
15356 switch (GET_MODE_SIZE (GET_MODE (x)))
15358 case 1:
15359 putc ('b', file);
15360 return;
15362 case 2:
15363 putc ('w', file);
15364 return;
15366 case 4:
15367 putc ('l', file);
15368 return;
15370 case 8:
15371 putc ('q', file);
15372 return;
15374 default:
15375 output_operand_lossage
15376 ("invalid operand size for operand code 'z'");
15377 return;
15381 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15382 warning
15383 (0, "non-integer operand used with operand code 'z'");
15384 /* FALLTHRU */
15386 case 'Z':
15387 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
15388 if (ASSEMBLER_DIALECT == ASM_INTEL)
15389 return;
15391 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15393 switch (GET_MODE_SIZE (GET_MODE (x)))
15395 case 2:
15396 #ifdef HAVE_AS_IX86_FILDS
15397 putc ('s', file);
15398 #endif
15399 return;
15401 case 4:
15402 putc ('l', file);
15403 return;
15405 case 8:
15406 #ifdef HAVE_AS_IX86_FILDQ
15407 putc ('q', file);
15408 #else
15409 fputs ("ll", file);
15410 #endif
15411 return;
15413 default:
15414 break;
15417 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15419 /* 387 opcodes don't get size suffixes
15420 if the operands are registers. */
15421 if (STACK_REG_P (x))
15422 return;
15424 switch (GET_MODE_SIZE (GET_MODE (x)))
15426 case 4:
15427 putc ('s', file);
15428 return;
15430 case 8:
15431 putc ('l', file);
15432 return;
15434 case 12:
15435 case 16:
15436 putc ('t', file);
15437 return;
15439 default:
15440 break;
15443 else
15445 output_operand_lossage
15446 ("invalid operand type used with operand code 'Z'");
15447 return;
15450 output_operand_lossage
15451 ("invalid operand size for operand code 'Z'");
15452 return;
15454 case 'd':
15455 case 'b':
15456 case 'w':
15457 case 'k':
15458 case 'q':
15459 case 'h':
15460 case 't':
15461 case 'g':
15462 case 'y':
15463 case 'x':
15464 case 'X':
15465 case 'P':
15466 case 'p':
15467 break;
15469 case 's':
15470 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
15472 ix86_print_operand (file, x, 0);
15473 fputs (", ", file);
15475 return;
15477 case 'Y':
15478 switch (GET_CODE (x))
15480 case NE:
15481 fputs ("neq", file);
15482 break;
15483 case EQ:
15484 fputs ("eq", file);
15485 break;
15486 case GE:
15487 case GEU:
15488 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
15489 break;
15490 case GT:
15491 case GTU:
15492 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
15493 break;
15494 case LE:
15495 case LEU:
15496 fputs ("le", file);
15497 break;
15498 case LT:
15499 case LTU:
15500 fputs ("lt", file);
15501 break;
15502 case UNORDERED:
15503 fputs ("unord", file);
15504 break;
15505 case ORDERED:
15506 fputs ("ord", file);
15507 break;
15508 case UNEQ:
15509 fputs ("ueq", file);
15510 break;
15511 case UNGE:
15512 fputs ("nlt", file);
15513 break;
15514 case UNGT:
15515 fputs ("nle", file);
15516 break;
15517 case UNLE:
15518 fputs ("ule", file);
15519 break;
15520 case UNLT:
15521 fputs ("ult", file);
15522 break;
15523 case LTGT:
15524 fputs ("une", file);
15525 break;
15526 default:
15527 output_operand_lossage ("operand is not a condition code, "
15528 "invalid operand code 'Y'");
15529 return;
15531 return;
15533 case 'D':
15534 /* Little bit of braindamage here. The SSE compare instructions
15535 does use completely different names for the comparisons that the
15536 fp conditional moves. */
15537 switch (GET_CODE (x))
15539 case UNEQ:
15540 if (TARGET_AVX)
15542 fputs ("eq_us", file);
15543 break;
15545 case EQ:
15546 fputs ("eq", file);
15547 break;
15548 case UNLT:
15549 if (TARGET_AVX)
15551 fputs ("nge", file);
15552 break;
15554 case LT:
15555 fputs ("lt", file);
15556 break;
15557 case UNLE:
15558 if (TARGET_AVX)
15560 fputs ("ngt", file);
15561 break;
15563 case LE:
15564 fputs ("le", file);
15565 break;
15566 case UNORDERED:
15567 fputs ("unord", file);
15568 break;
15569 case LTGT:
15570 if (TARGET_AVX)
15572 fputs ("neq_oq", file);
15573 break;
15575 case NE:
15576 fputs ("neq", file);
15577 break;
15578 case GE:
15579 if (TARGET_AVX)
15581 fputs ("ge", file);
15582 break;
15584 case UNGE:
15585 fputs ("nlt", file);
15586 break;
15587 case GT:
15588 if (TARGET_AVX)
15590 fputs ("gt", file);
15591 break;
15593 case UNGT:
15594 fputs ("nle", file);
15595 break;
15596 case ORDERED:
15597 fputs ("ord", file);
15598 break;
15599 default:
15600 output_operand_lossage ("operand is not a condition code, "
15601 "invalid operand code 'D'");
15602 return;
15604 return;
15606 case 'F':
15607 case 'f':
15608 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15609 if (ASSEMBLER_DIALECT == ASM_ATT)
15610 putc ('.', file);
15611 #endif
15613 case 'C':
15614 case 'c':
15615 if (!COMPARISON_P (x))
15617 output_operand_lossage ("operand is not a condition code, "
15618 "invalid operand code '%c'", code);
15619 return;
15621 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
15622 code == 'c' || code == 'f',
15623 code == 'F' || code == 'f',
15624 file);
15625 return;
15627 case 'H':
15628 if (!offsettable_memref_p (x))
15630 output_operand_lossage ("operand is not an offsettable memory "
15631 "reference, invalid operand code 'H'");
15632 return;
15634 /* It doesn't actually matter what mode we use here, as we're
15635 only going to use this for printing. */
15636 x = adjust_address_nv (x, DImode, 8);
15637 /* Output 'qword ptr' for intel assembler dialect. */
15638 if (ASSEMBLER_DIALECT == ASM_INTEL)
15639 code = 'q';
15640 break;
15642 case 'K':
15643 gcc_assert (CONST_INT_P (x));
15645 if (INTVAL (x) & IX86_HLE_ACQUIRE)
15646 #ifdef HAVE_AS_IX86_HLE
15647 fputs ("xacquire ", file);
15648 #else
15649 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
15650 #endif
15651 else if (INTVAL (x) & IX86_HLE_RELEASE)
15652 #ifdef HAVE_AS_IX86_HLE
15653 fputs ("xrelease ", file);
15654 #else
15655 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
15656 #endif
15657 /* We do not want to print value of the operand. */
15658 return;
15660 case 'N':
15661 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
15662 fputs ("{z}", file);
15663 return;
15665 case 'r':
15666 gcc_assert (CONST_INT_P (x));
15667 gcc_assert (INTVAL (x) == ROUND_SAE);
15669 if (ASSEMBLER_DIALECT == ASM_INTEL)
15670 fputs (", ", file);
15672 fputs ("{sae}", file);
15674 if (ASSEMBLER_DIALECT == ASM_ATT)
15675 fputs (", ", file);
15677 return;
15679 case 'R':
15680 gcc_assert (CONST_INT_P (x));
15682 if (ASSEMBLER_DIALECT == ASM_INTEL)
15683 fputs (", ", file);
15685 switch (INTVAL (x))
15687 case ROUND_NEAREST_INT | ROUND_SAE:
15688 fputs ("{rn-sae}", file);
15689 break;
15690 case ROUND_NEG_INF | ROUND_SAE:
15691 fputs ("{rd-sae}", file);
15692 break;
15693 case ROUND_POS_INF | ROUND_SAE:
15694 fputs ("{ru-sae}", file);
15695 break;
15696 case ROUND_ZERO | ROUND_SAE:
15697 fputs ("{rz-sae}", file);
15698 break;
15699 default:
15700 gcc_unreachable ();
15703 if (ASSEMBLER_DIALECT == ASM_ATT)
15704 fputs (", ", file);
15706 return;
15708 case '*':
15709 if (ASSEMBLER_DIALECT == ASM_ATT)
15710 putc ('*', file);
15711 return;
15713 case '&':
15715 const char *name = get_some_local_dynamic_name ();
15716 if (name == NULL)
15717 output_operand_lossage ("'%%&' used without any "
15718 "local dynamic TLS references");
15719 else
15720 assemble_name (file, name);
15721 return;
15724 case '+':
15726 rtx x;
15728 if (!optimize
15729 || optimize_function_for_size_p (cfun)
15730 || !TARGET_BRANCH_PREDICTION_HINTS)
15731 return;
15733 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
15734 if (x)
15736 int pred_val = XINT (x, 0);
15738 if (pred_val < REG_BR_PROB_BASE * 45 / 100
15739 || pred_val > REG_BR_PROB_BASE * 55 / 100)
15741 bool taken = pred_val > REG_BR_PROB_BASE / 2;
15742 bool cputaken
15743 = final_forward_branch_p (current_output_insn) == 0;
15745 /* Emit hints only in the case default branch prediction
15746 heuristics would fail. */
15747 if (taken != cputaken)
15749 /* We use 3e (DS) prefix for taken branches and
15750 2e (CS) prefix for not taken branches. */
15751 if (taken)
15752 fputs ("ds ; ", file);
15753 else
15754 fputs ("cs ; ", file);
15758 return;
15761 case ';':
15762 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
15763 putc (';', file);
15764 #endif
15765 return;
15767 case '@':
15768 if (ASSEMBLER_DIALECT == ASM_ATT)
15769 putc ('%', file);
15771 /* The kernel uses a different segment register for performance
15772 reasons; a system call would not have to trash the userspace
15773 segment register, which would be expensive. */
15774 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
15775 fputs ("fs", file);
15776 else
15777 fputs ("gs", file);
15778 return;
15780 case '~':
15781 putc (TARGET_AVX2 ? 'i' : 'f', file);
15782 return;
15784 case '^':
15785 if (TARGET_64BIT && Pmode != word_mode)
15786 fputs ("addr32 ", file);
15787 return;
15789 case '!':
15790 if (ix86_bnd_prefixed_insn_p (current_output_insn))
15791 fputs ("bnd ", file);
15792 return;
15794 default:
15795 output_operand_lossage ("invalid operand code '%c'", code);
15799 if (REG_P (x))
15800 print_reg (x, code, file);
15802 else if (MEM_P (x))
15804 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
15805 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
15806 && GET_MODE (x) != BLKmode)
15808 const char * size;
15809 switch (GET_MODE_SIZE (GET_MODE (x)))
15811 case 1: size = "BYTE"; break;
15812 case 2: size = "WORD"; break;
15813 case 4: size = "DWORD"; break;
15814 case 8: size = "QWORD"; break;
15815 case 12: size = "TBYTE"; break;
15816 case 16:
15817 if (GET_MODE (x) == XFmode)
15818 size = "TBYTE";
15819 else
15820 size = "XMMWORD";
15821 break;
15822 case 32: size = "YMMWORD"; break;
15823 case 64: size = "ZMMWORD"; break;
15824 default:
15825 gcc_unreachable ();
15828 /* Check for explicit size override (codes 'b', 'w', 'k',
15829 'q' and 'x') */
15830 if (code == 'b')
15831 size = "BYTE";
15832 else if (code == 'w')
15833 size = "WORD";
15834 else if (code == 'k')
15835 size = "DWORD";
15836 else if (code == 'q')
15837 size = "QWORD";
15838 else if (code == 'x')
15839 size = "XMMWORD";
15841 fputs (size, file);
15842 fputs (" PTR ", file);
15845 x = XEXP (x, 0);
15846 /* Avoid (%rip) for call operands. */
15847 if (CONSTANT_ADDRESS_P (x) && code == 'P'
15848 && !CONST_INT_P (x))
15849 output_addr_const (file, x);
15850 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
15851 output_operand_lossage ("invalid constraints for operand");
15852 else
15853 output_address (x);
15856 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
15858 REAL_VALUE_TYPE r;
15859 long l;
15861 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15862 REAL_VALUE_TO_TARGET_SINGLE (r, l);
15864 if (ASSEMBLER_DIALECT == ASM_ATT)
15865 putc ('$', file);
15866 /* Sign extend 32bit SFmode immediate to 8 bytes. */
15867 if (code == 'q')
15868 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
15869 (unsigned long long) (int) l);
15870 else
15871 fprintf (file, "0x%08x", (unsigned int) l);
15874 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
15876 REAL_VALUE_TYPE r;
15877 long l[2];
15879 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15880 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
15882 if (ASSEMBLER_DIALECT == ASM_ATT)
15883 putc ('$', file);
15884 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
15887 /* These float cases don't actually occur as immediate operands. */
15888 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
15890 char dstr[30];
15892 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
15893 fputs (dstr, file);
15896 else
15898 /* We have patterns that allow zero sets of memory, for instance.
15899 In 64-bit mode, we should probably support all 8-byte vectors,
15900 since we can in fact encode that into an immediate. */
15901 if (GET_CODE (x) == CONST_VECTOR)
15903 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
15904 x = const0_rtx;
15907 if (code != 'P' && code != 'p')
15909 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
15911 if (ASSEMBLER_DIALECT == ASM_ATT)
15912 putc ('$', file);
15914 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
15915 || GET_CODE (x) == LABEL_REF)
15917 if (ASSEMBLER_DIALECT == ASM_ATT)
15918 putc ('$', file);
15919 else
15920 fputs ("OFFSET FLAT:", file);
15923 if (CONST_INT_P (x))
15924 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15925 else if (flag_pic || MACHOPIC_INDIRECT)
15926 output_pic_addr_const (file, x, code);
15927 else
15928 output_addr_const (file, x);
15932 static bool
15933 ix86_print_operand_punct_valid_p (unsigned char code)
15935 return (code == '@' || code == '*' || code == '+' || code == '&'
15936 || code == ';' || code == '~' || code == '^' || code == '!');
15939 /* Print a memory operand whose address is ADDR. */
15941 static void
15942 ix86_print_operand_address (FILE *file, rtx addr)
15944 struct ix86_address parts;
15945 rtx base, index, disp;
15946 int scale;
15947 int ok;
15948 bool vsib = false;
15949 int code = 0;
15951 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
15953 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15954 gcc_assert (parts.index == NULL_RTX);
15955 parts.index = XVECEXP (addr, 0, 1);
15956 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
15957 addr = XVECEXP (addr, 0, 0);
15958 vsib = true;
15960 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
15962 gcc_assert (TARGET_64BIT);
15963 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15964 code = 'q';
15966 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDMK_ADDR)
15968 ok = ix86_decompose_address (XVECEXP (addr, 0, 1), &parts);
15969 gcc_assert (parts.base == NULL_RTX || parts.index == NULL_RTX);
15970 if (parts.base != NULL_RTX)
15972 parts.index = parts.base;
15973 parts.scale = 1;
15975 parts.base = XVECEXP (addr, 0, 0);
15976 addr = XVECEXP (addr, 0, 0);
15978 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDLDX_ADDR)
15980 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15981 gcc_assert (parts.index == NULL_RTX);
15982 parts.index = XVECEXP (addr, 0, 1);
15983 addr = XVECEXP (addr, 0, 0);
15985 else
15986 ok = ix86_decompose_address (addr, &parts);
15988 gcc_assert (ok);
15990 base = parts.base;
15991 index = parts.index;
15992 disp = parts.disp;
15993 scale = parts.scale;
15995 switch (parts.seg)
15997 case SEG_DEFAULT:
15998 break;
15999 case SEG_FS:
16000 case SEG_GS:
16001 if (ASSEMBLER_DIALECT == ASM_ATT)
16002 putc ('%', file);
16003 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
16004 break;
16005 default:
16006 gcc_unreachable ();
16009 /* Use one byte shorter RIP relative addressing for 64bit mode. */
16010 if (TARGET_64BIT && !base && !index)
16012 rtx symbol = disp;
16014 if (GET_CODE (disp) == CONST
16015 && GET_CODE (XEXP (disp, 0)) == PLUS
16016 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
16017 symbol = XEXP (XEXP (disp, 0), 0);
16019 if (GET_CODE (symbol) == LABEL_REF
16020 || (GET_CODE (symbol) == SYMBOL_REF
16021 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
16022 base = pc_rtx;
16024 if (!base && !index)
16026 /* Displacement only requires special attention. */
16028 if (CONST_INT_P (disp))
16030 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
16031 fputs ("ds:", file);
16032 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
16034 else if (flag_pic)
16035 output_pic_addr_const (file, disp, 0);
16036 else
16037 output_addr_const (file, disp);
16039 else
16041 /* Print SImode register names to force addr32 prefix. */
16042 if (SImode_address_operand (addr, VOIDmode))
16044 #ifdef ENABLE_CHECKING
16045 gcc_assert (TARGET_64BIT);
16046 switch (GET_CODE (addr))
16048 case SUBREG:
16049 gcc_assert (GET_MODE (addr) == SImode);
16050 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
16051 break;
16052 case ZERO_EXTEND:
16053 case AND:
16054 gcc_assert (GET_MODE (addr) == DImode);
16055 break;
16056 default:
16057 gcc_unreachable ();
16059 #endif
16060 gcc_assert (!code);
16061 code = 'k';
16063 else if (code == 0
16064 && TARGET_X32
16065 && disp
16066 && CONST_INT_P (disp)
16067 && INTVAL (disp) < -16*1024*1024)
16069 /* X32 runs in 64-bit mode, where displacement, DISP, in
16070 address DISP(%r64), is encoded as 32-bit immediate sign-
16071 extended from 32-bit to 64-bit. For -0x40000300(%r64),
16072 address is %r64 + 0xffffffffbffffd00. When %r64 <
16073 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
16074 which is invalid for x32. The correct address is %r64
16075 - 0x40000300 == 0xf7ffdd64. To properly encode
16076 -0x40000300(%r64) for x32, we zero-extend negative
16077 displacement by forcing addr32 prefix which truncates
16078 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
16079 zero-extend all negative displacements, including -1(%rsp).
16080 However, for small negative displacements, sign-extension
16081 won't cause overflow. We only zero-extend negative
16082 displacements if they < -16*1024*1024, which is also used
16083 to check legitimate address displacements for PIC. */
16084 code = 'k';
16087 if (ASSEMBLER_DIALECT == ASM_ATT)
16089 if (disp)
16091 if (flag_pic)
16092 output_pic_addr_const (file, disp, 0);
16093 else if (GET_CODE (disp) == LABEL_REF)
16094 output_asm_label (disp);
16095 else
16096 output_addr_const (file, disp);
16099 putc ('(', file);
16100 if (base)
16101 print_reg (base, code, file);
16102 if (index)
16104 putc (',', file);
16105 print_reg (index, vsib ? 0 : code, file);
16106 if (scale != 1 || vsib)
16107 fprintf (file, ",%d", scale);
16109 putc (')', file);
16111 else
16113 rtx offset = NULL_RTX;
16115 if (disp)
16117 /* Pull out the offset of a symbol; print any symbol itself. */
16118 if (GET_CODE (disp) == CONST
16119 && GET_CODE (XEXP (disp, 0)) == PLUS
16120 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
16122 offset = XEXP (XEXP (disp, 0), 1);
16123 disp = gen_rtx_CONST (VOIDmode,
16124 XEXP (XEXP (disp, 0), 0));
16127 if (flag_pic)
16128 output_pic_addr_const (file, disp, 0);
16129 else if (GET_CODE (disp) == LABEL_REF)
16130 output_asm_label (disp);
16131 else if (CONST_INT_P (disp))
16132 offset = disp;
16133 else
16134 output_addr_const (file, disp);
16137 putc ('[', file);
16138 if (base)
16140 print_reg (base, code, file);
16141 if (offset)
16143 if (INTVAL (offset) >= 0)
16144 putc ('+', file);
16145 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
16148 else if (offset)
16149 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
16150 else
16151 putc ('0', file);
16153 if (index)
16155 putc ('+', file);
16156 print_reg (index, vsib ? 0 : code, file);
16157 if (scale != 1 || vsib)
16158 fprintf (file, "*%d", scale);
16160 putc (']', file);
16165 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
16167 static bool
16168 i386_asm_output_addr_const_extra (FILE *file, rtx x)
16170 rtx op;
16172 if (GET_CODE (x) != UNSPEC)
16173 return false;
16175 op = XVECEXP (x, 0, 0);
16176 switch (XINT (x, 1))
16178 case UNSPEC_GOTTPOFF:
16179 output_addr_const (file, op);
16180 /* FIXME: This might be @TPOFF in Sun ld. */
16181 fputs ("@gottpoff", file);
16182 break;
16183 case UNSPEC_TPOFF:
16184 output_addr_const (file, op);
16185 fputs ("@tpoff", file);
16186 break;
16187 case UNSPEC_NTPOFF:
16188 output_addr_const (file, op);
16189 if (TARGET_64BIT)
16190 fputs ("@tpoff", file);
16191 else
16192 fputs ("@ntpoff", file);
16193 break;
16194 case UNSPEC_DTPOFF:
16195 output_addr_const (file, op);
16196 fputs ("@dtpoff", file);
16197 break;
16198 case UNSPEC_GOTNTPOFF:
16199 output_addr_const (file, op);
16200 if (TARGET_64BIT)
16201 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
16202 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
16203 else
16204 fputs ("@gotntpoff", file);
16205 break;
16206 case UNSPEC_INDNTPOFF:
16207 output_addr_const (file, op);
16208 fputs ("@indntpoff", file);
16209 break;
16210 #if TARGET_MACHO
16211 case UNSPEC_MACHOPIC_OFFSET:
16212 output_addr_const (file, op);
16213 putc ('-', file);
16214 machopic_output_function_base_name (file);
16215 break;
16216 #endif
16218 case UNSPEC_STACK_CHECK:
16220 int offset;
16222 gcc_assert (flag_split_stack);
16224 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
16225 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
16226 #else
16227 gcc_unreachable ();
16228 #endif
16230 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
16232 break;
16234 default:
16235 return false;
16238 return true;
16241 /* Split one or more double-mode RTL references into pairs of half-mode
16242 references. The RTL can be REG, offsettable MEM, integer constant, or
16243 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
16244 split and "num" is its length. lo_half and hi_half are output arrays
16245 that parallel "operands". */
16247 void
16248 split_double_mode (machine_mode mode, rtx operands[],
16249 int num, rtx lo_half[], rtx hi_half[])
16251 machine_mode half_mode;
16252 unsigned int byte;
16254 switch (mode)
16256 case TImode:
16257 half_mode = DImode;
16258 break;
16259 case DImode:
16260 half_mode = SImode;
16261 break;
16262 default:
16263 gcc_unreachable ();
16266 byte = GET_MODE_SIZE (half_mode);
16268 while (num--)
16270 rtx op = operands[num];
16272 /* simplify_subreg refuse to split volatile memory addresses,
16273 but we still have to handle it. */
16274 if (MEM_P (op))
16276 lo_half[num] = adjust_address (op, half_mode, 0);
16277 hi_half[num] = adjust_address (op, half_mode, byte);
16279 else
16281 lo_half[num] = simplify_gen_subreg (half_mode, op,
16282 GET_MODE (op) == VOIDmode
16283 ? mode : GET_MODE (op), 0);
16284 hi_half[num] = simplify_gen_subreg (half_mode, op,
16285 GET_MODE (op) == VOIDmode
16286 ? mode : GET_MODE (op), byte);
16291 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
16292 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
16293 is the expression of the binary operation. The output may either be
16294 emitted here, or returned to the caller, like all output_* functions.
16296 There is no guarantee that the operands are the same mode, as they
16297 might be within FLOAT or FLOAT_EXTEND expressions. */
16299 #ifndef SYSV386_COMPAT
16300 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
16301 wants to fix the assemblers because that causes incompatibility
16302 with gcc. No-one wants to fix gcc because that causes
16303 incompatibility with assemblers... You can use the option of
16304 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
16305 #define SYSV386_COMPAT 1
16306 #endif
16308 const char *
16309 output_387_binary_op (rtx insn, rtx *operands)
16311 static char buf[40];
16312 const char *p;
16313 const char *ssep;
16314 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
16316 #ifdef ENABLE_CHECKING
16317 /* Even if we do not want to check the inputs, this documents input
16318 constraints. Which helps in understanding the following code. */
16319 if (STACK_REG_P (operands[0])
16320 && ((REG_P (operands[1])
16321 && REGNO (operands[0]) == REGNO (operands[1])
16322 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
16323 || (REG_P (operands[2])
16324 && REGNO (operands[0]) == REGNO (operands[2])
16325 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
16326 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
16327 ; /* ok */
16328 else
16329 gcc_assert (is_sse);
16330 #endif
16332 switch (GET_CODE (operands[3]))
16334 case PLUS:
16335 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16336 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16337 p = "fiadd";
16338 else
16339 p = "fadd";
16340 ssep = "vadd";
16341 break;
16343 case MINUS:
16344 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16345 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16346 p = "fisub";
16347 else
16348 p = "fsub";
16349 ssep = "vsub";
16350 break;
16352 case MULT:
16353 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16354 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16355 p = "fimul";
16356 else
16357 p = "fmul";
16358 ssep = "vmul";
16359 break;
16361 case DIV:
16362 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16363 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16364 p = "fidiv";
16365 else
16366 p = "fdiv";
16367 ssep = "vdiv";
16368 break;
16370 default:
16371 gcc_unreachable ();
16374 if (is_sse)
16376 if (TARGET_AVX)
16378 strcpy (buf, ssep);
16379 if (GET_MODE (operands[0]) == SFmode)
16380 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
16381 else
16382 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
16384 else
16386 strcpy (buf, ssep + 1);
16387 if (GET_MODE (operands[0]) == SFmode)
16388 strcat (buf, "ss\t{%2, %0|%0, %2}");
16389 else
16390 strcat (buf, "sd\t{%2, %0|%0, %2}");
16392 return buf;
16394 strcpy (buf, p);
16396 switch (GET_CODE (operands[3]))
16398 case MULT:
16399 case PLUS:
16400 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
16402 rtx temp = operands[2];
16403 operands[2] = operands[1];
16404 operands[1] = temp;
16407 /* know operands[0] == operands[1]. */
16409 if (MEM_P (operands[2]))
16411 p = "%Z2\t%2";
16412 break;
16415 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16417 if (STACK_TOP_P (operands[0]))
16418 /* How is it that we are storing to a dead operand[2]?
16419 Well, presumably operands[1] is dead too. We can't
16420 store the result to st(0) as st(0) gets popped on this
16421 instruction. Instead store to operands[2] (which I
16422 think has to be st(1)). st(1) will be popped later.
16423 gcc <= 2.8.1 didn't have this check and generated
16424 assembly code that the Unixware assembler rejected. */
16425 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16426 else
16427 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16428 break;
16431 if (STACK_TOP_P (operands[0]))
16432 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16433 else
16434 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16435 break;
16437 case MINUS:
16438 case DIV:
16439 if (MEM_P (operands[1]))
16441 p = "r%Z1\t%1";
16442 break;
16445 if (MEM_P (operands[2]))
16447 p = "%Z2\t%2";
16448 break;
16451 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16453 #if SYSV386_COMPAT
16454 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
16455 derived assemblers, confusingly reverse the direction of
16456 the operation for fsub{r} and fdiv{r} when the
16457 destination register is not st(0). The Intel assembler
16458 doesn't have this brain damage. Read !SYSV386_COMPAT to
16459 figure out what the hardware really does. */
16460 if (STACK_TOP_P (operands[0]))
16461 p = "{p\t%0, %2|rp\t%2, %0}";
16462 else
16463 p = "{rp\t%2, %0|p\t%0, %2}";
16464 #else
16465 if (STACK_TOP_P (operands[0]))
16466 /* As above for fmul/fadd, we can't store to st(0). */
16467 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16468 else
16469 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16470 #endif
16471 break;
16474 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
16476 #if SYSV386_COMPAT
16477 if (STACK_TOP_P (operands[0]))
16478 p = "{rp\t%0, %1|p\t%1, %0}";
16479 else
16480 p = "{p\t%1, %0|rp\t%0, %1}";
16481 #else
16482 if (STACK_TOP_P (operands[0]))
16483 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
16484 else
16485 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
16486 #endif
16487 break;
16490 if (STACK_TOP_P (operands[0]))
16492 if (STACK_TOP_P (operands[1]))
16493 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16494 else
16495 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
16496 break;
16498 else if (STACK_TOP_P (operands[1]))
16500 #if SYSV386_COMPAT
16501 p = "{\t%1, %0|r\t%0, %1}";
16502 #else
16503 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
16504 #endif
16506 else
16508 #if SYSV386_COMPAT
16509 p = "{r\t%2, %0|\t%0, %2}";
16510 #else
16511 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16512 #endif
16514 break;
16516 default:
16517 gcc_unreachable ();
16520 strcat (buf, p);
16521 return buf;
16524 /* Check if a 256bit AVX register is referenced inside of EXP. */
16526 static bool
16527 ix86_check_avx256_register (const_rtx exp)
16529 if (GET_CODE (exp) == SUBREG)
16530 exp = SUBREG_REG (exp);
16532 return (REG_P (exp)
16533 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)));
16536 /* Return needed mode for entity in optimize_mode_switching pass. */
16538 static int
16539 ix86_avx_u128_mode_needed (rtx_insn *insn)
16541 if (CALL_P (insn))
16543 rtx link;
16545 /* Needed mode is set to AVX_U128_CLEAN if there are
16546 no 256bit modes used in function arguments. */
16547 for (link = CALL_INSN_FUNCTION_USAGE (insn);
16548 link;
16549 link = XEXP (link, 1))
16551 if (GET_CODE (XEXP (link, 0)) == USE)
16553 rtx arg = XEXP (XEXP (link, 0), 0);
16555 if (ix86_check_avx256_register (arg))
16556 return AVX_U128_DIRTY;
16560 return AVX_U128_CLEAN;
16563 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
16564 changes state only when a 256bit register is written to, but we need
16565 to prevent the compiler from moving optimal insertion point above
16566 eventual read from 256bit register. */
16567 subrtx_iterator::array_type array;
16568 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
16569 if (ix86_check_avx256_register (*iter))
16570 return AVX_U128_DIRTY;
16572 return AVX_U128_ANY;
16575 /* Return mode that i387 must be switched into
16576 prior to the execution of insn. */
16578 static int
16579 ix86_i387_mode_needed (int entity, rtx_insn *insn)
16581 enum attr_i387_cw mode;
16583 /* The mode UNINITIALIZED is used to store control word after a
16584 function call or ASM pattern. The mode ANY specify that function
16585 has no requirements on the control word and make no changes in the
16586 bits we are interested in. */
16588 if (CALL_P (insn)
16589 || (NONJUMP_INSN_P (insn)
16590 && (asm_noperands (PATTERN (insn)) >= 0
16591 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
16592 return I387_CW_UNINITIALIZED;
16594 if (recog_memoized (insn) < 0)
16595 return I387_CW_ANY;
16597 mode = get_attr_i387_cw (insn);
16599 switch (entity)
16601 case I387_TRUNC:
16602 if (mode == I387_CW_TRUNC)
16603 return mode;
16604 break;
16606 case I387_FLOOR:
16607 if (mode == I387_CW_FLOOR)
16608 return mode;
16609 break;
16611 case I387_CEIL:
16612 if (mode == I387_CW_CEIL)
16613 return mode;
16614 break;
16616 case I387_MASK_PM:
16617 if (mode == I387_CW_MASK_PM)
16618 return mode;
16619 break;
16621 default:
16622 gcc_unreachable ();
16625 return I387_CW_ANY;
16628 /* Return mode that entity must be switched into
16629 prior to the execution of insn. */
16631 static int
16632 ix86_mode_needed (int entity, rtx_insn *insn)
16634 switch (entity)
16636 case AVX_U128:
16637 return ix86_avx_u128_mode_needed (insn);
16638 case I387_TRUNC:
16639 case I387_FLOOR:
16640 case I387_CEIL:
16641 case I387_MASK_PM:
16642 return ix86_i387_mode_needed (entity, insn);
16643 default:
16644 gcc_unreachable ();
16646 return 0;
16649 /* Check if a 256bit AVX register is referenced in stores. */
16651 static void
16652 ix86_check_avx256_stores (rtx dest, const_rtx, void *data)
16654 if (ix86_check_avx256_register (dest))
16656 bool *used = (bool *) data;
16657 *used = true;
16661 /* Calculate mode of upper 128bit AVX registers after the insn. */
16663 static int
16664 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
16666 rtx pat = PATTERN (insn);
16668 if (vzeroupper_operation (pat, VOIDmode)
16669 || vzeroall_operation (pat, VOIDmode))
16670 return AVX_U128_CLEAN;
16672 /* We know that state is clean after CALL insn if there are no
16673 256bit registers used in the function return register. */
16674 if (CALL_P (insn))
16676 bool avx_reg256_found = false;
16677 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
16679 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
16682 /* Otherwise, return current mode. Remember that if insn
16683 references AVX 256bit registers, the mode was already changed
16684 to DIRTY from MODE_NEEDED. */
16685 return mode;
16688 /* Return the mode that an insn results in. */
16691 ix86_mode_after (int entity, int mode, rtx_insn *insn)
16693 switch (entity)
16695 case AVX_U128:
16696 return ix86_avx_u128_mode_after (mode, insn);
16697 case I387_TRUNC:
16698 case I387_FLOOR:
16699 case I387_CEIL:
16700 case I387_MASK_PM:
16701 return mode;
16702 default:
16703 gcc_unreachable ();
16707 static int
16708 ix86_avx_u128_mode_entry (void)
16710 tree arg;
16712 /* Entry mode is set to AVX_U128_DIRTY if there are
16713 256bit modes used in function arguments. */
16714 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
16715 arg = TREE_CHAIN (arg))
16717 rtx incoming = DECL_INCOMING_RTL (arg);
16719 if (incoming && ix86_check_avx256_register (incoming))
16720 return AVX_U128_DIRTY;
16723 return AVX_U128_CLEAN;
16726 /* Return a mode that ENTITY is assumed to be
16727 switched to at function entry. */
16729 static int
16730 ix86_mode_entry (int entity)
16732 switch (entity)
16734 case AVX_U128:
16735 return ix86_avx_u128_mode_entry ();
16736 case I387_TRUNC:
16737 case I387_FLOOR:
16738 case I387_CEIL:
16739 case I387_MASK_PM:
16740 return I387_CW_ANY;
16741 default:
16742 gcc_unreachable ();
16746 static int
16747 ix86_avx_u128_mode_exit (void)
16749 rtx reg = crtl->return_rtx;
16751 /* Exit mode is set to AVX_U128_DIRTY if there are
16752 256bit modes used in the function return register. */
16753 if (reg && ix86_check_avx256_register (reg))
16754 return AVX_U128_DIRTY;
16756 return AVX_U128_CLEAN;
16759 /* Return a mode that ENTITY is assumed to be
16760 switched to at function exit. */
16762 static int
16763 ix86_mode_exit (int entity)
16765 switch (entity)
16767 case AVX_U128:
16768 return ix86_avx_u128_mode_exit ();
16769 case I387_TRUNC:
16770 case I387_FLOOR:
16771 case I387_CEIL:
16772 case I387_MASK_PM:
16773 return I387_CW_ANY;
16774 default:
16775 gcc_unreachable ();
16779 static int
16780 ix86_mode_priority (int, int n)
16782 return n;
16785 /* Output code to initialize control word copies used by trunc?f?i and
16786 rounding patterns. CURRENT_MODE is set to current control word,
16787 while NEW_MODE is set to new control word. */
16789 static void
16790 emit_i387_cw_initialization (int mode)
16792 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
16793 rtx new_mode;
16795 enum ix86_stack_slot slot;
16797 rtx reg = gen_reg_rtx (HImode);
16799 emit_insn (gen_x86_fnstcw_1 (stored_mode));
16800 emit_move_insn (reg, copy_rtx (stored_mode));
16802 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
16803 || optimize_insn_for_size_p ())
16805 switch (mode)
16807 case I387_CW_TRUNC:
16808 /* round toward zero (truncate) */
16809 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
16810 slot = SLOT_CW_TRUNC;
16811 break;
16813 case I387_CW_FLOOR:
16814 /* round down toward -oo */
16815 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16816 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
16817 slot = SLOT_CW_FLOOR;
16818 break;
16820 case I387_CW_CEIL:
16821 /* round up toward +oo */
16822 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16823 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
16824 slot = SLOT_CW_CEIL;
16825 break;
16827 case I387_CW_MASK_PM:
16828 /* mask precision exception for nearbyint() */
16829 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16830 slot = SLOT_CW_MASK_PM;
16831 break;
16833 default:
16834 gcc_unreachable ();
16837 else
16839 switch (mode)
16841 case I387_CW_TRUNC:
16842 /* round toward zero (truncate) */
16843 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
16844 slot = SLOT_CW_TRUNC;
16845 break;
16847 case I387_CW_FLOOR:
16848 /* round down toward -oo */
16849 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
16850 slot = SLOT_CW_FLOOR;
16851 break;
16853 case I387_CW_CEIL:
16854 /* round up toward +oo */
16855 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
16856 slot = SLOT_CW_CEIL;
16857 break;
16859 case I387_CW_MASK_PM:
16860 /* mask precision exception for nearbyint() */
16861 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16862 slot = SLOT_CW_MASK_PM;
16863 break;
16865 default:
16866 gcc_unreachable ();
16870 gcc_assert (slot < MAX_386_STACK_LOCALS);
16872 new_mode = assign_386_stack_local (HImode, slot);
16873 emit_move_insn (new_mode, reg);
16876 /* Emit vzeroupper. */
16878 void
16879 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
16881 int i;
16883 /* Cancel automatic vzeroupper insertion if there are
16884 live call-saved SSE registers at the insertion point. */
16886 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
16887 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16888 return;
16890 if (TARGET_64BIT)
16891 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
16892 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16893 return;
16895 emit_insn (gen_avx_vzeroupper ());
16898 /* Generate one or more insns to set ENTITY to MODE. */
16900 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
16901 is the set of hard registers live at the point where the insn(s)
16902 are to be inserted. */
16904 static void
16905 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
16906 HARD_REG_SET regs_live)
16908 switch (entity)
16910 case AVX_U128:
16911 if (mode == AVX_U128_CLEAN)
16912 ix86_avx_emit_vzeroupper (regs_live);
16913 break;
16914 case I387_TRUNC:
16915 case I387_FLOOR:
16916 case I387_CEIL:
16917 case I387_MASK_PM:
16918 if (mode != I387_CW_ANY
16919 && mode != I387_CW_UNINITIALIZED)
16920 emit_i387_cw_initialization (mode);
16921 break;
16922 default:
16923 gcc_unreachable ();
16927 /* Output code for INSN to convert a float to a signed int. OPERANDS
16928 are the insn operands. The output may be [HSD]Imode and the input
16929 operand may be [SDX]Fmode. */
16931 const char *
16932 output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
16934 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16935 int dimode_p = GET_MODE (operands[0]) == DImode;
16936 int round_mode = get_attr_i387_cw (insn);
16938 /* Jump through a hoop or two for DImode, since the hardware has no
16939 non-popping instruction. We used to do this a different way, but
16940 that was somewhat fragile and broke with post-reload splitters. */
16941 if ((dimode_p || fisttp) && !stack_top_dies)
16942 output_asm_insn ("fld\t%y1", operands);
16944 gcc_assert (STACK_TOP_P (operands[1]));
16945 gcc_assert (MEM_P (operands[0]));
16946 gcc_assert (GET_MODE (operands[1]) != TFmode);
16948 if (fisttp)
16949 output_asm_insn ("fisttp%Z0\t%0", operands);
16950 else
16952 if (round_mode != I387_CW_ANY)
16953 output_asm_insn ("fldcw\t%3", operands);
16954 if (stack_top_dies || dimode_p)
16955 output_asm_insn ("fistp%Z0\t%0", operands);
16956 else
16957 output_asm_insn ("fist%Z0\t%0", operands);
16958 if (round_mode != I387_CW_ANY)
16959 output_asm_insn ("fldcw\t%2", operands);
16962 return "";
16965 /* Output code for x87 ffreep insn. The OPNO argument, which may only
16966 have the values zero or one, indicates the ffreep insn's operand
16967 from the OPERANDS array. */
16969 static const char *
16970 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
16972 if (TARGET_USE_FFREEP)
16973 #ifdef HAVE_AS_IX86_FFREEP
16974 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
16975 #else
16977 static char retval[32];
16978 int regno = REGNO (operands[opno]);
16980 gcc_assert (STACK_REGNO_P (regno));
16982 regno -= FIRST_STACK_REG;
16984 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
16985 return retval;
16987 #endif
16989 return opno ? "fstp\t%y1" : "fstp\t%y0";
16993 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
16994 should be used. UNORDERED_P is true when fucom should be used. */
16996 const char *
16997 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
16999 int stack_top_dies;
17000 rtx cmp_op0, cmp_op1;
17001 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
17003 if (eflags_p)
17005 cmp_op0 = operands[0];
17006 cmp_op1 = operands[1];
17008 else
17010 cmp_op0 = operands[1];
17011 cmp_op1 = operands[2];
17014 if (is_sse)
17016 if (GET_MODE (operands[0]) == SFmode)
17017 if (unordered_p)
17018 return "%vucomiss\t{%1, %0|%0, %1}";
17019 else
17020 return "%vcomiss\t{%1, %0|%0, %1}";
17021 else
17022 if (unordered_p)
17023 return "%vucomisd\t{%1, %0|%0, %1}";
17024 else
17025 return "%vcomisd\t{%1, %0|%0, %1}";
17028 gcc_assert (STACK_TOP_P (cmp_op0));
17030 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
17032 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
17034 if (stack_top_dies)
17036 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
17037 return output_387_ffreep (operands, 1);
17039 else
17040 return "ftst\n\tfnstsw\t%0";
17043 if (STACK_REG_P (cmp_op1)
17044 && stack_top_dies
17045 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
17046 && REGNO (cmp_op1) != FIRST_STACK_REG)
17048 /* If both the top of the 387 stack dies, and the other operand
17049 is also a stack register that dies, then this must be a
17050 `fcompp' float compare */
17052 if (eflags_p)
17054 /* There is no double popping fcomi variant. Fortunately,
17055 eflags is immune from the fstp's cc clobbering. */
17056 if (unordered_p)
17057 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
17058 else
17059 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
17060 return output_387_ffreep (operands, 0);
17062 else
17064 if (unordered_p)
17065 return "fucompp\n\tfnstsw\t%0";
17066 else
17067 return "fcompp\n\tfnstsw\t%0";
17070 else
17072 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
17074 static const char * const alt[16] =
17076 "fcom%Z2\t%y2\n\tfnstsw\t%0",
17077 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
17078 "fucom%Z2\t%y2\n\tfnstsw\t%0",
17079 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
17081 "ficom%Z2\t%y2\n\tfnstsw\t%0",
17082 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
17083 NULL,
17084 NULL,
17086 "fcomi\t{%y1, %0|%0, %y1}",
17087 "fcomip\t{%y1, %0|%0, %y1}",
17088 "fucomi\t{%y1, %0|%0, %y1}",
17089 "fucomip\t{%y1, %0|%0, %y1}",
17091 NULL,
17092 NULL,
17093 NULL,
17094 NULL
17097 int mask;
17098 const char *ret;
17100 mask = eflags_p << 3;
17101 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
17102 mask |= unordered_p << 1;
17103 mask |= stack_top_dies;
17105 gcc_assert (mask < 16);
17106 ret = alt[mask];
17107 gcc_assert (ret);
17109 return ret;
17113 void
17114 ix86_output_addr_vec_elt (FILE *file, int value)
17116 const char *directive = ASM_LONG;
17118 #ifdef ASM_QUAD
17119 if (TARGET_LP64)
17120 directive = ASM_QUAD;
17121 #else
17122 gcc_assert (!TARGET_64BIT);
17123 #endif
17125 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
17128 void
17129 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
17131 const char *directive = ASM_LONG;
17133 #ifdef ASM_QUAD
17134 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
17135 directive = ASM_QUAD;
17136 #else
17137 gcc_assert (!TARGET_64BIT);
17138 #endif
17139 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
17140 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
17141 fprintf (file, "%s%s%d-%s%d\n",
17142 directive, LPREFIX, value, LPREFIX, rel);
17143 else if (HAVE_AS_GOTOFF_IN_DATA)
17144 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
17145 #if TARGET_MACHO
17146 else if (TARGET_MACHO)
17148 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
17149 machopic_output_function_base_name (file);
17150 putc ('\n', file);
17152 #endif
17153 else
17154 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
17155 GOT_SYMBOL_NAME, LPREFIX, value);
17158 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
17159 for the target. */
17161 void
17162 ix86_expand_clear (rtx dest)
17164 rtx tmp;
17166 /* We play register width games, which are only valid after reload. */
17167 gcc_assert (reload_completed);
17169 /* Avoid HImode and its attendant prefix byte. */
17170 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
17171 dest = gen_rtx_REG (SImode, REGNO (dest));
17172 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
17174 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
17176 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17177 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
17180 emit_insn (tmp);
17183 /* X is an unchanging MEM. If it is a constant pool reference, return
17184 the constant pool rtx, else NULL. */
17187 maybe_get_pool_constant (rtx x)
17189 x = ix86_delegitimize_address (XEXP (x, 0));
17191 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
17192 return get_pool_constant (x);
17194 return NULL_RTX;
17197 void
17198 ix86_expand_move (machine_mode mode, rtx operands[])
17200 rtx op0, op1;
17201 enum tls_model model;
17203 op0 = operands[0];
17204 op1 = operands[1];
17206 if (GET_CODE (op1) == SYMBOL_REF)
17208 rtx tmp;
17210 model = SYMBOL_REF_TLS_MODEL (op1);
17211 if (model)
17213 op1 = legitimize_tls_address (op1, model, true);
17214 op1 = force_operand (op1, op0);
17215 if (op1 == op0)
17216 return;
17217 op1 = convert_to_mode (mode, op1, 1);
17219 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
17220 op1 = tmp;
17222 else if (GET_CODE (op1) == CONST
17223 && GET_CODE (XEXP (op1, 0)) == PLUS
17224 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
17226 rtx addend = XEXP (XEXP (op1, 0), 1);
17227 rtx symbol = XEXP (XEXP (op1, 0), 0);
17228 rtx tmp;
17230 model = SYMBOL_REF_TLS_MODEL (symbol);
17231 if (model)
17232 tmp = legitimize_tls_address (symbol, model, true);
17233 else
17234 tmp = legitimize_pe_coff_symbol (symbol, true);
17236 if (tmp)
17238 tmp = force_operand (tmp, NULL);
17239 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
17240 op0, 1, OPTAB_DIRECT);
17241 if (tmp == op0)
17242 return;
17243 op1 = convert_to_mode (mode, tmp, 1);
17247 if ((flag_pic || MACHOPIC_INDIRECT)
17248 && symbolic_operand (op1, mode))
17250 if (TARGET_MACHO && !TARGET_64BIT)
17252 #if TARGET_MACHO
17253 /* dynamic-no-pic */
17254 if (MACHOPIC_INDIRECT)
17256 rtx temp = ((reload_in_progress
17257 || ((op0 && REG_P (op0))
17258 && mode == Pmode))
17259 ? op0 : gen_reg_rtx (Pmode));
17260 op1 = machopic_indirect_data_reference (op1, temp);
17261 if (MACHOPIC_PURE)
17262 op1 = machopic_legitimize_pic_address (op1, mode,
17263 temp == op1 ? 0 : temp);
17265 if (op0 != op1 && GET_CODE (op0) != MEM)
17267 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
17268 emit_insn (insn);
17269 return;
17271 if (GET_CODE (op0) == MEM)
17272 op1 = force_reg (Pmode, op1);
17273 else
17275 rtx temp = op0;
17276 if (GET_CODE (temp) != REG)
17277 temp = gen_reg_rtx (Pmode);
17278 temp = legitimize_pic_address (op1, temp);
17279 if (temp == op0)
17280 return;
17281 op1 = temp;
17283 /* dynamic-no-pic */
17284 #endif
17286 else
17288 if (MEM_P (op0))
17289 op1 = force_reg (mode, op1);
17290 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
17292 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
17293 op1 = legitimize_pic_address (op1, reg);
17294 if (op0 == op1)
17295 return;
17296 op1 = convert_to_mode (mode, op1, 1);
17300 else
17302 if (MEM_P (op0)
17303 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
17304 || !push_operand (op0, mode))
17305 && MEM_P (op1))
17306 op1 = force_reg (mode, op1);
17308 if (push_operand (op0, mode)
17309 && ! general_no_elim_operand (op1, mode))
17310 op1 = copy_to_mode_reg (mode, op1);
17312 /* Force large constants in 64bit compilation into register
17313 to get them CSEed. */
17314 if (can_create_pseudo_p ()
17315 && (mode == DImode) && TARGET_64BIT
17316 && immediate_operand (op1, mode)
17317 && !x86_64_zext_immediate_operand (op1, VOIDmode)
17318 && !register_operand (op0, mode)
17319 && optimize)
17320 op1 = copy_to_mode_reg (mode, op1);
17322 if (can_create_pseudo_p ()
17323 && FLOAT_MODE_P (mode)
17324 && GET_CODE (op1) == CONST_DOUBLE)
17326 /* If we are loading a floating point constant to a register,
17327 force the value to memory now, since we'll get better code
17328 out the back end. */
17330 op1 = validize_mem (force_const_mem (mode, op1));
17331 if (!register_operand (op0, mode))
17333 rtx temp = gen_reg_rtx (mode);
17334 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
17335 emit_move_insn (op0, temp);
17336 return;
17341 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17344 void
17345 ix86_expand_vector_move (machine_mode mode, rtx operands[])
17347 rtx op0 = operands[0], op1 = operands[1];
17348 unsigned int align = GET_MODE_ALIGNMENT (mode);
17350 if (push_operand (op0, VOIDmode))
17351 op0 = emit_move_resolve_push (mode, op0);
17353 /* Force constants other than zero into memory. We do not know how
17354 the instructions used to build constants modify the upper 64 bits
17355 of the register, once we have that information we may be able
17356 to handle some of them more efficiently. */
17357 if (can_create_pseudo_p ()
17358 && register_operand (op0, mode)
17359 && (CONSTANT_P (op1)
17360 || (GET_CODE (op1) == SUBREG
17361 && CONSTANT_P (SUBREG_REG (op1))))
17362 && !standard_sse_constant_p (op1))
17363 op1 = validize_mem (force_const_mem (mode, op1));
17365 /* We need to check memory alignment for SSE mode since attribute
17366 can make operands unaligned. */
17367 if (can_create_pseudo_p ()
17368 && SSE_REG_MODE_P (mode)
17369 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
17370 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
17372 rtx tmp[2];
17374 /* ix86_expand_vector_move_misalign() does not like constants ... */
17375 if (CONSTANT_P (op1)
17376 || (GET_CODE (op1) == SUBREG
17377 && CONSTANT_P (SUBREG_REG (op1))))
17378 op1 = validize_mem (force_const_mem (mode, op1));
17380 /* ... nor both arguments in memory. */
17381 if (!register_operand (op0, mode)
17382 && !register_operand (op1, mode))
17383 op1 = force_reg (mode, op1);
17385 tmp[0] = op0; tmp[1] = op1;
17386 ix86_expand_vector_move_misalign (mode, tmp);
17387 return;
17390 /* Make operand1 a register if it isn't already. */
17391 if (can_create_pseudo_p ()
17392 && !register_operand (op0, mode)
17393 && !register_operand (op1, mode))
17395 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
17396 return;
17399 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17402 /* Split 32-byte AVX unaligned load and store if needed. */
17404 static void
17405 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
17407 rtx m;
17408 rtx (*extract) (rtx, rtx, rtx);
17409 rtx (*load_unaligned) (rtx, rtx);
17410 rtx (*store_unaligned) (rtx, rtx);
17411 machine_mode mode;
17413 switch (GET_MODE (op0))
17415 default:
17416 gcc_unreachable ();
17417 case V32QImode:
17418 extract = gen_avx_vextractf128v32qi;
17419 load_unaligned = gen_avx_loaddquv32qi;
17420 store_unaligned = gen_avx_storedquv32qi;
17421 mode = V16QImode;
17422 break;
17423 case V8SFmode:
17424 extract = gen_avx_vextractf128v8sf;
17425 load_unaligned = gen_avx_loadups256;
17426 store_unaligned = gen_avx_storeups256;
17427 mode = V4SFmode;
17428 break;
17429 case V4DFmode:
17430 extract = gen_avx_vextractf128v4df;
17431 load_unaligned = gen_avx_loadupd256;
17432 store_unaligned = gen_avx_storeupd256;
17433 mode = V2DFmode;
17434 break;
17437 if (MEM_P (op1))
17439 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
17441 rtx r = gen_reg_rtx (mode);
17442 m = adjust_address (op1, mode, 0);
17443 emit_move_insn (r, m);
17444 m = adjust_address (op1, mode, 16);
17445 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
17446 emit_move_insn (op0, r);
17448 /* Normal *mov<mode>_internal pattern will handle
17449 unaligned loads just fine if misaligned_operand
17450 is true, and without the UNSPEC it can be combined
17451 with arithmetic instructions. */
17452 else if (misaligned_operand (op1, GET_MODE (op1)))
17453 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17454 else
17455 emit_insn (load_unaligned (op0, op1));
17457 else if (MEM_P (op0))
17459 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE)
17461 m = adjust_address (op0, mode, 0);
17462 emit_insn (extract (m, op1, const0_rtx));
17463 m = adjust_address (op0, mode, 16);
17464 emit_insn (extract (m, op1, const1_rtx));
17466 else
17467 emit_insn (store_unaligned (op0, op1));
17469 else
17470 gcc_unreachable ();
17473 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
17474 straight to ix86_expand_vector_move. */
17475 /* Code generation for scalar reg-reg moves of single and double precision data:
17476 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
17477 movaps reg, reg
17478 else
17479 movss reg, reg
17480 if (x86_sse_partial_reg_dependency == true)
17481 movapd reg, reg
17482 else
17483 movsd reg, reg
17485 Code generation for scalar loads of double precision data:
17486 if (x86_sse_split_regs == true)
17487 movlpd mem, reg (gas syntax)
17488 else
17489 movsd mem, reg
17491 Code generation for unaligned packed loads of single precision data
17492 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
17493 if (x86_sse_unaligned_move_optimal)
17494 movups mem, reg
17496 if (x86_sse_partial_reg_dependency == true)
17498 xorps reg, reg
17499 movlps mem, reg
17500 movhps mem+8, reg
17502 else
17504 movlps mem, reg
17505 movhps mem+8, reg
17508 Code generation for unaligned packed loads of double precision data
17509 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
17510 if (x86_sse_unaligned_move_optimal)
17511 movupd mem, reg
17513 if (x86_sse_split_regs == true)
17515 movlpd mem, reg
17516 movhpd mem+8, reg
17518 else
17520 movsd mem, reg
17521 movhpd mem+8, reg
17525 void
17526 ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[])
17528 rtx op0, op1, orig_op0 = NULL_RTX, m;
17529 rtx (*load_unaligned) (rtx, rtx);
17530 rtx (*store_unaligned) (rtx, rtx);
17532 op0 = operands[0];
17533 op1 = operands[1];
17535 if (GET_MODE_SIZE (mode) == 64)
17537 switch (GET_MODE_CLASS (mode))
17539 case MODE_VECTOR_INT:
17540 case MODE_INT:
17541 if (GET_MODE (op0) != V16SImode)
17543 if (!MEM_P (op0))
17545 orig_op0 = op0;
17546 op0 = gen_reg_rtx (V16SImode);
17548 else
17549 op0 = gen_lowpart (V16SImode, op0);
17551 op1 = gen_lowpart (V16SImode, op1);
17552 /* FALLTHRU */
17554 case MODE_VECTOR_FLOAT:
17555 switch (GET_MODE (op0))
17557 default:
17558 gcc_unreachable ();
17559 case V16SImode:
17560 load_unaligned = gen_avx512f_loaddquv16si;
17561 store_unaligned = gen_avx512f_storedquv16si;
17562 break;
17563 case V16SFmode:
17564 load_unaligned = gen_avx512f_loadups512;
17565 store_unaligned = gen_avx512f_storeups512;
17566 break;
17567 case V8DFmode:
17568 load_unaligned = gen_avx512f_loadupd512;
17569 store_unaligned = gen_avx512f_storeupd512;
17570 break;
17573 if (MEM_P (op1))
17574 emit_insn (load_unaligned (op0, op1));
17575 else if (MEM_P (op0))
17576 emit_insn (store_unaligned (op0, op1));
17577 else
17578 gcc_unreachable ();
17579 if (orig_op0)
17580 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17581 break;
17583 default:
17584 gcc_unreachable ();
17587 return;
17590 if (TARGET_AVX
17591 && GET_MODE_SIZE (mode) == 32)
17593 switch (GET_MODE_CLASS (mode))
17595 case MODE_VECTOR_INT:
17596 case MODE_INT:
17597 if (GET_MODE (op0) != V32QImode)
17599 if (!MEM_P (op0))
17601 orig_op0 = op0;
17602 op0 = gen_reg_rtx (V32QImode);
17604 else
17605 op0 = gen_lowpart (V32QImode, op0);
17607 op1 = gen_lowpart (V32QImode, op1);
17608 /* FALLTHRU */
17610 case MODE_VECTOR_FLOAT:
17611 ix86_avx256_split_vector_move_misalign (op0, op1);
17612 if (orig_op0)
17613 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17614 break;
17616 default:
17617 gcc_unreachable ();
17620 return;
17623 if (MEM_P (op1))
17625 /* Normal *mov<mode>_internal pattern will handle
17626 unaligned loads just fine if misaligned_operand
17627 is true, and without the UNSPEC it can be combined
17628 with arithmetic instructions. */
17629 if (TARGET_AVX
17630 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
17631 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
17632 && misaligned_operand (op1, GET_MODE (op1)))
17633 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17634 /* ??? If we have typed data, then it would appear that using
17635 movdqu is the only way to get unaligned data loaded with
17636 integer type. */
17637 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17639 if (GET_MODE (op0) != V16QImode)
17641 orig_op0 = op0;
17642 op0 = gen_reg_rtx (V16QImode);
17644 op1 = gen_lowpart (V16QImode, op1);
17645 /* We will eventually emit movups based on insn attributes. */
17646 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
17647 if (orig_op0)
17648 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17650 else if (TARGET_SSE2 && mode == V2DFmode)
17652 rtx zero;
17654 if (TARGET_AVX
17655 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17656 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17657 || optimize_insn_for_size_p ())
17659 /* We will eventually emit movups based on insn attributes. */
17660 emit_insn (gen_sse2_loadupd (op0, op1));
17661 return;
17664 /* When SSE registers are split into halves, we can avoid
17665 writing to the top half twice. */
17666 if (TARGET_SSE_SPLIT_REGS)
17668 emit_clobber (op0);
17669 zero = op0;
17671 else
17673 /* ??? Not sure about the best option for the Intel chips.
17674 The following would seem to satisfy; the register is
17675 entirely cleared, breaking the dependency chain. We
17676 then store to the upper half, with a dependency depth
17677 of one. A rumor has it that Intel recommends two movsd
17678 followed by an unpacklpd, but this is unconfirmed. And
17679 given that the dependency depth of the unpacklpd would
17680 still be one, I'm not sure why this would be better. */
17681 zero = CONST0_RTX (V2DFmode);
17684 m = adjust_address (op1, DFmode, 0);
17685 emit_insn (gen_sse2_loadlpd (op0, zero, m));
17686 m = adjust_address (op1, DFmode, 8);
17687 emit_insn (gen_sse2_loadhpd (op0, op0, m));
17689 else
17691 rtx t;
17693 if (TARGET_AVX
17694 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17695 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17696 || optimize_insn_for_size_p ())
17698 if (GET_MODE (op0) != V4SFmode)
17700 orig_op0 = op0;
17701 op0 = gen_reg_rtx (V4SFmode);
17703 op1 = gen_lowpart (V4SFmode, op1);
17704 emit_insn (gen_sse_loadups (op0, op1));
17705 if (orig_op0)
17706 emit_move_insn (orig_op0,
17707 gen_lowpart (GET_MODE (orig_op0), op0));
17708 return;
17711 if (mode != V4SFmode)
17712 t = gen_reg_rtx (V4SFmode);
17713 else
17714 t = op0;
17716 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
17717 emit_move_insn (t, CONST0_RTX (V4SFmode));
17718 else
17719 emit_clobber (t);
17721 m = adjust_address (op1, V2SFmode, 0);
17722 emit_insn (gen_sse_loadlps (t, t, m));
17723 m = adjust_address (op1, V2SFmode, 8);
17724 emit_insn (gen_sse_loadhps (t, t, m));
17725 if (mode != V4SFmode)
17726 emit_move_insn (op0, gen_lowpart (mode, t));
17729 else if (MEM_P (op0))
17731 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17733 op0 = gen_lowpart (V16QImode, op0);
17734 op1 = gen_lowpart (V16QImode, op1);
17735 /* We will eventually emit movups based on insn attributes. */
17736 emit_insn (gen_sse2_storedquv16qi (op0, op1));
17738 else if (TARGET_SSE2 && mode == V2DFmode)
17740 if (TARGET_AVX
17741 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17742 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17743 || optimize_insn_for_size_p ())
17744 /* We will eventually emit movups based on insn attributes. */
17745 emit_insn (gen_sse2_storeupd (op0, op1));
17746 else
17748 m = adjust_address (op0, DFmode, 0);
17749 emit_insn (gen_sse2_storelpd (m, op1));
17750 m = adjust_address (op0, DFmode, 8);
17751 emit_insn (gen_sse2_storehpd (m, op1));
17754 else
17756 if (mode != V4SFmode)
17757 op1 = gen_lowpart (V4SFmode, op1);
17759 if (TARGET_AVX
17760 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17761 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17762 || optimize_insn_for_size_p ())
17764 op0 = gen_lowpart (V4SFmode, op0);
17765 emit_insn (gen_sse_storeups (op0, op1));
17767 else
17769 m = adjust_address (op0, V2SFmode, 0);
17770 emit_insn (gen_sse_storelps (m, op1));
17771 m = adjust_address (op0, V2SFmode, 8);
17772 emit_insn (gen_sse_storehps (m, op1));
17776 else
17777 gcc_unreachable ();
17780 /* Helper function of ix86_fixup_binary_operands to canonicalize
17781 operand order. Returns true if the operands should be swapped. */
17783 static bool
17784 ix86_swap_binary_operands_p (enum rtx_code code, machine_mode mode,
17785 rtx operands[])
17787 rtx dst = operands[0];
17788 rtx src1 = operands[1];
17789 rtx src2 = operands[2];
17791 /* If the operation is not commutative, we can't do anything. */
17792 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
17793 return false;
17795 /* Highest priority is that src1 should match dst. */
17796 if (rtx_equal_p (dst, src1))
17797 return false;
17798 if (rtx_equal_p (dst, src2))
17799 return true;
17801 /* Next highest priority is that immediate constants come second. */
17802 if (immediate_operand (src2, mode))
17803 return false;
17804 if (immediate_operand (src1, mode))
17805 return true;
17807 /* Lowest priority is that memory references should come second. */
17808 if (MEM_P (src2))
17809 return false;
17810 if (MEM_P (src1))
17811 return true;
17813 return false;
17817 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
17818 destination to use for the operation. If different from the true
17819 destination in operands[0], a copy operation will be required. */
17822 ix86_fixup_binary_operands (enum rtx_code code, machine_mode mode,
17823 rtx operands[])
17825 rtx dst = operands[0];
17826 rtx src1 = operands[1];
17827 rtx src2 = operands[2];
17829 /* Canonicalize operand order. */
17830 if (ix86_swap_binary_operands_p (code, mode, operands))
17832 /* It is invalid to swap operands of different modes. */
17833 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
17835 std::swap (src1, src2);
17838 /* Both source operands cannot be in memory. */
17839 if (MEM_P (src1) && MEM_P (src2))
17841 /* Optimization: Only read from memory once. */
17842 if (rtx_equal_p (src1, src2))
17844 src2 = force_reg (mode, src2);
17845 src1 = src2;
17847 else if (rtx_equal_p (dst, src1))
17848 src2 = force_reg (mode, src2);
17849 else
17850 src1 = force_reg (mode, src1);
17853 /* If the destination is memory, and we do not have matching source
17854 operands, do things in registers. */
17855 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17856 dst = gen_reg_rtx (mode);
17858 /* Source 1 cannot be a constant. */
17859 if (CONSTANT_P (src1))
17860 src1 = force_reg (mode, src1);
17862 /* Source 1 cannot be a non-matching memory. */
17863 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17864 src1 = force_reg (mode, src1);
17866 /* Improve address combine. */
17867 if (code == PLUS
17868 && GET_MODE_CLASS (mode) == MODE_INT
17869 && MEM_P (src2))
17870 src2 = force_reg (mode, src2);
17872 operands[1] = src1;
17873 operands[2] = src2;
17874 return dst;
17877 /* Similarly, but assume that the destination has already been
17878 set up properly. */
17880 void
17881 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
17882 machine_mode mode, rtx operands[])
17884 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
17885 gcc_assert (dst == operands[0]);
17888 /* Attempt to expand a binary operator. Make the expansion closer to the
17889 actual machine, then just general_operand, which will allow 3 separate
17890 memory references (one output, two input) in a single insn. */
17892 void
17893 ix86_expand_binary_operator (enum rtx_code code, machine_mode mode,
17894 rtx operands[])
17896 rtx src1, src2, dst, op, clob;
17898 dst = ix86_fixup_binary_operands (code, mode, operands);
17899 src1 = operands[1];
17900 src2 = operands[2];
17902 /* Emit the instruction. */
17904 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
17905 if (reload_in_progress)
17907 /* Reload doesn't know about the flags register, and doesn't know that
17908 it doesn't want to clobber it. We can only do this with PLUS. */
17909 gcc_assert (code == PLUS);
17910 emit_insn (op);
17912 else if (reload_completed
17913 && code == PLUS
17914 && !rtx_equal_p (dst, src1))
17916 /* This is going to be an LEA; avoid splitting it later. */
17917 emit_insn (op);
17919 else
17921 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17922 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17925 /* Fix up the destination if needed. */
17926 if (dst != operands[0])
17927 emit_move_insn (operands[0], dst);
17930 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
17931 the given OPERANDS. */
17933 void
17934 ix86_expand_vector_logical_operator (enum rtx_code code, machine_mode mode,
17935 rtx operands[])
17937 rtx op1 = NULL_RTX, op2 = NULL_RTX;
17938 if (GET_CODE (operands[1]) == SUBREG)
17940 op1 = operands[1];
17941 op2 = operands[2];
17943 else if (GET_CODE (operands[2]) == SUBREG)
17945 op1 = operands[2];
17946 op2 = operands[1];
17948 /* Optimize (__m128i) d | (__m128i) e and similar code
17949 when d and e are float vectors into float vector logical
17950 insn. In C/C++ without using intrinsics there is no other way
17951 to express vector logical operation on float vectors than
17952 to cast them temporarily to integer vectors. */
17953 if (op1
17954 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17955 && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR))
17956 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
17957 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
17958 && SUBREG_BYTE (op1) == 0
17959 && (GET_CODE (op2) == CONST_VECTOR
17960 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
17961 && SUBREG_BYTE (op2) == 0))
17962 && can_create_pseudo_p ())
17964 rtx dst;
17965 switch (GET_MODE (SUBREG_REG (op1)))
17967 case V4SFmode:
17968 case V8SFmode:
17969 case V16SFmode:
17970 case V2DFmode:
17971 case V4DFmode:
17972 case V8DFmode:
17973 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
17974 if (GET_CODE (op2) == CONST_VECTOR)
17976 op2 = gen_lowpart (GET_MODE (dst), op2);
17977 op2 = force_reg (GET_MODE (dst), op2);
17979 else
17981 op1 = operands[1];
17982 op2 = SUBREG_REG (operands[2]);
17983 if (!nonimmediate_operand (op2, GET_MODE (dst)))
17984 op2 = force_reg (GET_MODE (dst), op2);
17986 op1 = SUBREG_REG (op1);
17987 if (!nonimmediate_operand (op1, GET_MODE (dst)))
17988 op1 = force_reg (GET_MODE (dst), op1);
17989 emit_insn (gen_rtx_SET (VOIDmode, dst,
17990 gen_rtx_fmt_ee (code, GET_MODE (dst),
17991 op1, op2)));
17992 emit_move_insn (operands[0], gen_lowpart (mode, dst));
17993 return;
17994 default:
17995 break;
17998 if (!nonimmediate_operand (operands[1], mode))
17999 operands[1] = force_reg (mode, operands[1]);
18000 if (!nonimmediate_operand (operands[2], mode))
18001 operands[2] = force_reg (mode, operands[2]);
18002 ix86_fixup_binary_operands_no_copy (code, mode, operands);
18003 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18004 gen_rtx_fmt_ee (code, mode, operands[1],
18005 operands[2])));
18008 /* Return TRUE or FALSE depending on whether the binary operator meets the
18009 appropriate constraints. */
18011 bool
18012 ix86_binary_operator_ok (enum rtx_code code, machine_mode mode,
18013 rtx operands[3])
18015 rtx dst = operands[0];
18016 rtx src1 = operands[1];
18017 rtx src2 = operands[2];
18019 /* Both source operands cannot be in memory. */
18020 if (MEM_P (src1) && MEM_P (src2))
18021 return false;
18023 /* Canonicalize operand order for commutative operators. */
18024 if (ix86_swap_binary_operands_p (code, mode, operands))
18025 std::swap (src1, src2);
18027 /* If the destination is memory, we must have a matching source operand. */
18028 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
18029 return false;
18031 /* Source 1 cannot be a constant. */
18032 if (CONSTANT_P (src1))
18033 return false;
18035 /* Source 1 cannot be a non-matching memory. */
18036 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
18037 /* Support "andhi/andsi/anddi" as a zero-extending move. */
18038 return (code == AND
18039 && (mode == HImode
18040 || mode == SImode
18041 || (TARGET_64BIT && mode == DImode))
18042 && satisfies_constraint_L (src2));
18044 return true;
18047 /* Attempt to expand a unary operator. Make the expansion closer to the
18048 actual machine, then just general_operand, which will allow 2 separate
18049 memory references (one output, one input) in a single insn. */
18051 void
18052 ix86_expand_unary_operator (enum rtx_code code, machine_mode mode,
18053 rtx operands[])
18055 int matching_memory;
18056 rtx src, dst, op, clob;
18058 dst = operands[0];
18059 src = operands[1];
18061 /* If the destination is memory, and we do not have matching source
18062 operands, do things in registers. */
18063 matching_memory = 0;
18064 if (MEM_P (dst))
18066 if (rtx_equal_p (dst, src))
18067 matching_memory = 1;
18068 else
18069 dst = gen_reg_rtx (mode);
18072 /* When source operand is memory, destination must match. */
18073 if (MEM_P (src) && !matching_memory)
18074 src = force_reg (mode, src);
18076 /* Emit the instruction. */
18078 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
18079 if (reload_in_progress || code == NOT)
18081 /* Reload doesn't know about the flags register, and doesn't know that
18082 it doesn't want to clobber it. */
18083 gcc_assert (code == NOT);
18084 emit_insn (op);
18086 else
18088 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18089 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18092 /* Fix up the destination if needed. */
18093 if (dst != operands[0])
18094 emit_move_insn (operands[0], dst);
18097 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
18098 divisor are within the range [0-255]. */
18100 void
18101 ix86_split_idivmod (machine_mode mode, rtx operands[],
18102 bool signed_p)
18104 rtx_code_label *end_label, *qimode_label;
18105 rtx insn, div, mod;
18106 rtx scratch, tmp0, tmp1, tmp2;
18107 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
18108 rtx (*gen_zero_extend) (rtx, rtx);
18109 rtx (*gen_test_ccno_1) (rtx, rtx);
18111 switch (mode)
18113 case SImode:
18114 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
18115 gen_test_ccno_1 = gen_testsi_ccno_1;
18116 gen_zero_extend = gen_zero_extendqisi2;
18117 break;
18118 case DImode:
18119 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
18120 gen_test_ccno_1 = gen_testdi_ccno_1;
18121 gen_zero_extend = gen_zero_extendqidi2;
18122 break;
18123 default:
18124 gcc_unreachable ();
18127 end_label = gen_label_rtx ();
18128 qimode_label = gen_label_rtx ();
18130 scratch = gen_reg_rtx (mode);
18132 /* Use 8bit unsigned divimod if dividend and divisor are within
18133 the range [0-255]. */
18134 emit_move_insn (scratch, operands[2]);
18135 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
18136 scratch, 1, OPTAB_DIRECT);
18137 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
18138 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
18139 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
18140 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
18141 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
18142 pc_rtx);
18143 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
18144 predict_jump (REG_BR_PROB_BASE * 50 / 100);
18145 JUMP_LABEL (insn) = qimode_label;
18147 /* Generate original signed/unsigned divimod. */
18148 div = gen_divmod4_1 (operands[0], operands[1],
18149 operands[2], operands[3]);
18150 emit_insn (div);
18152 /* Branch to the end. */
18153 emit_jump_insn (gen_jump (end_label));
18154 emit_barrier ();
18156 /* Generate 8bit unsigned divide. */
18157 emit_label (qimode_label);
18158 /* Don't use operands[0] for result of 8bit divide since not all
18159 registers support QImode ZERO_EXTRACT. */
18160 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
18161 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
18162 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
18163 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
18165 if (signed_p)
18167 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
18168 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
18170 else
18172 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
18173 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
18176 /* Extract remainder from AH. */
18177 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
18178 if (REG_P (operands[1]))
18179 insn = emit_move_insn (operands[1], tmp1);
18180 else
18182 /* Need a new scratch register since the old one has result
18183 of 8bit divide. */
18184 scratch = gen_reg_rtx (mode);
18185 emit_move_insn (scratch, tmp1);
18186 insn = emit_move_insn (operands[1], scratch);
18188 set_unique_reg_note (insn, REG_EQUAL, mod);
18190 /* Zero extend quotient from AL. */
18191 tmp1 = gen_lowpart (QImode, tmp0);
18192 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
18193 set_unique_reg_note (insn, REG_EQUAL, div);
18195 emit_label (end_label);
18198 #define LEA_MAX_STALL (3)
18199 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
18201 /* Increase given DISTANCE in half-cycles according to
18202 dependencies between PREV and NEXT instructions.
18203 Add 1 half-cycle if there is no dependency and
18204 go to next cycle if there is some dependecy. */
18206 static unsigned int
18207 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
18209 df_ref def, use;
18211 if (!prev || !next)
18212 return distance + (distance & 1) + 2;
18214 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
18215 return distance + 1;
18217 FOR_EACH_INSN_USE (use, next)
18218 FOR_EACH_INSN_DEF (def, prev)
18219 if (!DF_REF_IS_ARTIFICIAL (def)
18220 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
18221 return distance + (distance & 1) + 2;
18223 return distance + 1;
18226 /* Function checks if instruction INSN defines register number
18227 REGNO1 or REGNO2. */
18229 static bool
18230 insn_defines_reg (unsigned int regno1, unsigned int regno2,
18231 rtx insn)
18233 df_ref def;
18235 FOR_EACH_INSN_DEF (def, insn)
18236 if (DF_REF_REG_DEF_P (def)
18237 && !DF_REF_IS_ARTIFICIAL (def)
18238 && (regno1 == DF_REF_REGNO (def)
18239 || regno2 == DF_REF_REGNO (def)))
18240 return true;
18242 return false;
18245 /* Function checks if instruction INSN uses register number
18246 REGNO as a part of address expression. */
18248 static bool
18249 insn_uses_reg_mem (unsigned int regno, rtx insn)
18251 df_ref use;
18253 FOR_EACH_INSN_USE (use, insn)
18254 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
18255 return true;
18257 return false;
18260 /* Search backward for non-agu definition of register number REGNO1
18261 or register number REGNO2 in basic block starting from instruction
18262 START up to head of basic block or instruction INSN.
18264 Function puts true value into *FOUND var if definition was found
18265 and false otherwise.
18267 Distance in half-cycles between START and found instruction or head
18268 of BB is added to DISTANCE and returned. */
18270 static int
18271 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
18272 rtx_insn *insn, int distance,
18273 rtx_insn *start, bool *found)
18275 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
18276 rtx_insn *prev = start;
18277 rtx_insn *next = NULL;
18279 *found = false;
18281 while (prev
18282 && prev != insn
18283 && distance < LEA_SEARCH_THRESHOLD)
18285 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
18287 distance = increase_distance (prev, next, distance);
18288 if (insn_defines_reg (regno1, regno2, prev))
18290 if (recog_memoized (prev) < 0
18291 || get_attr_type (prev) != TYPE_LEA)
18293 *found = true;
18294 return distance;
18298 next = prev;
18300 if (prev == BB_HEAD (bb))
18301 break;
18303 prev = PREV_INSN (prev);
18306 return distance;
18309 /* Search backward for non-agu definition of register number REGNO1
18310 or register number REGNO2 in INSN's basic block until
18311 1. Pass LEA_SEARCH_THRESHOLD instructions, or
18312 2. Reach neighbour BBs boundary, or
18313 3. Reach agu definition.
18314 Returns the distance between the non-agu definition point and INSN.
18315 If no definition point, returns -1. */
18317 static int
18318 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
18319 rtx_insn *insn)
18321 basic_block bb = BLOCK_FOR_INSN (insn);
18322 int distance = 0;
18323 bool found = false;
18325 if (insn != BB_HEAD (bb))
18326 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
18327 distance, PREV_INSN (insn),
18328 &found);
18330 if (!found && distance < LEA_SEARCH_THRESHOLD)
18332 edge e;
18333 edge_iterator ei;
18334 bool simple_loop = false;
18336 FOR_EACH_EDGE (e, ei, bb->preds)
18337 if (e->src == bb)
18339 simple_loop = true;
18340 break;
18343 if (simple_loop)
18344 distance = distance_non_agu_define_in_bb (regno1, regno2,
18345 insn, distance,
18346 BB_END (bb), &found);
18347 else
18349 int shortest_dist = -1;
18350 bool found_in_bb = false;
18352 FOR_EACH_EDGE (e, ei, bb->preds)
18354 int bb_dist
18355 = distance_non_agu_define_in_bb (regno1, regno2,
18356 insn, distance,
18357 BB_END (e->src),
18358 &found_in_bb);
18359 if (found_in_bb)
18361 if (shortest_dist < 0)
18362 shortest_dist = bb_dist;
18363 else if (bb_dist > 0)
18364 shortest_dist = MIN (bb_dist, shortest_dist);
18366 found = true;
18370 distance = shortest_dist;
18374 /* get_attr_type may modify recog data. We want to make sure
18375 that recog data is valid for instruction INSN, on which
18376 distance_non_agu_define is called. INSN is unchanged here. */
18377 extract_insn_cached (insn);
18379 if (!found)
18380 return -1;
18382 return distance >> 1;
18385 /* Return the distance in half-cycles between INSN and the next
18386 insn that uses register number REGNO in memory address added
18387 to DISTANCE. Return -1 if REGNO0 is set.
18389 Put true value into *FOUND if register usage was found and
18390 false otherwise.
18391 Put true value into *REDEFINED if register redefinition was
18392 found and false otherwise. */
18394 static int
18395 distance_agu_use_in_bb (unsigned int regno,
18396 rtx_insn *insn, int distance, rtx_insn *start,
18397 bool *found, bool *redefined)
18399 basic_block bb = NULL;
18400 rtx_insn *next = start;
18401 rtx_insn *prev = NULL;
18403 *found = false;
18404 *redefined = false;
18406 if (start != NULL_RTX)
18408 bb = BLOCK_FOR_INSN (start);
18409 if (start != BB_HEAD (bb))
18410 /* If insn and start belong to the same bb, set prev to insn,
18411 so the call to increase_distance will increase the distance
18412 between insns by 1. */
18413 prev = insn;
18416 while (next
18417 && next != insn
18418 && distance < LEA_SEARCH_THRESHOLD)
18420 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
18422 distance = increase_distance(prev, next, distance);
18423 if (insn_uses_reg_mem (regno, next))
18425 /* Return DISTANCE if OP0 is used in memory
18426 address in NEXT. */
18427 *found = true;
18428 return distance;
18431 if (insn_defines_reg (regno, INVALID_REGNUM, next))
18433 /* Return -1 if OP0 is set in NEXT. */
18434 *redefined = true;
18435 return -1;
18438 prev = next;
18441 if (next == BB_END (bb))
18442 break;
18444 next = NEXT_INSN (next);
18447 return distance;
18450 /* Return the distance between INSN and the next insn that uses
18451 register number REGNO0 in memory address. Return -1 if no such
18452 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
18454 static int
18455 distance_agu_use (unsigned int regno0, rtx_insn *insn)
18457 basic_block bb = BLOCK_FOR_INSN (insn);
18458 int distance = 0;
18459 bool found = false;
18460 bool redefined = false;
18462 if (insn != BB_END (bb))
18463 distance = distance_agu_use_in_bb (regno0, insn, distance,
18464 NEXT_INSN (insn),
18465 &found, &redefined);
18467 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
18469 edge e;
18470 edge_iterator ei;
18471 bool simple_loop = false;
18473 FOR_EACH_EDGE (e, ei, bb->succs)
18474 if (e->dest == bb)
18476 simple_loop = true;
18477 break;
18480 if (simple_loop)
18481 distance = distance_agu_use_in_bb (regno0, insn,
18482 distance, BB_HEAD (bb),
18483 &found, &redefined);
18484 else
18486 int shortest_dist = -1;
18487 bool found_in_bb = false;
18488 bool redefined_in_bb = false;
18490 FOR_EACH_EDGE (e, ei, bb->succs)
18492 int bb_dist
18493 = distance_agu_use_in_bb (regno0, insn,
18494 distance, BB_HEAD (e->dest),
18495 &found_in_bb, &redefined_in_bb);
18496 if (found_in_bb)
18498 if (shortest_dist < 0)
18499 shortest_dist = bb_dist;
18500 else if (bb_dist > 0)
18501 shortest_dist = MIN (bb_dist, shortest_dist);
18503 found = true;
18507 distance = shortest_dist;
18511 if (!found || redefined)
18512 return -1;
18514 return distance >> 1;
18517 /* Define this macro to tune LEA priority vs ADD, it take effect when
18518 there is a dilemma of choicing LEA or ADD
18519 Negative value: ADD is more preferred than LEA
18520 Zero: Netrual
18521 Positive value: LEA is more preferred than ADD*/
18522 #define IX86_LEA_PRIORITY 0
18524 /* Return true if usage of lea INSN has performance advantage
18525 over a sequence of instructions. Instructions sequence has
18526 SPLIT_COST cycles higher latency than lea latency. */
18528 static bool
18529 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
18530 unsigned int regno2, int split_cost, bool has_scale)
18532 int dist_define, dist_use;
18534 /* For Silvermont if using a 2-source or 3-source LEA for
18535 non-destructive destination purposes, or due to wanting
18536 ability to use SCALE, the use of LEA is justified. */
18537 if (TARGET_SILVERMONT || TARGET_INTEL)
18539 if (has_scale)
18540 return true;
18541 if (split_cost < 1)
18542 return false;
18543 if (regno0 == regno1 || regno0 == regno2)
18544 return false;
18545 return true;
18548 dist_define = distance_non_agu_define (regno1, regno2, insn);
18549 dist_use = distance_agu_use (regno0, insn);
18551 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
18553 /* If there is no non AGU operand definition, no AGU
18554 operand usage and split cost is 0 then both lea
18555 and non lea variants have same priority. Currently
18556 we prefer lea for 64 bit code and non lea on 32 bit
18557 code. */
18558 if (dist_use < 0 && split_cost == 0)
18559 return TARGET_64BIT || IX86_LEA_PRIORITY;
18560 else
18561 return true;
18564 /* With longer definitions distance lea is more preferable.
18565 Here we change it to take into account splitting cost and
18566 lea priority. */
18567 dist_define += split_cost + IX86_LEA_PRIORITY;
18569 /* If there is no use in memory addess then we just check
18570 that split cost exceeds AGU stall. */
18571 if (dist_use < 0)
18572 return dist_define > LEA_MAX_STALL;
18574 /* If this insn has both backward non-agu dependence and forward
18575 agu dependence, the one with short distance takes effect. */
18576 return dist_define >= dist_use;
18579 /* Return true if it is legal to clobber flags by INSN and
18580 false otherwise. */
18582 static bool
18583 ix86_ok_to_clobber_flags (rtx_insn *insn)
18585 basic_block bb = BLOCK_FOR_INSN (insn);
18586 df_ref use;
18587 bitmap live;
18589 while (insn)
18591 if (NONDEBUG_INSN_P (insn))
18593 FOR_EACH_INSN_USE (use, insn)
18594 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
18595 return false;
18597 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
18598 return true;
18601 if (insn == BB_END (bb))
18602 break;
18604 insn = NEXT_INSN (insn);
18607 live = df_get_live_out(bb);
18608 return !REGNO_REG_SET_P (live, FLAGS_REG);
18611 /* Return true if we need to split op0 = op1 + op2 into a sequence of
18612 move and add to avoid AGU stalls. */
18614 bool
18615 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
18617 unsigned int regno0, regno1, regno2;
18619 /* Check if we need to optimize. */
18620 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18621 return false;
18623 /* Check it is correct to split here. */
18624 if (!ix86_ok_to_clobber_flags(insn))
18625 return false;
18627 regno0 = true_regnum (operands[0]);
18628 regno1 = true_regnum (operands[1]);
18629 regno2 = true_regnum (operands[2]);
18631 /* We need to split only adds with non destructive
18632 destination operand. */
18633 if (regno0 == regno1 || regno0 == regno2)
18634 return false;
18635 else
18636 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
18639 /* Return true if we should emit lea instruction instead of mov
18640 instruction. */
18642 bool
18643 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
18645 unsigned int regno0, regno1;
18647 /* Check if we need to optimize. */
18648 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18649 return false;
18651 /* Use lea for reg to reg moves only. */
18652 if (!REG_P (operands[0]) || !REG_P (operands[1]))
18653 return false;
18655 regno0 = true_regnum (operands[0]);
18656 regno1 = true_regnum (operands[1]);
18658 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
18661 /* Return true if we need to split lea into a sequence of
18662 instructions to avoid AGU stalls. */
18664 bool
18665 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
18667 unsigned int regno0, regno1, regno2;
18668 int split_cost;
18669 struct ix86_address parts;
18670 int ok;
18672 /* Check we need to optimize. */
18673 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
18674 return false;
18676 /* The "at least two components" test below might not catch simple
18677 move or zero extension insns if parts.base is non-NULL and parts.disp
18678 is const0_rtx as the only components in the address, e.g. if the
18679 register is %rbp or %r13. As this test is much cheaper and moves or
18680 zero extensions are the common case, do this check first. */
18681 if (REG_P (operands[1])
18682 || (SImode_address_operand (operands[1], VOIDmode)
18683 && REG_P (XEXP (operands[1], 0))))
18684 return false;
18686 /* Check if it is OK to split here. */
18687 if (!ix86_ok_to_clobber_flags (insn))
18688 return false;
18690 ok = ix86_decompose_address (operands[1], &parts);
18691 gcc_assert (ok);
18693 /* There should be at least two components in the address. */
18694 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
18695 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
18696 return false;
18698 /* We should not split into add if non legitimate pic
18699 operand is used as displacement. */
18700 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
18701 return false;
18703 regno0 = true_regnum (operands[0]) ;
18704 regno1 = INVALID_REGNUM;
18705 regno2 = INVALID_REGNUM;
18707 if (parts.base)
18708 regno1 = true_regnum (parts.base);
18709 if (parts.index)
18710 regno2 = true_regnum (parts.index);
18712 split_cost = 0;
18714 /* Compute how many cycles we will add to execution time
18715 if split lea into a sequence of instructions. */
18716 if (parts.base || parts.index)
18718 /* Have to use mov instruction if non desctructive
18719 destination form is used. */
18720 if (regno1 != regno0 && regno2 != regno0)
18721 split_cost += 1;
18723 /* Have to add index to base if both exist. */
18724 if (parts.base && parts.index)
18725 split_cost += 1;
18727 /* Have to use shift and adds if scale is 2 or greater. */
18728 if (parts.scale > 1)
18730 if (regno0 != regno1)
18731 split_cost += 1;
18732 else if (regno2 == regno0)
18733 split_cost += 4;
18734 else
18735 split_cost += parts.scale;
18738 /* Have to use add instruction with immediate if
18739 disp is non zero. */
18740 if (parts.disp && parts.disp != const0_rtx)
18741 split_cost += 1;
18743 /* Subtract the price of lea. */
18744 split_cost -= 1;
18747 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
18748 parts.scale > 1);
18751 /* Emit x86 binary operand CODE in mode MODE, where the first operand
18752 matches destination. RTX includes clobber of FLAGS_REG. */
18754 static void
18755 ix86_emit_binop (enum rtx_code code, machine_mode mode,
18756 rtx dst, rtx src)
18758 rtx op, clob;
18760 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
18761 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18763 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18766 /* Return true if regno1 def is nearest to the insn. */
18768 static bool
18769 find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2)
18771 rtx_insn *prev = insn;
18772 rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn));
18774 if (insn == start)
18775 return false;
18776 while (prev && prev != start)
18778 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
18780 prev = PREV_INSN (prev);
18781 continue;
18783 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
18784 return true;
18785 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
18786 return false;
18787 prev = PREV_INSN (prev);
18790 /* None of the regs is defined in the bb. */
18791 return false;
18794 /* Split lea instructions into a sequence of instructions
18795 which are executed on ALU to avoid AGU stalls.
18796 It is assumed that it is allowed to clobber flags register
18797 at lea position. */
18799 void
18800 ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], machine_mode mode)
18802 unsigned int regno0, regno1, regno2;
18803 struct ix86_address parts;
18804 rtx target, tmp;
18805 int ok, adds;
18807 ok = ix86_decompose_address (operands[1], &parts);
18808 gcc_assert (ok);
18810 target = gen_lowpart (mode, operands[0]);
18812 regno0 = true_regnum (target);
18813 regno1 = INVALID_REGNUM;
18814 regno2 = INVALID_REGNUM;
18816 if (parts.base)
18818 parts.base = gen_lowpart (mode, parts.base);
18819 regno1 = true_regnum (parts.base);
18822 if (parts.index)
18824 parts.index = gen_lowpart (mode, parts.index);
18825 regno2 = true_regnum (parts.index);
18828 if (parts.disp)
18829 parts.disp = gen_lowpart (mode, parts.disp);
18831 if (parts.scale > 1)
18833 /* Case r1 = r1 + ... */
18834 if (regno1 == regno0)
18836 /* If we have a case r1 = r1 + C * r2 then we
18837 should use multiplication which is very
18838 expensive. Assume cost model is wrong if we
18839 have such case here. */
18840 gcc_assert (regno2 != regno0);
18842 for (adds = parts.scale; adds > 0; adds--)
18843 ix86_emit_binop (PLUS, mode, target, parts.index);
18845 else
18847 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
18848 if (regno0 != regno2)
18849 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18851 /* Use shift for scaling. */
18852 ix86_emit_binop (ASHIFT, mode, target,
18853 GEN_INT (exact_log2 (parts.scale)));
18855 if (parts.base)
18856 ix86_emit_binop (PLUS, mode, target, parts.base);
18858 if (parts.disp && parts.disp != const0_rtx)
18859 ix86_emit_binop (PLUS, mode, target, parts.disp);
18862 else if (!parts.base && !parts.index)
18864 gcc_assert(parts.disp);
18865 emit_insn (gen_rtx_SET (VOIDmode, target, parts.disp));
18867 else
18869 if (!parts.base)
18871 if (regno0 != regno2)
18872 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18874 else if (!parts.index)
18876 if (regno0 != regno1)
18877 emit_insn (gen_rtx_SET (VOIDmode, target, parts.base));
18879 else
18881 if (regno0 == regno1)
18882 tmp = parts.index;
18883 else if (regno0 == regno2)
18884 tmp = parts.base;
18885 else
18887 rtx tmp1;
18889 /* Find better operand for SET instruction, depending
18890 on which definition is farther from the insn. */
18891 if (find_nearest_reg_def (insn, regno1, regno2))
18892 tmp = parts.index, tmp1 = parts.base;
18893 else
18894 tmp = parts.base, tmp1 = parts.index;
18896 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18898 if (parts.disp && parts.disp != const0_rtx)
18899 ix86_emit_binop (PLUS, mode, target, parts.disp);
18901 ix86_emit_binop (PLUS, mode, target, tmp1);
18902 return;
18905 ix86_emit_binop (PLUS, mode, target, tmp);
18908 if (parts.disp && parts.disp != const0_rtx)
18909 ix86_emit_binop (PLUS, mode, target, parts.disp);
18913 /* Return true if it is ok to optimize an ADD operation to LEA
18914 operation to avoid flag register consumation. For most processors,
18915 ADD is faster than LEA. For the processors like BONNELL, if the
18916 destination register of LEA holds an actual address which will be
18917 used soon, LEA is better and otherwise ADD is better. */
18919 bool
18920 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
18922 unsigned int regno0 = true_regnum (operands[0]);
18923 unsigned int regno1 = true_regnum (operands[1]);
18924 unsigned int regno2 = true_regnum (operands[2]);
18926 /* If a = b + c, (a!=b && a!=c), must use lea form. */
18927 if (regno0 != regno1 && regno0 != regno2)
18928 return true;
18930 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18931 return false;
18933 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
18936 /* Return true if destination reg of SET_BODY is shift count of
18937 USE_BODY. */
18939 static bool
18940 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
18942 rtx set_dest;
18943 rtx shift_rtx;
18944 int i;
18946 /* Retrieve destination of SET_BODY. */
18947 switch (GET_CODE (set_body))
18949 case SET:
18950 set_dest = SET_DEST (set_body);
18951 if (!set_dest || !REG_P (set_dest))
18952 return false;
18953 break;
18954 case PARALLEL:
18955 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
18956 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
18957 use_body))
18958 return true;
18959 default:
18960 return false;
18961 break;
18964 /* Retrieve shift count of USE_BODY. */
18965 switch (GET_CODE (use_body))
18967 case SET:
18968 shift_rtx = XEXP (use_body, 1);
18969 break;
18970 case PARALLEL:
18971 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
18972 if (ix86_dep_by_shift_count_body (set_body,
18973 XVECEXP (use_body, 0, i)))
18974 return true;
18975 default:
18976 return false;
18977 break;
18980 if (shift_rtx
18981 && (GET_CODE (shift_rtx) == ASHIFT
18982 || GET_CODE (shift_rtx) == LSHIFTRT
18983 || GET_CODE (shift_rtx) == ASHIFTRT
18984 || GET_CODE (shift_rtx) == ROTATE
18985 || GET_CODE (shift_rtx) == ROTATERT))
18987 rtx shift_count = XEXP (shift_rtx, 1);
18989 /* Return true if shift count is dest of SET_BODY. */
18990 if (REG_P (shift_count))
18992 /* Add check since it can be invoked before register
18993 allocation in pre-reload schedule. */
18994 if (reload_completed
18995 && true_regnum (set_dest) == true_regnum (shift_count))
18996 return true;
18997 else if (REGNO(set_dest) == REGNO(shift_count))
18998 return true;
19002 return false;
19005 /* Return true if destination reg of SET_INSN is shift count of
19006 USE_INSN. */
19008 bool
19009 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
19011 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
19012 PATTERN (use_insn));
19015 /* Return TRUE or FALSE depending on whether the unary operator meets the
19016 appropriate constraints. */
19018 bool
19019 ix86_unary_operator_ok (enum rtx_code,
19020 machine_mode,
19021 rtx operands[2])
19023 /* If one of operands is memory, source and destination must match. */
19024 if ((MEM_P (operands[0])
19025 || MEM_P (operands[1]))
19026 && ! rtx_equal_p (operands[0], operands[1]))
19027 return false;
19028 return true;
19031 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
19032 are ok, keeping in mind the possible movddup alternative. */
19034 bool
19035 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
19037 if (MEM_P (operands[0]))
19038 return rtx_equal_p (operands[0], operands[1 + high]);
19039 if (MEM_P (operands[1]) && MEM_P (operands[2]))
19040 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
19041 return true;
19044 /* Post-reload splitter for converting an SF or DFmode value in an
19045 SSE register into an unsigned SImode. */
19047 void
19048 ix86_split_convert_uns_si_sse (rtx operands[])
19050 machine_mode vecmode;
19051 rtx value, large, zero_or_two31, input, two31, x;
19053 large = operands[1];
19054 zero_or_two31 = operands[2];
19055 input = operands[3];
19056 two31 = operands[4];
19057 vecmode = GET_MODE (large);
19058 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
19060 /* Load up the value into the low element. We must ensure that the other
19061 elements are valid floats -- zero is the easiest such value. */
19062 if (MEM_P (input))
19064 if (vecmode == V4SFmode)
19065 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
19066 else
19067 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
19069 else
19071 input = gen_rtx_REG (vecmode, REGNO (input));
19072 emit_move_insn (value, CONST0_RTX (vecmode));
19073 if (vecmode == V4SFmode)
19074 emit_insn (gen_sse_movss (value, value, input));
19075 else
19076 emit_insn (gen_sse2_movsd (value, value, input));
19079 emit_move_insn (large, two31);
19080 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
19082 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
19083 emit_insn (gen_rtx_SET (VOIDmode, large, x));
19085 x = gen_rtx_AND (vecmode, zero_or_two31, large);
19086 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
19088 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
19089 emit_insn (gen_rtx_SET (VOIDmode, value, x));
19091 large = gen_rtx_REG (V4SImode, REGNO (large));
19092 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
19094 x = gen_rtx_REG (V4SImode, REGNO (value));
19095 if (vecmode == V4SFmode)
19096 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
19097 else
19098 emit_insn (gen_sse2_cvttpd2dq (x, value));
19099 value = x;
19101 emit_insn (gen_xorv4si3 (value, value, large));
19104 /* Convert an unsigned DImode value into a DFmode, using only SSE.
19105 Expects the 64-bit DImode to be supplied in a pair of integral
19106 registers. Requires SSE2; will use SSE3 if available. For x86_32,
19107 -mfpmath=sse, !optimize_size only. */
19109 void
19110 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
19112 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
19113 rtx int_xmm, fp_xmm;
19114 rtx biases, exponents;
19115 rtx x;
19117 int_xmm = gen_reg_rtx (V4SImode);
19118 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
19119 emit_insn (gen_movdi_to_sse (int_xmm, input));
19120 else if (TARGET_SSE_SPLIT_REGS)
19122 emit_clobber (int_xmm);
19123 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
19125 else
19127 x = gen_reg_rtx (V2DImode);
19128 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
19129 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
19132 x = gen_rtx_CONST_VECTOR (V4SImode,
19133 gen_rtvec (4, GEN_INT (0x43300000UL),
19134 GEN_INT (0x45300000UL),
19135 const0_rtx, const0_rtx));
19136 exponents = validize_mem (force_const_mem (V4SImode, x));
19138 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
19139 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
19141 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
19142 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
19143 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
19144 (0x1.0p84 + double(fp_value_hi_xmm)).
19145 Note these exponents differ by 32. */
19147 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
19149 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
19150 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
19151 real_ldexp (&bias_lo_rvt, &dconst1, 52);
19152 real_ldexp (&bias_hi_rvt, &dconst1, 84);
19153 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
19154 x = const_double_from_real_value (bias_hi_rvt, DFmode);
19155 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
19156 biases = validize_mem (force_const_mem (V2DFmode, biases));
19157 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
19159 /* Add the upper and lower DFmode values together. */
19160 if (TARGET_SSE3)
19161 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
19162 else
19164 x = copy_to_mode_reg (V2DFmode, fp_xmm);
19165 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
19166 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
19169 ix86_expand_vector_extract (false, target, fp_xmm, 0);
19172 /* Not used, but eases macroization of patterns. */
19173 void
19174 ix86_expand_convert_uns_sixf_sse (rtx, rtx)
19176 gcc_unreachable ();
19179 /* Convert an unsigned SImode value into a DFmode. Only currently used
19180 for SSE, but applicable anywhere. */
19182 void
19183 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
19185 REAL_VALUE_TYPE TWO31r;
19186 rtx x, fp;
19188 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
19189 NULL, 1, OPTAB_DIRECT);
19191 fp = gen_reg_rtx (DFmode);
19192 emit_insn (gen_floatsidf2 (fp, x));
19194 real_ldexp (&TWO31r, &dconst1, 31);
19195 x = const_double_from_real_value (TWO31r, DFmode);
19197 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
19198 if (x != target)
19199 emit_move_insn (target, x);
19202 /* Convert a signed DImode value into a DFmode. Only used for SSE in
19203 32-bit mode; otherwise we have a direct convert instruction. */
19205 void
19206 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
19208 REAL_VALUE_TYPE TWO32r;
19209 rtx fp_lo, fp_hi, x;
19211 fp_lo = gen_reg_rtx (DFmode);
19212 fp_hi = gen_reg_rtx (DFmode);
19214 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
19216 real_ldexp (&TWO32r, &dconst1, 32);
19217 x = const_double_from_real_value (TWO32r, DFmode);
19218 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
19220 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
19222 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
19223 0, OPTAB_DIRECT);
19224 if (x != target)
19225 emit_move_insn (target, x);
19228 /* Convert an unsigned SImode value into a SFmode, using only SSE.
19229 For x86_32, -mfpmath=sse, !optimize_size only. */
19230 void
19231 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
19233 REAL_VALUE_TYPE ONE16r;
19234 rtx fp_hi, fp_lo, int_hi, int_lo, x;
19236 real_ldexp (&ONE16r, &dconst1, 16);
19237 x = const_double_from_real_value (ONE16r, SFmode);
19238 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
19239 NULL, 0, OPTAB_DIRECT);
19240 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
19241 NULL, 0, OPTAB_DIRECT);
19242 fp_hi = gen_reg_rtx (SFmode);
19243 fp_lo = gen_reg_rtx (SFmode);
19244 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
19245 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
19246 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
19247 0, OPTAB_DIRECT);
19248 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
19249 0, OPTAB_DIRECT);
19250 if (!rtx_equal_p (target, fp_hi))
19251 emit_move_insn (target, fp_hi);
19254 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
19255 a vector of unsigned ints VAL to vector of floats TARGET. */
19257 void
19258 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
19260 rtx tmp[8];
19261 REAL_VALUE_TYPE TWO16r;
19262 machine_mode intmode = GET_MODE (val);
19263 machine_mode fltmode = GET_MODE (target);
19264 rtx (*cvt) (rtx, rtx);
19266 if (intmode == V4SImode)
19267 cvt = gen_floatv4siv4sf2;
19268 else
19269 cvt = gen_floatv8siv8sf2;
19270 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
19271 tmp[0] = force_reg (intmode, tmp[0]);
19272 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
19273 OPTAB_DIRECT);
19274 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
19275 NULL_RTX, 1, OPTAB_DIRECT);
19276 tmp[3] = gen_reg_rtx (fltmode);
19277 emit_insn (cvt (tmp[3], tmp[1]));
19278 tmp[4] = gen_reg_rtx (fltmode);
19279 emit_insn (cvt (tmp[4], tmp[2]));
19280 real_ldexp (&TWO16r, &dconst1, 16);
19281 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
19282 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
19283 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
19284 OPTAB_DIRECT);
19285 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
19286 OPTAB_DIRECT);
19287 if (tmp[7] != target)
19288 emit_move_insn (target, tmp[7]);
19291 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
19292 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
19293 This is done by doing just signed conversion if < 0x1p31, and otherwise by
19294 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
19297 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
19299 REAL_VALUE_TYPE TWO31r;
19300 rtx two31r, tmp[4];
19301 machine_mode mode = GET_MODE (val);
19302 machine_mode scalarmode = GET_MODE_INNER (mode);
19303 machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
19304 rtx (*cmp) (rtx, rtx, rtx, rtx);
19305 int i;
19307 for (i = 0; i < 3; i++)
19308 tmp[i] = gen_reg_rtx (mode);
19309 real_ldexp (&TWO31r, &dconst1, 31);
19310 two31r = const_double_from_real_value (TWO31r, scalarmode);
19311 two31r = ix86_build_const_vector (mode, 1, two31r);
19312 two31r = force_reg (mode, two31r);
19313 switch (mode)
19315 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
19316 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
19317 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
19318 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
19319 default: gcc_unreachable ();
19321 tmp[3] = gen_rtx_LE (mode, two31r, val);
19322 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
19323 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
19324 0, OPTAB_DIRECT);
19325 if (intmode == V4SImode || TARGET_AVX2)
19326 *xorp = expand_simple_binop (intmode, ASHIFT,
19327 gen_lowpart (intmode, tmp[0]),
19328 GEN_INT (31), NULL_RTX, 0,
19329 OPTAB_DIRECT);
19330 else
19332 rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
19333 two31 = ix86_build_const_vector (intmode, 1, two31);
19334 *xorp = expand_simple_binop (intmode, AND,
19335 gen_lowpart (intmode, tmp[0]),
19336 two31, NULL_RTX, 0,
19337 OPTAB_DIRECT);
19339 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
19340 0, OPTAB_DIRECT);
19343 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
19344 then replicate the value for all elements of the vector
19345 register. */
19348 ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
19350 int i, n_elt;
19351 rtvec v;
19352 machine_mode scalar_mode;
19354 switch (mode)
19356 case V64QImode:
19357 case V32QImode:
19358 case V16QImode:
19359 case V32HImode:
19360 case V16HImode:
19361 case V8HImode:
19362 case V16SImode:
19363 case V8SImode:
19364 case V4SImode:
19365 case V8DImode:
19366 case V4DImode:
19367 case V2DImode:
19368 gcc_assert (vect);
19369 case V16SFmode:
19370 case V8SFmode:
19371 case V4SFmode:
19372 case V8DFmode:
19373 case V4DFmode:
19374 case V2DFmode:
19375 n_elt = GET_MODE_NUNITS (mode);
19376 v = rtvec_alloc (n_elt);
19377 scalar_mode = GET_MODE_INNER (mode);
19379 RTVEC_ELT (v, 0) = value;
19381 for (i = 1; i < n_elt; ++i)
19382 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
19384 return gen_rtx_CONST_VECTOR (mode, v);
19386 default:
19387 gcc_unreachable ();
19391 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
19392 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
19393 for an SSE register. If VECT is true, then replicate the mask for
19394 all elements of the vector register. If INVERT is true, then create
19395 a mask excluding the sign bit. */
19398 ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
19400 machine_mode vec_mode, imode;
19401 HOST_WIDE_INT hi, lo;
19402 int shift = 63;
19403 rtx v;
19404 rtx mask;
19406 /* Find the sign bit, sign extended to 2*HWI. */
19407 switch (mode)
19409 case V16SImode:
19410 case V16SFmode:
19411 case V8SImode:
19412 case V4SImode:
19413 case V8SFmode:
19414 case V4SFmode:
19415 vec_mode = mode;
19416 mode = GET_MODE_INNER (mode);
19417 imode = SImode;
19418 lo = 0x80000000, hi = lo < 0;
19419 break;
19421 case V8DImode:
19422 case V4DImode:
19423 case V2DImode:
19424 case V8DFmode:
19425 case V4DFmode:
19426 case V2DFmode:
19427 vec_mode = mode;
19428 mode = GET_MODE_INNER (mode);
19429 imode = DImode;
19430 if (HOST_BITS_PER_WIDE_INT >= 64)
19431 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
19432 else
19433 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19434 break;
19436 case TImode:
19437 case TFmode:
19438 vec_mode = VOIDmode;
19439 if (HOST_BITS_PER_WIDE_INT >= 64)
19441 imode = TImode;
19442 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
19444 else
19446 rtvec vec;
19448 imode = DImode;
19449 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19451 if (invert)
19453 lo = ~lo, hi = ~hi;
19454 v = constm1_rtx;
19456 else
19457 v = const0_rtx;
19459 mask = immed_double_const (lo, hi, imode);
19461 vec = gen_rtvec (2, v, mask);
19462 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
19463 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
19465 return v;
19467 break;
19469 default:
19470 gcc_unreachable ();
19473 if (invert)
19474 lo = ~lo, hi = ~hi;
19476 /* Force this value into the low part of a fp vector constant. */
19477 mask = immed_double_const (lo, hi, imode);
19478 mask = gen_lowpart (mode, mask);
19480 if (vec_mode == VOIDmode)
19481 return force_reg (mode, mask);
19483 v = ix86_build_const_vector (vec_mode, vect, mask);
19484 return force_reg (vec_mode, v);
19487 /* Generate code for floating point ABS or NEG. */
19489 void
19490 ix86_expand_fp_absneg_operator (enum rtx_code code, machine_mode mode,
19491 rtx operands[])
19493 rtx mask, set, dst, src;
19494 bool use_sse = false;
19495 bool vector_mode = VECTOR_MODE_P (mode);
19496 machine_mode vmode = mode;
19498 if (vector_mode)
19499 use_sse = true;
19500 else if (mode == TFmode)
19501 use_sse = true;
19502 else if (TARGET_SSE_MATH)
19504 use_sse = SSE_FLOAT_MODE_P (mode);
19505 if (mode == SFmode)
19506 vmode = V4SFmode;
19507 else if (mode == DFmode)
19508 vmode = V2DFmode;
19511 /* NEG and ABS performed with SSE use bitwise mask operations.
19512 Create the appropriate mask now. */
19513 if (use_sse)
19514 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
19515 else
19516 mask = NULL_RTX;
19518 dst = operands[0];
19519 src = operands[1];
19521 set = gen_rtx_fmt_e (code, mode, src);
19522 set = gen_rtx_SET (VOIDmode, dst, set);
19524 if (mask)
19526 rtx use, clob;
19527 rtvec par;
19529 use = gen_rtx_USE (VOIDmode, mask);
19530 if (vector_mode)
19531 par = gen_rtvec (2, set, use);
19532 else
19534 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19535 par = gen_rtvec (3, set, use, clob);
19537 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
19539 else
19540 emit_insn (set);
19543 /* Expand a copysign operation. Special case operand 0 being a constant. */
19545 void
19546 ix86_expand_copysign (rtx operands[])
19548 machine_mode mode, vmode;
19549 rtx dest, op0, op1, mask, nmask;
19551 dest = operands[0];
19552 op0 = operands[1];
19553 op1 = operands[2];
19555 mode = GET_MODE (dest);
19557 if (mode == SFmode)
19558 vmode = V4SFmode;
19559 else if (mode == DFmode)
19560 vmode = V2DFmode;
19561 else
19562 vmode = mode;
19564 if (GET_CODE (op0) == CONST_DOUBLE)
19566 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
19568 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
19569 op0 = simplify_unary_operation (ABS, mode, op0, mode);
19571 if (mode == SFmode || mode == DFmode)
19573 if (op0 == CONST0_RTX (mode))
19574 op0 = CONST0_RTX (vmode);
19575 else
19577 rtx v = ix86_build_const_vector (vmode, false, op0);
19579 op0 = force_reg (vmode, v);
19582 else if (op0 != CONST0_RTX (mode))
19583 op0 = force_reg (mode, op0);
19585 mask = ix86_build_signbit_mask (vmode, 0, 0);
19587 if (mode == SFmode)
19588 copysign_insn = gen_copysignsf3_const;
19589 else if (mode == DFmode)
19590 copysign_insn = gen_copysigndf3_const;
19591 else
19592 copysign_insn = gen_copysigntf3_const;
19594 emit_insn (copysign_insn (dest, op0, op1, mask));
19596 else
19598 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
19600 nmask = ix86_build_signbit_mask (vmode, 0, 1);
19601 mask = ix86_build_signbit_mask (vmode, 0, 0);
19603 if (mode == SFmode)
19604 copysign_insn = gen_copysignsf3_var;
19605 else if (mode == DFmode)
19606 copysign_insn = gen_copysigndf3_var;
19607 else
19608 copysign_insn = gen_copysigntf3_var;
19610 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
19614 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
19615 be a constant, and so has already been expanded into a vector constant. */
19617 void
19618 ix86_split_copysign_const (rtx operands[])
19620 machine_mode mode, vmode;
19621 rtx dest, op0, mask, x;
19623 dest = operands[0];
19624 op0 = operands[1];
19625 mask = operands[3];
19627 mode = GET_MODE (dest);
19628 vmode = GET_MODE (mask);
19630 dest = simplify_gen_subreg (vmode, dest, mode, 0);
19631 x = gen_rtx_AND (vmode, dest, mask);
19632 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19634 if (op0 != CONST0_RTX (vmode))
19636 x = gen_rtx_IOR (vmode, dest, op0);
19637 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19641 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
19642 so we have to do two masks. */
19644 void
19645 ix86_split_copysign_var (rtx operands[])
19647 machine_mode mode, vmode;
19648 rtx dest, scratch, op0, op1, mask, nmask, x;
19650 dest = operands[0];
19651 scratch = operands[1];
19652 op0 = operands[2];
19653 op1 = operands[3];
19654 nmask = operands[4];
19655 mask = operands[5];
19657 mode = GET_MODE (dest);
19658 vmode = GET_MODE (mask);
19660 if (rtx_equal_p (op0, op1))
19662 /* Shouldn't happen often (it's useless, obviously), but when it does
19663 we'd generate incorrect code if we continue below. */
19664 emit_move_insn (dest, op0);
19665 return;
19668 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
19670 gcc_assert (REGNO (op1) == REGNO (scratch));
19672 x = gen_rtx_AND (vmode, scratch, mask);
19673 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19675 dest = mask;
19676 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19677 x = gen_rtx_NOT (vmode, dest);
19678 x = gen_rtx_AND (vmode, x, op0);
19679 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19681 else
19683 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
19685 x = gen_rtx_AND (vmode, scratch, mask);
19687 else /* alternative 2,4 */
19689 gcc_assert (REGNO (mask) == REGNO (scratch));
19690 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
19691 x = gen_rtx_AND (vmode, scratch, op1);
19693 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19695 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
19697 dest = simplify_gen_subreg (vmode, op0, mode, 0);
19698 x = gen_rtx_AND (vmode, dest, nmask);
19700 else /* alternative 3,4 */
19702 gcc_assert (REGNO (nmask) == REGNO (dest));
19703 dest = nmask;
19704 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19705 x = gen_rtx_AND (vmode, dest, op0);
19707 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19710 x = gen_rtx_IOR (vmode, dest, scratch);
19711 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19714 /* Return TRUE or FALSE depending on whether the first SET in INSN
19715 has source and destination with matching CC modes, and that the
19716 CC mode is at least as constrained as REQ_MODE. */
19718 bool
19719 ix86_match_ccmode (rtx insn, machine_mode req_mode)
19721 rtx set;
19722 machine_mode set_mode;
19724 set = PATTERN (insn);
19725 if (GET_CODE (set) == PARALLEL)
19726 set = XVECEXP (set, 0, 0);
19727 gcc_assert (GET_CODE (set) == SET);
19728 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
19730 set_mode = GET_MODE (SET_DEST (set));
19731 switch (set_mode)
19733 case CCNOmode:
19734 if (req_mode != CCNOmode
19735 && (req_mode != CCmode
19736 || XEXP (SET_SRC (set), 1) != const0_rtx))
19737 return false;
19738 break;
19739 case CCmode:
19740 if (req_mode == CCGCmode)
19741 return false;
19742 /* FALLTHRU */
19743 case CCGCmode:
19744 if (req_mode == CCGOCmode || req_mode == CCNOmode)
19745 return false;
19746 /* FALLTHRU */
19747 case CCGOCmode:
19748 if (req_mode == CCZmode)
19749 return false;
19750 /* FALLTHRU */
19751 case CCZmode:
19752 break;
19754 case CCAmode:
19755 case CCCmode:
19756 case CCOmode:
19757 case CCSmode:
19758 if (set_mode != req_mode)
19759 return false;
19760 break;
19762 default:
19763 gcc_unreachable ();
19766 return GET_MODE (SET_SRC (set)) == set_mode;
19769 /* Generate insn patterns to do an integer compare of OPERANDS. */
19771 static rtx
19772 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
19774 machine_mode cmpmode;
19775 rtx tmp, flags;
19777 cmpmode = SELECT_CC_MODE (code, op0, op1);
19778 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
19780 /* This is very simple, but making the interface the same as in the
19781 FP case makes the rest of the code easier. */
19782 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
19783 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
19785 /* Return the test that should be put into the flags user, i.e.
19786 the bcc, scc, or cmov instruction. */
19787 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
19790 /* Figure out whether to use ordered or unordered fp comparisons.
19791 Return the appropriate mode to use. */
19793 machine_mode
19794 ix86_fp_compare_mode (enum rtx_code)
19796 /* ??? In order to make all comparisons reversible, we do all comparisons
19797 non-trapping when compiling for IEEE. Once gcc is able to distinguish
19798 all forms trapping and nontrapping comparisons, we can make inequality
19799 comparisons trapping again, since it results in better code when using
19800 FCOM based compares. */
19801 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
19804 machine_mode
19805 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
19807 machine_mode mode = GET_MODE (op0);
19809 if (SCALAR_FLOAT_MODE_P (mode))
19811 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
19812 return ix86_fp_compare_mode (code);
19815 switch (code)
19817 /* Only zero flag is needed. */
19818 case EQ: /* ZF=0 */
19819 case NE: /* ZF!=0 */
19820 return CCZmode;
19821 /* Codes needing carry flag. */
19822 case GEU: /* CF=0 */
19823 case LTU: /* CF=1 */
19824 /* Detect overflow checks. They need just the carry flag. */
19825 if (GET_CODE (op0) == PLUS
19826 && rtx_equal_p (op1, XEXP (op0, 0)))
19827 return CCCmode;
19828 else
19829 return CCmode;
19830 case GTU: /* CF=0 & ZF=0 */
19831 case LEU: /* CF=1 | ZF=1 */
19832 return CCmode;
19833 /* Codes possibly doable only with sign flag when
19834 comparing against zero. */
19835 case GE: /* SF=OF or SF=0 */
19836 case LT: /* SF<>OF or SF=1 */
19837 if (op1 == const0_rtx)
19838 return CCGOCmode;
19839 else
19840 /* For other cases Carry flag is not required. */
19841 return CCGCmode;
19842 /* Codes doable only with sign flag when comparing
19843 against zero, but we miss jump instruction for it
19844 so we need to use relational tests against overflow
19845 that thus needs to be zero. */
19846 case GT: /* ZF=0 & SF=OF */
19847 case LE: /* ZF=1 | SF<>OF */
19848 if (op1 == const0_rtx)
19849 return CCNOmode;
19850 else
19851 return CCGCmode;
19852 /* strcmp pattern do (use flags) and combine may ask us for proper
19853 mode. */
19854 case USE:
19855 return CCmode;
19856 default:
19857 gcc_unreachable ();
19861 /* Return the fixed registers used for condition codes. */
19863 static bool
19864 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
19866 *p1 = FLAGS_REG;
19867 *p2 = FPSR_REG;
19868 return true;
19871 /* If two condition code modes are compatible, return a condition code
19872 mode which is compatible with both. Otherwise, return
19873 VOIDmode. */
19875 static machine_mode
19876 ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
19878 if (m1 == m2)
19879 return m1;
19881 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
19882 return VOIDmode;
19884 if ((m1 == CCGCmode && m2 == CCGOCmode)
19885 || (m1 == CCGOCmode && m2 == CCGCmode))
19886 return CCGCmode;
19888 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
19889 return m2;
19890 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
19891 return m1;
19893 switch (m1)
19895 default:
19896 gcc_unreachable ();
19898 case CCmode:
19899 case CCGCmode:
19900 case CCGOCmode:
19901 case CCNOmode:
19902 case CCAmode:
19903 case CCCmode:
19904 case CCOmode:
19905 case CCSmode:
19906 case CCZmode:
19907 switch (m2)
19909 default:
19910 return VOIDmode;
19912 case CCmode:
19913 case CCGCmode:
19914 case CCGOCmode:
19915 case CCNOmode:
19916 case CCAmode:
19917 case CCCmode:
19918 case CCOmode:
19919 case CCSmode:
19920 case CCZmode:
19921 return CCmode;
19924 case CCFPmode:
19925 case CCFPUmode:
19926 /* These are only compatible with themselves, which we already
19927 checked above. */
19928 return VOIDmode;
19933 /* Return a comparison we can do and that it is equivalent to
19934 swap_condition (code) apart possibly from orderedness.
19935 But, never change orderedness if TARGET_IEEE_FP, returning
19936 UNKNOWN in that case if necessary. */
19938 static enum rtx_code
19939 ix86_fp_swap_condition (enum rtx_code code)
19941 switch (code)
19943 case GT: /* GTU - CF=0 & ZF=0 */
19944 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
19945 case GE: /* GEU - CF=0 */
19946 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
19947 case UNLT: /* LTU - CF=1 */
19948 return TARGET_IEEE_FP ? UNKNOWN : GT;
19949 case UNLE: /* LEU - CF=1 | ZF=1 */
19950 return TARGET_IEEE_FP ? UNKNOWN : GE;
19951 default:
19952 return swap_condition (code);
19956 /* Return cost of comparison CODE using the best strategy for performance.
19957 All following functions do use number of instructions as a cost metrics.
19958 In future this should be tweaked to compute bytes for optimize_size and
19959 take into account performance of various instructions on various CPUs. */
19961 static int
19962 ix86_fp_comparison_cost (enum rtx_code code)
19964 int arith_cost;
19966 /* The cost of code using bit-twiddling on %ah. */
19967 switch (code)
19969 case UNLE:
19970 case UNLT:
19971 case LTGT:
19972 case GT:
19973 case GE:
19974 case UNORDERED:
19975 case ORDERED:
19976 case UNEQ:
19977 arith_cost = 4;
19978 break;
19979 case LT:
19980 case NE:
19981 case EQ:
19982 case UNGE:
19983 arith_cost = TARGET_IEEE_FP ? 5 : 4;
19984 break;
19985 case LE:
19986 case UNGT:
19987 arith_cost = TARGET_IEEE_FP ? 6 : 4;
19988 break;
19989 default:
19990 gcc_unreachable ();
19993 switch (ix86_fp_comparison_strategy (code))
19995 case IX86_FPCMP_COMI:
19996 return arith_cost > 4 ? 3 : 2;
19997 case IX86_FPCMP_SAHF:
19998 return arith_cost > 4 ? 4 : 3;
19999 default:
20000 return arith_cost;
20004 /* Return strategy to use for floating-point. We assume that fcomi is always
20005 preferrable where available, since that is also true when looking at size
20006 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
20008 enum ix86_fpcmp_strategy
20009 ix86_fp_comparison_strategy (enum rtx_code)
20011 /* Do fcomi/sahf based test when profitable. */
20013 if (TARGET_CMOVE)
20014 return IX86_FPCMP_COMI;
20016 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
20017 return IX86_FPCMP_SAHF;
20019 return IX86_FPCMP_ARITH;
20022 /* Swap, force into registers, or otherwise massage the two operands
20023 to a fp comparison. The operands are updated in place; the new
20024 comparison code is returned. */
20026 static enum rtx_code
20027 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
20029 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
20030 rtx op0 = *pop0, op1 = *pop1;
20031 machine_mode op_mode = GET_MODE (op0);
20032 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
20034 /* All of the unordered compare instructions only work on registers.
20035 The same is true of the fcomi compare instructions. The XFmode
20036 compare instructions require registers except when comparing
20037 against zero or when converting operand 1 from fixed point to
20038 floating point. */
20040 if (!is_sse
20041 && (fpcmp_mode == CCFPUmode
20042 || (op_mode == XFmode
20043 && ! (standard_80387_constant_p (op0) == 1
20044 || standard_80387_constant_p (op1) == 1)
20045 && GET_CODE (op1) != FLOAT)
20046 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
20048 op0 = force_reg (op_mode, op0);
20049 op1 = force_reg (op_mode, op1);
20051 else
20053 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
20054 things around if they appear profitable, otherwise force op0
20055 into a register. */
20057 if (standard_80387_constant_p (op0) == 0
20058 || (MEM_P (op0)
20059 && ! (standard_80387_constant_p (op1) == 0
20060 || MEM_P (op1))))
20062 enum rtx_code new_code = ix86_fp_swap_condition (code);
20063 if (new_code != UNKNOWN)
20065 std::swap (op0, op1);
20066 code = new_code;
20070 if (!REG_P (op0))
20071 op0 = force_reg (op_mode, op0);
20073 if (CONSTANT_P (op1))
20075 int tmp = standard_80387_constant_p (op1);
20076 if (tmp == 0)
20077 op1 = validize_mem (force_const_mem (op_mode, op1));
20078 else if (tmp == 1)
20080 if (TARGET_CMOVE)
20081 op1 = force_reg (op_mode, op1);
20083 else
20084 op1 = force_reg (op_mode, op1);
20088 /* Try to rearrange the comparison to make it cheaper. */
20089 if (ix86_fp_comparison_cost (code)
20090 > ix86_fp_comparison_cost (swap_condition (code))
20091 && (REG_P (op1) || can_create_pseudo_p ()))
20093 std::swap (op0, op1);
20094 code = swap_condition (code);
20095 if (!REG_P (op0))
20096 op0 = force_reg (op_mode, op0);
20099 *pop0 = op0;
20100 *pop1 = op1;
20101 return code;
20104 /* Convert comparison codes we use to represent FP comparison to integer
20105 code that will result in proper branch. Return UNKNOWN if no such code
20106 is available. */
20108 enum rtx_code
20109 ix86_fp_compare_code_to_integer (enum rtx_code code)
20111 switch (code)
20113 case GT:
20114 return GTU;
20115 case GE:
20116 return GEU;
20117 case ORDERED:
20118 case UNORDERED:
20119 return code;
20120 break;
20121 case UNEQ:
20122 return EQ;
20123 break;
20124 case UNLT:
20125 return LTU;
20126 break;
20127 case UNLE:
20128 return LEU;
20129 break;
20130 case LTGT:
20131 return NE;
20132 break;
20133 default:
20134 return UNKNOWN;
20138 /* Generate insn patterns to do a floating point compare of OPERANDS. */
20140 static rtx
20141 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
20143 machine_mode fpcmp_mode, intcmp_mode;
20144 rtx tmp, tmp2;
20146 fpcmp_mode = ix86_fp_compare_mode (code);
20147 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
20149 /* Do fcomi/sahf based test when profitable. */
20150 switch (ix86_fp_comparison_strategy (code))
20152 case IX86_FPCMP_COMI:
20153 intcmp_mode = fpcmp_mode;
20154 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20155 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
20156 tmp);
20157 emit_insn (tmp);
20158 break;
20160 case IX86_FPCMP_SAHF:
20161 intcmp_mode = fpcmp_mode;
20162 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20163 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
20164 tmp);
20166 if (!scratch)
20167 scratch = gen_reg_rtx (HImode);
20168 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
20169 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
20170 break;
20172 case IX86_FPCMP_ARITH:
20173 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
20174 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20175 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
20176 if (!scratch)
20177 scratch = gen_reg_rtx (HImode);
20178 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
20180 /* In the unordered case, we have to check C2 for NaN's, which
20181 doesn't happen to work out to anything nice combination-wise.
20182 So do some bit twiddling on the value we've got in AH to come
20183 up with an appropriate set of condition codes. */
20185 intcmp_mode = CCNOmode;
20186 switch (code)
20188 case GT:
20189 case UNGT:
20190 if (code == GT || !TARGET_IEEE_FP)
20192 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
20193 code = EQ;
20195 else
20197 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20198 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
20199 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
20200 intcmp_mode = CCmode;
20201 code = GEU;
20203 break;
20204 case LT:
20205 case UNLT:
20206 if (code == LT && TARGET_IEEE_FP)
20208 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20209 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
20210 intcmp_mode = CCmode;
20211 code = EQ;
20213 else
20215 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
20216 code = NE;
20218 break;
20219 case GE:
20220 case UNGE:
20221 if (code == GE || !TARGET_IEEE_FP)
20223 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
20224 code = EQ;
20226 else
20228 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20229 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
20230 code = NE;
20232 break;
20233 case LE:
20234 case UNLE:
20235 if (code == LE && TARGET_IEEE_FP)
20237 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20238 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
20239 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
20240 intcmp_mode = CCmode;
20241 code = LTU;
20243 else
20245 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
20246 code = NE;
20248 break;
20249 case EQ:
20250 case UNEQ:
20251 if (code == EQ && TARGET_IEEE_FP)
20253 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20254 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
20255 intcmp_mode = CCmode;
20256 code = EQ;
20258 else
20260 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
20261 code = NE;
20263 break;
20264 case NE:
20265 case LTGT:
20266 if (code == NE && TARGET_IEEE_FP)
20268 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20269 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
20270 GEN_INT (0x40)));
20271 code = NE;
20273 else
20275 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
20276 code = EQ;
20278 break;
20280 case UNORDERED:
20281 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
20282 code = NE;
20283 break;
20284 case ORDERED:
20285 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
20286 code = EQ;
20287 break;
20289 default:
20290 gcc_unreachable ();
20292 break;
20294 default:
20295 gcc_unreachable();
20298 /* Return the test that should be put into the flags user, i.e.
20299 the bcc, scc, or cmov instruction. */
20300 return gen_rtx_fmt_ee (code, VOIDmode,
20301 gen_rtx_REG (intcmp_mode, FLAGS_REG),
20302 const0_rtx);
20305 static rtx
20306 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
20308 rtx ret;
20310 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
20311 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
20313 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
20315 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
20316 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20318 else
20319 ret = ix86_expand_int_compare (code, op0, op1);
20321 return ret;
20324 void
20325 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
20327 machine_mode mode = GET_MODE (op0);
20328 rtx tmp;
20330 switch (mode)
20332 case SFmode:
20333 case DFmode:
20334 case XFmode:
20335 case QImode:
20336 case HImode:
20337 case SImode:
20338 simple:
20339 tmp = ix86_expand_compare (code, op0, op1);
20340 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
20341 gen_rtx_LABEL_REF (VOIDmode, label),
20342 pc_rtx);
20343 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
20344 return;
20346 case DImode:
20347 if (TARGET_64BIT)
20348 goto simple;
20349 case TImode:
20350 /* Expand DImode branch into multiple compare+branch. */
20352 rtx lo[2], hi[2];
20353 rtx_code_label *label2;
20354 enum rtx_code code1, code2, code3;
20355 machine_mode submode;
20357 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
20359 std::swap (op0, op1);
20360 code = swap_condition (code);
20363 split_double_mode (mode, &op0, 1, lo+0, hi+0);
20364 split_double_mode (mode, &op1, 1, lo+1, hi+1);
20366 submode = mode == DImode ? SImode : DImode;
20368 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
20369 avoid two branches. This costs one extra insn, so disable when
20370 optimizing for size. */
20372 if ((code == EQ || code == NE)
20373 && (!optimize_insn_for_size_p ()
20374 || hi[1] == const0_rtx || lo[1] == const0_rtx))
20376 rtx xor0, xor1;
20378 xor1 = hi[0];
20379 if (hi[1] != const0_rtx)
20380 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
20381 NULL_RTX, 0, OPTAB_WIDEN);
20383 xor0 = lo[0];
20384 if (lo[1] != const0_rtx)
20385 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
20386 NULL_RTX, 0, OPTAB_WIDEN);
20388 tmp = expand_binop (submode, ior_optab, xor1, xor0,
20389 NULL_RTX, 0, OPTAB_WIDEN);
20391 ix86_expand_branch (code, tmp, const0_rtx, label);
20392 return;
20395 /* Otherwise, if we are doing less-than or greater-or-equal-than,
20396 op1 is a constant and the low word is zero, then we can just
20397 examine the high word. Similarly for low word -1 and
20398 less-or-equal-than or greater-than. */
20400 if (CONST_INT_P (hi[1]))
20401 switch (code)
20403 case LT: case LTU: case GE: case GEU:
20404 if (lo[1] == const0_rtx)
20406 ix86_expand_branch (code, hi[0], hi[1], label);
20407 return;
20409 break;
20410 case LE: case LEU: case GT: case GTU:
20411 if (lo[1] == constm1_rtx)
20413 ix86_expand_branch (code, hi[0], hi[1], label);
20414 return;
20416 break;
20417 default:
20418 break;
20421 /* Otherwise, we need two or three jumps. */
20423 label2 = gen_label_rtx ();
20425 code1 = code;
20426 code2 = swap_condition (code);
20427 code3 = unsigned_condition (code);
20429 switch (code)
20431 case LT: case GT: case LTU: case GTU:
20432 break;
20434 case LE: code1 = LT; code2 = GT; break;
20435 case GE: code1 = GT; code2 = LT; break;
20436 case LEU: code1 = LTU; code2 = GTU; break;
20437 case GEU: code1 = GTU; code2 = LTU; break;
20439 case EQ: code1 = UNKNOWN; code2 = NE; break;
20440 case NE: code2 = UNKNOWN; break;
20442 default:
20443 gcc_unreachable ();
20447 * a < b =>
20448 * if (hi(a) < hi(b)) goto true;
20449 * if (hi(a) > hi(b)) goto false;
20450 * if (lo(a) < lo(b)) goto true;
20451 * false:
20454 if (code1 != UNKNOWN)
20455 ix86_expand_branch (code1, hi[0], hi[1], label);
20456 if (code2 != UNKNOWN)
20457 ix86_expand_branch (code2, hi[0], hi[1], label2);
20459 ix86_expand_branch (code3, lo[0], lo[1], label);
20461 if (code2 != UNKNOWN)
20462 emit_label (label2);
20463 return;
20466 default:
20467 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
20468 goto simple;
20472 /* Split branch based on floating point condition. */
20473 void
20474 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
20475 rtx target1, rtx target2, rtx tmp)
20477 rtx condition;
20478 rtx i;
20480 if (target2 != pc_rtx)
20482 rtx tmp = target2;
20483 code = reverse_condition_maybe_unordered (code);
20484 target2 = target1;
20485 target1 = tmp;
20488 condition = ix86_expand_fp_compare (code, op1, op2,
20489 tmp);
20491 i = emit_jump_insn (gen_rtx_SET
20492 (VOIDmode, pc_rtx,
20493 gen_rtx_IF_THEN_ELSE (VOIDmode,
20494 condition, target1, target2)));
20495 if (split_branch_probability >= 0)
20496 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
20499 void
20500 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20502 rtx ret;
20504 gcc_assert (GET_MODE (dest) == QImode);
20506 ret = ix86_expand_compare (code, op0, op1);
20507 PUT_MODE (ret, QImode);
20508 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
20511 /* Expand comparison setting or clearing carry flag. Return true when
20512 successful and set pop for the operation. */
20513 static bool
20514 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
20516 machine_mode mode =
20517 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
20519 /* Do not handle double-mode compares that go through special path. */
20520 if (mode == (TARGET_64BIT ? TImode : DImode))
20521 return false;
20523 if (SCALAR_FLOAT_MODE_P (mode))
20525 rtx compare_op;
20526 rtx_insn *compare_seq;
20528 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
20530 /* Shortcut: following common codes never translate
20531 into carry flag compares. */
20532 if (code == EQ || code == NE || code == UNEQ || code == LTGT
20533 || code == ORDERED || code == UNORDERED)
20534 return false;
20536 /* These comparisons require zero flag; swap operands so they won't. */
20537 if ((code == GT || code == UNLE || code == LE || code == UNGT)
20538 && !TARGET_IEEE_FP)
20540 std::swap (op0, op1);
20541 code = swap_condition (code);
20544 /* Try to expand the comparison and verify that we end up with
20545 carry flag based comparison. This fails to be true only when
20546 we decide to expand comparison using arithmetic that is not
20547 too common scenario. */
20548 start_sequence ();
20549 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20550 compare_seq = get_insns ();
20551 end_sequence ();
20553 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
20554 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
20555 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
20556 else
20557 code = GET_CODE (compare_op);
20559 if (code != LTU && code != GEU)
20560 return false;
20562 emit_insn (compare_seq);
20563 *pop = compare_op;
20564 return true;
20567 if (!INTEGRAL_MODE_P (mode))
20568 return false;
20570 switch (code)
20572 case LTU:
20573 case GEU:
20574 break;
20576 /* Convert a==0 into (unsigned)a<1. */
20577 case EQ:
20578 case NE:
20579 if (op1 != const0_rtx)
20580 return false;
20581 op1 = const1_rtx;
20582 code = (code == EQ ? LTU : GEU);
20583 break;
20585 /* Convert a>b into b<a or a>=b-1. */
20586 case GTU:
20587 case LEU:
20588 if (CONST_INT_P (op1))
20590 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
20591 /* Bail out on overflow. We still can swap operands but that
20592 would force loading of the constant into register. */
20593 if (op1 == const0_rtx
20594 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
20595 return false;
20596 code = (code == GTU ? GEU : LTU);
20598 else
20600 std::swap (op1, op0);
20601 code = (code == GTU ? LTU : GEU);
20603 break;
20605 /* Convert a>=0 into (unsigned)a<0x80000000. */
20606 case LT:
20607 case GE:
20608 if (mode == DImode || op1 != const0_rtx)
20609 return false;
20610 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20611 code = (code == LT ? GEU : LTU);
20612 break;
20613 case LE:
20614 case GT:
20615 if (mode == DImode || op1 != constm1_rtx)
20616 return false;
20617 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20618 code = (code == LE ? GEU : LTU);
20619 break;
20621 default:
20622 return false;
20624 /* Swapping operands may cause constant to appear as first operand. */
20625 if (!nonimmediate_operand (op0, VOIDmode))
20627 if (!can_create_pseudo_p ())
20628 return false;
20629 op0 = force_reg (mode, op0);
20631 *pop = ix86_expand_compare (code, op0, op1);
20632 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
20633 return true;
20636 bool
20637 ix86_expand_int_movcc (rtx operands[])
20639 enum rtx_code code = GET_CODE (operands[1]), compare_code;
20640 rtx_insn *compare_seq;
20641 rtx compare_op;
20642 machine_mode mode = GET_MODE (operands[0]);
20643 bool sign_bit_compare_p = false;
20644 rtx op0 = XEXP (operands[1], 0);
20645 rtx op1 = XEXP (operands[1], 1);
20647 if (GET_MODE (op0) == TImode
20648 || (GET_MODE (op0) == DImode
20649 && !TARGET_64BIT))
20650 return false;
20652 start_sequence ();
20653 compare_op = ix86_expand_compare (code, op0, op1);
20654 compare_seq = get_insns ();
20655 end_sequence ();
20657 compare_code = GET_CODE (compare_op);
20659 if ((op1 == const0_rtx && (code == GE || code == LT))
20660 || (op1 == constm1_rtx && (code == GT || code == LE)))
20661 sign_bit_compare_p = true;
20663 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
20664 HImode insns, we'd be swallowed in word prefix ops. */
20666 if ((mode != HImode || TARGET_FAST_PREFIX)
20667 && (mode != (TARGET_64BIT ? TImode : DImode))
20668 && CONST_INT_P (operands[2])
20669 && CONST_INT_P (operands[3]))
20671 rtx out = operands[0];
20672 HOST_WIDE_INT ct = INTVAL (operands[2]);
20673 HOST_WIDE_INT cf = INTVAL (operands[3]);
20674 HOST_WIDE_INT diff;
20676 diff = ct - cf;
20677 /* Sign bit compares are better done using shifts than we do by using
20678 sbb. */
20679 if (sign_bit_compare_p
20680 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20682 /* Detect overlap between destination and compare sources. */
20683 rtx tmp = out;
20685 if (!sign_bit_compare_p)
20687 rtx flags;
20688 bool fpcmp = false;
20690 compare_code = GET_CODE (compare_op);
20692 flags = XEXP (compare_op, 0);
20694 if (GET_MODE (flags) == CCFPmode
20695 || GET_MODE (flags) == CCFPUmode)
20697 fpcmp = true;
20698 compare_code
20699 = ix86_fp_compare_code_to_integer (compare_code);
20702 /* To simplify rest of code, restrict to the GEU case. */
20703 if (compare_code == LTU)
20705 HOST_WIDE_INT tmp = ct;
20706 ct = cf;
20707 cf = tmp;
20708 compare_code = reverse_condition (compare_code);
20709 code = reverse_condition (code);
20711 else
20713 if (fpcmp)
20714 PUT_CODE (compare_op,
20715 reverse_condition_maybe_unordered
20716 (GET_CODE (compare_op)));
20717 else
20718 PUT_CODE (compare_op,
20719 reverse_condition (GET_CODE (compare_op)));
20721 diff = ct - cf;
20723 if (reg_overlap_mentioned_p (out, op0)
20724 || reg_overlap_mentioned_p (out, op1))
20725 tmp = gen_reg_rtx (mode);
20727 if (mode == DImode)
20728 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
20729 else
20730 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
20731 flags, compare_op));
20733 else
20735 if (code == GT || code == GE)
20736 code = reverse_condition (code);
20737 else
20739 HOST_WIDE_INT tmp = ct;
20740 ct = cf;
20741 cf = tmp;
20742 diff = ct - cf;
20744 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
20747 if (diff == 1)
20750 * cmpl op0,op1
20751 * sbbl dest,dest
20752 * [addl dest, ct]
20754 * Size 5 - 8.
20756 if (ct)
20757 tmp = expand_simple_binop (mode, PLUS,
20758 tmp, GEN_INT (ct),
20759 copy_rtx (tmp), 1, OPTAB_DIRECT);
20761 else if (cf == -1)
20764 * cmpl op0,op1
20765 * sbbl dest,dest
20766 * orl $ct, dest
20768 * Size 8.
20770 tmp = expand_simple_binop (mode, IOR,
20771 tmp, GEN_INT (ct),
20772 copy_rtx (tmp), 1, OPTAB_DIRECT);
20774 else if (diff == -1 && ct)
20777 * cmpl op0,op1
20778 * sbbl dest,dest
20779 * notl dest
20780 * [addl dest, cf]
20782 * Size 8 - 11.
20784 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20785 if (cf)
20786 tmp = expand_simple_binop (mode, PLUS,
20787 copy_rtx (tmp), GEN_INT (cf),
20788 copy_rtx (tmp), 1, OPTAB_DIRECT);
20790 else
20793 * cmpl op0,op1
20794 * sbbl dest,dest
20795 * [notl dest]
20796 * andl cf - ct, dest
20797 * [addl dest, ct]
20799 * Size 8 - 11.
20802 if (cf == 0)
20804 cf = ct;
20805 ct = 0;
20806 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20809 tmp = expand_simple_binop (mode, AND,
20810 copy_rtx (tmp),
20811 gen_int_mode (cf - ct, mode),
20812 copy_rtx (tmp), 1, OPTAB_DIRECT);
20813 if (ct)
20814 tmp = expand_simple_binop (mode, PLUS,
20815 copy_rtx (tmp), GEN_INT (ct),
20816 copy_rtx (tmp), 1, OPTAB_DIRECT);
20819 if (!rtx_equal_p (tmp, out))
20820 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
20822 return true;
20825 if (diff < 0)
20827 machine_mode cmp_mode = GET_MODE (op0);
20829 std::swap (ct, cf);
20830 diff = -diff;
20832 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20834 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20836 /* We may be reversing unordered compare to normal compare, that
20837 is not valid in general (we may convert non-trapping condition
20838 to trapping one), however on i386 we currently emit all
20839 comparisons unordered. */
20840 compare_code = reverse_condition_maybe_unordered (compare_code);
20841 code = reverse_condition_maybe_unordered (code);
20843 else
20845 compare_code = reverse_condition (compare_code);
20846 code = reverse_condition (code);
20850 compare_code = UNKNOWN;
20851 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
20852 && CONST_INT_P (op1))
20854 if (op1 == const0_rtx
20855 && (code == LT || code == GE))
20856 compare_code = code;
20857 else if (op1 == constm1_rtx)
20859 if (code == LE)
20860 compare_code = LT;
20861 else if (code == GT)
20862 compare_code = GE;
20866 /* Optimize dest = (op0 < 0) ? -1 : cf. */
20867 if (compare_code != UNKNOWN
20868 && GET_MODE (op0) == GET_MODE (out)
20869 && (cf == -1 || ct == -1))
20871 /* If lea code below could be used, only optimize
20872 if it results in a 2 insn sequence. */
20874 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
20875 || diff == 3 || diff == 5 || diff == 9)
20876 || (compare_code == LT && ct == -1)
20877 || (compare_code == GE && cf == -1))
20880 * notl op1 (if necessary)
20881 * sarl $31, op1
20882 * orl cf, op1
20884 if (ct != -1)
20886 cf = ct;
20887 ct = -1;
20888 code = reverse_condition (code);
20891 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20893 out = expand_simple_binop (mode, IOR,
20894 out, GEN_INT (cf),
20895 out, 1, OPTAB_DIRECT);
20896 if (out != operands[0])
20897 emit_move_insn (operands[0], out);
20899 return true;
20904 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
20905 || diff == 3 || diff == 5 || diff == 9)
20906 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
20907 && (mode != DImode
20908 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
20911 * xorl dest,dest
20912 * cmpl op1,op2
20913 * setcc dest
20914 * lea cf(dest*(ct-cf)),dest
20916 * Size 14.
20918 * This also catches the degenerate setcc-only case.
20921 rtx tmp;
20922 int nops;
20924 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20926 nops = 0;
20927 /* On x86_64 the lea instruction operates on Pmode, so we need
20928 to get arithmetics done in proper mode to match. */
20929 if (diff == 1)
20930 tmp = copy_rtx (out);
20931 else
20933 rtx out1;
20934 out1 = copy_rtx (out);
20935 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
20936 nops++;
20937 if (diff & 1)
20939 tmp = gen_rtx_PLUS (mode, tmp, out1);
20940 nops++;
20943 if (cf != 0)
20945 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
20946 nops++;
20948 if (!rtx_equal_p (tmp, out))
20950 if (nops == 1)
20951 out = force_operand (tmp, copy_rtx (out));
20952 else
20953 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
20955 if (!rtx_equal_p (out, operands[0]))
20956 emit_move_insn (operands[0], copy_rtx (out));
20958 return true;
20962 * General case: Jumpful:
20963 * xorl dest,dest cmpl op1, op2
20964 * cmpl op1, op2 movl ct, dest
20965 * setcc dest jcc 1f
20966 * decl dest movl cf, dest
20967 * andl (cf-ct),dest 1:
20968 * addl ct,dest
20970 * Size 20. Size 14.
20972 * This is reasonably steep, but branch mispredict costs are
20973 * high on modern cpus, so consider failing only if optimizing
20974 * for space.
20977 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20978 && BRANCH_COST (optimize_insn_for_speed_p (),
20979 false) >= 2)
20981 if (cf == 0)
20983 machine_mode cmp_mode = GET_MODE (op0);
20985 cf = ct;
20986 ct = 0;
20988 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20990 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20992 /* We may be reversing unordered compare to normal compare,
20993 that is not valid in general (we may convert non-trapping
20994 condition to trapping one), however on i386 we currently
20995 emit all comparisons unordered. */
20996 code = reverse_condition_maybe_unordered (code);
20998 else
21000 code = reverse_condition (code);
21001 if (compare_code != UNKNOWN)
21002 compare_code = reverse_condition (compare_code);
21006 if (compare_code != UNKNOWN)
21008 /* notl op1 (if needed)
21009 sarl $31, op1
21010 andl (cf-ct), op1
21011 addl ct, op1
21013 For x < 0 (resp. x <= -1) there will be no notl,
21014 so if possible swap the constants to get rid of the
21015 complement.
21016 True/false will be -1/0 while code below (store flag
21017 followed by decrement) is 0/-1, so the constants need
21018 to be exchanged once more. */
21020 if (compare_code == GE || !cf)
21022 code = reverse_condition (code);
21023 compare_code = LT;
21025 else
21026 std::swap (cf, ct);
21028 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
21030 else
21032 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
21034 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
21035 constm1_rtx,
21036 copy_rtx (out), 1, OPTAB_DIRECT);
21039 out = expand_simple_binop (mode, AND, copy_rtx (out),
21040 gen_int_mode (cf - ct, mode),
21041 copy_rtx (out), 1, OPTAB_DIRECT);
21042 if (ct)
21043 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
21044 copy_rtx (out), 1, OPTAB_DIRECT);
21045 if (!rtx_equal_p (out, operands[0]))
21046 emit_move_insn (operands[0], copy_rtx (out));
21048 return true;
21052 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
21054 /* Try a few things more with specific constants and a variable. */
21056 optab op;
21057 rtx var, orig_out, out, tmp;
21059 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
21060 return false;
21062 /* If one of the two operands is an interesting constant, load a
21063 constant with the above and mask it in with a logical operation. */
21065 if (CONST_INT_P (operands[2]))
21067 var = operands[3];
21068 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
21069 operands[3] = constm1_rtx, op = and_optab;
21070 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
21071 operands[3] = const0_rtx, op = ior_optab;
21072 else
21073 return false;
21075 else if (CONST_INT_P (operands[3]))
21077 var = operands[2];
21078 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
21079 operands[2] = constm1_rtx, op = and_optab;
21080 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
21081 operands[2] = const0_rtx, op = ior_optab;
21082 else
21083 return false;
21085 else
21086 return false;
21088 orig_out = operands[0];
21089 tmp = gen_reg_rtx (mode);
21090 operands[0] = tmp;
21092 /* Recurse to get the constant loaded. */
21093 if (ix86_expand_int_movcc (operands) == 0)
21094 return false;
21096 /* Mask in the interesting variable. */
21097 out = expand_binop (mode, op, var, tmp, orig_out, 0,
21098 OPTAB_WIDEN);
21099 if (!rtx_equal_p (out, orig_out))
21100 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
21102 return true;
21106 * For comparison with above,
21108 * movl cf,dest
21109 * movl ct,tmp
21110 * cmpl op1,op2
21111 * cmovcc tmp,dest
21113 * Size 15.
21116 if (! nonimmediate_operand (operands[2], mode))
21117 operands[2] = force_reg (mode, operands[2]);
21118 if (! nonimmediate_operand (operands[3], mode))
21119 operands[3] = force_reg (mode, operands[3]);
21121 if (! register_operand (operands[2], VOIDmode)
21122 && (mode == QImode
21123 || ! register_operand (operands[3], VOIDmode)))
21124 operands[2] = force_reg (mode, operands[2]);
21126 if (mode == QImode
21127 && ! register_operand (operands[3], VOIDmode))
21128 operands[3] = force_reg (mode, operands[3]);
21130 emit_insn (compare_seq);
21131 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21132 gen_rtx_IF_THEN_ELSE (mode,
21133 compare_op, operands[2],
21134 operands[3])));
21135 return true;
21138 /* Swap, force into registers, or otherwise massage the two operands
21139 to an sse comparison with a mask result. Thus we differ a bit from
21140 ix86_prepare_fp_compare_args which expects to produce a flags result.
21142 The DEST operand exists to help determine whether to commute commutative
21143 operators. The POP0/POP1 operands are updated in place. The new
21144 comparison code is returned, or UNKNOWN if not implementable. */
21146 static enum rtx_code
21147 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
21148 rtx *pop0, rtx *pop1)
21150 switch (code)
21152 case LTGT:
21153 case UNEQ:
21154 /* AVX supports all the needed comparisons. */
21155 if (TARGET_AVX)
21156 break;
21157 /* We have no LTGT as an operator. We could implement it with
21158 NE & ORDERED, but this requires an extra temporary. It's
21159 not clear that it's worth it. */
21160 return UNKNOWN;
21162 case LT:
21163 case LE:
21164 case UNGT:
21165 case UNGE:
21166 /* These are supported directly. */
21167 break;
21169 case EQ:
21170 case NE:
21171 case UNORDERED:
21172 case ORDERED:
21173 /* AVX has 3 operand comparisons, no need to swap anything. */
21174 if (TARGET_AVX)
21175 break;
21176 /* For commutative operators, try to canonicalize the destination
21177 operand to be first in the comparison - this helps reload to
21178 avoid extra moves. */
21179 if (!dest || !rtx_equal_p (dest, *pop1))
21180 break;
21181 /* FALLTHRU */
21183 case GE:
21184 case GT:
21185 case UNLE:
21186 case UNLT:
21187 /* These are not supported directly before AVX, and furthermore
21188 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
21189 comparison operands to transform into something that is
21190 supported. */
21191 std::swap (*pop0, *pop1);
21192 code = swap_condition (code);
21193 break;
21195 default:
21196 gcc_unreachable ();
21199 return code;
21202 /* Detect conditional moves that exactly match min/max operational
21203 semantics. Note that this is IEEE safe, as long as we don't
21204 interchange the operands.
21206 Returns FALSE if this conditional move doesn't match a MIN/MAX,
21207 and TRUE if the operation is successful and instructions are emitted. */
21209 static bool
21210 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
21211 rtx cmp_op1, rtx if_true, rtx if_false)
21213 machine_mode mode;
21214 bool is_min;
21215 rtx tmp;
21217 if (code == LT)
21219 else if (code == UNGE)
21220 std::swap (if_true, if_false);
21221 else
21222 return false;
21224 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
21225 is_min = true;
21226 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
21227 is_min = false;
21228 else
21229 return false;
21231 mode = GET_MODE (dest);
21233 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
21234 but MODE may be a vector mode and thus not appropriate. */
21235 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
21237 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
21238 rtvec v;
21240 if_true = force_reg (mode, if_true);
21241 v = gen_rtvec (2, if_true, if_false);
21242 tmp = gen_rtx_UNSPEC (mode, v, u);
21244 else
21246 code = is_min ? SMIN : SMAX;
21247 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
21250 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
21251 return true;
21254 /* Expand an sse vector comparison. Return the register with the result. */
21256 static rtx
21257 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
21258 rtx op_true, rtx op_false)
21260 machine_mode mode = GET_MODE (dest);
21261 machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
21263 /* In general case result of comparison can differ from operands' type. */
21264 machine_mode cmp_mode;
21266 /* In AVX512F the result of comparison is an integer mask. */
21267 bool maskcmp = false;
21268 rtx x;
21270 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
21272 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
21273 gcc_assert (cmp_mode != BLKmode);
21275 maskcmp = true;
21277 else
21278 cmp_mode = cmp_ops_mode;
21281 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
21282 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
21283 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
21285 if (optimize
21286 || reg_overlap_mentioned_p (dest, op_true)
21287 || reg_overlap_mentioned_p (dest, op_false))
21288 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
21290 /* Compare patterns for int modes are unspec in AVX512F only. */
21291 if (maskcmp && (code == GT || code == EQ))
21293 rtx (*gen)(rtx, rtx, rtx);
21295 switch (cmp_ops_mode)
21297 case V16SImode:
21298 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
21299 break;
21300 case V8DImode:
21301 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
21302 break;
21303 default:
21304 gen = NULL;
21307 if (gen)
21309 emit_insn (gen (dest, cmp_op0, cmp_op1));
21310 return dest;
21313 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
21315 if (cmp_mode != mode && !maskcmp)
21317 x = force_reg (cmp_ops_mode, x);
21318 convert_move (dest, x, false);
21320 else
21321 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21323 return dest;
21326 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
21327 operations. This is used for both scalar and vector conditional moves. */
21329 static void
21330 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
21332 machine_mode mode = GET_MODE (dest);
21333 machine_mode cmpmode = GET_MODE (cmp);
21335 /* In AVX512F the result of comparison is an integer mask. */
21336 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
21338 rtx t2, t3, x;
21340 if (vector_all_ones_operand (op_true, mode)
21341 && rtx_equal_p (op_false, CONST0_RTX (mode))
21342 && !maskcmp)
21344 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
21346 else if (op_false == CONST0_RTX (mode)
21347 && !maskcmp)
21349 op_true = force_reg (mode, op_true);
21350 x = gen_rtx_AND (mode, cmp, op_true);
21351 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21353 else if (op_true == CONST0_RTX (mode)
21354 && !maskcmp)
21356 op_false = force_reg (mode, op_false);
21357 x = gen_rtx_NOT (mode, cmp);
21358 x = gen_rtx_AND (mode, x, op_false);
21359 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21361 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
21362 && !maskcmp)
21364 op_false = force_reg (mode, op_false);
21365 x = gen_rtx_IOR (mode, cmp, op_false);
21366 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21368 else if (TARGET_XOP
21369 && !maskcmp)
21371 op_true = force_reg (mode, op_true);
21373 if (!nonimmediate_operand (op_false, mode))
21374 op_false = force_reg (mode, op_false);
21376 emit_insn (gen_rtx_SET (mode, dest,
21377 gen_rtx_IF_THEN_ELSE (mode, cmp,
21378 op_true,
21379 op_false)));
21381 else
21383 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21384 rtx d = dest;
21386 if (!nonimmediate_operand (op_true, mode))
21387 op_true = force_reg (mode, op_true);
21389 op_false = force_reg (mode, op_false);
21391 switch (mode)
21393 case V4SFmode:
21394 if (TARGET_SSE4_1)
21395 gen = gen_sse4_1_blendvps;
21396 break;
21397 case V2DFmode:
21398 if (TARGET_SSE4_1)
21399 gen = gen_sse4_1_blendvpd;
21400 break;
21401 case V16QImode:
21402 case V8HImode:
21403 case V4SImode:
21404 case V2DImode:
21405 if (TARGET_SSE4_1)
21407 gen = gen_sse4_1_pblendvb;
21408 if (mode != V16QImode)
21409 d = gen_reg_rtx (V16QImode);
21410 op_false = gen_lowpart (V16QImode, op_false);
21411 op_true = gen_lowpart (V16QImode, op_true);
21412 cmp = gen_lowpart (V16QImode, cmp);
21414 break;
21415 case V8SFmode:
21416 if (TARGET_AVX)
21417 gen = gen_avx_blendvps256;
21418 break;
21419 case V4DFmode:
21420 if (TARGET_AVX)
21421 gen = gen_avx_blendvpd256;
21422 break;
21423 case V32QImode:
21424 case V16HImode:
21425 case V8SImode:
21426 case V4DImode:
21427 if (TARGET_AVX2)
21429 gen = gen_avx2_pblendvb;
21430 if (mode != V32QImode)
21431 d = gen_reg_rtx (V32QImode);
21432 op_false = gen_lowpart (V32QImode, op_false);
21433 op_true = gen_lowpart (V32QImode, op_true);
21434 cmp = gen_lowpart (V32QImode, cmp);
21436 break;
21438 case V64QImode:
21439 gen = gen_avx512bw_blendmv64qi;
21440 break;
21441 case V32HImode:
21442 gen = gen_avx512bw_blendmv32hi;
21443 break;
21444 case V16SImode:
21445 gen = gen_avx512f_blendmv16si;
21446 break;
21447 case V8DImode:
21448 gen = gen_avx512f_blendmv8di;
21449 break;
21450 case V8DFmode:
21451 gen = gen_avx512f_blendmv8df;
21452 break;
21453 case V16SFmode:
21454 gen = gen_avx512f_blendmv16sf;
21455 break;
21457 default:
21458 break;
21461 if (gen != NULL)
21463 emit_insn (gen (d, op_false, op_true, cmp));
21464 if (d != dest)
21465 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
21467 else
21469 op_true = force_reg (mode, op_true);
21471 t2 = gen_reg_rtx (mode);
21472 if (optimize)
21473 t3 = gen_reg_rtx (mode);
21474 else
21475 t3 = dest;
21477 x = gen_rtx_AND (mode, op_true, cmp);
21478 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
21480 x = gen_rtx_NOT (mode, cmp);
21481 x = gen_rtx_AND (mode, x, op_false);
21482 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
21484 x = gen_rtx_IOR (mode, t3, t2);
21485 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21490 /* Expand a floating-point conditional move. Return true if successful. */
21492 bool
21493 ix86_expand_fp_movcc (rtx operands[])
21495 machine_mode mode = GET_MODE (operands[0]);
21496 enum rtx_code code = GET_CODE (operands[1]);
21497 rtx tmp, compare_op;
21498 rtx op0 = XEXP (operands[1], 0);
21499 rtx op1 = XEXP (operands[1], 1);
21501 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21503 machine_mode cmode;
21505 /* Since we've no cmove for sse registers, don't force bad register
21506 allocation just to gain access to it. Deny movcc when the
21507 comparison mode doesn't match the move mode. */
21508 cmode = GET_MODE (op0);
21509 if (cmode == VOIDmode)
21510 cmode = GET_MODE (op1);
21511 if (cmode != mode)
21512 return false;
21514 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
21515 if (code == UNKNOWN)
21516 return false;
21518 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
21519 operands[2], operands[3]))
21520 return true;
21522 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
21523 operands[2], operands[3]);
21524 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
21525 return true;
21528 if (GET_MODE (op0) == TImode
21529 || (GET_MODE (op0) == DImode
21530 && !TARGET_64BIT))
21531 return false;
21533 /* The floating point conditional move instructions don't directly
21534 support conditions resulting from a signed integer comparison. */
21536 compare_op = ix86_expand_compare (code, op0, op1);
21537 if (!fcmov_comparison_operator (compare_op, VOIDmode))
21539 tmp = gen_reg_rtx (QImode);
21540 ix86_expand_setcc (tmp, code, op0, op1);
21542 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
21545 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21546 gen_rtx_IF_THEN_ELSE (mode, compare_op,
21547 operands[2], operands[3])));
21549 return true;
21552 /* Expand a floating-point vector conditional move; a vcond operation
21553 rather than a movcc operation. */
21555 bool
21556 ix86_expand_fp_vcond (rtx operands[])
21558 enum rtx_code code = GET_CODE (operands[3]);
21559 rtx cmp;
21561 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
21562 &operands[4], &operands[5]);
21563 if (code == UNKNOWN)
21565 rtx temp;
21566 switch (GET_CODE (operands[3]))
21568 case LTGT:
21569 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
21570 operands[5], operands[0], operands[0]);
21571 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
21572 operands[5], operands[1], operands[2]);
21573 code = AND;
21574 break;
21575 case UNEQ:
21576 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
21577 operands[5], operands[0], operands[0]);
21578 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
21579 operands[5], operands[1], operands[2]);
21580 code = IOR;
21581 break;
21582 default:
21583 gcc_unreachable ();
21585 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
21586 OPTAB_DIRECT);
21587 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21588 return true;
21591 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
21592 operands[5], operands[1], operands[2]))
21593 return true;
21595 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
21596 operands[1], operands[2]);
21597 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21598 return true;
21601 /* Expand a signed/unsigned integral vector conditional move. */
21603 bool
21604 ix86_expand_int_vcond (rtx operands[])
21606 machine_mode data_mode = GET_MODE (operands[0]);
21607 machine_mode mode = GET_MODE (operands[4]);
21608 enum rtx_code code = GET_CODE (operands[3]);
21609 bool negate = false;
21610 rtx x, cop0, cop1;
21612 cop0 = operands[4];
21613 cop1 = operands[5];
21615 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
21616 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
21617 if ((code == LT || code == GE)
21618 && data_mode == mode
21619 && cop1 == CONST0_RTX (mode)
21620 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
21621 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
21622 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
21623 && (GET_MODE_SIZE (data_mode) == 16
21624 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
21626 rtx negop = operands[2 - (code == LT)];
21627 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
21628 if (negop == CONST1_RTX (data_mode))
21630 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
21631 operands[0], 1, OPTAB_DIRECT);
21632 if (res != operands[0])
21633 emit_move_insn (operands[0], res);
21634 return true;
21636 else if (GET_MODE_INNER (data_mode) != DImode
21637 && vector_all_ones_operand (negop, data_mode))
21639 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
21640 operands[0], 0, OPTAB_DIRECT);
21641 if (res != operands[0])
21642 emit_move_insn (operands[0], res);
21643 return true;
21647 if (!nonimmediate_operand (cop1, mode))
21648 cop1 = force_reg (mode, cop1);
21649 if (!general_operand (operands[1], data_mode))
21650 operands[1] = force_reg (data_mode, operands[1]);
21651 if (!general_operand (operands[2], data_mode))
21652 operands[2] = force_reg (data_mode, operands[2]);
21654 /* XOP supports all of the comparisons on all 128-bit vector int types. */
21655 if (TARGET_XOP
21656 && (mode == V16QImode || mode == V8HImode
21657 || mode == V4SImode || mode == V2DImode))
21659 else
21661 /* Canonicalize the comparison to EQ, GT, GTU. */
21662 switch (code)
21664 case EQ:
21665 case GT:
21666 case GTU:
21667 break;
21669 case NE:
21670 case LE:
21671 case LEU:
21672 code = reverse_condition (code);
21673 negate = true;
21674 break;
21676 case GE:
21677 case GEU:
21678 code = reverse_condition (code);
21679 negate = true;
21680 /* FALLTHRU */
21682 case LT:
21683 case LTU:
21684 std::swap (cop0, cop1);
21685 code = swap_condition (code);
21686 break;
21688 default:
21689 gcc_unreachable ();
21692 /* Only SSE4.1/SSE4.2 supports V2DImode. */
21693 if (mode == V2DImode)
21695 switch (code)
21697 case EQ:
21698 /* SSE4.1 supports EQ. */
21699 if (!TARGET_SSE4_1)
21700 return false;
21701 break;
21703 case GT:
21704 case GTU:
21705 /* SSE4.2 supports GT/GTU. */
21706 if (!TARGET_SSE4_2)
21707 return false;
21708 break;
21710 default:
21711 gcc_unreachable ();
21715 /* Unsigned parallel compare is not supported by the hardware.
21716 Play some tricks to turn this into a signed comparison
21717 against 0. */
21718 if (code == GTU)
21720 cop0 = force_reg (mode, cop0);
21722 switch (mode)
21724 case V16SImode:
21725 case V8DImode:
21726 case V8SImode:
21727 case V4DImode:
21728 case V4SImode:
21729 case V2DImode:
21731 rtx t1, t2, mask;
21732 rtx (*gen_sub3) (rtx, rtx, rtx);
21734 switch (mode)
21736 case V16SImode: gen_sub3 = gen_subv16si3; break;
21737 case V8DImode: gen_sub3 = gen_subv8di3; break;
21738 case V8SImode: gen_sub3 = gen_subv8si3; break;
21739 case V4DImode: gen_sub3 = gen_subv4di3; break;
21740 case V4SImode: gen_sub3 = gen_subv4si3; break;
21741 case V2DImode: gen_sub3 = gen_subv2di3; break;
21742 default:
21743 gcc_unreachable ();
21745 /* Subtract (-(INT MAX) - 1) from both operands to make
21746 them signed. */
21747 mask = ix86_build_signbit_mask (mode, true, false);
21748 t1 = gen_reg_rtx (mode);
21749 emit_insn (gen_sub3 (t1, cop0, mask));
21751 t2 = gen_reg_rtx (mode);
21752 emit_insn (gen_sub3 (t2, cop1, mask));
21754 cop0 = t1;
21755 cop1 = t2;
21756 code = GT;
21758 break;
21760 case V64QImode:
21761 case V32HImode:
21762 case V32QImode:
21763 case V16HImode:
21764 case V16QImode:
21765 case V8HImode:
21766 /* Perform a parallel unsigned saturating subtraction. */
21767 x = gen_reg_rtx (mode);
21768 emit_insn (gen_rtx_SET (VOIDmode, x,
21769 gen_rtx_US_MINUS (mode, cop0, cop1)));
21771 cop0 = x;
21772 cop1 = CONST0_RTX (mode);
21773 code = EQ;
21774 negate = !negate;
21775 break;
21777 default:
21778 gcc_unreachable ();
21783 /* Allow the comparison to be done in one mode, but the movcc to
21784 happen in another mode. */
21785 if (data_mode == mode)
21787 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
21788 operands[1+negate], operands[2-negate]);
21790 else
21792 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
21793 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
21794 operands[1+negate], operands[2-negate]);
21795 if (GET_MODE (x) == mode)
21796 x = gen_lowpart (data_mode, x);
21799 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
21800 operands[2-negate]);
21801 return true;
21804 /* AVX512F does support 64-byte integer vector operations,
21805 thus the longest vector we are faced with is V64QImode. */
21806 #define MAX_VECT_LEN 64
21808 struct expand_vec_perm_d
21810 rtx target, op0, op1;
21811 unsigned char perm[MAX_VECT_LEN];
21812 machine_mode vmode;
21813 unsigned char nelt;
21814 bool one_operand_p;
21815 bool testing_p;
21818 static bool
21819 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1,
21820 struct expand_vec_perm_d *d)
21822 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
21823 expander, so args are either in d, or in op0, op1 etc. */
21824 machine_mode mode = GET_MODE (d ? d->op0 : op0);
21825 machine_mode maskmode = mode;
21826 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21828 switch (mode)
21830 case V8HImode:
21831 if (TARGET_AVX512VL && TARGET_AVX512BW)
21832 gen = gen_avx512vl_vpermi2varv8hi3;
21833 break;
21834 case V16HImode:
21835 if (TARGET_AVX512VL && TARGET_AVX512BW)
21836 gen = gen_avx512vl_vpermi2varv16hi3;
21837 break;
21838 case V32HImode:
21839 if (TARGET_AVX512BW)
21840 gen = gen_avx512bw_vpermi2varv32hi3;
21841 break;
21842 case V4SImode:
21843 if (TARGET_AVX512VL)
21844 gen = gen_avx512vl_vpermi2varv4si3;
21845 break;
21846 case V8SImode:
21847 if (TARGET_AVX512VL)
21848 gen = gen_avx512vl_vpermi2varv8si3;
21849 break;
21850 case V16SImode:
21851 if (TARGET_AVX512F)
21852 gen = gen_avx512f_vpermi2varv16si3;
21853 break;
21854 case V4SFmode:
21855 if (TARGET_AVX512VL)
21857 gen = gen_avx512vl_vpermi2varv4sf3;
21858 maskmode = V4SImode;
21860 break;
21861 case V8SFmode:
21862 if (TARGET_AVX512VL)
21864 gen = gen_avx512vl_vpermi2varv8sf3;
21865 maskmode = V8SImode;
21867 break;
21868 case V16SFmode:
21869 if (TARGET_AVX512F)
21871 gen = gen_avx512f_vpermi2varv16sf3;
21872 maskmode = V16SImode;
21874 break;
21875 case V2DImode:
21876 if (TARGET_AVX512VL)
21877 gen = gen_avx512vl_vpermi2varv2di3;
21878 break;
21879 case V4DImode:
21880 if (TARGET_AVX512VL)
21881 gen = gen_avx512vl_vpermi2varv4di3;
21882 break;
21883 case V8DImode:
21884 if (TARGET_AVX512F)
21885 gen = gen_avx512f_vpermi2varv8di3;
21886 break;
21887 case V2DFmode:
21888 if (TARGET_AVX512VL)
21890 gen = gen_avx512vl_vpermi2varv2df3;
21891 maskmode = V2DImode;
21893 break;
21894 case V4DFmode:
21895 if (TARGET_AVX512VL)
21897 gen = gen_avx512vl_vpermi2varv4df3;
21898 maskmode = V4DImode;
21900 break;
21901 case V8DFmode:
21902 if (TARGET_AVX512F)
21904 gen = gen_avx512f_vpermi2varv8df3;
21905 maskmode = V8DImode;
21907 break;
21908 default:
21909 break;
21912 if (gen == NULL)
21913 return false;
21915 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
21916 expander, so args are either in d, or in op0, op1 etc. */
21917 if (d)
21919 rtx vec[64];
21920 target = d->target;
21921 op0 = d->op0;
21922 op1 = d->op1;
21923 for (int i = 0; i < d->nelt; ++i)
21924 vec[i] = GEN_INT (d->perm[i]);
21925 mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
21928 emit_insn (gen (target, op0, force_reg (maskmode, mask), op1));
21929 return true;
21932 /* Expand a variable vector permutation. */
21934 void
21935 ix86_expand_vec_perm (rtx operands[])
21937 rtx target = operands[0];
21938 rtx op0 = operands[1];
21939 rtx op1 = operands[2];
21940 rtx mask = operands[3];
21941 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
21942 machine_mode mode = GET_MODE (op0);
21943 machine_mode maskmode = GET_MODE (mask);
21944 int w, e, i;
21945 bool one_operand_shuffle = rtx_equal_p (op0, op1);
21947 /* Number of elements in the vector. */
21948 w = GET_MODE_NUNITS (mode);
21949 e = GET_MODE_UNIT_SIZE (mode);
21950 gcc_assert (w <= 64);
21952 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1, NULL))
21953 return;
21955 if (TARGET_AVX2)
21957 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
21959 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
21960 an constant shuffle operand. With a tiny bit of effort we can
21961 use VPERMD instead. A re-interpretation stall for V4DFmode is
21962 unfortunate but there's no avoiding it.
21963 Similarly for V16HImode we don't have instructions for variable
21964 shuffling, while for V32QImode we can use after preparing suitable
21965 masks vpshufb; vpshufb; vpermq; vpor. */
21967 if (mode == V16HImode)
21969 maskmode = mode = V32QImode;
21970 w = 32;
21971 e = 1;
21973 else
21975 maskmode = mode = V8SImode;
21976 w = 8;
21977 e = 4;
21979 t1 = gen_reg_rtx (maskmode);
21981 /* Replicate the low bits of the V4DImode mask into V8SImode:
21982 mask = { A B C D }
21983 t1 = { A A B B C C D D }. */
21984 for (i = 0; i < w / 2; ++i)
21985 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
21986 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21987 vt = force_reg (maskmode, vt);
21988 mask = gen_lowpart (maskmode, mask);
21989 if (maskmode == V8SImode)
21990 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
21991 else
21992 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
21994 /* Multiply the shuffle indicies by two. */
21995 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
21996 OPTAB_DIRECT);
21998 /* Add one to the odd shuffle indicies:
21999 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
22000 for (i = 0; i < w / 2; ++i)
22002 vec[i * 2] = const0_rtx;
22003 vec[i * 2 + 1] = const1_rtx;
22005 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22006 vt = validize_mem (force_const_mem (maskmode, vt));
22007 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
22008 OPTAB_DIRECT);
22010 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
22011 operands[3] = mask = t1;
22012 target = gen_reg_rtx (mode);
22013 op0 = gen_lowpart (mode, op0);
22014 op1 = gen_lowpart (mode, op1);
22017 switch (mode)
22019 case V8SImode:
22020 /* The VPERMD and VPERMPS instructions already properly ignore
22021 the high bits of the shuffle elements. No need for us to
22022 perform an AND ourselves. */
22023 if (one_operand_shuffle)
22025 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
22026 if (target != operands[0])
22027 emit_move_insn (operands[0],
22028 gen_lowpart (GET_MODE (operands[0]), target));
22030 else
22032 t1 = gen_reg_rtx (V8SImode);
22033 t2 = gen_reg_rtx (V8SImode);
22034 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
22035 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
22036 goto merge_two;
22038 return;
22040 case V8SFmode:
22041 mask = gen_lowpart (V8SImode, mask);
22042 if (one_operand_shuffle)
22043 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
22044 else
22046 t1 = gen_reg_rtx (V8SFmode);
22047 t2 = gen_reg_rtx (V8SFmode);
22048 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
22049 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
22050 goto merge_two;
22052 return;
22054 case V4SImode:
22055 /* By combining the two 128-bit input vectors into one 256-bit
22056 input vector, we can use VPERMD and VPERMPS for the full
22057 two-operand shuffle. */
22058 t1 = gen_reg_rtx (V8SImode);
22059 t2 = gen_reg_rtx (V8SImode);
22060 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
22061 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
22062 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
22063 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
22064 return;
22066 case V4SFmode:
22067 t1 = gen_reg_rtx (V8SFmode);
22068 t2 = gen_reg_rtx (V8SImode);
22069 mask = gen_lowpart (V4SImode, mask);
22070 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
22071 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
22072 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
22073 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
22074 return;
22076 case V32QImode:
22077 t1 = gen_reg_rtx (V32QImode);
22078 t2 = gen_reg_rtx (V32QImode);
22079 t3 = gen_reg_rtx (V32QImode);
22080 vt2 = GEN_INT (-128);
22081 for (i = 0; i < 32; i++)
22082 vec[i] = vt2;
22083 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
22084 vt = force_reg (V32QImode, vt);
22085 for (i = 0; i < 32; i++)
22086 vec[i] = i < 16 ? vt2 : const0_rtx;
22087 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
22088 vt2 = force_reg (V32QImode, vt2);
22089 /* From mask create two adjusted masks, which contain the same
22090 bits as mask in the low 7 bits of each vector element.
22091 The first mask will have the most significant bit clear
22092 if it requests element from the same 128-bit lane
22093 and MSB set if it requests element from the other 128-bit lane.
22094 The second mask will have the opposite values of the MSB,
22095 and additionally will have its 128-bit lanes swapped.
22096 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
22097 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
22098 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
22099 stands for other 12 bytes. */
22100 /* The bit whether element is from the same lane or the other
22101 lane is bit 4, so shift it up by 3 to the MSB position. */
22102 t5 = gen_reg_rtx (V4DImode);
22103 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
22104 GEN_INT (3)));
22105 /* Clear MSB bits from the mask just in case it had them set. */
22106 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
22107 /* After this t1 will have MSB set for elements from other lane. */
22108 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
22109 /* Clear bits other than MSB. */
22110 emit_insn (gen_andv32qi3 (t1, t1, vt));
22111 /* Or in the lower bits from mask into t3. */
22112 emit_insn (gen_iorv32qi3 (t3, t1, t2));
22113 /* And invert MSB bits in t1, so MSB is set for elements from the same
22114 lane. */
22115 emit_insn (gen_xorv32qi3 (t1, t1, vt));
22116 /* Swap 128-bit lanes in t3. */
22117 t6 = gen_reg_rtx (V4DImode);
22118 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
22119 const2_rtx, GEN_INT (3),
22120 const0_rtx, const1_rtx));
22121 /* And or in the lower bits from mask into t1. */
22122 emit_insn (gen_iorv32qi3 (t1, t1, t2));
22123 if (one_operand_shuffle)
22125 /* Each of these shuffles will put 0s in places where
22126 element from the other 128-bit lane is needed, otherwise
22127 will shuffle in the requested value. */
22128 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
22129 gen_lowpart (V32QImode, t6)));
22130 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
22131 /* For t3 the 128-bit lanes are swapped again. */
22132 t7 = gen_reg_rtx (V4DImode);
22133 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
22134 const2_rtx, GEN_INT (3),
22135 const0_rtx, const1_rtx));
22136 /* And oring both together leads to the result. */
22137 emit_insn (gen_iorv32qi3 (target, t1,
22138 gen_lowpart (V32QImode, t7)));
22139 if (target != operands[0])
22140 emit_move_insn (operands[0],
22141 gen_lowpart (GET_MODE (operands[0]), target));
22142 return;
22145 t4 = gen_reg_rtx (V32QImode);
22146 /* Similarly to the above one_operand_shuffle code,
22147 just for repeated twice for each operand. merge_two:
22148 code will merge the two results together. */
22149 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
22150 gen_lowpart (V32QImode, t6)));
22151 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
22152 gen_lowpart (V32QImode, t6)));
22153 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
22154 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
22155 t7 = gen_reg_rtx (V4DImode);
22156 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
22157 const2_rtx, GEN_INT (3),
22158 const0_rtx, const1_rtx));
22159 t8 = gen_reg_rtx (V4DImode);
22160 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
22161 const2_rtx, GEN_INT (3),
22162 const0_rtx, const1_rtx));
22163 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
22164 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
22165 t1 = t4;
22166 t2 = t3;
22167 goto merge_two;
22169 default:
22170 gcc_assert (GET_MODE_SIZE (mode) <= 16);
22171 break;
22175 if (TARGET_XOP)
22177 /* The XOP VPPERM insn supports three inputs. By ignoring the
22178 one_operand_shuffle special case, we avoid creating another
22179 set of constant vectors in memory. */
22180 one_operand_shuffle = false;
22182 /* mask = mask & {2*w-1, ...} */
22183 vt = GEN_INT (2*w - 1);
22185 else
22187 /* mask = mask & {w-1, ...} */
22188 vt = GEN_INT (w - 1);
22191 for (i = 0; i < w; i++)
22192 vec[i] = vt;
22193 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22194 mask = expand_simple_binop (maskmode, AND, mask, vt,
22195 NULL_RTX, 0, OPTAB_DIRECT);
22197 /* For non-QImode operations, convert the word permutation control
22198 into a byte permutation control. */
22199 if (mode != V16QImode)
22201 mask = expand_simple_binop (maskmode, ASHIFT, mask,
22202 GEN_INT (exact_log2 (e)),
22203 NULL_RTX, 0, OPTAB_DIRECT);
22205 /* Convert mask to vector of chars. */
22206 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
22208 /* Replicate each of the input bytes into byte positions:
22209 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
22210 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
22211 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
22212 for (i = 0; i < 16; ++i)
22213 vec[i] = GEN_INT (i/e * e);
22214 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
22215 vt = validize_mem (force_const_mem (V16QImode, vt));
22216 if (TARGET_XOP)
22217 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
22218 else
22219 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
22221 /* Convert it into the byte positions by doing
22222 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
22223 for (i = 0; i < 16; ++i)
22224 vec[i] = GEN_INT (i % e);
22225 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
22226 vt = validize_mem (force_const_mem (V16QImode, vt));
22227 emit_insn (gen_addv16qi3 (mask, mask, vt));
22230 /* The actual shuffle operations all operate on V16QImode. */
22231 op0 = gen_lowpart (V16QImode, op0);
22232 op1 = gen_lowpart (V16QImode, op1);
22234 if (TARGET_XOP)
22236 if (GET_MODE (target) != V16QImode)
22237 target = gen_reg_rtx (V16QImode);
22238 emit_insn (gen_xop_pperm (target, op0, op1, mask));
22239 if (target != operands[0])
22240 emit_move_insn (operands[0],
22241 gen_lowpart (GET_MODE (operands[0]), target));
22243 else if (one_operand_shuffle)
22245 if (GET_MODE (target) != V16QImode)
22246 target = gen_reg_rtx (V16QImode);
22247 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
22248 if (target != operands[0])
22249 emit_move_insn (operands[0],
22250 gen_lowpart (GET_MODE (operands[0]), target));
22252 else
22254 rtx xops[6];
22255 bool ok;
22257 /* Shuffle the two input vectors independently. */
22258 t1 = gen_reg_rtx (V16QImode);
22259 t2 = gen_reg_rtx (V16QImode);
22260 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
22261 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
22263 merge_two:
22264 /* Then merge them together. The key is whether any given control
22265 element contained a bit set that indicates the second word. */
22266 mask = operands[3];
22267 vt = GEN_INT (w);
22268 if (maskmode == V2DImode && !TARGET_SSE4_1)
22270 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
22271 more shuffle to convert the V2DI input mask into a V4SI
22272 input mask. At which point the masking that expand_int_vcond
22273 will work as desired. */
22274 rtx t3 = gen_reg_rtx (V4SImode);
22275 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
22276 const0_rtx, const0_rtx,
22277 const2_rtx, const2_rtx));
22278 mask = t3;
22279 maskmode = V4SImode;
22280 e = w = 4;
22283 for (i = 0; i < w; i++)
22284 vec[i] = vt;
22285 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22286 vt = force_reg (maskmode, vt);
22287 mask = expand_simple_binop (maskmode, AND, mask, vt,
22288 NULL_RTX, 0, OPTAB_DIRECT);
22290 if (GET_MODE (target) != mode)
22291 target = gen_reg_rtx (mode);
22292 xops[0] = target;
22293 xops[1] = gen_lowpart (mode, t2);
22294 xops[2] = gen_lowpart (mode, t1);
22295 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
22296 xops[4] = mask;
22297 xops[5] = vt;
22298 ok = ix86_expand_int_vcond (xops);
22299 gcc_assert (ok);
22300 if (target != operands[0])
22301 emit_move_insn (operands[0],
22302 gen_lowpart (GET_MODE (operands[0]), target));
22306 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
22307 true if we should do zero extension, else sign extension. HIGH_P is
22308 true if we want the N/2 high elements, else the low elements. */
22310 void
22311 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
22313 machine_mode imode = GET_MODE (src);
22314 rtx tmp;
22316 if (TARGET_SSE4_1)
22318 rtx (*unpack)(rtx, rtx);
22319 rtx (*extract)(rtx, rtx) = NULL;
22320 machine_mode halfmode = BLKmode;
22322 switch (imode)
22324 case V64QImode:
22325 if (unsigned_p)
22326 unpack = gen_avx512bw_zero_extendv32qiv32hi2;
22327 else
22328 unpack = gen_avx512bw_sign_extendv32qiv32hi2;
22329 halfmode = V32QImode;
22330 extract
22331 = high_p ? gen_vec_extract_hi_v64qi : gen_vec_extract_lo_v64qi;
22332 break;
22333 case V32QImode:
22334 if (unsigned_p)
22335 unpack = gen_avx2_zero_extendv16qiv16hi2;
22336 else
22337 unpack = gen_avx2_sign_extendv16qiv16hi2;
22338 halfmode = V16QImode;
22339 extract
22340 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
22341 break;
22342 case V32HImode:
22343 if (unsigned_p)
22344 unpack = gen_avx512f_zero_extendv16hiv16si2;
22345 else
22346 unpack = gen_avx512f_sign_extendv16hiv16si2;
22347 halfmode = V16HImode;
22348 extract
22349 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
22350 break;
22351 case V16HImode:
22352 if (unsigned_p)
22353 unpack = gen_avx2_zero_extendv8hiv8si2;
22354 else
22355 unpack = gen_avx2_sign_extendv8hiv8si2;
22356 halfmode = V8HImode;
22357 extract
22358 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
22359 break;
22360 case V16SImode:
22361 if (unsigned_p)
22362 unpack = gen_avx512f_zero_extendv8siv8di2;
22363 else
22364 unpack = gen_avx512f_sign_extendv8siv8di2;
22365 halfmode = V8SImode;
22366 extract
22367 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
22368 break;
22369 case V8SImode:
22370 if (unsigned_p)
22371 unpack = gen_avx2_zero_extendv4siv4di2;
22372 else
22373 unpack = gen_avx2_sign_extendv4siv4di2;
22374 halfmode = V4SImode;
22375 extract
22376 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
22377 break;
22378 case V16QImode:
22379 if (unsigned_p)
22380 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
22381 else
22382 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
22383 break;
22384 case V8HImode:
22385 if (unsigned_p)
22386 unpack = gen_sse4_1_zero_extendv4hiv4si2;
22387 else
22388 unpack = gen_sse4_1_sign_extendv4hiv4si2;
22389 break;
22390 case V4SImode:
22391 if (unsigned_p)
22392 unpack = gen_sse4_1_zero_extendv2siv2di2;
22393 else
22394 unpack = gen_sse4_1_sign_extendv2siv2di2;
22395 break;
22396 default:
22397 gcc_unreachable ();
22400 if (GET_MODE_SIZE (imode) >= 32)
22402 tmp = gen_reg_rtx (halfmode);
22403 emit_insn (extract (tmp, src));
22405 else if (high_p)
22407 /* Shift higher 8 bytes to lower 8 bytes. */
22408 tmp = gen_reg_rtx (V1TImode);
22409 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
22410 GEN_INT (64)));
22411 tmp = gen_lowpart (imode, tmp);
22413 else
22414 tmp = src;
22416 emit_insn (unpack (dest, tmp));
22418 else
22420 rtx (*unpack)(rtx, rtx, rtx);
22422 switch (imode)
22424 case V16QImode:
22425 if (high_p)
22426 unpack = gen_vec_interleave_highv16qi;
22427 else
22428 unpack = gen_vec_interleave_lowv16qi;
22429 break;
22430 case V8HImode:
22431 if (high_p)
22432 unpack = gen_vec_interleave_highv8hi;
22433 else
22434 unpack = gen_vec_interleave_lowv8hi;
22435 break;
22436 case V4SImode:
22437 if (high_p)
22438 unpack = gen_vec_interleave_highv4si;
22439 else
22440 unpack = gen_vec_interleave_lowv4si;
22441 break;
22442 default:
22443 gcc_unreachable ();
22446 if (unsigned_p)
22447 tmp = force_reg (imode, CONST0_RTX (imode));
22448 else
22449 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
22450 src, pc_rtx, pc_rtx);
22452 rtx tmp2 = gen_reg_rtx (imode);
22453 emit_insn (unpack (tmp2, src, tmp));
22454 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
22458 /* Expand conditional increment or decrement using adb/sbb instructions.
22459 The default case using setcc followed by the conditional move can be
22460 done by generic code. */
22461 bool
22462 ix86_expand_int_addcc (rtx operands[])
22464 enum rtx_code code = GET_CODE (operands[1]);
22465 rtx flags;
22466 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
22467 rtx compare_op;
22468 rtx val = const0_rtx;
22469 bool fpcmp = false;
22470 machine_mode mode;
22471 rtx op0 = XEXP (operands[1], 0);
22472 rtx op1 = XEXP (operands[1], 1);
22474 if (operands[3] != const1_rtx
22475 && operands[3] != constm1_rtx)
22476 return false;
22477 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
22478 return false;
22479 code = GET_CODE (compare_op);
22481 flags = XEXP (compare_op, 0);
22483 if (GET_MODE (flags) == CCFPmode
22484 || GET_MODE (flags) == CCFPUmode)
22486 fpcmp = true;
22487 code = ix86_fp_compare_code_to_integer (code);
22490 if (code != LTU)
22492 val = constm1_rtx;
22493 if (fpcmp)
22494 PUT_CODE (compare_op,
22495 reverse_condition_maybe_unordered
22496 (GET_CODE (compare_op)));
22497 else
22498 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
22501 mode = GET_MODE (operands[0]);
22503 /* Construct either adc or sbb insn. */
22504 if ((code == LTU) == (operands[3] == constm1_rtx))
22506 switch (mode)
22508 case QImode:
22509 insn = gen_subqi3_carry;
22510 break;
22511 case HImode:
22512 insn = gen_subhi3_carry;
22513 break;
22514 case SImode:
22515 insn = gen_subsi3_carry;
22516 break;
22517 case DImode:
22518 insn = gen_subdi3_carry;
22519 break;
22520 default:
22521 gcc_unreachable ();
22524 else
22526 switch (mode)
22528 case QImode:
22529 insn = gen_addqi3_carry;
22530 break;
22531 case HImode:
22532 insn = gen_addhi3_carry;
22533 break;
22534 case SImode:
22535 insn = gen_addsi3_carry;
22536 break;
22537 case DImode:
22538 insn = gen_adddi3_carry;
22539 break;
22540 default:
22541 gcc_unreachable ();
22544 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
22546 return true;
22550 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
22551 but works for floating pointer parameters and nonoffsetable memories.
22552 For pushes, it returns just stack offsets; the values will be saved
22553 in the right order. Maximally three parts are generated. */
22555 static int
22556 ix86_split_to_parts (rtx operand, rtx *parts, machine_mode mode)
22558 int size;
22560 if (!TARGET_64BIT)
22561 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
22562 else
22563 size = (GET_MODE_SIZE (mode) + 4) / 8;
22565 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
22566 gcc_assert (size >= 2 && size <= 4);
22568 /* Optimize constant pool reference to immediates. This is used by fp
22569 moves, that force all constants to memory to allow combining. */
22570 if (MEM_P (operand) && MEM_READONLY_P (operand))
22572 rtx tmp = maybe_get_pool_constant (operand);
22573 if (tmp)
22574 operand = tmp;
22577 if (MEM_P (operand) && !offsettable_memref_p (operand))
22579 /* The only non-offsetable memories we handle are pushes. */
22580 int ok = push_operand (operand, VOIDmode);
22582 gcc_assert (ok);
22584 operand = copy_rtx (operand);
22585 PUT_MODE (operand, word_mode);
22586 parts[0] = parts[1] = parts[2] = parts[3] = operand;
22587 return size;
22590 if (GET_CODE (operand) == CONST_VECTOR)
22592 machine_mode imode = int_mode_for_mode (mode);
22593 /* Caution: if we looked through a constant pool memory above,
22594 the operand may actually have a different mode now. That's
22595 ok, since we want to pun this all the way back to an integer. */
22596 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
22597 gcc_assert (operand != NULL);
22598 mode = imode;
22601 if (!TARGET_64BIT)
22603 if (mode == DImode)
22604 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22605 else
22607 int i;
22609 if (REG_P (operand))
22611 gcc_assert (reload_completed);
22612 for (i = 0; i < size; i++)
22613 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
22615 else if (offsettable_memref_p (operand))
22617 operand = adjust_address (operand, SImode, 0);
22618 parts[0] = operand;
22619 for (i = 1; i < size; i++)
22620 parts[i] = adjust_address (operand, SImode, 4 * i);
22622 else if (GET_CODE (operand) == CONST_DOUBLE)
22624 REAL_VALUE_TYPE r;
22625 long l[4];
22627 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22628 switch (mode)
22630 case TFmode:
22631 real_to_target (l, &r, mode);
22632 parts[3] = gen_int_mode (l[3], SImode);
22633 parts[2] = gen_int_mode (l[2], SImode);
22634 break;
22635 case XFmode:
22636 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
22637 long double may not be 80-bit. */
22638 real_to_target (l, &r, mode);
22639 parts[2] = gen_int_mode (l[2], SImode);
22640 break;
22641 case DFmode:
22642 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
22643 break;
22644 default:
22645 gcc_unreachable ();
22647 parts[1] = gen_int_mode (l[1], SImode);
22648 parts[0] = gen_int_mode (l[0], SImode);
22650 else
22651 gcc_unreachable ();
22654 else
22656 if (mode == TImode)
22657 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22658 if (mode == XFmode || mode == TFmode)
22660 machine_mode upper_mode = mode==XFmode ? SImode : DImode;
22661 if (REG_P (operand))
22663 gcc_assert (reload_completed);
22664 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
22665 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
22667 else if (offsettable_memref_p (operand))
22669 operand = adjust_address (operand, DImode, 0);
22670 parts[0] = operand;
22671 parts[1] = adjust_address (operand, upper_mode, 8);
22673 else if (GET_CODE (operand) == CONST_DOUBLE)
22675 REAL_VALUE_TYPE r;
22676 long l[4];
22678 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22679 real_to_target (l, &r, mode);
22681 /* Do not use shift by 32 to avoid warning on 32bit systems. */
22682 if (HOST_BITS_PER_WIDE_INT >= 64)
22683 parts[0]
22684 = gen_int_mode
22685 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
22686 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
22687 DImode);
22688 else
22689 parts[0] = immed_double_const (l[0], l[1], DImode);
22691 if (upper_mode == SImode)
22692 parts[1] = gen_int_mode (l[2], SImode);
22693 else if (HOST_BITS_PER_WIDE_INT >= 64)
22694 parts[1]
22695 = gen_int_mode
22696 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
22697 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
22698 DImode);
22699 else
22700 parts[1] = immed_double_const (l[2], l[3], DImode);
22702 else
22703 gcc_unreachable ();
22707 return size;
22710 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
22711 Return false when normal moves are needed; true when all required
22712 insns have been emitted. Operands 2-4 contain the input values
22713 int the correct order; operands 5-7 contain the output values. */
22715 void
22716 ix86_split_long_move (rtx operands[])
22718 rtx part[2][4];
22719 int nparts, i, j;
22720 int push = 0;
22721 int collisions = 0;
22722 machine_mode mode = GET_MODE (operands[0]);
22723 bool collisionparts[4];
22725 /* The DFmode expanders may ask us to move double.
22726 For 64bit target this is single move. By hiding the fact
22727 here we simplify i386.md splitters. */
22728 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
22730 /* Optimize constant pool reference to immediates. This is used by
22731 fp moves, that force all constants to memory to allow combining. */
22733 if (MEM_P (operands[1])
22734 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
22735 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
22736 operands[1] = get_pool_constant (XEXP (operands[1], 0));
22737 if (push_operand (operands[0], VOIDmode))
22739 operands[0] = copy_rtx (operands[0]);
22740 PUT_MODE (operands[0], word_mode);
22742 else
22743 operands[0] = gen_lowpart (DImode, operands[0]);
22744 operands[1] = gen_lowpart (DImode, operands[1]);
22745 emit_move_insn (operands[0], operands[1]);
22746 return;
22749 /* The only non-offsettable memory we handle is push. */
22750 if (push_operand (operands[0], VOIDmode))
22751 push = 1;
22752 else
22753 gcc_assert (!MEM_P (operands[0])
22754 || offsettable_memref_p (operands[0]));
22756 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
22757 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
22759 /* When emitting push, take care for source operands on the stack. */
22760 if (push && MEM_P (operands[1])
22761 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
22763 rtx src_base = XEXP (part[1][nparts - 1], 0);
22765 /* Compensate for the stack decrement by 4. */
22766 if (!TARGET_64BIT && nparts == 3
22767 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
22768 src_base = plus_constant (Pmode, src_base, 4);
22770 /* src_base refers to the stack pointer and is
22771 automatically decreased by emitted push. */
22772 for (i = 0; i < nparts; i++)
22773 part[1][i] = change_address (part[1][i],
22774 GET_MODE (part[1][i]), src_base);
22777 /* We need to do copy in the right order in case an address register
22778 of the source overlaps the destination. */
22779 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
22781 rtx tmp;
22783 for (i = 0; i < nparts; i++)
22785 collisionparts[i]
22786 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
22787 if (collisionparts[i])
22788 collisions++;
22791 /* Collision in the middle part can be handled by reordering. */
22792 if (collisions == 1 && nparts == 3 && collisionparts [1])
22794 std::swap (part[0][1], part[0][2]);
22795 std::swap (part[1][1], part[1][2]);
22797 else if (collisions == 1
22798 && nparts == 4
22799 && (collisionparts [1] || collisionparts [2]))
22801 if (collisionparts [1])
22803 std::swap (part[0][1], part[0][2]);
22804 std::swap (part[1][1], part[1][2]);
22806 else
22808 std::swap (part[0][2], part[0][3]);
22809 std::swap (part[1][2], part[1][3]);
22813 /* If there are more collisions, we can't handle it by reordering.
22814 Do an lea to the last part and use only one colliding move. */
22815 else if (collisions > 1)
22817 rtx base;
22819 collisions = 1;
22821 base = part[0][nparts - 1];
22823 /* Handle the case when the last part isn't valid for lea.
22824 Happens in 64-bit mode storing the 12-byte XFmode. */
22825 if (GET_MODE (base) != Pmode)
22826 base = gen_rtx_REG (Pmode, REGNO (base));
22828 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
22829 part[1][0] = replace_equiv_address (part[1][0], base);
22830 for (i = 1; i < nparts; i++)
22832 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
22833 part[1][i] = replace_equiv_address (part[1][i], tmp);
22838 if (push)
22840 if (!TARGET_64BIT)
22842 if (nparts == 3)
22844 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
22845 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
22846 stack_pointer_rtx, GEN_INT (-4)));
22847 emit_move_insn (part[0][2], part[1][2]);
22849 else if (nparts == 4)
22851 emit_move_insn (part[0][3], part[1][3]);
22852 emit_move_insn (part[0][2], part[1][2]);
22855 else
22857 /* In 64bit mode we don't have 32bit push available. In case this is
22858 register, it is OK - we will just use larger counterpart. We also
22859 retype memory - these comes from attempt to avoid REX prefix on
22860 moving of second half of TFmode value. */
22861 if (GET_MODE (part[1][1]) == SImode)
22863 switch (GET_CODE (part[1][1]))
22865 case MEM:
22866 part[1][1] = adjust_address (part[1][1], DImode, 0);
22867 break;
22869 case REG:
22870 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
22871 break;
22873 default:
22874 gcc_unreachable ();
22877 if (GET_MODE (part[1][0]) == SImode)
22878 part[1][0] = part[1][1];
22881 emit_move_insn (part[0][1], part[1][1]);
22882 emit_move_insn (part[0][0], part[1][0]);
22883 return;
22886 /* Choose correct order to not overwrite the source before it is copied. */
22887 if ((REG_P (part[0][0])
22888 && REG_P (part[1][1])
22889 && (REGNO (part[0][0]) == REGNO (part[1][1])
22890 || (nparts == 3
22891 && REGNO (part[0][0]) == REGNO (part[1][2]))
22892 || (nparts == 4
22893 && REGNO (part[0][0]) == REGNO (part[1][3]))))
22894 || (collisions > 0
22895 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
22897 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
22899 operands[2 + i] = part[0][j];
22900 operands[6 + i] = part[1][j];
22903 else
22905 for (i = 0; i < nparts; i++)
22907 operands[2 + i] = part[0][i];
22908 operands[6 + i] = part[1][i];
22912 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
22913 if (optimize_insn_for_size_p ())
22915 for (j = 0; j < nparts - 1; j++)
22916 if (CONST_INT_P (operands[6 + j])
22917 && operands[6 + j] != const0_rtx
22918 && REG_P (operands[2 + j]))
22919 for (i = j; i < nparts - 1; i++)
22920 if (CONST_INT_P (operands[7 + i])
22921 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
22922 operands[7 + i] = operands[2 + j];
22925 for (i = 0; i < nparts; i++)
22926 emit_move_insn (operands[2 + i], operands[6 + i]);
22928 return;
22931 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
22932 left shift by a constant, either using a single shift or
22933 a sequence of add instructions. */
22935 static void
22936 ix86_expand_ashl_const (rtx operand, int count, machine_mode mode)
22938 rtx (*insn)(rtx, rtx, rtx);
22940 if (count == 1
22941 || (count * ix86_cost->add <= ix86_cost->shift_const
22942 && !optimize_insn_for_size_p ()))
22944 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
22945 while (count-- > 0)
22946 emit_insn (insn (operand, operand, operand));
22948 else
22950 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22951 emit_insn (insn (operand, operand, GEN_INT (count)));
22955 void
22956 ix86_split_ashl (rtx *operands, rtx scratch, machine_mode mode)
22958 rtx (*gen_ashl3)(rtx, rtx, rtx);
22959 rtx (*gen_shld)(rtx, rtx, rtx);
22960 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22962 rtx low[2], high[2];
22963 int count;
22965 if (CONST_INT_P (operands[2]))
22967 split_double_mode (mode, operands, 2, low, high);
22968 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22970 if (count >= half_width)
22972 emit_move_insn (high[0], low[1]);
22973 emit_move_insn (low[0], const0_rtx);
22975 if (count > half_width)
22976 ix86_expand_ashl_const (high[0], count - half_width, mode);
22978 else
22980 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22982 if (!rtx_equal_p (operands[0], operands[1]))
22983 emit_move_insn (operands[0], operands[1]);
22985 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
22986 ix86_expand_ashl_const (low[0], count, mode);
22988 return;
22991 split_double_mode (mode, operands, 1, low, high);
22993 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22995 if (operands[1] == const1_rtx)
22997 /* Assuming we've chosen a QImode capable registers, then 1 << N
22998 can be done with two 32/64-bit shifts, no branches, no cmoves. */
22999 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
23001 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
23003 ix86_expand_clear (low[0]);
23004 ix86_expand_clear (high[0]);
23005 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
23007 d = gen_lowpart (QImode, low[0]);
23008 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
23009 s = gen_rtx_EQ (QImode, flags, const0_rtx);
23010 emit_insn (gen_rtx_SET (VOIDmode, d, s));
23012 d = gen_lowpart (QImode, high[0]);
23013 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
23014 s = gen_rtx_NE (QImode, flags, const0_rtx);
23015 emit_insn (gen_rtx_SET (VOIDmode, d, s));
23018 /* Otherwise, we can get the same results by manually performing
23019 a bit extract operation on bit 5/6, and then performing the two
23020 shifts. The two methods of getting 0/1 into low/high are exactly
23021 the same size. Avoiding the shift in the bit extract case helps
23022 pentium4 a bit; no one else seems to care much either way. */
23023 else
23025 machine_mode half_mode;
23026 rtx (*gen_lshr3)(rtx, rtx, rtx);
23027 rtx (*gen_and3)(rtx, rtx, rtx);
23028 rtx (*gen_xor3)(rtx, rtx, rtx);
23029 HOST_WIDE_INT bits;
23030 rtx x;
23032 if (mode == DImode)
23034 half_mode = SImode;
23035 gen_lshr3 = gen_lshrsi3;
23036 gen_and3 = gen_andsi3;
23037 gen_xor3 = gen_xorsi3;
23038 bits = 5;
23040 else
23042 half_mode = DImode;
23043 gen_lshr3 = gen_lshrdi3;
23044 gen_and3 = gen_anddi3;
23045 gen_xor3 = gen_xordi3;
23046 bits = 6;
23049 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
23050 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
23051 else
23052 x = gen_lowpart (half_mode, operands[2]);
23053 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
23055 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
23056 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
23057 emit_move_insn (low[0], high[0]);
23058 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
23061 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
23062 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
23063 return;
23066 if (operands[1] == constm1_rtx)
23068 /* For -1 << N, we can avoid the shld instruction, because we
23069 know that we're shifting 0...31/63 ones into a -1. */
23070 emit_move_insn (low[0], constm1_rtx);
23071 if (optimize_insn_for_size_p ())
23072 emit_move_insn (high[0], low[0]);
23073 else
23074 emit_move_insn (high[0], constm1_rtx);
23076 else
23078 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
23080 if (!rtx_equal_p (operands[0], operands[1]))
23081 emit_move_insn (operands[0], operands[1]);
23083 split_double_mode (mode, operands, 1, low, high);
23084 emit_insn (gen_shld (high[0], low[0], operands[2]));
23087 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
23089 if (TARGET_CMOVE && scratch)
23091 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23092 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23094 ix86_expand_clear (scratch);
23095 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
23097 else
23099 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
23100 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
23102 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
23106 void
23107 ix86_split_ashr (rtx *operands, rtx scratch, machine_mode mode)
23109 rtx (*gen_ashr3)(rtx, rtx, rtx)
23110 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
23111 rtx (*gen_shrd)(rtx, rtx, rtx);
23112 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23114 rtx low[2], high[2];
23115 int count;
23117 if (CONST_INT_P (operands[2]))
23119 split_double_mode (mode, operands, 2, low, high);
23120 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23122 if (count == GET_MODE_BITSIZE (mode) - 1)
23124 emit_move_insn (high[0], high[1]);
23125 emit_insn (gen_ashr3 (high[0], high[0],
23126 GEN_INT (half_width - 1)));
23127 emit_move_insn (low[0], high[0]);
23130 else if (count >= half_width)
23132 emit_move_insn (low[0], high[1]);
23133 emit_move_insn (high[0], low[0]);
23134 emit_insn (gen_ashr3 (high[0], high[0],
23135 GEN_INT (half_width - 1)));
23137 if (count > half_width)
23138 emit_insn (gen_ashr3 (low[0], low[0],
23139 GEN_INT (count - half_width)));
23141 else
23143 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23145 if (!rtx_equal_p (operands[0], operands[1]))
23146 emit_move_insn (operands[0], operands[1]);
23148 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
23149 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
23152 else
23154 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23156 if (!rtx_equal_p (operands[0], operands[1]))
23157 emit_move_insn (operands[0], operands[1]);
23159 split_double_mode (mode, operands, 1, low, high);
23161 emit_insn (gen_shrd (low[0], high[0], operands[2]));
23162 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
23164 if (TARGET_CMOVE && scratch)
23166 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23167 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23169 emit_move_insn (scratch, high[0]);
23170 emit_insn (gen_ashr3 (scratch, scratch,
23171 GEN_INT (half_width - 1)));
23172 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
23173 scratch));
23175 else
23177 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
23178 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
23180 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
23185 void
23186 ix86_split_lshr (rtx *operands, rtx scratch, machine_mode mode)
23188 rtx (*gen_lshr3)(rtx, rtx, rtx)
23189 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
23190 rtx (*gen_shrd)(rtx, rtx, rtx);
23191 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23193 rtx low[2], high[2];
23194 int count;
23196 if (CONST_INT_P (operands[2]))
23198 split_double_mode (mode, operands, 2, low, high);
23199 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23201 if (count >= half_width)
23203 emit_move_insn (low[0], high[1]);
23204 ix86_expand_clear (high[0]);
23206 if (count > half_width)
23207 emit_insn (gen_lshr3 (low[0], low[0],
23208 GEN_INT (count - half_width)));
23210 else
23212 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23214 if (!rtx_equal_p (operands[0], operands[1]))
23215 emit_move_insn (operands[0], operands[1]);
23217 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
23218 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
23221 else
23223 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23225 if (!rtx_equal_p (operands[0], operands[1]))
23226 emit_move_insn (operands[0], operands[1]);
23228 split_double_mode (mode, operands, 1, low, high);
23230 emit_insn (gen_shrd (low[0], high[0], operands[2]));
23231 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
23233 if (TARGET_CMOVE && scratch)
23235 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23236 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23238 ix86_expand_clear (scratch);
23239 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
23240 scratch));
23242 else
23244 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
23245 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
23247 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
23252 /* Predict just emitted jump instruction to be taken with probability PROB. */
23253 static void
23254 predict_jump (int prob)
23256 rtx insn = get_last_insn ();
23257 gcc_assert (JUMP_P (insn));
23258 add_int_reg_note (insn, REG_BR_PROB, prob);
23261 /* Helper function for the string operations below. Dest VARIABLE whether
23262 it is aligned to VALUE bytes. If true, jump to the label. */
23263 static rtx_code_label *
23264 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
23266 rtx_code_label *label = gen_label_rtx ();
23267 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
23268 if (GET_MODE (variable) == DImode)
23269 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
23270 else
23271 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
23272 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
23273 1, label);
23274 if (epilogue)
23275 predict_jump (REG_BR_PROB_BASE * 50 / 100);
23276 else
23277 predict_jump (REG_BR_PROB_BASE * 90 / 100);
23278 return label;
23281 /* Adjust COUNTER by the VALUE. */
23282 static void
23283 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
23285 rtx (*gen_add)(rtx, rtx, rtx)
23286 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
23288 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
23291 /* Zero extend possibly SImode EXP to Pmode register. */
23293 ix86_zero_extend_to_Pmode (rtx exp)
23295 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
23298 /* Divide COUNTREG by SCALE. */
23299 static rtx
23300 scale_counter (rtx countreg, int scale)
23302 rtx sc;
23304 if (scale == 1)
23305 return countreg;
23306 if (CONST_INT_P (countreg))
23307 return GEN_INT (INTVAL (countreg) / scale);
23308 gcc_assert (REG_P (countreg));
23310 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
23311 GEN_INT (exact_log2 (scale)),
23312 NULL, 1, OPTAB_DIRECT);
23313 return sc;
23316 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
23317 DImode for constant loop counts. */
23319 static machine_mode
23320 counter_mode (rtx count_exp)
23322 if (GET_MODE (count_exp) != VOIDmode)
23323 return GET_MODE (count_exp);
23324 if (!CONST_INT_P (count_exp))
23325 return Pmode;
23326 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
23327 return DImode;
23328 return SImode;
23331 /* Copy the address to a Pmode register. This is used for x32 to
23332 truncate DImode TLS address to a SImode register. */
23334 static rtx
23335 ix86_copy_addr_to_reg (rtx addr)
23337 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
23338 return copy_addr_to_reg (addr);
23339 else
23341 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
23342 return gen_rtx_SUBREG (SImode, copy_to_mode_reg (DImode, addr), 0);
23346 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
23347 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
23348 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
23349 memory by VALUE (supposed to be in MODE).
23351 The size is rounded down to whole number of chunk size moved at once.
23352 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
23355 static void
23356 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
23357 rtx destptr, rtx srcptr, rtx value,
23358 rtx count, machine_mode mode, int unroll,
23359 int expected_size, bool issetmem)
23361 rtx_code_label *out_label, *top_label;
23362 rtx iter, tmp;
23363 machine_mode iter_mode = counter_mode (count);
23364 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
23365 rtx piece_size = GEN_INT (piece_size_n);
23366 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
23367 rtx size;
23368 int i;
23370 top_label = gen_label_rtx ();
23371 out_label = gen_label_rtx ();
23372 iter = gen_reg_rtx (iter_mode);
23374 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
23375 NULL, 1, OPTAB_DIRECT);
23376 /* Those two should combine. */
23377 if (piece_size == const1_rtx)
23379 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
23380 true, out_label);
23381 predict_jump (REG_BR_PROB_BASE * 10 / 100);
23383 emit_move_insn (iter, const0_rtx);
23385 emit_label (top_label);
23387 tmp = convert_modes (Pmode, iter_mode, iter, true);
23389 /* This assert could be relaxed - in this case we'll need to compute
23390 smallest power of two, containing in PIECE_SIZE_N and pass it to
23391 offset_address. */
23392 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
23393 destmem = offset_address (destmem, tmp, piece_size_n);
23394 destmem = adjust_address (destmem, mode, 0);
23396 if (!issetmem)
23398 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
23399 srcmem = adjust_address (srcmem, mode, 0);
23401 /* When unrolling for chips that reorder memory reads and writes,
23402 we can save registers by using single temporary.
23403 Also using 4 temporaries is overkill in 32bit mode. */
23404 if (!TARGET_64BIT && 0)
23406 for (i = 0; i < unroll; i++)
23408 if (i)
23410 destmem =
23411 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23412 srcmem =
23413 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23415 emit_move_insn (destmem, srcmem);
23418 else
23420 rtx tmpreg[4];
23421 gcc_assert (unroll <= 4);
23422 for (i = 0; i < unroll; i++)
23424 tmpreg[i] = gen_reg_rtx (mode);
23425 if (i)
23427 srcmem =
23428 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23430 emit_move_insn (tmpreg[i], srcmem);
23432 for (i = 0; i < unroll; i++)
23434 if (i)
23436 destmem =
23437 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23439 emit_move_insn (destmem, tmpreg[i]);
23443 else
23444 for (i = 0; i < unroll; i++)
23446 if (i)
23447 destmem =
23448 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23449 emit_move_insn (destmem, value);
23452 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
23453 true, OPTAB_LIB_WIDEN);
23454 if (tmp != iter)
23455 emit_move_insn (iter, tmp);
23457 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
23458 true, top_label);
23459 if (expected_size != -1)
23461 expected_size /= GET_MODE_SIZE (mode) * unroll;
23462 if (expected_size == 0)
23463 predict_jump (0);
23464 else if (expected_size > REG_BR_PROB_BASE)
23465 predict_jump (REG_BR_PROB_BASE - 1);
23466 else
23467 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
23469 else
23470 predict_jump (REG_BR_PROB_BASE * 80 / 100);
23471 iter = ix86_zero_extend_to_Pmode (iter);
23472 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
23473 true, OPTAB_LIB_WIDEN);
23474 if (tmp != destptr)
23475 emit_move_insn (destptr, tmp);
23476 if (!issetmem)
23478 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
23479 true, OPTAB_LIB_WIDEN);
23480 if (tmp != srcptr)
23481 emit_move_insn (srcptr, tmp);
23483 emit_label (out_label);
23486 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
23487 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
23488 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
23489 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
23490 ORIG_VALUE is the original value passed to memset to fill the memory with.
23491 Other arguments have same meaning as for previous function. */
23493 static void
23494 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
23495 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
23496 rtx count,
23497 machine_mode mode, bool issetmem)
23499 rtx destexp;
23500 rtx srcexp;
23501 rtx countreg;
23502 HOST_WIDE_INT rounded_count;
23504 /* If possible, it is shorter to use rep movs.
23505 TODO: Maybe it is better to move this logic to decide_alg. */
23506 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
23507 && (!issetmem || orig_value == const0_rtx))
23508 mode = SImode;
23510 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
23511 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
23513 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
23514 GET_MODE_SIZE (mode)));
23515 if (mode != QImode)
23517 destexp = gen_rtx_ASHIFT (Pmode, countreg,
23518 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23519 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
23521 else
23522 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
23523 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
23525 rounded_count = (INTVAL (count)
23526 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23527 destmem = shallow_copy_rtx (destmem);
23528 set_mem_size (destmem, rounded_count);
23530 else if (MEM_SIZE_KNOWN_P (destmem))
23531 clear_mem_size (destmem);
23533 if (issetmem)
23535 value = force_reg (mode, gen_lowpart (mode, value));
23536 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
23538 else
23540 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
23541 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
23542 if (mode != QImode)
23544 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
23545 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23546 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
23548 else
23549 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
23550 if (CONST_INT_P (count))
23552 rounded_count = (INTVAL (count)
23553 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23554 srcmem = shallow_copy_rtx (srcmem);
23555 set_mem_size (srcmem, rounded_count);
23557 else
23559 if (MEM_SIZE_KNOWN_P (srcmem))
23560 clear_mem_size (srcmem);
23562 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
23563 destexp, srcexp));
23567 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
23568 DESTMEM.
23569 SRC is passed by pointer to be updated on return.
23570 Return value is updated DST. */
23571 static rtx
23572 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
23573 HOST_WIDE_INT size_to_move)
23575 rtx dst = destmem, src = *srcmem, adjust, tempreg;
23576 enum insn_code code;
23577 machine_mode move_mode;
23578 int piece_size, i;
23580 /* Find the widest mode in which we could perform moves.
23581 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23582 it until move of such size is supported. */
23583 piece_size = 1 << floor_log2 (size_to_move);
23584 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23585 code = optab_handler (mov_optab, move_mode);
23586 while (code == CODE_FOR_nothing && piece_size > 1)
23588 piece_size >>= 1;
23589 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23590 code = optab_handler (mov_optab, move_mode);
23593 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23594 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23595 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
23597 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
23598 move_mode = mode_for_vector (word_mode, nunits);
23599 code = optab_handler (mov_optab, move_mode);
23600 if (code == CODE_FOR_nothing)
23602 move_mode = word_mode;
23603 piece_size = GET_MODE_SIZE (move_mode);
23604 code = optab_handler (mov_optab, move_mode);
23607 gcc_assert (code != CODE_FOR_nothing);
23609 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23610 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
23612 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23613 gcc_assert (size_to_move % piece_size == 0);
23614 adjust = GEN_INT (piece_size);
23615 for (i = 0; i < size_to_move; i += piece_size)
23617 /* We move from memory to memory, so we'll need to do it via
23618 a temporary register. */
23619 tempreg = gen_reg_rtx (move_mode);
23620 emit_insn (GEN_FCN (code) (tempreg, src));
23621 emit_insn (GEN_FCN (code) (dst, tempreg));
23623 emit_move_insn (destptr,
23624 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23625 emit_move_insn (srcptr,
23626 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
23628 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23629 piece_size);
23630 src = adjust_automodify_address_nv (src, move_mode, srcptr,
23631 piece_size);
23634 /* Update DST and SRC rtx. */
23635 *srcmem = src;
23636 return dst;
23639 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
23640 static void
23641 expand_movmem_epilogue (rtx destmem, rtx srcmem,
23642 rtx destptr, rtx srcptr, rtx count, int max_size)
23644 rtx src, dest;
23645 if (CONST_INT_P (count))
23647 HOST_WIDE_INT countval = INTVAL (count);
23648 HOST_WIDE_INT epilogue_size = countval % max_size;
23649 int i;
23651 /* For now MAX_SIZE should be a power of 2. This assert could be
23652 relaxed, but it'll require a bit more complicated epilogue
23653 expanding. */
23654 gcc_assert ((max_size & (max_size - 1)) == 0);
23655 for (i = max_size; i >= 1; i >>= 1)
23657 if (epilogue_size & i)
23658 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23660 return;
23662 if (max_size > 8)
23664 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
23665 count, 1, OPTAB_DIRECT);
23666 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
23667 count, QImode, 1, 4, false);
23668 return;
23671 /* When there are stringops, we can cheaply increase dest and src pointers.
23672 Otherwise we save code size by maintaining offset (zero is readily
23673 available from preceding rep operation) and using x86 addressing modes.
23675 if (TARGET_SINGLE_STRINGOP)
23677 if (max_size > 4)
23679 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23680 src = change_address (srcmem, SImode, srcptr);
23681 dest = change_address (destmem, SImode, destptr);
23682 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23683 emit_label (label);
23684 LABEL_NUSES (label) = 1;
23686 if (max_size > 2)
23688 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23689 src = change_address (srcmem, HImode, srcptr);
23690 dest = change_address (destmem, HImode, destptr);
23691 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23692 emit_label (label);
23693 LABEL_NUSES (label) = 1;
23695 if (max_size > 1)
23697 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23698 src = change_address (srcmem, QImode, srcptr);
23699 dest = change_address (destmem, QImode, destptr);
23700 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23701 emit_label (label);
23702 LABEL_NUSES (label) = 1;
23705 else
23707 rtx offset = force_reg (Pmode, const0_rtx);
23708 rtx tmp;
23710 if (max_size > 4)
23712 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23713 src = change_address (srcmem, SImode, srcptr);
23714 dest = change_address (destmem, SImode, destptr);
23715 emit_move_insn (dest, src);
23716 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
23717 true, OPTAB_LIB_WIDEN);
23718 if (tmp != offset)
23719 emit_move_insn (offset, tmp);
23720 emit_label (label);
23721 LABEL_NUSES (label) = 1;
23723 if (max_size > 2)
23725 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23726 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23727 src = change_address (srcmem, HImode, tmp);
23728 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23729 dest = change_address (destmem, HImode, tmp);
23730 emit_move_insn (dest, src);
23731 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
23732 true, OPTAB_LIB_WIDEN);
23733 if (tmp != offset)
23734 emit_move_insn (offset, tmp);
23735 emit_label (label);
23736 LABEL_NUSES (label) = 1;
23738 if (max_size > 1)
23740 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23741 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23742 src = change_address (srcmem, QImode, tmp);
23743 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23744 dest = change_address (destmem, QImode, tmp);
23745 emit_move_insn (dest, src);
23746 emit_label (label);
23747 LABEL_NUSES (label) = 1;
23752 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
23753 with value PROMOTED_VAL.
23754 SRC is passed by pointer to be updated on return.
23755 Return value is updated DST. */
23756 static rtx
23757 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
23758 HOST_WIDE_INT size_to_move)
23760 rtx dst = destmem, adjust;
23761 enum insn_code code;
23762 machine_mode move_mode;
23763 int piece_size, i;
23765 /* Find the widest mode in which we could perform moves.
23766 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23767 it until move of such size is supported. */
23768 move_mode = GET_MODE (promoted_val);
23769 if (move_mode == VOIDmode)
23770 move_mode = QImode;
23771 if (size_to_move < GET_MODE_SIZE (move_mode))
23773 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
23774 promoted_val = gen_lowpart (move_mode, promoted_val);
23776 piece_size = GET_MODE_SIZE (move_mode);
23777 code = optab_handler (mov_optab, move_mode);
23778 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
23780 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23782 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23783 gcc_assert (size_to_move % piece_size == 0);
23784 adjust = GEN_INT (piece_size);
23785 for (i = 0; i < size_to_move; i += piece_size)
23787 if (piece_size <= GET_MODE_SIZE (word_mode))
23789 emit_insn (gen_strset (destptr, dst, promoted_val));
23790 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23791 piece_size);
23792 continue;
23795 emit_insn (GEN_FCN (code) (dst, promoted_val));
23797 emit_move_insn (destptr,
23798 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23800 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23801 piece_size);
23804 /* Update DST rtx. */
23805 return dst;
23807 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23808 static void
23809 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
23810 rtx count, int max_size)
23812 count =
23813 expand_simple_binop (counter_mode (count), AND, count,
23814 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
23815 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
23816 gen_lowpart (QImode, value), count, QImode,
23817 1, max_size / 2, true);
23820 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23821 static void
23822 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
23823 rtx count, int max_size)
23825 rtx dest;
23827 if (CONST_INT_P (count))
23829 HOST_WIDE_INT countval = INTVAL (count);
23830 HOST_WIDE_INT epilogue_size = countval % max_size;
23831 int i;
23833 /* For now MAX_SIZE should be a power of 2. This assert could be
23834 relaxed, but it'll require a bit more complicated epilogue
23835 expanding. */
23836 gcc_assert ((max_size & (max_size - 1)) == 0);
23837 for (i = max_size; i >= 1; i >>= 1)
23839 if (epilogue_size & i)
23841 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23842 destmem = emit_memset (destmem, destptr, vec_value, i);
23843 else
23844 destmem = emit_memset (destmem, destptr, value, i);
23847 return;
23849 if (max_size > 32)
23851 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
23852 return;
23854 if (max_size > 16)
23856 rtx_code_label *label = ix86_expand_aligntest (count, 16, true);
23857 if (TARGET_64BIT)
23859 dest = change_address (destmem, DImode, destptr);
23860 emit_insn (gen_strset (destptr, dest, value));
23861 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
23862 emit_insn (gen_strset (destptr, dest, value));
23864 else
23866 dest = change_address (destmem, SImode, destptr);
23867 emit_insn (gen_strset (destptr, dest, value));
23868 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23869 emit_insn (gen_strset (destptr, dest, value));
23870 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
23871 emit_insn (gen_strset (destptr, dest, value));
23872 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
23873 emit_insn (gen_strset (destptr, dest, value));
23875 emit_label (label);
23876 LABEL_NUSES (label) = 1;
23878 if (max_size > 8)
23880 rtx_code_label *label = ix86_expand_aligntest (count, 8, true);
23881 if (TARGET_64BIT)
23883 dest = change_address (destmem, DImode, destptr);
23884 emit_insn (gen_strset (destptr, dest, value));
23886 else
23888 dest = change_address (destmem, SImode, destptr);
23889 emit_insn (gen_strset (destptr, dest, value));
23890 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23891 emit_insn (gen_strset (destptr, dest, value));
23893 emit_label (label);
23894 LABEL_NUSES (label) = 1;
23896 if (max_size > 4)
23898 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23899 dest = change_address (destmem, SImode, destptr);
23900 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
23901 emit_label (label);
23902 LABEL_NUSES (label) = 1;
23904 if (max_size > 2)
23906 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23907 dest = change_address (destmem, HImode, destptr);
23908 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
23909 emit_label (label);
23910 LABEL_NUSES (label) = 1;
23912 if (max_size > 1)
23914 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23915 dest = change_address (destmem, QImode, destptr);
23916 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
23917 emit_label (label);
23918 LABEL_NUSES (label) = 1;
23922 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
23923 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
23924 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
23925 ignored.
23926 Return value is updated DESTMEM. */
23927 static rtx
23928 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
23929 rtx destptr, rtx srcptr, rtx value,
23930 rtx vec_value, rtx count, int align,
23931 int desired_alignment, bool issetmem)
23933 int i;
23934 for (i = 1; i < desired_alignment; i <<= 1)
23936 if (align <= i)
23938 rtx_code_label *label = ix86_expand_aligntest (destptr, i, false);
23939 if (issetmem)
23941 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23942 destmem = emit_memset (destmem, destptr, vec_value, i);
23943 else
23944 destmem = emit_memset (destmem, destptr, value, i);
23946 else
23947 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23948 ix86_adjust_counter (count, i);
23949 emit_label (label);
23950 LABEL_NUSES (label) = 1;
23951 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
23954 return destmem;
23957 /* Test if COUNT&SIZE is nonzero and if so, expand movme
23958 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
23959 and jump to DONE_LABEL. */
23960 static void
23961 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
23962 rtx destptr, rtx srcptr,
23963 rtx value, rtx vec_value,
23964 rtx count, int size,
23965 rtx done_label, bool issetmem)
23967 rtx_code_label *label = ix86_expand_aligntest (count, size, false);
23968 machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
23969 rtx modesize;
23970 int n;
23972 /* If we do not have vector value to copy, we must reduce size. */
23973 if (issetmem)
23975 if (!vec_value)
23977 if (GET_MODE (value) == VOIDmode && size > 8)
23978 mode = Pmode;
23979 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
23980 mode = GET_MODE (value);
23982 else
23983 mode = GET_MODE (vec_value), value = vec_value;
23985 else
23987 /* Choose appropriate vector mode. */
23988 if (size >= 32)
23989 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
23990 else if (size >= 16)
23991 mode = TARGET_SSE ? V16QImode : DImode;
23992 srcmem = change_address (srcmem, mode, srcptr);
23994 destmem = change_address (destmem, mode, destptr);
23995 modesize = GEN_INT (GET_MODE_SIZE (mode));
23996 gcc_assert (GET_MODE_SIZE (mode) <= size);
23997 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23999 if (issetmem)
24000 emit_move_insn (destmem, gen_lowpart (mode, value));
24001 else
24003 emit_move_insn (destmem, srcmem);
24004 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24006 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24009 destmem = offset_address (destmem, count, 1);
24010 destmem = offset_address (destmem, GEN_INT (-2 * size),
24011 GET_MODE_SIZE (mode));
24012 if (!issetmem)
24014 srcmem = offset_address (srcmem, count, 1);
24015 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
24016 GET_MODE_SIZE (mode));
24018 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
24020 if (issetmem)
24021 emit_move_insn (destmem, gen_lowpart (mode, value));
24022 else
24024 emit_move_insn (destmem, srcmem);
24025 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24027 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24029 emit_jump_insn (gen_jump (done_label));
24030 emit_barrier ();
24032 emit_label (label);
24033 LABEL_NUSES (label) = 1;
24036 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
24037 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
24038 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
24039 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
24040 DONE_LABEL is a label after the whole copying sequence. The label is created
24041 on demand if *DONE_LABEL is NULL.
24042 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
24043 bounds after the initial copies.
24045 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
24046 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
24047 we will dispatch to a library call for large blocks.
24049 In pseudocode we do:
24051 if (COUNT < SIZE)
24053 Assume that SIZE is 4. Bigger sizes are handled analogously
24054 if (COUNT & 4)
24056 copy 4 bytes from SRCPTR to DESTPTR
24057 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
24058 goto done_label
24060 if (!COUNT)
24061 goto done_label;
24062 copy 1 byte from SRCPTR to DESTPTR
24063 if (COUNT & 2)
24065 copy 2 bytes from SRCPTR to DESTPTR
24066 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
24069 else
24071 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
24072 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
24074 OLD_DESPTR = DESTPTR;
24075 Align DESTPTR up to DESIRED_ALIGN
24076 SRCPTR += DESTPTR - OLD_DESTPTR
24077 COUNT -= DEST_PTR - OLD_DESTPTR
24078 if (DYNAMIC_CHECK)
24079 Round COUNT down to multiple of SIZE
24080 << optional caller supplied zero size guard is here >>
24081 << optional caller suppplied dynamic check is here >>
24082 << caller supplied main copy loop is here >>
24084 done_label:
24086 static void
24087 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
24088 rtx *destptr, rtx *srcptr,
24089 machine_mode mode,
24090 rtx value, rtx vec_value,
24091 rtx *count,
24092 rtx_code_label **done_label,
24093 int size,
24094 int desired_align,
24095 int align,
24096 unsigned HOST_WIDE_INT *min_size,
24097 bool dynamic_check,
24098 bool issetmem)
24100 rtx_code_label *loop_label = NULL, *label;
24101 int n;
24102 rtx modesize;
24103 int prolog_size = 0;
24104 rtx mode_value;
24106 /* Chose proper value to copy. */
24107 if (issetmem && VECTOR_MODE_P (mode))
24108 mode_value = vec_value;
24109 else
24110 mode_value = value;
24111 gcc_assert (GET_MODE_SIZE (mode) <= size);
24113 /* See if block is big or small, handle small blocks. */
24114 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
24116 int size2 = size;
24117 loop_label = gen_label_rtx ();
24119 if (!*done_label)
24120 *done_label = gen_label_rtx ();
24122 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
24123 1, loop_label);
24124 size2 >>= 1;
24126 /* Handle sizes > 3. */
24127 for (;size2 > 2; size2 >>= 1)
24128 expand_small_movmem_or_setmem (destmem, srcmem,
24129 *destptr, *srcptr,
24130 value, vec_value,
24131 *count,
24132 size2, *done_label, issetmem);
24133 /* Nothing to copy? Jump to DONE_LABEL if so */
24134 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
24135 1, *done_label);
24137 /* Do a byte copy. */
24138 destmem = change_address (destmem, QImode, *destptr);
24139 if (issetmem)
24140 emit_move_insn (destmem, gen_lowpart (QImode, value));
24141 else
24143 srcmem = change_address (srcmem, QImode, *srcptr);
24144 emit_move_insn (destmem, srcmem);
24147 /* Handle sizes 2 and 3. */
24148 label = ix86_expand_aligntest (*count, 2, false);
24149 destmem = change_address (destmem, HImode, *destptr);
24150 destmem = offset_address (destmem, *count, 1);
24151 destmem = offset_address (destmem, GEN_INT (-2), 2);
24152 if (issetmem)
24153 emit_move_insn (destmem, gen_lowpart (HImode, value));
24154 else
24156 srcmem = change_address (srcmem, HImode, *srcptr);
24157 srcmem = offset_address (srcmem, *count, 1);
24158 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
24159 emit_move_insn (destmem, srcmem);
24162 emit_label (label);
24163 LABEL_NUSES (label) = 1;
24164 emit_jump_insn (gen_jump (*done_label));
24165 emit_barrier ();
24167 else
24168 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
24169 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
24171 /* Start memcpy for COUNT >= SIZE. */
24172 if (loop_label)
24174 emit_label (loop_label);
24175 LABEL_NUSES (loop_label) = 1;
24178 /* Copy first desired_align bytes. */
24179 if (!issetmem)
24180 srcmem = change_address (srcmem, mode, *srcptr);
24181 destmem = change_address (destmem, mode, *destptr);
24182 modesize = GEN_INT (GET_MODE_SIZE (mode));
24183 for (n = 0; prolog_size < desired_align - align; n++)
24185 if (issetmem)
24186 emit_move_insn (destmem, mode_value);
24187 else
24189 emit_move_insn (destmem, srcmem);
24190 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24192 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24193 prolog_size += GET_MODE_SIZE (mode);
24197 /* Copy last SIZE bytes. */
24198 destmem = offset_address (destmem, *count, 1);
24199 destmem = offset_address (destmem,
24200 GEN_INT (-size - prolog_size),
24202 if (issetmem)
24203 emit_move_insn (destmem, mode_value);
24204 else
24206 srcmem = offset_address (srcmem, *count, 1);
24207 srcmem = offset_address (srcmem,
24208 GEN_INT (-size - prolog_size),
24210 emit_move_insn (destmem, srcmem);
24212 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
24214 destmem = offset_address (destmem, modesize, 1);
24215 if (issetmem)
24216 emit_move_insn (destmem, mode_value);
24217 else
24219 srcmem = offset_address (srcmem, modesize, 1);
24220 emit_move_insn (destmem, srcmem);
24224 /* Align destination. */
24225 if (desired_align > 1 && desired_align > align)
24227 rtx saveddest = *destptr;
24229 gcc_assert (desired_align <= size);
24230 /* Align destptr up, place it to new register. */
24231 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
24232 GEN_INT (prolog_size),
24233 NULL_RTX, 1, OPTAB_DIRECT);
24234 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
24235 GEN_INT (-desired_align),
24236 *destptr, 1, OPTAB_DIRECT);
24237 /* See how many bytes we skipped. */
24238 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
24239 *destptr,
24240 saveddest, 1, OPTAB_DIRECT);
24241 /* Adjust srcptr and count. */
24242 if (!issetmem)
24243 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr, saveddest,
24244 *srcptr, 1, OPTAB_DIRECT);
24245 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
24246 saveddest, *count, 1, OPTAB_DIRECT);
24247 /* We copied at most size + prolog_size. */
24248 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
24249 *min_size = (*min_size - size) & ~(unsigned HOST_WIDE_INT)(size - 1);
24250 else
24251 *min_size = 0;
24253 /* Our loops always round down the bock size, but for dispatch to library
24254 we need precise value. */
24255 if (dynamic_check)
24256 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
24257 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
24259 else
24261 gcc_assert (prolog_size == 0);
24262 /* Decrease count, so we won't end up copying last word twice. */
24263 if (!CONST_INT_P (*count))
24264 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
24265 constm1_rtx, *count, 1, OPTAB_DIRECT);
24266 else
24267 *count = GEN_INT ((UINTVAL (*count) - 1) & ~(unsigned HOST_WIDE_INT)(size - 1));
24268 if (*min_size)
24269 *min_size = (*min_size - 1) & ~(unsigned HOST_WIDE_INT)(size - 1);
24274 /* This function is like the previous one, except here we know how many bytes
24275 need to be copied. That allows us to update alignment not only of DST, which
24276 is returned, but also of SRC, which is passed as a pointer for that
24277 reason. */
24278 static rtx
24279 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
24280 rtx srcreg, rtx value, rtx vec_value,
24281 int desired_align, int align_bytes,
24282 bool issetmem)
24284 rtx src = NULL;
24285 rtx orig_dst = dst;
24286 rtx orig_src = NULL;
24287 int piece_size = 1;
24288 int copied_bytes = 0;
24290 if (!issetmem)
24292 gcc_assert (srcp != NULL);
24293 src = *srcp;
24294 orig_src = src;
24297 for (piece_size = 1;
24298 piece_size <= desired_align && copied_bytes < align_bytes;
24299 piece_size <<= 1)
24301 if (align_bytes & piece_size)
24303 if (issetmem)
24305 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
24306 dst = emit_memset (dst, destreg, vec_value, piece_size);
24307 else
24308 dst = emit_memset (dst, destreg, value, piece_size);
24310 else
24311 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
24312 copied_bytes += piece_size;
24315 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
24316 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24317 if (MEM_SIZE_KNOWN_P (orig_dst))
24318 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
24320 if (!issetmem)
24322 int src_align_bytes = get_mem_align_offset (src, desired_align
24323 * BITS_PER_UNIT);
24324 if (src_align_bytes >= 0)
24325 src_align_bytes = desired_align - src_align_bytes;
24326 if (src_align_bytes >= 0)
24328 unsigned int src_align;
24329 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
24331 if ((src_align_bytes & (src_align - 1))
24332 == (align_bytes & (src_align - 1)))
24333 break;
24335 if (src_align > (unsigned int) desired_align)
24336 src_align = desired_align;
24337 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
24338 set_mem_align (src, src_align * BITS_PER_UNIT);
24340 if (MEM_SIZE_KNOWN_P (orig_src))
24341 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
24342 *srcp = src;
24345 return dst;
24348 /* Return true if ALG can be used in current context.
24349 Assume we expand memset if MEMSET is true. */
24350 static bool
24351 alg_usable_p (enum stringop_alg alg, bool memset)
24353 if (alg == no_stringop)
24354 return false;
24355 if (alg == vector_loop)
24356 return TARGET_SSE || TARGET_AVX;
24357 /* Algorithms using the rep prefix want at least edi and ecx;
24358 additionally, memset wants eax and memcpy wants esi. Don't
24359 consider such algorithms if the user has appropriated those
24360 registers for their own purposes. */
24361 if (alg == rep_prefix_1_byte
24362 || alg == rep_prefix_4_byte
24363 || alg == rep_prefix_8_byte)
24364 return !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
24365 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
24366 return true;
24369 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
24370 static enum stringop_alg
24371 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
24372 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
24373 bool memset, bool zero_memset, int *dynamic_check, bool *noalign)
24375 const struct stringop_algs * algs;
24376 bool optimize_for_speed;
24377 int max = 0;
24378 const struct processor_costs *cost;
24379 int i;
24380 bool any_alg_usable_p = false;
24382 *noalign = false;
24383 *dynamic_check = -1;
24385 /* Even if the string operation call is cold, we still might spend a lot
24386 of time processing large blocks. */
24387 if (optimize_function_for_size_p (cfun)
24388 || (optimize_insn_for_size_p ()
24389 && (max_size < 256
24390 || (expected_size != -1 && expected_size < 256))))
24391 optimize_for_speed = false;
24392 else
24393 optimize_for_speed = true;
24395 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
24396 if (memset)
24397 algs = &cost->memset[TARGET_64BIT != 0];
24398 else
24399 algs = &cost->memcpy[TARGET_64BIT != 0];
24401 /* See maximal size for user defined algorithm. */
24402 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24404 enum stringop_alg candidate = algs->size[i].alg;
24405 bool usable = alg_usable_p (candidate, memset);
24406 any_alg_usable_p |= usable;
24408 if (candidate != libcall && candidate && usable)
24409 max = algs->size[i].max;
24412 /* If expected size is not known but max size is small enough
24413 so inline version is a win, set expected size into
24414 the range. */
24415 if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
24416 && expected_size == -1)
24417 expected_size = min_size / 2 + max_size / 2;
24419 /* If user specified the algorithm, honnor it if possible. */
24420 if (ix86_stringop_alg != no_stringop
24421 && alg_usable_p (ix86_stringop_alg, memset))
24422 return ix86_stringop_alg;
24423 /* rep; movq or rep; movl is the smallest variant. */
24424 else if (!optimize_for_speed)
24426 *noalign = true;
24427 if (!count || (count & 3) || (memset && !zero_memset))
24428 return alg_usable_p (rep_prefix_1_byte, memset)
24429 ? rep_prefix_1_byte : loop_1_byte;
24430 else
24431 return alg_usable_p (rep_prefix_4_byte, memset)
24432 ? rep_prefix_4_byte : loop;
24434 /* Very tiny blocks are best handled via the loop, REP is expensive to
24435 setup. */
24436 else if (expected_size != -1 && expected_size < 4)
24437 return loop_1_byte;
24438 else if (expected_size != -1)
24440 enum stringop_alg alg = libcall;
24441 bool alg_noalign = false;
24442 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24444 /* We get here if the algorithms that were not libcall-based
24445 were rep-prefix based and we are unable to use rep prefixes
24446 based on global register usage. Break out of the loop and
24447 use the heuristic below. */
24448 if (algs->size[i].max == 0)
24449 break;
24450 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
24452 enum stringop_alg candidate = algs->size[i].alg;
24454 if (candidate != libcall && alg_usable_p (candidate, memset))
24456 alg = candidate;
24457 alg_noalign = algs->size[i].noalign;
24459 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
24460 last non-libcall inline algorithm. */
24461 if (TARGET_INLINE_ALL_STRINGOPS)
24463 /* When the current size is best to be copied by a libcall,
24464 but we are still forced to inline, run the heuristic below
24465 that will pick code for medium sized blocks. */
24466 if (alg != libcall)
24468 *noalign = alg_noalign;
24469 return alg;
24471 else if (!any_alg_usable_p)
24472 break;
24474 else if (alg_usable_p (candidate, memset))
24476 *noalign = algs->size[i].noalign;
24477 return candidate;
24482 /* When asked to inline the call anyway, try to pick meaningful choice.
24483 We look for maximal size of block that is faster to copy by hand and
24484 take blocks of at most of that size guessing that average size will
24485 be roughly half of the block.
24487 If this turns out to be bad, we might simply specify the preferred
24488 choice in ix86_costs. */
24489 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24490 && (algs->unknown_size == libcall
24491 || !alg_usable_p (algs->unknown_size, memset)))
24493 enum stringop_alg alg;
24495 /* If there aren't any usable algorithms, then recursing on
24496 smaller sizes isn't going to find anything. Just return the
24497 simple byte-at-a-time copy loop. */
24498 if (!any_alg_usable_p)
24500 /* Pick something reasonable. */
24501 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24502 *dynamic_check = 128;
24503 return loop_1_byte;
24505 if (max <= 0)
24506 max = 4096;
24507 alg = decide_alg (count, max / 2, min_size, max_size, memset,
24508 zero_memset, dynamic_check, noalign);
24509 gcc_assert (*dynamic_check == -1);
24510 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24511 *dynamic_check = max;
24512 else
24513 gcc_assert (alg != libcall);
24514 return alg;
24516 return (alg_usable_p (algs->unknown_size, memset)
24517 ? algs->unknown_size : libcall);
24520 /* Decide on alignment. We know that the operand is already aligned to ALIGN
24521 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
24522 static int
24523 decide_alignment (int align,
24524 enum stringop_alg alg,
24525 int expected_size,
24526 machine_mode move_mode)
24528 int desired_align = 0;
24530 gcc_assert (alg != no_stringop);
24532 if (alg == libcall)
24533 return 0;
24534 if (move_mode == VOIDmode)
24535 return 0;
24537 desired_align = GET_MODE_SIZE (move_mode);
24538 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
24539 copying whole cacheline at once. */
24540 if (TARGET_PENTIUMPRO
24541 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
24542 desired_align = 8;
24544 if (optimize_size)
24545 desired_align = 1;
24546 if (desired_align < align)
24547 desired_align = align;
24548 if (expected_size != -1 && expected_size < 4)
24549 desired_align = align;
24551 return desired_align;
24555 /* Helper function for memcpy. For QImode value 0xXY produce
24556 0xXYXYXYXY of wide specified by MODE. This is essentially
24557 a * 0x10101010, but we can do slightly better than
24558 synth_mult by unwinding the sequence by hand on CPUs with
24559 slow multiply. */
24560 static rtx
24561 promote_duplicated_reg (machine_mode mode, rtx val)
24563 machine_mode valmode = GET_MODE (val);
24564 rtx tmp;
24565 int nops = mode == DImode ? 3 : 2;
24567 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
24568 if (val == const0_rtx)
24569 return copy_to_mode_reg (mode, CONST0_RTX (mode));
24570 if (CONST_INT_P (val))
24572 HOST_WIDE_INT v = INTVAL (val) & 255;
24574 v |= v << 8;
24575 v |= v << 16;
24576 if (mode == DImode)
24577 v |= (v << 16) << 16;
24578 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
24581 if (valmode == VOIDmode)
24582 valmode = QImode;
24583 if (valmode != QImode)
24584 val = gen_lowpart (QImode, val);
24585 if (mode == QImode)
24586 return val;
24587 if (!TARGET_PARTIAL_REG_STALL)
24588 nops--;
24589 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
24590 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
24591 <= (ix86_cost->shift_const + ix86_cost->add) * nops
24592 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
24594 rtx reg = convert_modes (mode, QImode, val, true);
24595 tmp = promote_duplicated_reg (mode, const1_rtx);
24596 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
24597 OPTAB_DIRECT);
24599 else
24601 rtx reg = convert_modes (mode, QImode, val, true);
24603 if (!TARGET_PARTIAL_REG_STALL)
24604 if (mode == SImode)
24605 emit_insn (gen_movsi_insv_1 (reg, reg));
24606 else
24607 emit_insn (gen_movdi_insv_1 (reg, reg));
24608 else
24610 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
24611 NULL, 1, OPTAB_DIRECT);
24612 reg =
24613 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24615 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
24616 NULL, 1, OPTAB_DIRECT);
24617 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24618 if (mode == SImode)
24619 return reg;
24620 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
24621 NULL, 1, OPTAB_DIRECT);
24622 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24623 return reg;
24627 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
24628 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
24629 alignment from ALIGN to DESIRED_ALIGN. */
24630 static rtx
24631 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
24632 int align)
24634 rtx promoted_val;
24636 if (TARGET_64BIT
24637 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
24638 promoted_val = promote_duplicated_reg (DImode, val);
24639 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
24640 promoted_val = promote_duplicated_reg (SImode, val);
24641 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
24642 promoted_val = promote_duplicated_reg (HImode, val);
24643 else
24644 promoted_val = val;
24646 return promoted_val;
24649 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
24650 operations when profitable. The code depends upon architecture, block size
24651 and alignment, but always has one of the following overall structures:
24653 Aligned move sequence:
24655 1) Prologue guard: Conditional that jumps up to epilogues for small
24656 blocks that can be handled by epilogue alone. This is faster
24657 but also needed for correctness, since prologue assume the block
24658 is larger than the desired alignment.
24660 Optional dynamic check for size and libcall for large
24661 blocks is emitted here too, with -minline-stringops-dynamically.
24663 2) Prologue: copy first few bytes in order to get destination
24664 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
24665 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
24666 copied. We emit either a jump tree on power of two sized
24667 blocks, or a byte loop.
24669 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24670 with specified algorithm.
24672 4) Epilogue: code copying tail of the block that is too small to be
24673 handled by main body (or up to size guarded by prologue guard).
24675 Misaligned move sequence
24677 1) missaligned move prologue/epilogue containing:
24678 a) Prologue handling small memory blocks and jumping to done_label
24679 (skipped if blocks are known to be large enough)
24680 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
24681 needed by single possibly misaligned move
24682 (skipped if alignment is not needed)
24683 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
24685 2) Zero size guard dispatching to done_label, if needed
24687 3) dispatch to library call, if needed,
24689 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24690 with specified algorithm. */
24691 bool
24692 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
24693 rtx align_exp, rtx expected_align_exp,
24694 rtx expected_size_exp, rtx min_size_exp,
24695 rtx max_size_exp, rtx probable_max_size_exp,
24696 bool issetmem)
24698 rtx destreg;
24699 rtx srcreg = NULL;
24700 rtx_code_label *label = NULL;
24701 rtx tmp;
24702 rtx_code_label *jump_around_label = NULL;
24703 HOST_WIDE_INT align = 1;
24704 unsigned HOST_WIDE_INT count = 0;
24705 HOST_WIDE_INT expected_size = -1;
24706 int size_needed = 0, epilogue_size_needed;
24707 int desired_align = 0, align_bytes = 0;
24708 enum stringop_alg alg;
24709 rtx promoted_val = NULL;
24710 rtx vec_promoted_val = NULL;
24711 bool force_loopy_epilogue = false;
24712 int dynamic_check;
24713 bool need_zero_guard = false;
24714 bool noalign;
24715 machine_mode move_mode = VOIDmode;
24716 int unroll_factor = 1;
24717 /* TODO: Once value ranges are available, fill in proper data. */
24718 unsigned HOST_WIDE_INT min_size = 0;
24719 unsigned HOST_WIDE_INT max_size = -1;
24720 unsigned HOST_WIDE_INT probable_max_size = -1;
24721 bool misaligned_prologue_used = false;
24723 if (CONST_INT_P (align_exp))
24724 align = INTVAL (align_exp);
24725 /* i386 can do misaligned access on reasonably increased cost. */
24726 if (CONST_INT_P (expected_align_exp)
24727 && INTVAL (expected_align_exp) > align)
24728 align = INTVAL (expected_align_exp);
24729 /* ALIGN is the minimum of destination and source alignment, but we care here
24730 just about destination alignment. */
24731 else if (!issetmem
24732 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
24733 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
24735 if (CONST_INT_P (count_exp))
24737 min_size = max_size = probable_max_size = count = expected_size
24738 = INTVAL (count_exp);
24739 /* When COUNT is 0, there is nothing to do. */
24740 if (!count)
24741 return true;
24743 else
24745 if (min_size_exp)
24746 min_size = INTVAL (min_size_exp);
24747 if (max_size_exp)
24748 max_size = INTVAL (max_size_exp);
24749 if (probable_max_size_exp)
24750 probable_max_size = INTVAL (probable_max_size_exp);
24751 if (CONST_INT_P (expected_size_exp))
24752 expected_size = INTVAL (expected_size_exp);
24755 /* Make sure we don't need to care about overflow later on. */
24756 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
24757 return false;
24759 /* Step 0: Decide on preferred algorithm, desired alignment and
24760 size of chunks to be copied by main loop. */
24761 alg = decide_alg (count, expected_size, min_size, probable_max_size,
24762 issetmem,
24763 issetmem && val_exp == const0_rtx,
24764 &dynamic_check, &noalign);
24765 if (alg == libcall)
24766 return false;
24767 gcc_assert (alg != no_stringop);
24769 /* For now vector-version of memset is generated only for memory zeroing, as
24770 creating of promoted vector value is very cheap in this case. */
24771 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
24772 alg = unrolled_loop;
24774 if (!count)
24775 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
24776 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
24777 if (!issetmem)
24778 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
24780 unroll_factor = 1;
24781 move_mode = word_mode;
24782 switch (alg)
24784 case libcall:
24785 case no_stringop:
24786 case last_alg:
24787 gcc_unreachable ();
24788 case loop_1_byte:
24789 need_zero_guard = true;
24790 move_mode = QImode;
24791 break;
24792 case loop:
24793 need_zero_guard = true;
24794 break;
24795 case unrolled_loop:
24796 need_zero_guard = true;
24797 unroll_factor = (TARGET_64BIT ? 4 : 2);
24798 break;
24799 case vector_loop:
24800 need_zero_guard = true;
24801 unroll_factor = 4;
24802 /* Find the widest supported mode. */
24803 move_mode = word_mode;
24804 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
24805 != CODE_FOR_nothing)
24806 move_mode = GET_MODE_WIDER_MODE (move_mode);
24808 /* Find the corresponding vector mode with the same size as MOVE_MODE.
24809 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
24810 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
24812 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
24813 move_mode = mode_for_vector (word_mode, nunits);
24814 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
24815 move_mode = word_mode;
24817 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
24818 break;
24819 case rep_prefix_8_byte:
24820 move_mode = DImode;
24821 break;
24822 case rep_prefix_4_byte:
24823 move_mode = SImode;
24824 break;
24825 case rep_prefix_1_byte:
24826 move_mode = QImode;
24827 break;
24829 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
24830 epilogue_size_needed = size_needed;
24832 desired_align = decide_alignment (align, alg, expected_size, move_mode);
24833 if (!TARGET_ALIGN_STRINGOPS || noalign)
24834 align = desired_align;
24836 /* Step 1: Prologue guard. */
24838 /* Alignment code needs count to be in register. */
24839 if (CONST_INT_P (count_exp) && desired_align > align)
24841 if (INTVAL (count_exp) > desired_align
24842 && INTVAL (count_exp) > size_needed)
24844 align_bytes
24845 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
24846 if (align_bytes <= 0)
24847 align_bytes = 0;
24848 else
24849 align_bytes = desired_align - align_bytes;
24851 if (align_bytes == 0)
24852 count_exp = force_reg (counter_mode (count_exp), count_exp);
24854 gcc_assert (desired_align >= 1 && align >= 1);
24856 /* Misaligned move sequences handle both prologue and epilogue at once.
24857 Default code generation results in a smaller code for large alignments
24858 and also avoids redundant job when sizes are known precisely. */
24859 misaligned_prologue_used
24860 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
24861 && MAX (desired_align, epilogue_size_needed) <= 32
24862 && desired_align <= epilogue_size_needed
24863 && ((desired_align > align && !align_bytes)
24864 || (!count && epilogue_size_needed > 1)));
24866 /* Do the cheap promotion to allow better CSE across the
24867 main loop and epilogue (ie one load of the big constant in the
24868 front of all code.
24869 For now the misaligned move sequences do not have fast path
24870 without broadcasting. */
24871 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
24873 if (alg == vector_loop)
24875 gcc_assert (val_exp == const0_rtx);
24876 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
24877 promoted_val = promote_duplicated_reg_to_size (val_exp,
24878 GET_MODE_SIZE (word_mode),
24879 desired_align, align);
24881 else
24883 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24884 desired_align, align);
24887 /* Misaligned move sequences handles both prologues and epilogues at once.
24888 Default code generation results in smaller code for large alignments and
24889 also avoids redundant job when sizes are known precisely. */
24890 if (misaligned_prologue_used)
24892 /* Misaligned move prologue handled small blocks by itself. */
24893 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
24894 (dst, src, &destreg, &srcreg,
24895 move_mode, promoted_val, vec_promoted_val,
24896 &count_exp,
24897 &jump_around_label,
24898 desired_align < align
24899 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
24900 desired_align, align, &min_size, dynamic_check, issetmem);
24901 if (!issetmem)
24902 src = change_address (src, BLKmode, srcreg);
24903 dst = change_address (dst, BLKmode, destreg);
24904 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24905 epilogue_size_needed = 0;
24906 if (need_zero_guard && !min_size)
24908 /* It is possible that we copied enough so the main loop will not
24909 execute. */
24910 gcc_assert (size_needed > 1);
24911 if (jump_around_label == NULL_RTX)
24912 jump_around_label = gen_label_rtx ();
24913 emit_cmp_and_jump_insns (count_exp,
24914 GEN_INT (size_needed),
24915 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
24916 if (expected_size == -1
24917 || expected_size < (desired_align - align) / 2 + size_needed)
24918 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24919 else
24920 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24923 /* Ensure that alignment prologue won't copy past end of block. */
24924 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
24926 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
24927 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
24928 Make sure it is power of 2. */
24929 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
24931 /* To improve performance of small blocks, we jump around the VAL
24932 promoting mode. This mean that if the promoted VAL is not constant,
24933 we might not use it in the epilogue and have to use byte
24934 loop variant. */
24935 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
24936 force_loopy_epilogue = true;
24937 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24938 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24940 /* If main algorithm works on QImode, no epilogue is needed.
24941 For small sizes just don't align anything. */
24942 if (size_needed == 1)
24943 desired_align = align;
24944 else
24945 goto epilogue;
24947 else if (!count
24948 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24950 label = gen_label_rtx ();
24951 emit_cmp_and_jump_insns (count_exp,
24952 GEN_INT (epilogue_size_needed),
24953 LTU, 0, counter_mode (count_exp), 1, label);
24954 if (expected_size == -1 || expected_size < epilogue_size_needed)
24955 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24956 else
24957 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24961 /* Emit code to decide on runtime whether library call or inline should be
24962 used. */
24963 if (dynamic_check != -1)
24965 if (!issetmem && CONST_INT_P (count_exp))
24967 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
24969 emit_block_move_via_libcall (dst, src, count_exp, false);
24970 count_exp = const0_rtx;
24971 goto epilogue;
24974 else
24976 rtx_code_label *hot_label = gen_label_rtx ();
24977 if (jump_around_label == NULL_RTX)
24978 jump_around_label = gen_label_rtx ();
24979 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
24980 LEU, 0, counter_mode (count_exp),
24981 1, hot_label);
24982 predict_jump (REG_BR_PROB_BASE * 90 / 100);
24983 if (issetmem)
24984 set_storage_via_libcall (dst, count_exp, val_exp, false);
24985 else
24986 emit_block_move_via_libcall (dst, src, count_exp, false);
24987 emit_jump (jump_around_label);
24988 emit_label (hot_label);
24992 /* Step 2: Alignment prologue. */
24993 /* Do the expensive promotion once we branched off the small blocks. */
24994 if (issetmem && !promoted_val)
24995 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24996 desired_align, align);
24998 if (desired_align > align && !misaligned_prologue_used)
25000 if (align_bytes == 0)
25002 /* Except for the first move in prologue, we no longer know
25003 constant offset in aliasing info. It don't seems to worth
25004 the pain to maintain it for the first move, so throw away
25005 the info early. */
25006 dst = change_address (dst, BLKmode, destreg);
25007 if (!issetmem)
25008 src = change_address (src, BLKmode, srcreg);
25009 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
25010 promoted_val, vec_promoted_val,
25011 count_exp, align, desired_align,
25012 issetmem);
25013 /* At most desired_align - align bytes are copied. */
25014 if (min_size < (unsigned)(desired_align - align))
25015 min_size = 0;
25016 else
25017 min_size -= desired_align - align;
25019 else
25021 /* If we know how many bytes need to be stored before dst is
25022 sufficiently aligned, maintain aliasing info accurately. */
25023 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
25024 srcreg,
25025 promoted_val,
25026 vec_promoted_val,
25027 desired_align,
25028 align_bytes,
25029 issetmem);
25031 count_exp = plus_constant (counter_mode (count_exp),
25032 count_exp, -align_bytes);
25033 count -= align_bytes;
25034 min_size -= align_bytes;
25035 max_size -= align_bytes;
25037 if (need_zero_guard
25038 && !min_size
25039 && (count < (unsigned HOST_WIDE_INT) size_needed
25040 || (align_bytes == 0
25041 && count < ((unsigned HOST_WIDE_INT) size_needed
25042 + desired_align - align))))
25044 /* It is possible that we copied enough so the main loop will not
25045 execute. */
25046 gcc_assert (size_needed > 1);
25047 if (label == NULL_RTX)
25048 label = gen_label_rtx ();
25049 emit_cmp_and_jump_insns (count_exp,
25050 GEN_INT (size_needed),
25051 LTU, 0, counter_mode (count_exp), 1, label);
25052 if (expected_size == -1
25053 || expected_size < (desired_align - align) / 2 + size_needed)
25054 predict_jump (REG_BR_PROB_BASE * 20 / 100);
25055 else
25056 predict_jump (REG_BR_PROB_BASE * 60 / 100);
25059 if (label && size_needed == 1)
25061 emit_label (label);
25062 LABEL_NUSES (label) = 1;
25063 label = NULL;
25064 epilogue_size_needed = 1;
25065 if (issetmem)
25066 promoted_val = val_exp;
25068 else if (label == NULL_RTX && !misaligned_prologue_used)
25069 epilogue_size_needed = size_needed;
25071 /* Step 3: Main loop. */
25073 switch (alg)
25075 case libcall:
25076 case no_stringop:
25077 case last_alg:
25078 gcc_unreachable ();
25079 case loop_1_byte:
25080 case loop:
25081 case unrolled_loop:
25082 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
25083 count_exp, move_mode, unroll_factor,
25084 expected_size, issetmem);
25085 break;
25086 case vector_loop:
25087 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
25088 vec_promoted_val, count_exp, move_mode,
25089 unroll_factor, expected_size, issetmem);
25090 break;
25091 case rep_prefix_8_byte:
25092 case rep_prefix_4_byte:
25093 case rep_prefix_1_byte:
25094 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
25095 val_exp, count_exp, move_mode, issetmem);
25096 break;
25098 /* Adjust properly the offset of src and dest memory for aliasing. */
25099 if (CONST_INT_P (count_exp))
25101 if (!issetmem)
25102 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
25103 (count / size_needed) * size_needed);
25104 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
25105 (count / size_needed) * size_needed);
25107 else
25109 if (!issetmem)
25110 src = change_address (src, BLKmode, srcreg);
25111 dst = change_address (dst, BLKmode, destreg);
25114 /* Step 4: Epilogue to copy the remaining bytes. */
25115 epilogue:
25116 if (label)
25118 /* When the main loop is done, COUNT_EXP might hold original count,
25119 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
25120 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
25121 bytes. Compensate if needed. */
25123 if (size_needed < epilogue_size_needed)
25125 tmp =
25126 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
25127 GEN_INT (size_needed - 1), count_exp, 1,
25128 OPTAB_DIRECT);
25129 if (tmp != count_exp)
25130 emit_move_insn (count_exp, tmp);
25132 emit_label (label);
25133 LABEL_NUSES (label) = 1;
25136 if (count_exp != const0_rtx && epilogue_size_needed > 1)
25138 if (force_loopy_epilogue)
25139 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
25140 epilogue_size_needed);
25141 else
25143 if (issetmem)
25144 expand_setmem_epilogue (dst, destreg, promoted_val,
25145 vec_promoted_val, count_exp,
25146 epilogue_size_needed);
25147 else
25148 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
25149 epilogue_size_needed);
25152 if (jump_around_label)
25153 emit_label (jump_around_label);
25154 return true;
25158 /* Expand the appropriate insns for doing strlen if not just doing
25159 repnz; scasb
25161 out = result, initialized with the start address
25162 align_rtx = alignment of the address.
25163 scratch = scratch register, initialized with the startaddress when
25164 not aligned, otherwise undefined
25166 This is just the body. It needs the initializations mentioned above and
25167 some address computing at the end. These things are done in i386.md. */
25169 static void
25170 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
25172 int align;
25173 rtx tmp;
25174 rtx_code_label *align_2_label = NULL;
25175 rtx_code_label *align_3_label = NULL;
25176 rtx_code_label *align_4_label = gen_label_rtx ();
25177 rtx_code_label *end_0_label = gen_label_rtx ();
25178 rtx mem;
25179 rtx tmpreg = gen_reg_rtx (SImode);
25180 rtx scratch = gen_reg_rtx (SImode);
25181 rtx cmp;
25183 align = 0;
25184 if (CONST_INT_P (align_rtx))
25185 align = INTVAL (align_rtx);
25187 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
25189 /* Is there a known alignment and is it less than 4? */
25190 if (align < 4)
25192 rtx scratch1 = gen_reg_rtx (Pmode);
25193 emit_move_insn (scratch1, out);
25194 /* Is there a known alignment and is it not 2? */
25195 if (align != 2)
25197 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
25198 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
25200 /* Leave just the 3 lower bits. */
25201 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
25202 NULL_RTX, 0, OPTAB_WIDEN);
25204 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
25205 Pmode, 1, align_4_label);
25206 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
25207 Pmode, 1, align_2_label);
25208 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
25209 Pmode, 1, align_3_label);
25211 else
25213 /* Since the alignment is 2, we have to check 2 or 0 bytes;
25214 check if is aligned to 4 - byte. */
25216 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
25217 NULL_RTX, 0, OPTAB_WIDEN);
25219 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
25220 Pmode, 1, align_4_label);
25223 mem = change_address (src, QImode, out);
25225 /* Now compare the bytes. */
25227 /* Compare the first n unaligned byte on a byte per byte basis. */
25228 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
25229 QImode, 1, end_0_label);
25231 /* Increment the address. */
25232 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25234 /* Not needed with an alignment of 2 */
25235 if (align != 2)
25237 emit_label (align_2_label);
25239 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
25240 end_0_label);
25242 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25244 emit_label (align_3_label);
25247 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
25248 end_0_label);
25250 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25253 /* Generate loop to check 4 bytes at a time. It is not a good idea to
25254 align this loop. It gives only huge programs, but does not help to
25255 speed up. */
25256 emit_label (align_4_label);
25258 mem = change_address (src, SImode, out);
25259 emit_move_insn (scratch, mem);
25260 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
25262 /* This formula yields a nonzero result iff one of the bytes is zero.
25263 This saves three branches inside loop and many cycles. */
25265 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
25266 emit_insn (gen_one_cmplsi2 (scratch, scratch));
25267 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
25268 emit_insn (gen_andsi3 (tmpreg, tmpreg,
25269 gen_int_mode (0x80808080, SImode)));
25270 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
25271 align_4_label);
25273 if (TARGET_CMOVE)
25275 rtx reg = gen_reg_rtx (SImode);
25276 rtx reg2 = gen_reg_rtx (Pmode);
25277 emit_move_insn (reg, tmpreg);
25278 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
25280 /* If zero is not in the first two bytes, move two bytes forward. */
25281 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
25282 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25283 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
25284 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
25285 gen_rtx_IF_THEN_ELSE (SImode, tmp,
25286 reg,
25287 tmpreg)));
25288 /* Emit lea manually to avoid clobbering of flags. */
25289 emit_insn (gen_rtx_SET (SImode, reg2,
25290 gen_rtx_PLUS (Pmode, out, const2_rtx)));
25292 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25293 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
25294 emit_insn (gen_rtx_SET (VOIDmode, out,
25295 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
25296 reg2,
25297 out)));
25299 else
25301 rtx_code_label *end_2_label = gen_label_rtx ();
25302 /* Is zero in the first two bytes? */
25304 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
25305 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25306 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
25307 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
25308 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
25309 pc_rtx);
25310 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
25311 JUMP_LABEL (tmp) = end_2_label;
25313 /* Not in the first two. Move two bytes forward. */
25314 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
25315 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
25317 emit_label (end_2_label);
25321 /* Avoid branch in fixing the byte. */
25322 tmpreg = gen_lowpart (QImode, tmpreg);
25323 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
25324 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
25325 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
25326 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
25328 emit_label (end_0_label);
25331 /* Expand strlen. */
25333 bool
25334 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
25336 rtx addr, scratch1, scratch2, scratch3, scratch4;
25338 /* The generic case of strlen expander is long. Avoid it's
25339 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
25341 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
25342 && !TARGET_INLINE_ALL_STRINGOPS
25343 && !optimize_insn_for_size_p ()
25344 && (!CONST_INT_P (align) || INTVAL (align) < 4))
25345 return false;
25347 addr = force_reg (Pmode, XEXP (src, 0));
25348 scratch1 = gen_reg_rtx (Pmode);
25350 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
25351 && !optimize_insn_for_size_p ())
25353 /* Well it seems that some optimizer does not combine a call like
25354 foo(strlen(bar), strlen(bar));
25355 when the move and the subtraction is done here. It does calculate
25356 the length just once when these instructions are done inside of
25357 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
25358 often used and I use one fewer register for the lifetime of
25359 output_strlen_unroll() this is better. */
25361 emit_move_insn (out, addr);
25363 ix86_expand_strlensi_unroll_1 (out, src, align);
25365 /* strlensi_unroll_1 returns the address of the zero at the end of
25366 the string, like memchr(), so compute the length by subtracting
25367 the start address. */
25368 emit_insn (ix86_gen_sub3 (out, out, addr));
25370 else
25372 rtx unspec;
25374 /* Can't use this if the user has appropriated eax, ecx, or edi. */
25375 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
25376 return false;
25378 scratch2 = gen_reg_rtx (Pmode);
25379 scratch3 = gen_reg_rtx (Pmode);
25380 scratch4 = force_reg (Pmode, constm1_rtx);
25382 emit_move_insn (scratch3, addr);
25383 eoschar = force_reg (QImode, eoschar);
25385 src = replace_equiv_address_nv (src, scratch3);
25387 /* If .md starts supporting :P, this can be done in .md. */
25388 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
25389 scratch4), UNSPEC_SCAS);
25390 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
25391 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
25392 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
25394 return true;
25397 /* For given symbol (function) construct code to compute address of it's PLT
25398 entry in large x86-64 PIC model. */
25399 static rtx
25400 construct_plt_address (rtx symbol)
25402 rtx tmp, unspec;
25404 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
25405 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
25406 gcc_assert (Pmode == DImode);
25408 tmp = gen_reg_rtx (Pmode);
25409 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
25411 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
25412 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
25413 return tmp;
25417 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
25418 rtx callarg2,
25419 rtx pop, bool sibcall)
25421 rtx vec[3];
25422 rtx use = NULL, call;
25423 unsigned int vec_len = 0;
25425 if (pop == const0_rtx)
25426 pop = NULL;
25427 gcc_assert (!TARGET_64BIT || !pop);
25429 if (TARGET_MACHO && !TARGET_64BIT)
25431 #if TARGET_MACHO
25432 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
25433 fnaddr = machopic_indirect_call_target (fnaddr);
25434 #endif
25436 else
25438 /* Static functions and indirect calls don't need the pic register. */
25439 if (flag_pic
25440 && (!TARGET_64BIT
25441 || (ix86_cmodel == CM_LARGE_PIC
25442 && DEFAULT_ABI != MS_ABI))
25443 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25444 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
25446 use_reg (&use, gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM));
25447 if (ix86_use_pseudo_pic_reg ())
25448 emit_move_insn (gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM),
25449 pic_offset_table_rtx);
25453 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
25455 rtx al = gen_rtx_REG (QImode, AX_REG);
25456 emit_move_insn (al, callarg2);
25457 use_reg (&use, al);
25460 if (ix86_cmodel == CM_LARGE_PIC
25461 && !TARGET_PECOFF
25462 && MEM_P (fnaddr)
25463 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25464 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
25465 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
25466 else if (sibcall
25467 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
25468 : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
25470 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
25471 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
25474 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
25476 if (retval)
25478 /* We should add bounds as destination register in case
25479 pointer with bounds may be returned. */
25480 if (TARGET_MPX && SCALAR_INT_MODE_P (GET_MODE (retval)))
25482 rtx b0 = gen_rtx_REG (BND64mode, FIRST_BND_REG);
25483 rtx b1 = gen_rtx_REG (BND64mode, FIRST_BND_REG + 1);
25484 retval = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, retval, b0, b1));
25485 chkp_put_regs_to_expr_list (retval);
25488 call = gen_rtx_SET (VOIDmode, retval, call);
25490 vec[vec_len++] = call;
25492 if (pop)
25494 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
25495 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
25496 vec[vec_len++] = pop;
25499 if (TARGET_64BIT_MS_ABI
25500 && (!callarg2 || INTVAL (callarg2) != -2))
25502 int const cregs_size
25503 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
25504 int i;
25506 for (i = 0; i < cregs_size; i++)
25508 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
25509 machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
25511 clobber_reg (&use, gen_rtx_REG (mode, regno));
25515 if (vec_len > 1)
25516 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
25517 call = emit_call_insn (call);
25518 if (use)
25519 CALL_INSN_FUNCTION_USAGE (call) = use;
25521 return call;
25524 /* Output the assembly for a call instruction. */
25526 const char *
25527 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
25529 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
25530 bool seh_nop_p = false;
25531 const char *xasm;
25533 if (SIBLING_CALL_P (insn))
25535 if (direct_p)
25536 xasm = "%!jmp\t%P0";
25537 /* SEH epilogue detection requires the indirect branch case
25538 to include REX.W. */
25539 else if (TARGET_SEH)
25540 xasm = "%!rex.W jmp %A0";
25541 else
25542 xasm = "%!jmp\t%A0";
25544 output_asm_insn (xasm, &call_op);
25545 return "";
25548 /* SEH unwinding can require an extra nop to be emitted in several
25549 circumstances. Determine if we have one of those. */
25550 if (TARGET_SEH)
25552 rtx_insn *i;
25554 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
25556 /* If we get to another real insn, we don't need the nop. */
25557 if (INSN_P (i))
25558 break;
25560 /* If we get to the epilogue note, prevent a catch region from
25561 being adjacent to the standard epilogue sequence. If non-
25562 call-exceptions, we'll have done this during epilogue emission. */
25563 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
25564 && !flag_non_call_exceptions
25565 && !can_throw_internal (insn))
25567 seh_nop_p = true;
25568 break;
25572 /* If we didn't find a real insn following the call, prevent the
25573 unwinder from looking into the next function. */
25574 if (i == NULL)
25575 seh_nop_p = true;
25578 if (direct_p)
25579 xasm = "%!call\t%P0";
25580 else
25581 xasm = "%!call\t%A0";
25583 output_asm_insn (xasm, &call_op);
25585 if (seh_nop_p)
25586 return "nop";
25588 return "";
25591 /* Clear stack slot assignments remembered from previous functions.
25592 This is called from INIT_EXPANDERS once before RTL is emitted for each
25593 function. */
25595 static struct machine_function *
25596 ix86_init_machine_status (void)
25598 struct machine_function *f;
25600 f = ggc_cleared_alloc<machine_function> ();
25601 f->use_fast_prologue_epilogue_nregs = -1;
25602 f->call_abi = ix86_abi;
25604 return f;
25607 /* Return a MEM corresponding to a stack slot with mode MODE.
25608 Allocate a new slot if necessary.
25610 The RTL for a function can have several slots available: N is
25611 which slot to use. */
25614 assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
25616 struct stack_local_entry *s;
25618 gcc_assert (n < MAX_386_STACK_LOCALS);
25620 for (s = ix86_stack_locals; s; s = s->next)
25621 if (s->mode == mode && s->n == n)
25622 return validize_mem (copy_rtx (s->rtl));
25624 s = ggc_alloc<stack_local_entry> ();
25625 s->n = n;
25626 s->mode = mode;
25627 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
25629 s->next = ix86_stack_locals;
25630 ix86_stack_locals = s;
25631 return validize_mem (copy_rtx (s->rtl));
25634 static void
25635 ix86_instantiate_decls (void)
25637 struct stack_local_entry *s;
25639 for (s = ix86_stack_locals; s; s = s->next)
25640 if (s->rtl != NULL_RTX)
25641 instantiate_decl_rtl (s->rtl);
25644 /* Check whether x86 address PARTS is a pc-relative address. */
25646 static bool
25647 rip_relative_addr_p (struct ix86_address *parts)
25649 rtx base, index, disp;
25651 base = parts->base;
25652 index = parts->index;
25653 disp = parts->disp;
25655 if (disp && !base && !index)
25657 if (TARGET_64BIT)
25659 rtx symbol = disp;
25661 if (GET_CODE (disp) == CONST)
25662 symbol = XEXP (disp, 0);
25663 if (GET_CODE (symbol) == PLUS
25664 && CONST_INT_P (XEXP (symbol, 1)))
25665 symbol = XEXP (symbol, 0);
25667 if (GET_CODE (symbol) == LABEL_REF
25668 || (GET_CODE (symbol) == SYMBOL_REF
25669 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
25670 || (GET_CODE (symbol) == UNSPEC
25671 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
25672 || XINT (symbol, 1) == UNSPEC_PCREL
25673 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
25674 return true;
25677 return false;
25680 /* Calculate the length of the memory address in the instruction encoding.
25681 Includes addr32 prefix, does not include the one-byte modrm, opcode,
25682 or other prefixes. We never generate addr32 prefix for LEA insn. */
25685 memory_address_length (rtx addr, bool lea)
25687 struct ix86_address parts;
25688 rtx base, index, disp;
25689 int len;
25690 int ok;
25692 if (GET_CODE (addr) == PRE_DEC
25693 || GET_CODE (addr) == POST_INC
25694 || GET_CODE (addr) == PRE_MODIFY
25695 || GET_CODE (addr) == POST_MODIFY)
25696 return 0;
25698 ok = ix86_decompose_address (addr, &parts);
25699 gcc_assert (ok);
25701 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
25703 /* If this is not LEA instruction, add the length of addr32 prefix. */
25704 if (TARGET_64BIT && !lea
25705 && (SImode_address_operand (addr, VOIDmode)
25706 || (parts.base && GET_MODE (parts.base) == SImode)
25707 || (parts.index && GET_MODE (parts.index) == SImode)))
25708 len++;
25710 base = parts.base;
25711 index = parts.index;
25712 disp = parts.disp;
25714 if (base && GET_CODE (base) == SUBREG)
25715 base = SUBREG_REG (base);
25716 if (index && GET_CODE (index) == SUBREG)
25717 index = SUBREG_REG (index);
25719 gcc_assert (base == NULL_RTX || REG_P (base));
25720 gcc_assert (index == NULL_RTX || REG_P (index));
25722 /* Rule of thumb:
25723 - esp as the base always wants an index,
25724 - ebp as the base always wants a displacement,
25725 - r12 as the base always wants an index,
25726 - r13 as the base always wants a displacement. */
25728 /* Register Indirect. */
25729 if (base && !index && !disp)
25731 /* esp (for its index) and ebp (for its displacement) need
25732 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
25733 code. */
25734 if (base == arg_pointer_rtx
25735 || base == frame_pointer_rtx
25736 || REGNO (base) == SP_REG
25737 || REGNO (base) == BP_REG
25738 || REGNO (base) == R12_REG
25739 || REGNO (base) == R13_REG)
25740 len++;
25743 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
25744 is not disp32, but disp32(%rip), so for disp32
25745 SIB byte is needed, unless print_operand_address
25746 optimizes it into disp32(%rip) or (%rip) is implied
25747 by UNSPEC. */
25748 else if (disp && !base && !index)
25750 len += 4;
25751 if (rip_relative_addr_p (&parts))
25752 len++;
25754 else
25756 /* Find the length of the displacement constant. */
25757 if (disp)
25759 if (base && satisfies_constraint_K (disp))
25760 len += 1;
25761 else
25762 len += 4;
25764 /* ebp always wants a displacement. Similarly r13. */
25765 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
25766 len++;
25768 /* An index requires the two-byte modrm form.... */
25769 if (index
25770 /* ...like esp (or r12), which always wants an index. */
25771 || base == arg_pointer_rtx
25772 || base == frame_pointer_rtx
25773 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
25774 len++;
25777 return len;
25780 /* Compute default value for "length_immediate" attribute. When SHORTFORM
25781 is set, expect that insn have 8bit immediate alternative. */
25783 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
25785 int len = 0;
25786 int i;
25787 extract_insn_cached (insn);
25788 for (i = recog_data.n_operands - 1; i >= 0; --i)
25789 if (CONSTANT_P (recog_data.operand[i]))
25791 enum attr_mode mode = get_attr_mode (insn);
25793 gcc_assert (!len);
25794 if (shortform && CONST_INT_P (recog_data.operand[i]))
25796 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
25797 switch (mode)
25799 case MODE_QI:
25800 len = 1;
25801 continue;
25802 case MODE_HI:
25803 ival = trunc_int_for_mode (ival, HImode);
25804 break;
25805 case MODE_SI:
25806 ival = trunc_int_for_mode (ival, SImode);
25807 break;
25808 default:
25809 break;
25811 if (IN_RANGE (ival, -128, 127))
25813 len = 1;
25814 continue;
25817 switch (mode)
25819 case MODE_QI:
25820 len = 1;
25821 break;
25822 case MODE_HI:
25823 len = 2;
25824 break;
25825 case MODE_SI:
25826 len = 4;
25827 break;
25828 /* Immediates for DImode instructions are encoded
25829 as 32bit sign extended values. */
25830 case MODE_DI:
25831 len = 4;
25832 break;
25833 default:
25834 fatal_insn ("unknown insn mode", insn);
25837 return len;
25840 /* Compute default value for "length_address" attribute. */
25842 ix86_attr_length_address_default (rtx_insn *insn)
25844 int i;
25846 if (get_attr_type (insn) == TYPE_LEA)
25848 rtx set = PATTERN (insn), addr;
25850 if (GET_CODE (set) == PARALLEL)
25851 set = XVECEXP (set, 0, 0);
25853 gcc_assert (GET_CODE (set) == SET);
25855 addr = SET_SRC (set);
25857 return memory_address_length (addr, true);
25860 extract_insn_cached (insn);
25861 for (i = recog_data.n_operands - 1; i >= 0; --i)
25862 if (MEM_P (recog_data.operand[i]))
25864 constrain_operands_cached (insn, reload_completed);
25865 if (which_alternative != -1)
25867 const char *constraints = recog_data.constraints[i];
25868 int alt = which_alternative;
25870 while (*constraints == '=' || *constraints == '+')
25871 constraints++;
25872 while (alt-- > 0)
25873 while (*constraints++ != ',')
25875 /* Skip ignored operands. */
25876 if (*constraints == 'X')
25877 continue;
25879 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
25881 return 0;
25884 /* Compute default value for "length_vex" attribute. It includes
25885 2 or 3 byte VEX prefix and 1 opcode byte. */
25888 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
25889 bool has_vex_w)
25891 int i;
25893 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
25894 byte VEX prefix. */
25895 if (!has_0f_opcode || has_vex_w)
25896 return 3 + 1;
25898 /* We can always use 2 byte VEX prefix in 32bit. */
25899 if (!TARGET_64BIT)
25900 return 2 + 1;
25902 extract_insn_cached (insn);
25904 for (i = recog_data.n_operands - 1; i >= 0; --i)
25905 if (REG_P (recog_data.operand[i]))
25907 /* REX.W bit uses 3 byte VEX prefix. */
25908 if (GET_MODE (recog_data.operand[i]) == DImode
25909 && GENERAL_REG_P (recog_data.operand[i]))
25910 return 3 + 1;
25912 else
25914 /* REX.X or REX.B bits use 3 byte VEX prefix. */
25915 if (MEM_P (recog_data.operand[i])
25916 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
25917 return 3 + 1;
25920 return 2 + 1;
25923 /* Return the maximum number of instructions a cpu can issue. */
25925 static int
25926 ix86_issue_rate (void)
25928 switch (ix86_tune)
25930 case PROCESSOR_PENTIUM:
25931 case PROCESSOR_BONNELL:
25932 case PROCESSOR_SILVERMONT:
25933 case PROCESSOR_INTEL:
25934 case PROCESSOR_K6:
25935 case PROCESSOR_BTVER2:
25936 case PROCESSOR_PENTIUM4:
25937 case PROCESSOR_NOCONA:
25938 return 2;
25940 case PROCESSOR_PENTIUMPRO:
25941 case PROCESSOR_ATHLON:
25942 case PROCESSOR_K8:
25943 case PROCESSOR_AMDFAM10:
25944 case PROCESSOR_GENERIC:
25945 case PROCESSOR_BTVER1:
25946 return 3;
25948 case PROCESSOR_BDVER1:
25949 case PROCESSOR_BDVER2:
25950 case PROCESSOR_BDVER3:
25951 case PROCESSOR_BDVER4:
25952 case PROCESSOR_CORE2:
25953 case PROCESSOR_NEHALEM:
25954 case PROCESSOR_SANDYBRIDGE:
25955 case PROCESSOR_HASWELL:
25956 return 4;
25958 default:
25959 return 1;
25963 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
25964 by DEP_INSN and nothing set by DEP_INSN. */
25966 static bool
25967 ix86_flags_dependent (rtx_insn *insn, rtx_insn *dep_insn, enum attr_type insn_type)
25969 rtx set, set2;
25971 /* Simplify the test for uninteresting insns. */
25972 if (insn_type != TYPE_SETCC
25973 && insn_type != TYPE_ICMOV
25974 && insn_type != TYPE_FCMOV
25975 && insn_type != TYPE_IBR)
25976 return false;
25978 if ((set = single_set (dep_insn)) != 0)
25980 set = SET_DEST (set);
25981 set2 = NULL_RTX;
25983 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
25984 && XVECLEN (PATTERN (dep_insn), 0) == 2
25985 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
25986 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
25988 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25989 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25991 else
25992 return false;
25994 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
25995 return false;
25997 /* This test is true if the dependent insn reads the flags but
25998 not any other potentially set register. */
25999 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
26000 return false;
26002 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
26003 return false;
26005 return true;
26008 /* Return true iff USE_INSN has a memory address with operands set by
26009 SET_INSN. */
26011 bool
26012 ix86_agi_dependent (rtx_insn *set_insn, rtx_insn *use_insn)
26014 int i;
26015 extract_insn_cached (use_insn);
26016 for (i = recog_data.n_operands - 1; i >= 0; --i)
26017 if (MEM_P (recog_data.operand[i]))
26019 rtx addr = XEXP (recog_data.operand[i], 0);
26020 return modified_in_p (addr, set_insn) != 0;
26022 return false;
26025 /* Helper function for exact_store_load_dependency.
26026 Return true if addr is found in insn. */
26027 static bool
26028 exact_dependency_1 (rtx addr, rtx insn)
26030 enum rtx_code code;
26031 const char *format_ptr;
26032 int i, j;
26034 code = GET_CODE (insn);
26035 switch (code)
26037 case MEM:
26038 if (rtx_equal_p (addr, insn))
26039 return true;
26040 break;
26041 case REG:
26042 CASE_CONST_ANY:
26043 case SYMBOL_REF:
26044 case CODE_LABEL:
26045 case PC:
26046 case CC0:
26047 case EXPR_LIST:
26048 return false;
26049 default:
26050 break;
26053 format_ptr = GET_RTX_FORMAT (code);
26054 for (i = 0; i < GET_RTX_LENGTH (code); i++)
26056 switch (*format_ptr++)
26058 case 'e':
26059 if (exact_dependency_1 (addr, XEXP (insn, i)))
26060 return true;
26061 break;
26062 case 'E':
26063 for (j = 0; j < XVECLEN (insn, i); j++)
26064 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
26065 return true;
26066 break;
26069 return false;
26072 /* Return true if there exists exact dependency for store & load, i.e.
26073 the same memory address is used in them. */
26074 static bool
26075 exact_store_load_dependency (rtx_insn *store, rtx_insn *load)
26077 rtx set1, set2;
26079 set1 = single_set (store);
26080 if (!set1)
26081 return false;
26082 if (!MEM_P (SET_DEST (set1)))
26083 return false;
26084 set2 = single_set (load);
26085 if (!set2)
26086 return false;
26087 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
26088 return true;
26089 return false;
26092 static int
26093 ix86_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
26095 enum attr_type insn_type, dep_insn_type;
26096 enum attr_memory memory;
26097 rtx set, set2;
26098 int dep_insn_code_number;
26100 /* Anti and output dependencies have zero cost on all CPUs. */
26101 if (REG_NOTE_KIND (link) != 0)
26102 return 0;
26104 dep_insn_code_number = recog_memoized (dep_insn);
26106 /* If we can't recognize the insns, we can't really do anything. */
26107 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
26108 return cost;
26110 insn_type = get_attr_type (insn);
26111 dep_insn_type = get_attr_type (dep_insn);
26113 switch (ix86_tune)
26115 case PROCESSOR_PENTIUM:
26116 /* Address Generation Interlock adds a cycle of latency. */
26117 if (insn_type == TYPE_LEA)
26119 rtx addr = PATTERN (insn);
26121 if (GET_CODE (addr) == PARALLEL)
26122 addr = XVECEXP (addr, 0, 0);
26124 gcc_assert (GET_CODE (addr) == SET);
26126 addr = SET_SRC (addr);
26127 if (modified_in_p (addr, dep_insn))
26128 cost += 1;
26130 else if (ix86_agi_dependent (dep_insn, insn))
26131 cost += 1;
26133 /* ??? Compares pair with jump/setcc. */
26134 if (ix86_flags_dependent (insn, dep_insn, insn_type))
26135 cost = 0;
26137 /* Floating point stores require value to be ready one cycle earlier. */
26138 if (insn_type == TYPE_FMOV
26139 && get_attr_memory (insn) == MEMORY_STORE
26140 && !ix86_agi_dependent (dep_insn, insn))
26141 cost += 1;
26142 break;
26144 case PROCESSOR_PENTIUMPRO:
26145 /* INT->FP conversion is expensive. */
26146 if (get_attr_fp_int_src (dep_insn))
26147 cost += 5;
26149 /* There is one cycle extra latency between an FP op and a store. */
26150 if (insn_type == TYPE_FMOV
26151 && (set = single_set (dep_insn)) != NULL_RTX
26152 && (set2 = single_set (insn)) != NULL_RTX
26153 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
26154 && MEM_P (SET_DEST (set2)))
26155 cost += 1;
26157 memory = get_attr_memory (insn);
26159 /* Show ability of reorder buffer to hide latency of load by executing
26160 in parallel with previous instruction in case
26161 previous instruction is not needed to compute the address. */
26162 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26163 && !ix86_agi_dependent (dep_insn, insn))
26165 /* Claim moves to take one cycle, as core can issue one load
26166 at time and the next load can start cycle later. */
26167 if (dep_insn_type == TYPE_IMOV
26168 || dep_insn_type == TYPE_FMOV)
26169 cost = 1;
26170 else if (cost > 1)
26171 cost--;
26173 break;
26175 case PROCESSOR_K6:
26176 /* The esp dependency is resolved before
26177 the instruction is really finished. */
26178 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26179 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26180 return 1;
26182 /* INT->FP conversion is expensive. */
26183 if (get_attr_fp_int_src (dep_insn))
26184 cost += 5;
26186 memory = get_attr_memory (insn);
26188 /* Show ability of reorder buffer to hide latency of load by executing
26189 in parallel with previous instruction in case
26190 previous instruction is not needed to compute the address. */
26191 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26192 && !ix86_agi_dependent (dep_insn, insn))
26194 /* Claim moves to take one cycle, as core can issue one load
26195 at time and the next load can start cycle later. */
26196 if (dep_insn_type == TYPE_IMOV
26197 || dep_insn_type == TYPE_FMOV)
26198 cost = 1;
26199 else if (cost > 2)
26200 cost -= 2;
26201 else
26202 cost = 1;
26204 break;
26206 case PROCESSOR_AMDFAM10:
26207 case PROCESSOR_BDVER1:
26208 case PROCESSOR_BDVER2:
26209 case PROCESSOR_BDVER3:
26210 case PROCESSOR_BDVER4:
26211 case PROCESSOR_BTVER1:
26212 case PROCESSOR_BTVER2:
26213 case PROCESSOR_GENERIC:
26214 /* Stack engine allows to execute push&pop instructions in parall. */
26215 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26216 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26217 return 0;
26218 /* FALLTHRU */
26220 case PROCESSOR_ATHLON:
26221 case PROCESSOR_K8:
26222 memory = get_attr_memory (insn);
26224 /* Show ability of reorder buffer to hide latency of load by executing
26225 in parallel with previous instruction in case
26226 previous instruction is not needed to compute the address. */
26227 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26228 && !ix86_agi_dependent (dep_insn, insn))
26230 enum attr_unit unit = get_attr_unit (insn);
26231 int loadcost = 3;
26233 /* Because of the difference between the length of integer and
26234 floating unit pipeline preparation stages, the memory operands
26235 for floating point are cheaper.
26237 ??? For Athlon it the difference is most probably 2. */
26238 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
26239 loadcost = 3;
26240 else
26241 loadcost = TARGET_ATHLON ? 2 : 0;
26243 if (cost >= loadcost)
26244 cost -= loadcost;
26245 else
26246 cost = 0;
26248 break;
26250 case PROCESSOR_CORE2:
26251 case PROCESSOR_NEHALEM:
26252 case PROCESSOR_SANDYBRIDGE:
26253 case PROCESSOR_HASWELL:
26254 /* Stack engine allows to execute push&pop instructions in parall. */
26255 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26256 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26257 return 0;
26259 memory = get_attr_memory (insn);
26261 /* Show ability of reorder buffer to hide latency of load by executing
26262 in parallel with previous instruction in case
26263 previous instruction is not needed to compute the address. */
26264 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26265 && !ix86_agi_dependent (dep_insn, insn))
26267 if (cost >= 4)
26268 cost -= 4;
26269 else
26270 cost = 0;
26272 break;
26274 case PROCESSOR_SILVERMONT:
26275 case PROCESSOR_INTEL:
26276 if (!reload_completed)
26277 return cost;
26279 /* Increase cost of integer loads. */
26280 memory = get_attr_memory (dep_insn);
26281 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26283 enum attr_unit unit = get_attr_unit (dep_insn);
26284 if (unit == UNIT_INTEGER && cost == 1)
26286 if (memory == MEMORY_LOAD)
26287 cost = 3;
26288 else
26290 /* Increase cost of ld/st for short int types only
26291 because of store forwarding issue. */
26292 rtx set = single_set (dep_insn);
26293 if (set && (GET_MODE (SET_DEST (set)) == QImode
26294 || GET_MODE (SET_DEST (set)) == HImode))
26296 /* Increase cost of store/load insn if exact
26297 dependence exists and it is load insn. */
26298 enum attr_memory insn_memory = get_attr_memory (insn);
26299 if (insn_memory == MEMORY_LOAD
26300 && exact_store_load_dependency (dep_insn, insn))
26301 cost = 3;
26307 default:
26308 break;
26311 return cost;
26314 /* How many alternative schedules to try. This should be as wide as the
26315 scheduling freedom in the DFA, but no wider. Making this value too
26316 large results extra work for the scheduler. */
26318 static int
26319 ia32_multipass_dfa_lookahead (void)
26321 switch (ix86_tune)
26323 case PROCESSOR_PENTIUM:
26324 return 2;
26326 case PROCESSOR_PENTIUMPRO:
26327 case PROCESSOR_K6:
26328 return 1;
26330 case PROCESSOR_BDVER1:
26331 case PROCESSOR_BDVER2:
26332 case PROCESSOR_BDVER3:
26333 case PROCESSOR_BDVER4:
26334 /* We use lookahead value 4 for BD both before and after reload
26335 schedules. Plan is to have value 8 included for O3. */
26336 return 4;
26338 case PROCESSOR_CORE2:
26339 case PROCESSOR_NEHALEM:
26340 case PROCESSOR_SANDYBRIDGE:
26341 case PROCESSOR_HASWELL:
26342 case PROCESSOR_BONNELL:
26343 case PROCESSOR_SILVERMONT:
26344 case PROCESSOR_INTEL:
26345 /* Generally, we want haifa-sched:max_issue() to look ahead as far
26346 as many instructions can be executed on a cycle, i.e.,
26347 issue_rate. I wonder why tuning for many CPUs does not do this. */
26348 if (reload_completed)
26349 return ix86_issue_rate ();
26350 /* Don't use lookahead for pre-reload schedule to save compile time. */
26351 return 0;
26353 default:
26354 return 0;
26358 /* Return true if target platform supports macro-fusion. */
26360 static bool
26361 ix86_macro_fusion_p ()
26363 return TARGET_FUSE_CMP_AND_BRANCH;
26366 /* Check whether current microarchitecture support macro fusion
26367 for insn pair "CONDGEN + CONDJMP". Refer to
26368 "Intel Architectures Optimization Reference Manual". */
26370 static bool
26371 ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
26373 rtx src, dest;
26374 enum rtx_code ccode;
26375 rtx compare_set = NULL_RTX, test_if, cond;
26376 rtx alu_set = NULL_RTX, addr = NULL_RTX;
26378 if (!any_condjump_p (condjmp))
26379 return false;
26381 if (get_attr_type (condgen) != TYPE_TEST
26382 && get_attr_type (condgen) != TYPE_ICMP
26383 && get_attr_type (condgen) != TYPE_INCDEC
26384 && get_attr_type (condgen) != TYPE_ALU)
26385 return false;
26387 compare_set = single_set (condgen);
26388 if (compare_set == NULL_RTX
26389 && !TARGET_FUSE_ALU_AND_BRANCH)
26390 return false;
26392 if (compare_set == NULL_RTX)
26394 int i;
26395 rtx pat = PATTERN (condgen);
26396 for (i = 0; i < XVECLEN (pat, 0); i++)
26397 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
26399 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
26400 if (GET_CODE (set_src) == COMPARE)
26401 compare_set = XVECEXP (pat, 0, i);
26402 else
26403 alu_set = XVECEXP (pat, 0, i);
26406 if (compare_set == NULL_RTX)
26407 return false;
26408 src = SET_SRC (compare_set);
26409 if (GET_CODE (src) != COMPARE)
26410 return false;
26412 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
26413 supported. */
26414 if ((MEM_P (XEXP (src, 0))
26415 && CONST_INT_P (XEXP (src, 1)))
26416 || (MEM_P (XEXP (src, 1))
26417 && CONST_INT_P (XEXP (src, 0))))
26418 return false;
26420 /* No fusion for RIP-relative address. */
26421 if (MEM_P (XEXP (src, 0)))
26422 addr = XEXP (XEXP (src, 0), 0);
26423 else if (MEM_P (XEXP (src, 1)))
26424 addr = XEXP (XEXP (src, 1), 0);
26426 if (addr) {
26427 ix86_address parts;
26428 int ok = ix86_decompose_address (addr, &parts);
26429 gcc_assert (ok);
26431 if (rip_relative_addr_p (&parts))
26432 return false;
26435 test_if = SET_SRC (pc_set (condjmp));
26436 cond = XEXP (test_if, 0);
26437 ccode = GET_CODE (cond);
26438 /* Check whether conditional jump use Sign or Overflow Flags. */
26439 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
26440 && (ccode == GE
26441 || ccode == GT
26442 || ccode == LE
26443 || ccode == LT))
26444 return false;
26446 /* Return true for TYPE_TEST and TYPE_ICMP. */
26447 if (get_attr_type (condgen) == TYPE_TEST
26448 || get_attr_type (condgen) == TYPE_ICMP)
26449 return true;
26451 /* The following is the case that macro-fusion for alu + jmp. */
26452 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
26453 return false;
26455 /* No fusion for alu op with memory destination operand. */
26456 dest = SET_DEST (alu_set);
26457 if (MEM_P (dest))
26458 return false;
26460 /* Macro-fusion for inc/dec + unsigned conditional jump is not
26461 supported. */
26462 if (get_attr_type (condgen) == TYPE_INCDEC
26463 && (ccode == GEU
26464 || ccode == GTU
26465 || ccode == LEU
26466 || ccode == LTU))
26467 return false;
26469 return true;
26472 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
26473 execution. It is applied if
26474 (1) IMUL instruction is on the top of list;
26475 (2) There exists the only producer of independent IMUL instruction in
26476 ready list.
26477 Return index of IMUL producer if it was found and -1 otherwise. */
26478 static int
26479 do_reorder_for_imul (rtx_insn **ready, int n_ready)
26481 rtx_insn *insn;
26482 rtx set, insn1, insn2;
26483 sd_iterator_def sd_it;
26484 dep_t dep;
26485 int index = -1;
26486 int i;
26488 if (!TARGET_BONNELL)
26489 return index;
26491 /* Check that IMUL instruction is on the top of ready list. */
26492 insn = ready[n_ready - 1];
26493 set = single_set (insn);
26494 if (!set)
26495 return index;
26496 if (!(GET_CODE (SET_SRC (set)) == MULT
26497 && GET_MODE (SET_SRC (set)) == SImode))
26498 return index;
26500 /* Search for producer of independent IMUL instruction. */
26501 for (i = n_ready - 2; i >= 0; i--)
26503 insn = ready[i];
26504 if (!NONDEBUG_INSN_P (insn))
26505 continue;
26506 /* Skip IMUL instruction. */
26507 insn2 = PATTERN (insn);
26508 if (GET_CODE (insn2) == PARALLEL)
26509 insn2 = XVECEXP (insn2, 0, 0);
26510 if (GET_CODE (insn2) == SET
26511 && GET_CODE (SET_SRC (insn2)) == MULT
26512 && GET_MODE (SET_SRC (insn2)) == SImode)
26513 continue;
26515 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
26517 rtx con;
26518 con = DEP_CON (dep);
26519 if (!NONDEBUG_INSN_P (con))
26520 continue;
26521 insn1 = PATTERN (con);
26522 if (GET_CODE (insn1) == PARALLEL)
26523 insn1 = XVECEXP (insn1, 0, 0);
26525 if (GET_CODE (insn1) == SET
26526 && GET_CODE (SET_SRC (insn1)) == MULT
26527 && GET_MODE (SET_SRC (insn1)) == SImode)
26529 sd_iterator_def sd_it1;
26530 dep_t dep1;
26531 /* Check if there is no other dependee for IMUL. */
26532 index = i;
26533 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
26535 rtx pro;
26536 pro = DEP_PRO (dep1);
26537 if (!NONDEBUG_INSN_P (pro))
26538 continue;
26539 if (pro != insn)
26540 index = -1;
26542 if (index >= 0)
26543 break;
26546 if (index >= 0)
26547 break;
26549 return index;
26552 /* Try to find the best candidate on the top of ready list if two insns
26553 have the same priority - candidate is best if its dependees were
26554 scheduled earlier. Applied for Silvermont only.
26555 Return true if top 2 insns must be interchanged. */
26556 static bool
26557 swap_top_of_ready_list (rtx_insn **ready, int n_ready)
26559 rtx_insn *top = ready[n_ready - 1];
26560 rtx_insn *next = ready[n_ready - 2];
26561 rtx set;
26562 sd_iterator_def sd_it;
26563 dep_t dep;
26564 int clock1 = -1;
26565 int clock2 = -1;
26566 #define INSN_TICK(INSN) (HID (INSN)->tick)
26568 if (!TARGET_SILVERMONT && !TARGET_INTEL)
26569 return false;
26571 if (!NONDEBUG_INSN_P (top))
26572 return false;
26573 if (!NONJUMP_INSN_P (top))
26574 return false;
26575 if (!NONDEBUG_INSN_P (next))
26576 return false;
26577 if (!NONJUMP_INSN_P (next))
26578 return false;
26579 set = single_set (top);
26580 if (!set)
26581 return false;
26582 set = single_set (next);
26583 if (!set)
26584 return false;
26586 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
26588 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
26589 return false;
26590 /* Determine winner more precise. */
26591 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
26593 rtx pro;
26594 pro = DEP_PRO (dep);
26595 if (!NONDEBUG_INSN_P (pro))
26596 continue;
26597 if (INSN_TICK (pro) > clock1)
26598 clock1 = INSN_TICK (pro);
26600 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
26602 rtx pro;
26603 pro = DEP_PRO (dep);
26604 if (!NONDEBUG_INSN_P (pro))
26605 continue;
26606 if (INSN_TICK (pro) > clock2)
26607 clock2 = INSN_TICK (pro);
26610 if (clock1 == clock2)
26612 /* Determine winner - load must win. */
26613 enum attr_memory memory1, memory2;
26614 memory1 = get_attr_memory (top);
26615 memory2 = get_attr_memory (next);
26616 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
26617 return true;
26619 return (bool) (clock2 < clock1);
26621 return false;
26622 #undef INSN_TICK
26625 /* Perform possible reodering of ready list for Atom/Silvermont only.
26626 Return issue rate. */
26627 static int
26628 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
26629 int *pn_ready, int clock_var)
26631 int issue_rate = -1;
26632 int n_ready = *pn_ready;
26633 int i;
26634 rtx_insn *insn;
26635 int index = -1;
26637 /* Set up issue rate. */
26638 issue_rate = ix86_issue_rate ();
26640 /* Do reodering for BONNELL/SILVERMONT only. */
26641 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
26642 return issue_rate;
26644 /* Nothing to do if ready list contains only 1 instruction. */
26645 if (n_ready <= 1)
26646 return issue_rate;
26648 /* Do reodering for post-reload scheduler only. */
26649 if (!reload_completed)
26650 return issue_rate;
26652 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
26654 if (sched_verbose > 1)
26655 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
26656 INSN_UID (ready[index]));
26658 /* Put IMUL producer (ready[index]) at the top of ready list. */
26659 insn = ready[index];
26660 for (i = index; i < n_ready - 1; i++)
26661 ready[i] = ready[i + 1];
26662 ready[n_ready - 1] = insn;
26663 return issue_rate;
26665 if (clock_var != 0 && swap_top_of_ready_list (ready, n_ready))
26667 if (sched_verbose > 1)
26668 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
26669 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
26670 /* Swap 2 top elements of ready list. */
26671 insn = ready[n_ready - 1];
26672 ready[n_ready - 1] = ready[n_ready - 2];
26673 ready[n_ready - 2] = insn;
26675 return issue_rate;
26678 static bool
26679 ix86_class_likely_spilled_p (reg_class_t);
26681 /* Returns true if lhs of insn is HW function argument register and set up
26682 is_spilled to true if it is likely spilled HW register. */
26683 static bool
26684 insn_is_function_arg (rtx insn, bool* is_spilled)
26686 rtx dst;
26688 if (!NONDEBUG_INSN_P (insn))
26689 return false;
26690 /* Call instructions are not movable, ignore it. */
26691 if (CALL_P (insn))
26692 return false;
26693 insn = PATTERN (insn);
26694 if (GET_CODE (insn) == PARALLEL)
26695 insn = XVECEXP (insn, 0, 0);
26696 if (GET_CODE (insn) != SET)
26697 return false;
26698 dst = SET_DEST (insn);
26699 if (REG_P (dst) && HARD_REGISTER_P (dst)
26700 && ix86_function_arg_regno_p (REGNO (dst)))
26702 /* Is it likely spilled HW register? */
26703 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
26704 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
26705 *is_spilled = true;
26706 return true;
26708 return false;
26711 /* Add output dependencies for chain of function adjacent arguments if only
26712 there is a move to likely spilled HW register. Return first argument
26713 if at least one dependence was added or NULL otherwise. */
26714 static rtx_insn *
26715 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
26717 rtx_insn *insn;
26718 rtx_insn *last = call;
26719 rtx_insn *first_arg = NULL;
26720 bool is_spilled = false;
26722 head = PREV_INSN (head);
26724 /* Find nearest to call argument passing instruction. */
26725 while (true)
26727 last = PREV_INSN (last);
26728 if (last == head)
26729 return NULL;
26730 if (!NONDEBUG_INSN_P (last))
26731 continue;
26732 if (insn_is_function_arg (last, &is_spilled))
26733 break;
26734 return NULL;
26737 first_arg = last;
26738 while (true)
26740 insn = PREV_INSN (last);
26741 if (!INSN_P (insn))
26742 break;
26743 if (insn == head)
26744 break;
26745 if (!NONDEBUG_INSN_P (insn))
26747 last = insn;
26748 continue;
26750 if (insn_is_function_arg (insn, &is_spilled))
26752 /* Add output depdendence between two function arguments if chain
26753 of output arguments contains likely spilled HW registers. */
26754 if (is_spilled)
26755 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26756 first_arg = last = insn;
26758 else
26759 break;
26761 if (!is_spilled)
26762 return NULL;
26763 return first_arg;
26766 /* Add output or anti dependency from insn to first_arg to restrict its code
26767 motion. */
26768 static void
26769 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
26771 rtx set;
26772 rtx tmp;
26774 set = single_set (insn);
26775 if (!set)
26776 return;
26777 tmp = SET_DEST (set);
26778 if (REG_P (tmp))
26780 /* Add output dependency to the first function argument. */
26781 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26782 return;
26784 /* Add anti dependency. */
26785 add_dependence (first_arg, insn, REG_DEP_ANTI);
26788 /* Avoid cross block motion of function argument through adding dependency
26789 from the first non-jump instruction in bb. */
26790 static void
26791 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
26793 rtx_insn *insn = BB_END (bb);
26795 while (insn)
26797 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
26799 rtx set = single_set (insn);
26800 if (set)
26802 avoid_func_arg_motion (arg, insn);
26803 return;
26806 if (insn == BB_HEAD (bb))
26807 return;
26808 insn = PREV_INSN (insn);
26812 /* Hook for pre-reload schedule - avoid motion of function arguments
26813 passed in likely spilled HW registers. */
26814 static void
26815 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
26817 rtx_insn *insn;
26818 rtx_insn *first_arg = NULL;
26819 if (reload_completed)
26820 return;
26821 while (head != tail && DEBUG_INSN_P (head))
26822 head = NEXT_INSN (head);
26823 for (insn = tail; insn != head; insn = PREV_INSN (insn))
26824 if (INSN_P (insn) && CALL_P (insn))
26826 first_arg = add_parameter_dependencies (insn, head);
26827 if (first_arg)
26829 /* Add dependee for first argument to predecessors if only
26830 region contains more than one block. */
26831 basic_block bb = BLOCK_FOR_INSN (insn);
26832 int rgn = CONTAINING_RGN (bb->index);
26833 int nr_blks = RGN_NR_BLOCKS (rgn);
26834 /* Skip trivial regions and region head blocks that can have
26835 predecessors outside of region. */
26836 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
26838 edge e;
26839 edge_iterator ei;
26841 /* Regions are SCCs with the exception of selective
26842 scheduling with pipelining of outer blocks enabled.
26843 So also check that immediate predecessors of a non-head
26844 block are in the same region. */
26845 FOR_EACH_EDGE (e, ei, bb->preds)
26847 /* Avoid creating of loop-carried dependencies through
26848 using topological ordering in the region. */
26849 if (rgn == CONTAINING_RGN (e->src->index)
26850 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
26851 add_dependee_for_func_arg (first_arg, e->src);
26854 insn = first_arg;
26855 if (insn == head)
26856 break;
26859 else if (first_arg)
26860 avoid_func_arg_motion (first_arg, insn);
26863 /* Hook for pre-reload schedule - set priority of moves from likely spilled
26864 HW registers to maximum, to schedule them at soon as possible. These are
26865 moves from function argument registers at the top of the function entry
26866 and moves from function return value registers after call. */
26867 static int
26868 ix86_adjust_priority (rtx_insn *insn, int priority)
26870 rtx set;
26872 if (reload_completed)
26873 return priority;
26875 if (!NONDEBUG_INSN_P (insn))
26876 return priority;
26878 set = single_set (insn);
26879 if (set)
26881 rtx tmp = SET_SRC (set);
26882 if (REG_P (tmp)
26883 && HARD_REGISTER_P (tmp)
26884 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
26885 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
26886 return current_sched_info->sched_max_insns_priority;
26889 return priority;
26892 /* Model decoder of Core 2/i7.
26893 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
26894 track the instruction fetch block boundaries and make sure that long
26895 (9+ bytes) instructions are assigned to D0. */
26897 /* Maximum length of an insn that can be handled by
26898 a secondary decoder unit. '8' for Core 2/i7. */
26899 static int core2i7_secondary_decoder_max_insn_size;
26901 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
26902 '16' for Core 2/i7. */
26903 static int core2i7_ifetch_block_size;
26905 /* Maximum number of instructions decoder can handle per cycle.
26906 '6' for Core 2/i7. */
26907 static int core2i7_ifetch_block_max_insns;
26909 typedef struct ix86_first_cycle_multipass_data_ *
26910 ix86_first_cycle_multipass_data_t;
26911 typedef const struct ix86_first_cycle_multipass_data_ *
26912 const_ix86_first_cycle_multipass_data_t;
26914 /* A variable to store target state across calls to max_issue within
26915 one cycle. */
26916 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
26917 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
26919 /* Initialize DATA. */
26920 static void
26921 core2i7_first_cycle_multipass_init (void *_data)
26923 ix86_first_cycle_multipass_data_t data
26924 = (ix86_first_cycle_multipass_data_t) _data;
26926 data->ifetch_block_len = 0;
26927 data->ifetch_block_n_insns = 0;
26928 data->ready_try_change = NULL;
26929 data->ready_try_change_size = 0;
26932 /* Advancing the cycle; reset ifetch block counts. */
26933 static void
26934 core2i7_dfa_post_advance_cycle (void)
26936 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
26938 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26940 data->ifetch_block_len = 0;
26941 data->ifetch_block_n_insns = 0;
26944 static int min_insn_size (rtx_insn *);
26946 /* Filter out insns from ready_try that the core will not be able to issue
26947 on current cycle due to decoder. */
26948 static void
26949 core2i7_first_cycle_multipass_filter_ready_try
26950 (const_ix86_first_cycle_multipass_data_t data,
26951 signed char *ready_try, int n_ready, bool first_cycle_insn_p)
26953 while (n_ready--)
26955 rtx_insn *insn;
26956 int insn_size;
26958 if (ready_try[n_ready])
26959 continue;
26961 insn = get_ready_element (n_ready);
26962 insn_size = min_insn_size (insn);
26964 if (/* If this is a too long an insn for a secondary decoder ... */
26965 (!first_cycle_insn_p
26966 && insn_size > core2i7_secondary_decoder_max_insn_size)
26967 /* ... or it would not fit into the ifetch block ... */
26968 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
26969 /* ... or the decoder is full already ... */
26970 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
26971 /* ... mask the insn out. */
26973 ready_try[n_ready] = 1;
26975 if (data->ready_try_change)
26976 bitmap_set_bit (data->ready_try_change, n_ready);
26981 /* Prepare for a new round of multipass lookahead scheduling. */
26982 static void
26983 core2i7_first_cycle_multipass_begin (void *_data,
26984 signed char *ready_try, int n_ready,
26985 bool first_cycle_insn_p)
26987 ix86_first_cycle_multipass_data_t data
26988 = (ix86_first_cycle_multipass_data_t) _data;
26989 const_ix86_first_cycle_multipass_data_t prev_data
26990 = ix86_first_cycle_multipass_data;
26992 /* Restore the state from the end of the previous round. */
26993 data->ifetch_block_len = prev_data->ifetch_block_len;
26994 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
26996 /* Filter instructions that cannot be issued on current cycle due to
26997 decoder restrictions. */
26998 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26999 first_cycle_insn_p);
27002 /* INSN is being issued in current solution. Account for its impact on
27003 the decoder model. */
27004 static void
27005 core2i7_first_cycle_multipass_issue (void *_data,
27006 signed char *ready_try, int n_ready,
27007 rtx_insn *insn, const void *_prev_data)
27009 ix86_first_cycle_multipass_data_t data
27010 = (ix86_first_cycle_multipass_data_t) _data;
27011 const_ix86_first_cycle_multipass_data_t prev_data
27012 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
27014 int insn_size = min_insn_size (insn);
27016 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
27017 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
27018 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
27019 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
27021 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
27022 if (!data->ready_try_change)
27024 data->ready_try_change = sbitmap_alloc (n_ready);
27025 data->ready_try_change_size = n_ready;
27027 else if (data->ready_try_change_size < n_ready)
27029 data->ready_try_change = sbitmap_resize (data->ready_try_change,
27030 n_ready, 0);
27031 data->ready_try_change_size = n_ready;
27033 bitmap_clear (data->ready_try_change);
27035 /* Filter out insns from ready_try that the core will not be able to issue
27036 on current cycle due to decoder. */
27037 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
27038 false);
27041 /* Revert the effect on ready_try. */
27042 static void
27043 core2i7_first_cycle_multipass_backtrack (const void *_data,
27044 signed char *ready_try,
27045 int n_ready ATTRIBUTE_UNUSED)
27047 const_ix86_first_cycle_multipass_data_t data
27048 = (const_ix86_first_cycle_multipass_data_t) _data;
27049 unsigned int i = 0;
27050 sbitmap_iterator sbi;
27052 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
27053 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
27055 ready_try[i] = 0;
27059 /* Save the result of multipass lookahead scheduling for the next round. */
27060 static void
27061 core2i7_first_cycle_multipass_end (const void *_data)
27063 const_ix86_first_cycle_multipass_data_t data
27064 = (const_ix86_first_cycle_multipass_data_t) _data;
27065 ix86_first_cycle_multipass_data_t next_data
27066 = ix86_first_cycle_multipass_data;
27068 if (data != NULL)
27070 next_data->ifetch_block_len = data->ifetch_block_len;
27071 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
27075 /* Deallocate target data. */
27076 static void
27077 core2i7_first_cycle_multipass_fini (void *_data)
27079 ix86_first_cycle_multipass_data_t data
27080 = (ix86_first_cycle_multipass_data_t) _data;
27082 if (data->ready_try_change)
27084 sbitmap_free (data->ready_try_change);
27085 data->ready_try_change = NULL;
27086 data->ready_try_change_size = 0;
27090 /* Prepare for scheduling pass. */
27091 static void
27092 ix86_sched_init_global (FILE *, int, int)
27094 /* Install scheduling hooks for current CPU. Some of these hooks are used
27095 in time-critical parts of the scheduler, so we only set them up when
27096 they are actually used. */
27097 switch (ix86_tune)
27099 case PROCESSOR_CORE2:
27100 case PROCESSOR_NEHALEM:
27101 case PROCESSOR_SANDYBRIDGE:
27102 case PROCESSOR_HASWELL:
27103 /* Do not perform multipass scheduling for pre-reload schedule
27104 to save compile time. */
27105 if (reload_completed)
27107 targetm.sched.dfa_post_advance_cycle
27108 = core2i7_dfa_post_advance_cycle;
27109 targetm.sched.first_cycle_multipass_init
27110 = core2i7_first_cycle_multipass_init;
27111 targetm.sched.first_cycle_multipass_begin
27112 = core2i7_first_cycle_multipass_begin;
27113 targetm.sched.first_cycle_multipass_issue
27114 = core2i7_first_cycle_multipass_issue;
27115 targetm.sched.first_cycle_multipass_backtrack
27116 = core2i7_first_cycle_multipass_backtrack;
27117 targetm.sched.first_cycle_multipass_end
27118 = core2i7_first_cycle_multipass_end;
27119 targetm.sched.first_cycle_multipass_fini
27120 = core2i7_first_cycle_multipass_fini;
27122 /* Set decoder parameters. */
27123 core2i7_secondary_decoder_max_insn_size = 8;
27124 core2i7_ifetch_block_size = 16;
27125 core2i7_ifetch_block_max_insns = 6;
27126 break;
27128 /* ... Fall through ... */
27129 default:
27130 targetm.sched.dfa_post_advance_cycle = NULL;
27131 targetm.sched.first_cycle_multipass_init = NULL;
27132 targetm.sched.first_cycle_multipass_begin = NULL;
27133 targetm.sched.first_cycle_multipass_issue = NULL;
27134 targetm.sched.first_cycle_multipass_backtrack = NULL;
27135 targetm.sched.first_cycle_multipass_end = NULL;
27136 targetm.sched.first_cycle_multipass_fini = NULL;
27137 break;
27142 /* Compute the alignment given to a constant that is being placed in memory.
27143 EXP is the constant and ALIGN is the alignment that the object would
27144 ordinarily have.
27145 The value of this function is used instead of that alignment to align
27146 the object. */
27149 ix86_constant_alignment (tree exp, int align)
27151 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
27152 || TREE_CODE (exp) == INTEGER_CST)
27154 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
27155 return 64;
27156 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
27157 return 128;
27159 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
27160 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
27161 return BITS_PER_WORD;
27163 return align;
27166 /* Compute the alignment for a static variable.
27167 TYPE is the data type, and ALIGN is the alignment that
27168 the object would ordinarily have. The value of this function is used
27169 instead of that alignment to align the object. */
27172 ix86_data_alignment (tree type, int align, bool opt)
27174 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
27175 for symbols from other compilation units or symbols that don't need
27176 to bind locally. In order to preserve some ABI compatibility with
27177 those compilers, ensure we don't decrease alignment from what we
27178 used to assume. */
27180 int max_align_compat
27181 = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
27183 /* A data structure, equal or greater than the size of a cache line
27184 (64 bytes in the Pentium 4 and other recent Intel processors, including
27185 processors based on Intel Core microarchitecture) should be aligned
27186 so that its base address is a multiple of a cache line size. */
27188 int max_align
27189 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
27191 if (max_align < BITS_PER_WORD)
27192 max_align = BITS_PER_WORD;
27194 if (opt
27195 && AGGREGATE_TYPE_P (type)
27196 && TYPE_SIZE (type)
27197 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
27199 if (wi::geu_p (TYPE_SIZE (type), max_align_compat)
27200 && align < max_align_compat)
27201 align = max_align_compat;
27202 if (wi::geu_p (TYPE_SIZE (type), max_align)
27203 && align < max_align)
27204 align = max_align;
27207 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
27208 to 16byte boundary. */
27209 if (TARGET_64BIT)
27211 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
27212 && TYPE_SIZE (type)
27213 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
27214 && wi::geu_p (TYPE_SIZE (type), 128)
27215 && align < 128)
27216 return 128;
27219 if (!opt)
27220 return align;
27222 if (TREE_CODE (type) == ARRAY_TYPE)
27224 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
27225 return 64;
27226 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
27227 return 128;
27229 else if (TREE_CODE (type) == COMPLEX_TYPE)
27232 if (TYPE_MODE (type) == DCmode && align < 64)
27233 return 64;
27234 if ((TYPE_MODE (type) == XCmode
27235 || TYPE_MODE (type) == TCmode) && align < 128)
27236 return 128;
27238 else if ((TREE_CODE (type) == RECORD_TYPE
27239 || TREE_CODE (type) == UNION_TYPE
27240 || TREE_CODE (type) == QUAL_UNION_TYPE)
27241 && TYPE_FIELDS (type))
27243 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
27244 return 64;
27245 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
27246 return 128;
27248 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
27249 || TREE_CODE (type) == INTEGER_TYPE)
27251 if (TYPE_MODE (type) == DFmode && align < 64)
27252 return 64;
27253 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
27254 return 128;
27257 return align;
27260 /* Compute the alignment for a local variable or a stack slot. EXP is
27261 the data type or decl itself, MODE is the widest mode available and
27262 ALIGN is the alignment that the object would ordinarily have. The
27263 value of this macro is used instead of that alignment to align the
27264 object. */
27266 unsigned int
27267 ix86_local_alignment (tree exp, machine_mode mode,
27268 unsigned int align)
27270 tree type, decl;
27272 if (exp && DECL_P (exp))
27274 type = TREE_TYPE (exp);
27275 decl = exp;
27277 else
27279 type = exp;
27280 decl = NULL;
27283 /* Don't do dynamic stack realignment for long long objects with
27284 -mpreferred-stack-boundary=2. */
27285 if (!TARGET_64BIT
27286 && align == 64
27287 && ix86_preferred_stack_boundary < 64
27288 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
27289 && (!type || !TYPE_USER_ALIGN (type))
27290 && (!decl || !DECL_USER_ALIGN (decl)))
27291 align = 32;
27293 /* If TYPE is NULL, we are allocating a stack slot for caller-save
27294 register in MODE. We will return the largest alignment of XF
27295 and DF. */
27296 if (!type)
27298 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
27299 align = GET_MODE_ALIGNMENT (DFmode);
27300 return align;
27303 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
27304 to 16byte boundary. Exact wording is:
27306 An array uses the same alignment as its elements, except that a local or
27307 global array variable of length at least 16 bytes or
27308 a C99 variable-length array variable always has alignment of at least 16 bytes.
27310 This was added to allow use of aligned SSE instructions at arrays. This
27311 rule is meant for static storage (where compiler can not do the analysis
27312 by itself). We follow it for automatic variables only when convenient.
27313 We fully control everything in the function compiled and functions from
27314 other unit can not rely on the alignment.
27316 Exclude va_list type. It is the common case of local array where
27317 we can not benefit from the alignment.
27319 TODO: Probably one should optimize for size only when var is not escaping. */
27320 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
27321 && TARGET_SSE)
27323 if (AGGREGATE_TYPE_P (type)
27324 && (va_list_type_node == NULL_TREE
27325 || (TYPE_MAIN_VARIANT (type)
27326 != TYPE_MAIN_VARIANT (va_list_type_node)))
27327 && TYPE_SIZE (type)
27328 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
27329 && wi::geu_p (TYPE_SIZE (type), 16)
27330 && align < 128)
27331 return 128;
27333 if (TREE_CODE (type) == ARRAY_TYPE)
27335 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
27336 return 64;
27337 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
27338 return 128;
27340 else if (TREE_CODE (type) == COMPLEX_TYPE)
27342 if (TYPE_MODE (type) == DCmode && align < 64)
27343 return 64;
27344 if ((TYPE_MODE (type) == XCmode
27345 || TYPE_MODE (type) == TCmode) && align < 128)
27346 return 128;
27348 else if ((TREE_CODE (type) == RECORD_TYPE
27349 || TREE_CODE (type) == UNION_TYPE
27350 || TREE_CODE (type) == QUAL_UNION_TYPE)
27351 && TYPE_FIELDS (type))
27353 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
27354 return 64;
27355 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
27356 return 128;
27358 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
27359 || TREE_CODE (type) == INTEGER_TYPE)
27362 if (TYPE_MODE (type) == DFmode && align < 64)
27363 return 64;
27364 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
27365 return 128;
27367 return align;
27370 /* Compute the minimum required alignment for dynamic stack realignment
27371 purposes for a local variable, parameter or a stack slot. EXP is
27372 the data type or decl itself, MODE is its mode and ALIGN is the
27373 alignment that the object would ordinarily have. */
27375 unsigned int
27376 ix86_minimum_alignment (tree exp, machine_mode mode,
27377 unsigned int align)
27379 tree type, decl;
27381 if (exp && DECL_P (exp))
27383 type = TREE_TYPE (exp);
27384 decl = exp;
27386 else
27388 type = exp;
27389 decl = NULL;
27392 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
27393 return align;
27395 /* Don't do dynamic stack realignment for long long objects with
27396 -mpreferred-stack-boundary=2. */
27397 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
27398 && (!type || !TYPE_USER_ALIGN (type))
27399 && (!decl || !DECL_USER_ALIGN (decl)))
27400 return 32;
27402 return align;
27405 /* Find a location for the static chain incoming to a nested function.
27406 This is a register, unless all free registers are used by arguments. */
27408 static rtx
27409 ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
27411 unsigned regno;
27413 /* While this function won't be called by the middle-end when a static
27414 chain isn't needed, it's also used throughout the backend so it's
27415 easiest to keep this check centralized. */
27416 if (DECL_P (fndecl_or_type) && !DECL_STATIC_CHAIN (fndecl_or_type))
27417 return NULL;
27419 if (TARGET_64BIT)
27421 /* We always use R10 in 64-bit mode. */
27422 regno = R10_REG;
27424 else
27426 const_tree fntype, fndecl;
27427 unsigned int ccvt;
27429 /* By default in 32-bit mode we use ECX to pass the static chain. */
27430 regno = CX_REG;
27432 if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
27434 fntype = TREE_TYPE (fndecl_or_type);
27435 fndecl = fndecl_or_type;
27437 else
27439 fntype = fndecl_or_type;
27440 fndecl = NULL;
27443 ccvt = ix86_get_callcvt (fntype);
27444 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
27446 /* Fastcall functions use ecx/edx for arguments, which leaves
27447 us with EAX for the static chain.
27448 Thiscall functions use ecx for arguments, which also
27449 leaves us with EAX for the static chain. */
27450 regno = AX_REG;
27452 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
27454 /* Thiscall functions use ecx for arguments, which leaves
27455 us with EAX and EDX for the static chain.
27456 We are using for abi-compatibility EAX. */
27457 regno = AX_REG;
27459 else if (ix86_function_regparm (fntype, fndecl) == 3)
27461 /* For regparm 3, we have no free call-clobbered registers in
27462 which to store the static chain. In order to implement this,
27463 we have the trampoline push the static chain to the stack.
27464 However, we can't push a value below the return address when
27465 we call the nested function directly, so we have to use an
27466 alternate entry point. For this we use ESI, and have the
27467 alternate entry point push ESI, so that things appear the
27468 same once we're executing the nested function. */
27469 if (incoming_p)
27471 if (fndecl == current_function_decl)
27472 ix86_static_chain_on_stack = true;
27473 return gen_frame_mem (SImode,
27474 plus_constant (Pmode,
27475 arg_pointer_rtx, -8));
27477 regno = SI_REG;
27481 return gen_rtx_REG (Pmode, regno);
27484 /* Emit RTL insns to initialize the variable parts of a trampoline.
27485 FNDECL is the decl of the target address; M_TRAMP is a MEM for
27486 the trampoline, and CHAIN_VALUE is an RTX for the static chain
27487 to be passed to the target function. */
27489 static void
27490 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
27492 rtx mem, fnaddr;
27493 int opcode;
27494 int offset = 0;
27496 fnaddr = XEXP (DECL_RTL (fndecl), 0);
27498 if (TARGET_64BIT)
27500 int size;
27502 /* Load the function address to r11. Try to load address using
27503 the shorter movl instead of movabs. We may want to support
27504 movq for kernel mode, but kernel does not use trampolines at
27505 the moment. FNADDR is a 32bit address and may not be in
27506 DImode when ptr_mode == SImode. Always use movl in this
27507 case. */
27508 if (ptr_mode == SImode
27509 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
27511 fnaddr = copy_addr_to_reg (fnaddr);
27513 mem = adjust_address (m_tramp, HImode, offset);
27514 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
27516 mem = adjust_address (m_tramp, SImode, offset + 2);
27517 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
27518 offset += 6;
27520 else
27522 mem = adjust_address (m_tramp, HImode, offset);
27523 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
27525 mem = adjust_address (m_tramp, DImode, offset + 2);
27526 emit_move_insn (mem, fnaddr);
27527 offset += 10;
27530 /* Load static chain using movabs to r10. Use the shorter movl
27531 instead of movabs when ptr_mode == SImode. */
27532 if (ptr_mode == SImode)
27534 opcode = 0xba41;
27535 size = 6;
27537 else
27539 opcode = 0xba49;
27540 size = 10;
27543 mem = adjust_address (m_tramp, HImode, offset);
27544 emit_move_insn (mem, gen_int_mode (opcode, HImode));
27546 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
27547 emit_move_insn (mem, chain_value);
27548 offset += size;
27550 /* Jump to r11; the last (unused) byte is a nop, only there to
27551 pad the write out to a single 32-bit store. */
27552 mem = adjust_address (m_tramp, SImode, offset);
27553 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
27554 offset += 4;
27556 else
27558 rtx disp, chain;
27560 /* Depending on the static chain location, either load a register
27561 with a constant, or push the constant to the stack. All of the
27562 instructions are the same size. */
27563 chain = ix86_static_chain (fndecl, true);
27564 if (REG_P (chain))
27566 switch (REGNO (chain))
27568 case AX_REG:
27569 opcode = 0xb8; break;
27570 case CX_REG:
27571 opcode = 0xb9; break;
27572 default:
27573 gcc_unreachable ();
27576 else
27577 opcode = 0x68;
27579 mem = adjust_address (m_tramp, QImode, offset);
27580 emit_move_insn (mem, gen_int_mode (opcode, QImode));
27582 mem = adjust_address (m_tramp, SImode, offset + 1);
27583 emit_move_insn (mem, chain_value);
27584 offset += 5;
27586 mem = adjust_address (m_tramp, QImode, offset);
27587 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
27589 mem = adjust_address (m_tramp, SImode, offset + 1);
27591 /* Compute offset from the end of the jmp to the target function.
27592 In the case in which the trampoline stores the static chain on
27593 the stack, we need to skip the first insn which pushes the
27594 (call-saved) register static chain; this push is 1 byte. */
27595 offset += 5;
27596 disp = expand_binop (SImode, sub_optab, fnaddr,
27597 plus_constant (Pmode, XEXP (m_tramp, 0),
27598 offset - (MEM_P (chain) ? 1 : 0)),
27599 NULL_RTX, 1, OPTAB_DIRECT);
27600 emit_move_insn (mem, disp);
27603 gcc_assert (offset <= TRAMPOLINE_SIZE);
27605 #ifdef HAVE_ENABLE_EXECUTE_STACK
27606 #ifdef CHECK_EXECUTE_STACK_ENABLED
27607 if (CHECK_EXECUTE_STACK_ENABLED)
27608 #endif
27609 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
27610 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
27611 #endif
27614 /* The following file contains several enumerations and data structures
27615 built from the definitions in i386-builtin-types.def. */
27617 #include "i386-builtin-types.inc"
27619 /* Table for the ix86 builtin non-function types. */
27620 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
27622 /* Retrieve an element from the above table, building some of
27623 the types lazily. */
27625 static tree
27626 ix86_get_builtin_type (enum ix86_builtin_type tcode)
27628 unsigned int index;
27629 tree type, itype;
27631 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
27633 type = ix86_builtin_type_tab[(int) tcode];
27634 if (type != NULL)
27635 return type;
27637 gcc_assert (tcode > IX86_BT_LAST_PRIM);
27638 if (tcode <= IX86_BT_LAST_VECT)
27640 machine_mode mode;
27642 index = tcode - IX86_BT_LAST_PRIM - 1;
27643 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
27644 mode = ix86_builtin_type_vect_mode[index];
27646 type = build_vector_type_for_mode (itype, mode);
27648 else
27650 int quals;
27652 index = tcode - IX86_BT_LAST_VECT - 1;
27653 if (tcode <= IX86_BT_LAST_PTR)
27654 quals = TYPE_UNQUALIFIED;
27655 else
27656 quals = TYPE_QUAL_CONST;
27658 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
27659 if (quals != TYPE_UNQUALIFIED)
27660 itype = build_qualified_type (itype, quals);
27662 type = build_pointer_type (itype);
27665 ix86_builtin_type_tab[(int) tcode] = type;
27666 return type;
27669 /* Table for the ix86 builtin function types. */
27670 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
27672 /* Retrieve an element from the above table, building some of
27673 the types lazily. */
27675 static tree
27676 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
27678 tree type;
27680 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
27682 type = ix86_builtin_func_type_tab[(int) tcode];
27683 if (type != NULL)
27684 return type;
27686 if (tcode <= IX86_BT_LAST_FUNC)
27688 unsigned start = ix86_builtin_func_start[(int) tcode];
27689 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
27690 tree rtype, atype, args = void_list_node;
27691 unsigned i;
27693 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
27694 for (i = after - 1; i > start; --i)
27696 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
27697 args = tree_cons (NULL, atype, args);
27700 type = build_function_type (rtype, args);
27702 else
27704 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
27705 enum ix86_builtin_func_type icode;
27707 icode = ix86_builtin_func_alias_base[index];
27708 type = ix86_get_builtin_func_type (icode);
27711 ix86_builtin_func_type_tab[(int) tcode] = type;
27712 return type;
27716 /* Codes for all the SSE/MMX builtins. */
27717 enum ix86_builtins
27719 IX86_BUILTIN_ADDPS,
27720 IX86_BUILTIN_ADDSS,
27721 IX86_BUILTIN_DIVPS,
27722 IX86_BUILTIN_DIVSS,
27723 IX86_BUILTIN_MULPS,
27724 IX86_BUILTIN_MULSS,
27725 IX86_BUILTIN_SUBPS,
27726 IX86_BUILTIN_SUBSS,
27728 IX86_BUILTIN_CMPEQPS,
27729 IX86_BUILTIN_CMPLTPS,
27730 IX86_BUILTIN_CMPLEPS,
27731 IX86_BUILTIN_CMPGTPS,
27732 IX86_BUILTIN_CMPGEPS,
27733 IX86_BUILTIN_CMPNEQPS,
27734 IX86_BUILTIN_CMPNLTPS,
27735 IX86_BUILTIN_CMPNLEPS,
27736 IX86_BUILTIN_CMPNGTPS,
27737 IX86_BUILTIN_CMPNGEPS,
27738 IX86_BUILTIN_CMPORDPS,
27739 IX86_BUILTIN_CMPUNORDPS,
27740 IX86_BUILTIN_CMPEQSS,
27741 IX86_BUILTIN_CMPLTSS,
27742 IX86_BUILTIN_CMPLESS,
27743 IX86_BUILTIN_CMPNEQSS,
27744 IX86_BUILTIN_CMPNLTSS,
27745 IX86_BUILTIN_CMPNLESS,
27746 IX86_BUILTIN_CMPORDSS,
27747 IX86_BUILTIN_CMPUNORDSS,
27749 IX86_BUILTIN_COMIEQSS,
27750 IX86_BUILTIN_COMILTSS,
27751 IX86_BUILTIN_COMILESS,
27752 IX86_BUILTIN_COMIGTSS,
27753 IX86_BUILTIN_COMIGESS,
27754 IX86_BUILTIN_COMINEQSS,
27755 IX86_BUILTIN_UCOMIEQSS,
27756 IX86_BUILTIN_UCOMILTSS,
27757 IX86_BUILTIN_UCOMILESS,
27758 IX86_BUILTIN_UCOMIGTSS,
27759 IX86_BUILTIN_UCOMIGESS,
27760 IX86_BUILTIN_UCOMINEQSS,
27762 IX86_BUILTIN_CVTPI2PS,
27763 IX86_BUILTIN_CVTPS2PI,
27764 IX86_BUILTIN_CVTSI2SS,
27765 IX86_BUILTIN_CVTSI642SS,
27766 IX86_BUILTIN_CVTSS2SI,
27767 IX86_BUILTIN_CVTSS2SI64,
27768 IX86_BUILTIN_CVTTPS2PI,
27769 IX86_BUILTIN_CVTTSS2SI,
27770 IX86_BUILTIN_CVTTSS2SI64,
27772 IX86_BUILTIN_MAXPS,
27773 IX86_BUILTIN_MAXSS,
27774 IX86_BUILTIN_MINPS,
27775 IX86_BUILTIN_MINSS,
27777 IX86_BUILTIN_LOADUPS,
27778 IX86_BUILTIN_STOREUPS,
27779 IX86_BUILTIN_MOVSS,
27781 IX86_BUILTIN_MOVHLPS,
27782 IX86_BUILTIN_MOVLHPS,
27783 IX86_BUILTIN_LOADHPS,
27784 IX86_BUILTIN_LOADLPS,
27785 IX86_BUILTIN_STOREHPS,
27786 IX86_BUILTIN_STORELPS,
27788 IX86_BUILTIN_MASKMOVQ,
27789 IX86_BUILTIN_MOVMSKPS,
27790 IX86_BUILTIN_PMOVMSKB,
27792 IX86_BUILTIN_MOVNTPS,
27793 IX86_BUILTIN_MOVNTQ,
27795 IX86_BUILTIN_LOADDQU,
27796 IX86_BUILTIN_STOREDQU,
27798 IX86_BUILTIN_PACKSSWB,
27799 IX86_BUILTIN_PACKSSDW,
27800 IX86_BUILTIN_PACKUSWB,
27802 IX86_BUILTIN_PADDB,
27803 IX86_BUILTIN_PADDW,
27804 IX86_BUILTIN_PADDD,
27805 IX86_BUILTIN_PADDQ,
27806 IX86_BUILTIN_PADDSB,
27807 IX86_BUILTIN_PADDSW,
27808 IX86_BUILTIN_PADDUSB,
27809 IX86_BUILTIN_PADDUSW,
27810 IX86_BUILTIN_PSUBB,
27811 IX86_BUILTIN_PSUBW,
27812 IX86_BUILTIN_PSUBD,
27813 IX86_BUILTIN_PSUBQ,
27814 IX86_BUILTIN_PSUBSB,
27815 IX86_BUILTIN_PSUBSW,
27816 IX86_BUILTIN_PSUBUSB,
27817 IX86_BUILTIN_PSUBUSW,
27819 IX86_BUILTIN_PAND,
27820 IX86_BUILTIN_PANDN,
27821 IX86_BUILTIN_POR,
27822 IX86_BUILTIN_PXOR,
27824 IX86_BUILTIN_PAVGB,
27825 IX86_BUILTIN_PAVGW,
27827 IX86_BUILTIN_PCMPEQB,
27828 IX86_BUILTIN_PCMPEQW,
27829 IX86_BUILTIN_PCMPEQD,
27830 IX86_BUILTIN_PCMPGTB,
27831 IX86_BUILTIN_PCMPGTW,
27832 IX86_BUILTIN_PCMPGTD,
27834 IX86_BUILTIN_PMADDWD,
27836 IX86_BUILTIN_PMAXSW,
27837 IX86_BUILTIN_PMAXUB,
27838 IX86_BUILTIN_PMINSW,
27839 IX86_BUILTIN_PMINUB,
27841 IX86_BUILTIN_PMULHUW,
27842 IX86_BUILTIN_PMULHW,
27843 IX86_BUILTIN_PMULLW,
27845 IX86_BUILTIN_PSADBW,
27846 IX86_BUILTIN_PSHUFW,
27848 IX86_BUILTIN_PSLLW,
27849 IX86_BUILTIN_PSLLD,
27850 IX86_BUILTIN_PSLLQ,
27851 IX86_BUILTIN_PSRAW,
27852 IX86_BUILTIN_PSRAD,
27853 IX86_BUILTIN_PSRLW,
27854 IX86_BUILTIN_PSRLD,
27855 IX86_BUILTIN_PSRLQ,
27856 IX86_BUILTIN_PSLLWI,
27857 IX86_BUILTIN_PSLLDI,
27858 IX86_BUILTIN_PSLLQI,
27859 IX86_BUILTIN_PSRAWI,
27860 IX86_BUILTIN_PSRADI,
27861 IX86_BUILTIN_PSRLWI,
27862 IX86_BUILTIN_PSRLDI,
27863 IX86_BUILTIN_PSRLQI,
27865 IX86_BUILTIN_PUNPCKHBW,
27866 IX86_BUILTIN_PUNPCKHWD,
27867 IX86_BUILTIN_PUNPCKHDQ,
27868 IX86_BUILTIN_PUNPCKLBW,
27869 IX86_BUILTIN_PUNPCKLWD,
27870 IX86_BUILTIN_PUNPCKLDQ,
27872 IX86_BUILTIN_SHUFPS,
27874 IX86_BUILTIN_RCPPS,
27875 IX86_BUILTIN_RCPSS,
27876 IX86_BUILTIN_RSQRTPS,
27877 IX86_BUILTIN_RSQRTPS_NR,
27878 IX86_BUILTIN_RSQRTSS,
27879 IX86_BUILTIN_RSQRTF,
27880 IX86_BUILTIN_SQRTPS,
27881 IX86_BUILTIN_SQRTPS_NR,
27882 IX86_BUILTIN_SQRTSS,
27884 IX86_BUILTIN_UNPCKHPS,
27885 IX86_BUILTIN_UNPCKLPS,
27887 IX86_BUILTIN_ANDPS,
27888 IX86_BUILTIN_ANDNPS,
27889 IX86_BUILTIN_ORPS,
27890 IX86_BUILTIN_XORPS,
27892 IX86_BUILTIN_EMMS,
27893 IX86_BUILTIN_LDMXCSR,
27894 IX86_BUILTIN_STMXCSR,
27895 IX86_BUILTIN_SFENCE,
27897 IX86_BUILTIN_FXSAVE,
27898 IX86_BUILTIN_FXRSTOR,
27899 IX86_BUILTIN_FXSAVE64,
27900 IX86_BUILTIN_FXRSTOR64,
27902 IX86_BUILTIN_XSAVE,
27903 IX86_BUILTIN_XRSTOR,
27904 IX86_BUILTIN_XSAVE64,
27905 IX86_BUILTIN_XRSTOR64,
27907 IX86_BUILTIN_XSAVEOPT,
27908 IX86_BUILTIN_XSAVEOPT64,
27910 IX86_BUILTIN_XSAVEC,
27911 IX86_BUILTIN_XSAVEC64,
27913 IX86_BUILTIN_XSAVES,
27914 IX86_BUILTIN_XRSTORS,
27915 IX86_BUILTIN_XSAVES64,
27916 IX86_BUILTIN_XRSTORS64,
27918 /* 3DNow! Original */
27919 IX86_BUILTIN_FEMMS,
27920 IX86_BUILTIN_PAVGUSB,
27921 IX86_BUILTIN_PF2ID,
27922 IX86_BUILTIN_PFACC,
27923 IX86_BUILTIN_PFADD,
27924 IX86_BUILTIN_PFCMPEQ,
27925 IX86_BUILTIN_PFCMPGE,
27926 IX86_BUILTIN_PFCMPGT,
27927 IX86_BUILTIN_PFMAX,
27928 IX86_BUILTIN_PFMIN,
27929 IX86_BUILTIN_PFMUL,
27930 IX86_BUILTIN_PFRCP,
27931 IX86_BUILTIN_PFRCPIT1,
27932 IX86_BUILTIN_PFRCPIT2,
27933 IX86_BUILTIN_PFRSQIT1,
27934 IX86_BUILTIN_PFRSQRT,
27935 IX86_BUILTIN_PFSUB,
27936 IX86_BUILTIN_PFSUBR,
27937 IX86_BUILTIN_PI2FD,
27938 IX86_BUILTIN_PMULHRW,
27940 /* 3DNow! Athlon Extensions */
27941 IX86_BUILTIN_PF2IW,
27942 IX86_BUILTIN_PFNACC,
27943 IX86_BUILTIN_PFPNACC,
27944 IX86_BUILTIN_PI2FW,
27945 IX86_BUILTIN_PSWAPDSI,
27946 IX86_BUILTIN_PSWAPDSF,
27948 /* SSE2 */
27949 IX86_BUILTIN_ADDPD,
27950 IX86_BUILTIN_ADDSD,
27951 IX86_BUILTIN_DIVPD,
27952 IX86_BUILTIN_DIVSD,
27953 IX86_BUILTIN_MULPD,
27954 IX86_BUILTIN_MULSD,
27955 IX86_BUILTIN_SUBPD,
27956 IX86_BUILTIN_SUBSD,
27958 IX86_BUILTIN_CMPEQPD,
27959 IX86_BUILTIN_CMPLTPD,
27960 IX86_BUILTIN_CMPLEPD,
27961 IX86_BUILTIN_CMPGTPD,
27962 IX86_BUILTIN_CMPGEPD,
27963 IX86_BUILTIN_CMPNEQPD,
27964 IX86_BUILTIN_CMPNLTPD,
27965 IX86_BUILTIN_CMPNLEPD,
27966 IX86_BUILTIN_CMPNGTPD,
27967 IX86_BUILTIN_CMPNGEPD,
27968 IX86_BUILTIN_CMPORDPD,
27969 IX86_BUILTIN_CMPUNORDPD,
27970 IX86_BUILTIN_CMPEQSD,
27971 IX86_BUILTIN_CMPLTSD,
27972 IX86_BUILTIN_CMPLESD,
27973 IX86_BUILTIN_CMPNEQSD,
27974 IX86_BUILTIN_CMPNLTSD,
27975 IX86_BUILTIN_CMPNLESD,
27976 IX86_BUILTIN_CMPORDSD,
27977 IX86_BUILTIN_CMPUNORDSD,
27979 IX86_BUILTIN_COMIEQSD,
27980 IX86_BUILTIN_COMILTSD,
27981 IX86_BUILTIN_COMILESD,
27982 IX86_BUILTIN_COMIGTSD,
27983 IX86_BUILTIN_COMIGESD,
27984 IX86_BUILTIN_COMINEQSD,
27985 IX86_BUILTIN_UCOMIEQSD,
27986 IX86_BUILTIN_UCOMILTSD,
27987 IX86_BUILTIN_UCOMILESD,
27988 IX86_BUILTIN_UCOMIGTSD,
27989 IX86_BUILTIN_UCOMIGESD,
27990 IX86_BUILTIN_UCOMINEQSD,
27992 IX86_BUILTIN_MAXPD,
27993 IX86_BUILTIN_MAXSD,
27994 IX86_BUILTIN_MINPD,
27995 IX86_BUILTIN_MINSD,
27997 IX86_BUILTIN_ANDPD,
27998 IX86_BUILTIN_ANDNPD,
27999 IX86_BUILTIN_ORPD,
28000 IX86_BUILTIN_XORPD,
28002 IX86_BUILTIN_SQRTPD,
28003 IX86_BUILTIN_SQRTSD,
28005 IX86_BUILTIN_UNPCKHPD,
28006 IX86_BUILTIN_UNPCKLPD,
28008 IX86_BUILTIN_SHUFPD,
28010 IX86_BUILTIN_LOADUPD,
28011 IX86_BUILTIN_STOREUPD,
28012 IX86_BUILTIN_MOVSD,
28014 IX86_BUILTIN_LOADHPD,
28015 IX86_BUILTIN_LOADLPD,
28017 IX86_BUILTIN_CVTDQ2PD,
28018 IX86_BUILTIN_CVTDQ2PS,
28020 IX86_BUILTIN_CVTPD2DQ,
28021 IX86_BUILTIN_CVTPD2PI,
28022 IX86_BUILTIN_CVTPD2PS,
28023 IX86_BUILTIN_CVTTPD2DQ,
28024 IX86_BUILTIN_CVTTPD2PI,
28026 IX86_BUILTIN_CVTPI2PD,
28027 IX86_BUILTIN_CVTSI2SD,
28028 IX86_BUILTIN_CVTSI642SD,
28030 IX86_BUILTIN_CVTSD2SI,
28031 IX86_BUILTIN_CVTSD2SI64,
28032 IX86_BUILTIN_CVTSD2SS,
28033 IX86_BUILTIN_CVTSS2SD,
28034 IX86_BUILTIN_CVTTSD2SI,
28035 IX86_BUILTIN_CVTTSD2SI64,
28037 IX86_BUILTIN_CVTPS2DQ,
28038 IX86_BUILTIN_CVTPS2PD,
28039 IX86_BUILTIN_CVTTPS2DQ,
28041 IX86_BUILTIN_MOVNTI,
28042 IX86_BUILTIN_MOVNTI64,
28043 IX86_BUILTIN_MOVNTPD,
28044 IX86_BUILTIN_MOVNTDQ,
28046 IX86_BUILTIN_MOVQ128,
28048 /* SSE2 MMX */
28049 IX86_BUILTIN_MASKMOVDQU,
28050 IX86_BUILTIN_MOVMSKPD,
28051 IX86_BUILTIN_PMOVMSKB128,
28053 IX86_BUILTIN_PACKSSWB128,
28054 IX86_BUILTIN_PACKSSDW128,
28055 IX86_BUILTIN_PACKUSWB128,
28057 IX86_BUILTIN_PADDB128,
28058 IX86_BUILTIN_PADDW128,
28059 IX86_BUILTIN_PADDD128,
28060 IX86_BUILTIN_PADDQ128,
28061 IX86_BUILTIN_PADDSB128,
28062 IX86_BUILTIN_PADDSW128,
28063 IX86_BUILTIN_PADDUSB128,
28064 IX86_BUILTIN_PADDUSW128,
28065 IX86_BUILTIN_PSUBB128,
28066 IX86_BUILTIN_PSUBW128,
28067 IX86_BUILTIN_PSUBD128,
28068 IX86_BUILTIN_PSUBQ128,
28069 IX86_BUILTIN_PSUBSB128,
28070 IX86_BUILTIN_PSUBSW128,
28071 IX86_BUILTIN_PSUBUSB128,
28072 IX86_BUILTIN_PSUBUSW128,
28074 IX86_BUILTIN_PAND128,
28075 IX86_BUILTIN_PANDN128,
28076 IX86_BUILTIN_POR128,
28077 IX86_BUILTIN_PXOR128,
28079 IX86_BUILTIN_PAVGB128,
28080 IX86_BUILTIN_PAVGW128,
28082 IX86_BUILTIN_PCMPEQB128,
28083 IX86_BUILTIN_PCMPEQW128,
28084 IX86_BUILTIN_PCMPEQD128,
28085 IX86_BUILTIN_PCMPGTB128,
28086 IX86_BUILTIN_PCMPGTW128,
28087 IX86_BUILTIN_PCMPGTD128,
28089 IX86_BUILTIN_PMADDWD128,
28091 IX86_BUILTIN_PMAXSW128,
28092 IX86_BUILTIN_PMAXUB128,
28093 IX86_BUILTIN_PMINSW128,
28094 IX86_BUILTIN_PMINUB128,
28096 IX86_BUILTIN_PMULUDQ,
28097 IX86_BUILTIN_PMULUDQ128,
28098 IX86_BUILTIN_PMULHUW128,
28099 IX86_BUILTIN_PMULHW128,
28100 IX86_BUILTIN_PMULLW128,
28102 IX86_BUILTIN_PSADBW128,
28103 IX86_BUILTIN_PSHUFHW,
28104 IX86_BUILTIN_PSHUFLW,
28105 IX86_BUILTIN_PSHUFD,
28107 IX86_BUILTIN_PSLLDQI128,
28108 IX86_BUILTIN_PSLLWI128,
28109 IX86_BUILTIN_PSLLDI128,
28110 IX86_BUILTIN_PSLLQI128,
28111 IX86_BUILTIN_PSRAWI128,
28112 IX86_BUILTIN_PSRADI128,
28113 IX86_BUILTIN_PSRLDQI128,
28114 IX86_BUILTIN_PSRLWI128,
28115 IX86_BUILTIN_PSRLDI128,
28116 IX86_BUILTIN_PSRLQI128,
28118 IX86_BUILTIN_PSLLDQ128,
28119 IX86_BUILTIN_PSLLW128,
28120 IX86_BUILTIN_PSLLD128,
28121 IX86_BUILTIN_PSLLQ128,
28122 IX86_BUILTIN_PSRAW128,
28123 IX86_BUILTIN_PSRAD128,
28124 IX86_BUILTIN_PSRLW128,
28125 IX86_BUILTIN_PSRLD128,
28126 IX86_BUILTIN_PSRLQ128,
28128 IX86_BUILTIN_PUNPCKHBW128,
28129 IX86_BUILTIN_PUNPCKHWD128,
28130 IX86_BUILTIN_PUNPCKHDQ128,
28131 IX86_BUILTIN_PUNPCKHQDQ128,
28132 IX86_BUILTIN_PUNPCKLBW128,
28133 IX86_BUILTIN_PUNPCKLWD128,
28134 IX86_BUILTIN_PUNPCKLDQ128,
28135 IX86_BUILTIN_PUNPCKLQDQ128,
28137 IX86_BUILTIN_CLFLUSH,
28138 IX86_BUILTIN_MFENCE,
28139 IX86_BUILTIN_LFENCE,
28140 IX86_BUILTIN_PAUSE,
28142 IX86_BUILTIN_FNSTENV,
28143 IX86_BUILTIN_FLDENV,
28144 IX86_BUILTIN_FNSTSW,
28145 IX86_BUILTIN_FNCLEX,
28147 IX86_BUILTIN_BSRSI,
28148 IX86_BUILTIN_BSRDI,
28149 IX86_BUILTIN_RDPMC,
28150 IX86_BUILTIN_RDTSC,
28151 IX86_BUILTIN_RDTSCP,
28152 IX86_BUILTIN_ROLQI,
28153 IX86_BUILTIN_ROLHI,
28154 IX86_BUILTIN_RORQI,
28155 IX86_BUILTIN_RORHI,
28157 /* SSE3. */
28158 IX86_BUILTIN_ADDSUBPS,
28159 IX86_BUILTIN_HADDPS,
28160 IX86_BUILTIN_HSUBPS,
28161 IX86_BUILTIN_MOVSHDUP,
28162 IX86_BUILTIN_MOVSLDUP,
28163 IX86_BUILTIN_ADDSUBPD,
28164 IX86_BUILTIN_HADDPD,
28165 IX86_BUILTIN_HSUBPD,
28166 IX86_BUILTIN_LDDQU,
28168 IX86_BUILTIN_MONITOR,
28169 IX86_BUILTIN_MWAIT,
28171 /* SSSE3. */
28172 IX86_BUILTIN_PHADDW,
28173 IX86_BUILTIN_PHADDD,
28174 IX86_BUILTIN_PHADDSW,
28175 IX86_BUILTIN_PHSUBW,
28176 IX86_BUILTIN_PHSUBD,
28177 IX86_BUILTIN_PHSUBSW,
28178 IX86_BUILTIN_PMADDUBSW,
28179 IX86_BUILTIN_PMULHRSW,
28180 IX86_BUILTIN_PSHUFB,
28181 IX86_BUILTIN_PSIGNB,
28182 IX86_BUILTIN_PSIGNW,
28183 IX86_BUILTIN_PSIGND,
28184 IX86_BUILTIN_PALIGNR,
28185 IX86_BUILTIN_PABSB,
28186 IX86_BUILTIN_PABSW,
28187 IX86_BUILTIN_PABSD,
28189 IX86_BUILTIN_PHADDW128,
28190 IX86_BUILTIN_PHADDD128,
28191 IX86_BUILTIN_PHADDSW128,
28192 IX86_BUILTIN_PHSUBW128,
28193 IX86_BUILTIN_PHSUBD128,
28194 IX86_BUILTIN_PHSUBSW128,
28195 IX86_BUILTIN_PMADDUBSW128,
28196 IX86_BUILTIN_PMULHRSW128,
28197 IX86_BUILTIN_PSHUFB128,
28198 IX86_BUILTIN_PSIGNB128,
28199 IX86_BUILTIN_PSIGNW128,
28200 IX86_BUILTIN_PSIGND128,
28201 IX86_BUILTIN_PALIGNR128,
28202 IX86_BUILTIN_PABSB128,
28203 IX86_BUILTIN_PABSW128,
28204 IX86_BUILTIN_PABSD128,
28206 /* AMDFAM10 - SSE4A New Instructions. */
28207 IX86_BUILTIN_MOVNTSD,
28208 IX86_BUILTIN_MOVNTSS,
28209 IX86_BUILTIN_EXTRQI,
28210 IX86_BUILTIN_EXTRQ,
28211 IX86_BUILTIN_INSERTQI,
28212 IX86_BUILTIN_INSERTQ,
28214 /* SSE4.1. */
28215 IX86_BUILTIN_BLENDPD,
28216 IX86_BUILTIN_BLENDPS,
28217 IX86_BUILTIN_BLENDVPD,
28218 IX86_BUILTIN_BLENDVPS,
28219 IX86_BUILTIN_PBLENDVB128,
28220 IX86_BUILTIN_PBLENDW128,
28222 IX86_BUILTIN_DPPD,
28223 IX86_BUILTIN_DPPS,
28225 IX86_BUILTIN_INSERTPS128,
28227 IX86_BUILTIN_MOVNTDQA,
28228 IX86_BUILTIN_MPSADBW128,
28229 IX86_BUILTIN_PACKUSDW128,
28230 IX86_BUILTIN_PCMPEQQ,
28231 IX86_BUILTIN_PHMINPOSUW128,
28233 IX86_BUILTIN_PMAXSB128,
28234 IX86_BUILTIN_PMAXSD128,
28235 IX86_BUILTIN_PMAXUD128,
28236 IX86_BUILTIN_PMAXUW128,
28238 IX86_BUILTIN_PMINSB128,
28239 IX86_BUILTIN_PMINSD128,
28240 IX86_BUILTIN_PMINUD128,
28241 IX86_BUILTIN_PMINUW128,
28243 IX86_BUILTIN_PMOVSXBW128,
28244 IX86_BUILTIN_PMOVSXBD128,
28245 IX86_BUILTIN_PMOVSXBQ128,
28246 IX86_BUILTIN_PMOVSXWD128,
28247 IX86_BUILTIN_PMOVSXWQ128,
28248 IX86_BUILTIN_PMOVSXDQ128,
28250 IX86_BUILTIN_PMOVZXBW128,
28251 IX86_BUILTIN_PMOVZXBD128,
28252 IX86_BUILTIN_PMOVZXBQ128,
28253 IX86_BUILTIN_PMOVZXWD128,
28254 IX86_BUILTIN_PMOVZXWQ128,
28255 IX86_BUILTIN_PMOVZXDQ128,
28257 IX86_BUILTIN_PMULDQ128,
28258 IX86_BUILTIN_PMULLD128,
28260 IX86_BUILTIN_ROUNDSD,
28261 IX86_BUILTIN_ROUNDSS,
28263 IX86_BUILTIN_ROUNDPD,
28264 IX86_BUILTIN_ROUNDPS,
28266 IX86_BUILTIN_FLOORPD,
28267 IX86_BUILTIN_CEILPD,
28268 IX86_BUILTIN_TRUNCPD,
28269 IX86_BUILTIN_RINTPD,
28270 IX86_BUILTIN_ROUNDPD_AZ,
28272 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
28273 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
28274 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
28276 IX86_BUILTIN_FLOORPS,
28277 IX86_BUILTIN_CEILPS,
28278 IX86_BUILTIN_TRUNCPS,
28279 IX86_BUILTIN_RINTPS,
28280 IX86_BUILTIN_ROUNDPS_AZ,
28282 IX86_BUILTIN_FLOORPS_SFIX,
28283 IX86_BUILTIN_CEILPS_SFIX,
28284 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
28286 IX86_BUILTIN_PTESTZ,
28287 IX86_BUILTIN_PTESTC,
28288 IX86_BUILTIN_PTESTNZC,
28290 IX86_BUILTIN_VEC_INIT_V2SI,
28291 IX86_BUILTIN_VEC_INIT_V4HI,
28292 IX86_BUILTIN_VEC_INIT_V8QI,
28293 IX86_BUILTIN_VEC_EXT_V2DF,
28294 IX86_BUILTIN_VEC_EXT_V2DI,
28295 IX86_BUILTIN_VEC_EXT_V4SF,
28296 IX86_BUILTIN_VEC_EXT_V4SI,
28297 IX86_BUILTIN_VEC_EXT_V8HI,
28298 IX86_BUILTIN_VEC_EXT_V2SI,
28299 IX86_BUILTIN_VEC_EXT_V4HI,
28300 IX86_BUILTIN_VEC_EXT_V16QI,
28301 IX86_BUILTIN_VEC_SET_V2DI,
28302 IX86_BUILTIN_VEC_SET_V4SF,
28303 IX86_BUILTIN_VEC_SET_V4SI,
28304 IX86_BUILTIN_VEC_SET_V8HI,
28305 IX86_BUILTIN_VEC_SET_V4HI,
28306 IX86_BUILTIN_VEC_SET_V16QI,
28308 IX86_BUILTIN_VEC_PACK_SFIX,
28309 IX86_BUILTIN_VEC_PACK_SFIX256,
28311 /* SSE4.2. */
28312 IX86_BUILTIN_CRC32QI,
28313 IX86_BUILTIN_CRC32HI,
28314 IX86_BUILTIN_CRC32SI,
28315 IX86_BUILTIN_CRC32DI,
28317 IX86_BUILTIN_PCMPESTRI128,
28318 IX86_BUILTIN_PCMPESTRM128,
28319 IX86_BUILTIN_PCMPESTRA128,
28320 IX86_BUILTIN_PCMPESTRC128,
28321 IX86_BUILTIN_PCMPESTRO128,
28322 IX86_BUILTIN_PCMPESTRS128,
28323 IX86_BUILTIN_PCMPESTRZ128,
28324 IX86_BUILTIN_PCMPISTRI128,
28325 IX86_BUILTIN_PCMPISTRM128,
28326 IX86_BUILTIN_PCMPISTRA128,
28327 IX86_BUILTIN_PCMPISTRC128,
28328 IX86_BUILTIN_PCMPISTRO128,
28329 IX86_BUILTIN_PCMPISTRS128,
28330 IX86_BUILTIN_PCMPISTRZ128,
28332 IX86_BUILTIN_PCMPGTQ,
28334 /* AES instructions */
28335 IX86_BUILTIN_AESENC128,
28336 IX86_BUILTIN_AESENCLAST128,
28337 IX86_BUILTIN_AESDEC128,
28338 IX86_BUILTIN_AESDECLAST128,
28339 IX86_BUILTIN_AESIMC128,
28340 IX86_BUILTIN_AESKEYGENASSIST128,
28342 /* PCLMUL instruction */
28343 IX86_BUILTIN_PCLMULQDQ128,
28345 /* AVX */
28346 IX86_BUILTIN_ADDPD256,
28347 IX86_BUILTIN_ADDPS256,
28348 IX86_BUILTIN_ADDSUBPD256,
28349 IX86_BUILTIN_ADDSUBPS256,
28350 IX86_BUILTIN_ANDPD256,
28351 IX86_BUILTIN_ANDPS256,
28352 IX86_BUILTIN_ANDNPD256,
28353 IX86_BUILTIN_ANDNPS256,
28354 IX86_BUILTIN_BLENDPD256,
28355 IX86_BUILTIN_BLENDPS256,
28356 IX86_BUILTIN_BLENDVPD256,
28357 IX86_BUILTIN_BLENDVPS256,
28358 IX86_BUILTIN_DIVPD256,
28359 IX86_BUILTIN_DIVPS256,
28360 IX86_BUILTIN_DPPS256,
28361 IX86_BUILTIN_HADDPD256,
28362 IX86_BUILTIN_HADDPS256,
28363 IX86_BUILTIN_HSUBPD256,
28364 IX86_BUILTIN_HSUBPS256,
28365 IX86_BUILTIN_MAXPD256,
28366 IX86_BUILTIN_MAXPS256,
28367 IX86_BUILTIN_MINPD256,
28368 IX86_BUILTIN_MINPS256,
28369 IX86_BUILTIN_MULPD256,
28370 IX86_BUILTIN_MULPS256,
28371 IX86_BUILTIN_ORPD256,
28372 IX86_BUILTIN_ORPS256,
28373 IX86_BUILTIN_SHUFPD256,
28374 IX86_BUILTIN_SHUFPS256,
28375 IX86_BUILTIN_SUBPD256,
28376 IX86_BUILTIN_SUBPS256,
28377 IX86_BUILTIN_XORPD256,
28378 IX86_BUILTIN_XORPS256,
28379 IX86_BUILTIN_CMPSD,
28380 IX86_BUILTIN_CMPSS,
28381 IX86_BUILTIN_CMPPD,
28382 IX86_BUILTIN_CMPPS,
28383 IX86_BUILTIN_CMPPD256,
28384 IX86_BUILTIN_CMPPS256,
28385 IX86_BUILTIN_CVTDQ2PD256,
28386 IX86_BUILTIN_CVTDQ2PS256,
28387 IX86_BUILTIN_CVTPD2PS256,
28388 IX86_BUILTIN_CVTPS2DQ256,
28389 IX86_BUILTIN_CVTPS2PD256,
28390 IX86_BUILTIN_CVTTPD2DQ256,
28391 IX86_BUILTIN_CVTPD2DQ256,
28392 IX86_BUILTIN_CVTTPS2DQ256,
28393 IX86_BUILTIN_EXTRACTF128PD256,
28394 IX86_BUILTIN_EXTRACTF128PS256,
28395 IX86_BUILTIN_EXTRACTF128SI256,
28396 IX86_BUILTIN_VZEROALL,
28397 IX86_BUILTIN_VZEROUPPER,
28398 IX86_BUILTIN_VPERMILVARPD,
28399 IX86_BUILTIN_VPERMILVARPS,
28400 IX86_BUILTIN_VPERMILVARPD256,
28401 IX86_BUILTIN_VPERMILVARPS256,
28402 IX86_BUILTIN_VPERMILPD,
28403 IX86_BUILTIN_VPERMILPS,
28404 IX86_BUILTIN_VPERMILPD256,
28405 IX86_BUILTIN_VPERMILPS256,
28406 IX86_BUILTIN_VPERMIL2PD,
28407 IX86_BUILTIN_VPERMIL2PS,
28408 IX86_BUILTIN_VPERMIL2PD256,
28409 IX86_BUILTIN_VPERMIL2PS256,
28410 IX86_BUILTIN_VPERM2F128PD256,
28411 IX86_BUILTIN_VPERM2F128PS256,
28412 IX86_BUILTIN_VPERM2F128SI256,
28413 IX86_BUILTIN_VBROADCASTSS,
28414 IX86_BUILTIN_VBROADCASTSD256,
28415 IX86_BUILTIN_VBROADCASTSS256,
28416 IX86_BUILTIN_VBROADCASTPD256,
28417 IX86_BUILTIN_VBROADCASTPS256,
28418 IX86_BUILTIN_VINSERTF128PD256,
28419 IX86_BUILTIN_VINSERTF128PS256,
28420 IX86_BUILTIN_VINSERTF128SI256,
28421 IX86_BUILTIN_LOADUPD256,
28422 IX86_BUILTIN_LOADUPS256,
28423 IX86_BUILTIN_STOREUPD256,
28424 IX86_BUILTIN_STOREUPS256,
28425 IX86_BUILTIN_LDDQU256,
28426 IX86_BUILTIN_MOVNTDQ256,
28427 IX86_BUILTIN_MOVNTPD256,
28428 IX86_BUILTIN_MOVNTPS256,
28429 IX86_BUILTIN_LOADDQU256,
28430 IX86_BUILTIN_STOREDQU256,
28431 IX86_BUILTIN_MASKLOADPD,
28432 IX86_BUILTIN_MASKLOADPS,
28433 IX86_BUILTIN_MASKSTOREPD,
28434 IX86_BUILTIN_MASKSTOREPS,
28435 IX86_BUILTIN_MASKLOADPD256,
28436 IX86_BUILTIN_MASKLOADPS256,
28437 IX86_BUILTIN_MASKSTOREPD256,
28438 IX86_BUILTIN_MASKSTOREPS256,
28439 IX86_BUILTIN_MOVSHDUP256,
28440 IX86_BUILTIN_MOVSLDUP256,
28441 IX86_BUILTIN_MOVDDUP256,
28443 IX86_BUILTIN_SQRTPD256,
28444 IX86_BUILTIN_SQRTPS256,
28445 IX86_BUILTIN_SQRTPS_NR256,
28446 IX86_BUILTIN_RSQRTPS256,
28447 IX86_BUILTIN_RSQRTPS_NR256,
28449 IX86_BUILTIN_RCPPS256,
28451 IX86_BUILTIN_ROUNDPD256,
28452 IX86_BUILTIN_ROUNDPS256,
28454 IX86_BUILTIN_FLOORPD256,
28455 IX86_BUILTIN_CEILPD256,
28456 IX86_BUILTIN_TRUNCPD256,
28457 IX86_BUILTIN_RINTPD256,
28458 IX86_BUILTIN_ROUNDPD_AZ256,
28460 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
28461 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
28462 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
28464 IX86_BUILTIN_FLOORPS256,
28465 IX86_BUILTIN_CEILPS256,
28466 IX86_BUILTIN_TRUNCPS256,
28467 IX86_BUILTIN_RINTPS256,
28468 IX86_BUILTIN_ROUNDPS_AZ256,
28470 IX86_BUILTIN_FLOORPS_SFIX256,
28471 IX86_BUILTIN_CEILPS_SFIX256,
28472 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
28474 IX86_BUILTIN_UNPCKHPD256,
28475 IX86_BUILTIN_UNPCKLPD256,
28476 IX86_BUILTIN_UNPCKHPS256,
28477 IX86_BUILTIN_UNPCKLPS256,
28479 IX86_BUILTIN_SI256_SI,
28480 IX86_BUILTIN_PS256_PS,
28481 IX86_BUILTIN_PD256_PD,
28482 IX86_BUILTIN_SI_SI256,
28483 IX86_BUILTIN_PS_PS256,
28484 IX86_BUILTIN_PD_PD256,
28486 IX86_BUILTIN_VTESTZPD,
28487 IX86_BUILTIN_VTESTCPD,
28488 IX86_BUILTIN_VTESTNZCPD,
28489 IX86_BUILTIN_VTESTZPS,
28490 IX86_BUILTIN_VTESTCPS,
28491 IX86_BUILTIN_VTESTNZCPS,
28492 IX86_BUILTIN_VTESTZPD256,
28493 IX86_BUILTIN_VTESTCPD256,
28494 IX86_BUILTIN_VTESTNZCPD256,
28495 IX86_BUILTIN_VTESTZPS256,
28496 IX86_BUILTIN_VTESTCPS256,
28497 IX86_BUILTIN_VTESTNZCPS256,
28498 IX86_BUILTIN_PTESTZ256,
28499 IX86_BUILTIN_PTESTC256,
28500 IX86_BUILTIN_PTESTNZC256,
28502 IX86_BUILTIN_MOVMSKPD256,
28503 IX86_BUILTIN_MOVMSKPS256,
28505 /* AVX2 */
28506 IX86_BUILTIN_MPSADBW256,
28507 IX86_BUILTIN_PABSB256,
28508 IX86_BUILTIN_PABSW256,
28509 IX86_BUILTIN_PABSD256,
28510 IX86_BUILTIN_PACKSSDW256,
28511 IX86_BUILTIN_PACKSSWB256,
28512 IX86_BUILTIN_PACKUSDW256,
28513 IX86_BUILTIN_PACKUSWB256,
28514 IX86_BUILTIN_PADDB256,
28515 IX86_BUILTIN_PADDW256,
28516 IX86_BUILTIN_PADDD256,
28517 IX86_BUILTIN_PADDQ256,
28518 IX86_BUILTIN_PADDSB256,
28519 IX86_BUILTIN_PADDSW256,
28520 IX86_BUILTIN_PADDUSB256,
28521 IX86_BUILTIN_PADDUSW256,
28522 IX86_BUILTIN_PALIGNR256,
28523 IX86_BUILTIN_AND256I,
28524 IX86_BUILTIN_ANDNOT256I,
28525 IX86_BUILTIN_PAVGB256,
28526 IX86_BUILTIN_PAVGW256,
28527 IX86_BUILTIN_PBLENDVB256,
28528 IX86_BUILTIN_PBLENDVW256,
28529 IX86_BUILTIN_PCMPEQB256,
28530 IX86_BUILTIN_PCMPEQW256,
28531 IX86_BUILTIN_PCMPEQD256,
28532 IX86_BUILTIN_PCMPEQQ256,
28533 IX86_BUILTIN_PCMPGTB256,
28534 IX86_BUILTIN_PCMPGTW256,
28535 IX86_BUILTIN_PCMPGTD256,
28536 IX86_BUILTIN_PCMPGTQ256,
28537 IX86_BUILTIN_PHADDW256,
28538 IX86_BUILTIN_PHADDD256,
28539 IX86_BUILTIN_PHADDSW256,
28540 IX86_BUILTIN_PHSUBW256,
28541 IX86_BUILTIN_PHSUBD256,
28542 IX86_BUILTIN_PHSUBSW256,
28543 IX86_BUILTIN_PMADDUBSW256,
28544 IX86_BUILTIN_PMADDWD256,
28545 IX86_BUILTIN_PMAXSB256,
28546 IX86_BUILTIN_PMAXSW256,
28547 IX86_BUILTIN_PMAXSD256,
28548 IX86_BUILTIN_PMAXUB256,
28549 IX86_BUILTIN_PMAXUW256,
28550 IX86_BUILTIN_PMAXUD256,
28551 IX86_BUILTIN_PMINSB256,
28552 IX86_BUILTIN_PMINSW256,
28553 IX86_BUILTIN_PMINSD256,
28554 IX86_BUILTIN_PMINUB256,
28555 IX86_BUILTIN_PMINUW256,
28556 IX86_BUILTIN_PMINUD256,
28557 IX86_BUILTIN_PMOVMSKB256,
28558 IX86_BUILTIN_PMOVSXBW256,
28559 IX86_BUILTIN_PMOVSXBD256,
28560 IX86_BUILTIN_PMOVSXBQ256,
28561 IX86_BUILTIN_PMOVSXWD256,
28562 IX86_BUILTIN_PMOVSXWQ256,
28563 IX86_BUILTIN_PMOVSXDQ256,
28564 IX86_BUILTIN_PMOVZXBW256,
28565 IX86_BUILTIN_PMOVZXBD256,
28566 IX86_BUILTIN_PMOVZXBQ256,
28567 IX86_BUILTIN_PMOVZXWD256,
28568 IX86_BUILTIN_PMOVZXWQ256,
28569 IX86_BUILTIN_PMOVZXDQ256,
28570 IX86_BUILTIN_PMULDQ256,
28571 IX86_BUILTIN_PMULHRSW256,
28572 IX86_BUILTIN_PMULHUW256,
28573 IX86_BUILTIN_PMULHW256,
28574 IX86_BUILTIN_PMULLW256,
28575 IX86_BUILTIN_PMULLD256,
28576 IX86_BUILTIN_PMULUDQ256,
28577 IX86_BUILTIN_POR256,
28578 IX86_BUILTIN_PSADBW256,
28579 IX86_BUILTIN_PSHUFB256,
28580 IX86_BUILTIN_PSHUFD256,
28581 IX86_BUILTIN_PSHUFHW256,
28582 IX86_BUILTIN_PSHUFLW256,
28583 IX86_BUILTIN_PSIGNB256,
28584 IX86_BUILTIN_PSIGNW256,
28585 IX86_BUILTIN_PSIGND256,
28586 IX86_BUILTIN_PSLLDQI256,
28587 IX86_BUILTIN_PSLLWI256,
28588 IX86_BUILTIN_PSLLW256,
28589 IX86_BUILTIN_PSLLDI256,
28590 IX86_BUILTIN_PSLLD256,
28591 IX86_BUILTIN_PSLLQI256,
28592 IX86_BUILTIN_PSLLQ256,
28593 IX86_BUILTIN_PSRAWI256,
28594 IX86_BUILTIN_PSRAW256,
28595 IX86_BUILTIN_PSRADI256,
28596 IX86_BUILTIN_PSRAD256,
28597 IX86_BUILTIN_PSRLDQI256,
28598 IX86_BUILTIN_PSRLWI256,
28599 IX86_BUILTIN_PSRLW256,
28600 IX86_BUILTIN_PSRLDI256,
28601 IX86_BUILTIN_PSRLD256,
28602 IX86_BUILTIN_PSRLQI256,
28603 IX86_BUILTIN_PSRLQ256,
28604 IX86_BUILTIN_PSUBB256,
28605 IX86_BUILTIN_PSUBW256,
28606 IX86_BUILTIN_PSUBD256,
28607 IX86_BUILTIN_PSUBQ256,
28608 IX86_BUILTIN_PSUBSB256,
28609 IX86_BUILTIN_PSUBSW256,
28610 IX86_BUILTIN_PSUBUSB256,
28611 IX86_BUILTIN_PSUBUSW256,
28612 IX86_BUILTIN_PUNPCKHBW256,
28613 IX86_BUILTIN_PUNPCKHWD256,
28614 IX86_BUILTIN_PUNPCKHDQ256,
28615 IX86_BUILTIN_PUNPCKHQDQ256,
28616 IX86_BUILTIN_PUNPCKLBW256,
28617 IX86_BUILTIN_PUNPCKLWD256,
28618 IX86_BUILTIN_PUNPCKLDQ256,
28619 IX86_BUILTIN_PUNPCKLQDQ256,
28620 IX86_BUILTIN_PXOR256,
28621 IX86_BUILTIN_MOVNTDQA256,
28622 IX86_BUILTIN_VBROADCASTSS_PS,
28623 IX86_BUILTIN_VBROADCASTSS_PS256,
28624 IX86_BUILTIN_VBROADCASTSD_PD256,
28625 IX86_BUILTIN_VBROADCASTSI256,
28626 IX86_BUILTIN_PBLENDD256,
28627 IX86_BUILTIN_PBLENDD128,
28628 IX86_BUILTIN_PBROADCASTB256,
28629 IX86_BUILTIN_PBROADCASTW256,
28630 IX86_BUILTIN_PBROADCASTD256,
28631 IX86_BUILTIN_PBROADCASTQ256,
28632 IX86_BUILTIN_PBROADCASTB128,
28633 IX86_BUILTIN_PBROADCASTW128,
28634 IX86_BUILTIN_PBROADCASTD128,
28635 IX86_BUILTIN_PBROADCASTQ128,
28636 IX86_BUILTIN_VPERMVARSI256,
28637 IX86_BUILTIN_VPERMDF256,
28638 IX86_BUILTIN_VPERMVARSF256,
28639 IX86_BUILTIN_VPERMDI256,
28640 IX86_BUILTIN_VPERMTI256,
28641 IX86_BUILTIN_VEXTRACT128I256,
28642 IX86_BUILTIN_VINSERT128I256,
28643 IX86_BUILTIN_MASKLOADD,
28644 IX86_BUILTIN_MASKLOADQ,
28645 IX86_BUILTIN_MASKLOADD256,
28646 IX86_BUILTIN_MASKLOADQ256,
28647 IX86_BUILTIN_MASKSTORED,
28648 IX86_BUILTIN_MASKSTOREQ,
28649 IX86_BUILTIN_MASKSTORED256,
28650 IX86_BUILTIN_MASKSTOREQ256,
28651 IX86_BUILTIN_PSLLVV4DI,
28652 IX86_BUILTIN_PSLLVV2DI,
28653 IX86_BUILTIN_PSLLVV8SI,
28654 IX86_BUILTIN_PSLLVV4SI,
28655 IX86_BUILTIN_PSRAVV8SI,
28656 IX86_BUILTIN_PSRAVV4SI,
28657 IX86_BUILTIN_PSRLVV4DI,
28658 IX86_BUILTIN_PSRLVV2DI,
28659 IX86_BUILTIN_PSRLVV8SI,
28660 IX86_BUILTIN_PSRLVV4SI,
28662 IX86_BUILTIN_GATHERSIV2DF,
28663 IX86_BUILTIN_GATHERSIV4DF,
28664 IX86_BUILTIN_GATHERDIV2DF,
28665 IX86_BUILTIN_GATHERDIV4DF,
28666 IX86_BUILTIN_GATHERSIV4SF,
28667 IX86_BUILTIN_GATHERSIV8SF,
28668 IX86_BUILTIN_GATHERDIV4SF,
28669 IX86_BUILTIN_GATHERDIV8SF,
28670 IX86_BUILTIN_GATHERSIV2DI,
28671 IX86_BUILTIN_GATHERSIV4DI,
28672 IX86_BUILTIN_GATHERDIV2DI,
28673 IX86_BUILTIN_GATHERDIV4DI,
28674 IX86_BUILTIN_GATHERSIV4SI,
28675 IX86_BUILTIN_GATHERSIV8SI,
28676 IX86_BUILTIN_GATHERDIV4SI,
28677 IX86_BUILTIN_GATHERDIV8SI,
28679 /* AVX512F */
28680 IX86_BUILTIN_SI512_SI256,
28681 IX86_BUILTIN_PD512_PD256,
28682 IX86_BUILTIN_PS512_PS256,
28683 IX86_BUILTIN_SI512_SI,
28684 IX86_BUILTIN_PD512_PD,
28685 IX86_BUILTIN_PS512_PS,
28686 IX86_BUILTIN_ADDPD512,
28687 IX86_BUILTIN_ADDPS512,
28688 IX86_BUILTIN_ADDSD_ROUND,
28689 IX86_BUILTIN_ADDSS_ROUND,
28690 IX86_BUILTIN_ALIGND512,
28691 IX86_BUILTIN_ALIGNQ512,
28692 IX86_BUILTIN_BLENDMD512,
28693 IX86_BUILTIN_BLENDMPD512,
28694 IX86_BUILTIN_BLENDMPS512,
28695 IX86_BUILTIN_BLENDMQ512,
28696 IX86_BUILTIN_BROADCASTF32X4_512,
28697 IX86_BUILTIN_BROADCASTF64X4_512,
28698 IX86_BUILTIN_BROADCASTI32X4_512,
28699 IX86_BUILTIN_BROADCASTI64X4_512,
28700 IX86_BUILTIN_BROADCASTSD512,
28701 IX86_BUILTIN_BROADCASTSS512,
28702 IX86_BUILTIN_CMPD512,
28703 IX86_BUILTIN_CMPPD512,
28704 IX86_BUILTIN_CMPPS512,
28705 IX86_BUILTIN_CMPQ512,
28706 IX86_BUILTIN_CMPSD_MASK,
28707 IX86_BUILTIN_CMPSS_MASK,
28708 IX86_BUILTIN_COMIDF,
28709 IX86_BUILTIN_COMISF,
28710 IX86_BUILTIN_COMPRESSPD512,
28711 IX86_BUILTIN_COMPRESSPDSTORE512,
28712 IX86_BUILTIN_COMPRESSPS512,
28713 IX86_BUILTIN_COMPRESSPSSTORE512,
28714 IX86_BUILTIN_CVTDQ2PD512,
28715 IX86_BUILTIN_CVTDQ2PS512,
28716 IX86_BUILTIN_CVTPD2DQ512,
28717 IX86_BUILTIN_CVTPD2PS512,
28718 IX86_BUILTIN_CVTPD2UDQ512,
28719 IX86_BUILTIN_CVTPH2PS512,
28720 IX86_BUILTIN_CVTPS2DQ512,
28721 IX86_BUILTIN_CVTPS2PD512,
28722 IX86_BUILTIN_CVTPS2PH512,
28723 IX86_BUILTIN_CVTPS2UDQ512,
28724 IX86_BUILTIN_CVTSD2SS_ROUND,
28725 IX86_BUILTIN_CVTSI2SD64,
28726 IX86_BUILTIN_CVTSI2SS32,
28727 IX86_BUILTIN_CVTSI2SS64,
28728 IX86_BUILTIN_CVTSS2SD_ROUND,
28729 IX86_BUILTIN_CVTTPD2DQ512,
28730 IX86_BUILTIN_CVTTPD2UDQ512,
28731 IX86_BUILTIN_CVTTPS2DQ512,
28732 IX86_BUILTIN_CVTTPS2UDQ512,
28733 IX86_BUILTIN_CVTUDQ2PD512,
28734 IX86_BUILTIN_CVTUDQ2PS512,
28735 IX86_BUILTIN_CVTUSI2SD32,
28736 IX86_BUILTIN_CVTUSI2SD64,
28737 IX86_BUILTIN_CVTUSI2SS32,
28738 IX86_BUILTIN_CVTUSI2SS64,
28739 IX86_BUILTIN_DIVPD512,
28740 IX86_BUILTIN_DIVPS512,
28741 IX86_BUILTIN_DIVSD_ROUND,
28742 IX86_BUILTIN_DIVSS_ROUND,
28743 IX86_BUILTIN_EXPANDPD512,
28744 IX86_BUILTIN_EXPANDPD512Z,
28745 IX86_BUILTIN_EXPANDPDLOAD512,
28746 IX86_BUILTIN_EXPANDPDLOAD512Z,
28747 IX86_BUILTIN_EXPANDPS512,
28748 IX86_BUILTIN_EXPANDPS512Z,
28749 IX86_BUILTIN_EXPANDPSLOAD512,
28750 IX86_BUILTIN_EXPANDPSLOAD512Z,
28751 IX86_BUILTIN_EXTRACTF32X4,
28752 IX86_BUILTIN_EXTRACTF64X4,
28753 IX86_BUILTIN_EXTRACTI32X4,
28754 IX86_BUILTIN_EXTRACTI64X4,
28755 IX86_BUILTIN_FIXUPIMMPD512_MASK,
28756 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
28757 IX86_BUILTIN_FIXUPIMMPS512_MASK,
28758 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
28759 IX86_BUILTIN_FIXUPIMMSD128_MASK,
28760 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
28761 IX86_BUILTIN_FIXUPIMMSS128_MASK,
28762 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
28763 IX86_BUILTIN_GETEXPPD512,
28764 IX86_BUILTIN_GETEXPPS512,
28765 IX86_BUILTIN_GETEXPSD128,
28766 IX86_BUILTIN_GETEXPSS128,
28767 IX86_BUILTIN_GETMANTPD512,
28768 IX86_BUILTIN_GETMANTPS512,
28769 IX86_BUILTIN_GETMANTSD128,
28770 IX86_BUILTIN_GETMANTSS128,
28771 IX86_BUILTIN_INSERTF32X4,
28772 IX86_BUILTIN_INSERTF64X4,
28773 IX86_BUILTIN_INSERTI32X4,
28774 IX86_BUILTIN_INSERTI64X4,
28775 IX86_BUILTIN_LOADAPD512,
28776 IX86_BUILTIN_LOADAPS512,
28777 IX86_BUILTIN_LOADDQUDI512,
28778 IX86_BUILTIN_LOADDQUSI512,
28779 IX86_BUILTIN_LOADUPD512,
28780 IX86_BUILTIN_LOADUPS512,
28781 IX86_BUILTIN_MAXPD512,
28782 IX86_BUILTIN_MAXPS512,
28783 IX86_BUILTIN_MAXSD_ROUND,
28784 IX86_BUILTIN_MAXSS_ROUND,
28785 IX86_BUILTIN_MINPD512,
28786 IX86_BUILTIN_MINPS512,
28787 IX86_BUILTIN_MINSD_ROUND,
28788 IX86_BUILTIN_MINSS_ROUND,
28789 IX86_BUILTIN_MOVAPD512,
28790 IX86_BUILTIN_MOVAPS512,
28791 IX86_BUILTIN_MOVDDUP512,
28792 IX86_BUILTIN_MOVDQA32LOAD512,
28793 IX86_BUILTIN_MOVDQA32STORE512,
28794 IX86_BUILTIN_MOVDQA32_512,
28795 IX86_BUILTIN_MOVDQA64LOAD512,
28796 IX86_BUILTIN_MOVDQA64STORE512,
28797 IX86_BUILTIN_MOVDQA64_512,
28798 IX86_BUILTIN_MOVNTDQ512,
28799 IX86_BUILTIN_MOVNTDQA512,
28800 IX86_BUILTIN_MOVNTPD512,
28801 IX86_BUILTIN_MOVNTPS512,
28802 IX86_BUILTIN_MOVSHDUP512,
28803 IX86_BUILTIN_MOVSLDUP512,
28804 IX86_BUILTIN_MULPD512,
28805 IX86_BUILTIN_MULPS512,
28806 IX86_BUILTIN_MULSD_ROUND,
28807 IX86_BUILTIN_MULSS_ROUND,
28808 IX86_BUILTIN_PABSD512,
28809 IX86_BUILTIN_PABSQ512,
28810 IX86_BUILTIN_PADDD512,
28811 IX86_BUILTIN_PADDQ512,
28812 IX86_BUILTIN_PANDD512,
28813 IX86_BUILTIN_PANDND512,
28814 IX86_BUILTIN_PANDNQ512,
28815 IX86_BUILTIN_PANDQ512,
28816 IX86_BUILTIN_PBROADCASTD512,
28817 IX86_BUILTIN_PBROADCASTD512_GPR,
28818 IX86_BUILTIN_PBROADCASTMB512,
28819 IX86_BUILTIN_PBROADCASTMW512,
28820 IX86_BUILTIN_PBROADCASTQ512,
28821 IX86_BUILTIN_PBROADCASTQ512_GPR,
28822 IX86_BUILTIN_PBROADCASTQ512_MEM,
28823 IX86_BUILTIN_PCMPEQD512_MASK,
28824 IX86_BUILTIN_PCMPEQQ512_MASK,
28825 IX86_BUILTIN_PCMPGTD512_MASK,
28826 IX86_BUILTIN_PCMPGTQ512_MASK,
28827 IX86_BUILTIN_PCOMPRESSD512,
28828 IX86_BUILTIN_PCOMPRESSDSTORE512,
28829 IX86_BUILTIN_PCOMPRESSQ512,
28830 IX86_BUILTIN_PCOMPRESSQSTORE512,
28831 IX86_BUILTIN_PEXPANDD512,
28832 IX86_BUILTIN_PEXPANDD512Z,
28833 IX86_BUILTIN_PEXPANDDLOAD512,
28834 IX86_BUILTIN_PEXPANDDLOAD512Z,
28835 IX86_BUILTIN_PEXPANDQ512,
28836 IX86_BUILTIN_PEXPANDQ512Z,
28837 IX86_BUILTIN_PEXPANDQLOAD512,
28838 IX86_BUILTIN_PEXPANDQLOAD512Z,
28839 IX86_BUILTIN_PMAXSD512,
28840 IX86_BUILTIN_PMAXSQ512,
28841 IX86_BUILTIN_PMAXUD512,
28842 IX86_BUILTIN_PMAXUQ512,
28843 IX86_BUILTIN_PMINSD512,
28844 IX86_BUILTIN_PMINSQ512,
28845 IX86_BUILTIN_PMINUD512,
28846 IX86_BUILTIN_PMINUQ512,
28847 IX86_BUILTIN_PMOVDB512,
28848 IX86_BUILTIN_PMOVDB512_MEM,
28849 IX86_BUILTIN_PMOVDW512,
28850 IX86_BUILTIN_PMOVDW512_MEM,
28851 IX86_BUILTIN_PMOVQB512,
28852 IX86_BUILTIN_PMOVQB512_MEM,
28853 IX86_BUILTIN_PMOVQD512,
28854 IX86_BUILTIN_PMOVQD512_MEM,
28855 IX86_BUILTIN_PMOVQW512,
28856 IX86_BUILTIN_PMOVQW512_MEM,
28857 IX86_BUILTIN_PMOVSDB512,
28858 IX86_BUILTIN_PMOVSDB512_MEM,
28859 IX86_BUILTIN_PMOVSDW512,
28860 IX86_BUILTIN_PMOVSDW512_MEM,
28861 IX86_BUILTIN_PMOVSQB512,
28862 IX86_BUILTIN_PMOVSQB512_MEM,
28863 IX86_BUILTIN_PMOVSQD512,
28864 IX86_BUILTIN_PMOVSQD512_MEM,
28865 IX86_BUILTIN_PMOVSQW512,
28866 IX86_BUILTIN_PMOVSQW512_MEM,
28867 IX86_BUILTIN_PMOVSXBD512,
28868 IX86_BUILTIN_PMOVSXBQ512,
28869 IX86_BUILTIN_PMOVSXDQ512,
28870 IX86_BUILTIN_PMOVSXWD512,
28871 IX86_BUILTIN_PMOVSXWQ512,
28872 IX86_BUILTIN_PMOVUSDB512,
28873 IX86_BUILTIN_PMOVUSDB512_MEM,
28874 IX86_BUILTIN_PMOVUSDW512,
28875 IX86_BUILTIN_PMOVUSDW512_MEM,
28876 IX86_BUILTIN_PMOVUSQB512,
28877 IX86_BUILTIN_PMOVUSQB512_MEM,
28878 IX86_BUILTIN_PMOVUSQD512,
28879 IX86_BUILTIN_PMOVUSQD512_MEM,
28880 IX86_BUILTIN_PMOVUSQW512,
28881 IX86_BUILTIN_PMOVUSQW512_MEM,
28882 IX86_BUILTIN_PMOVZXBD512,
28883 IX86_BUILTIN_PMOVZXBQ512,
28884 IX86_BUILTIN_PMOVZXDQ512,
28885 IX86_BUILTIN_PMOVZXWD512,
28886 IX86_BUILTIN_PMOVZXWQ512,
28887 IX86_BUILTIN_PMULDQ512,
28888 IX86_BUILTIN_PMULLD512,
28889 IX86_BUILTIN_PMULUDQ512,
28890 IX86_BUILTIN_PORD512,
28891 IX86_BUILTIN_PORQ512,
28892 IX86_BUILTIN_PROLD512,
28893 IX86_BUILTIN_PROLQ512,
28894 IX86_BUILTIN_PROLVD512,
28895 IX86_BUILTIN_PROLVQ512,
28896 IX86_BUILTIN_PRORD512,
28897 IX86_BUILTIN_PRORQ512,
28898 IX86_BUILTIN_PRORVD512,
28899 IX86_BUILTIN_PRORVQ512,
28900 IX86_BUILTIN_PSHUFD512,
28901 IX86_BUILTIN_PSLLD512,
28902 IX86_BUILTIN_PSLLDI512,
28903 IX86_BUILTIN_PSLLQ512,
28904 IX86_BUILTIN_PSLLQI512,
28905 IX86_BUILTIN_PSLLVV16SI,
28906 IX86_BUILTIN_PSLLVV8DI,
28907 IX86_BUILTIN_PSRAD512,
28908 IX86_BUILTIN_PSRADI512,
28909 IX86_BUILTIN_PSRAQ512,
28910 IX86_BUILTIN_PSRAQI512,
28911 IX86_BUILTIN_PSRAVV16SI,
28912 IX86_BUILTIN_PSRAVV8DI,
28913 IX86_BUILTIN_PSRLD512,
28914 IX86_BUILTIN_PSRLDI512,
28915 IX86_BUILTIN_PSRLQ512,
28916 IX86_BUILTIN_PSRLQI512,
28917 IX86_BUILTIN_PSRLVV16SI,
28918 IX86_BUILTIN_PSRLVV8DI,
28919 IX86_BUILTIN_PSUBD512,
28920 IX86_BUILTIN_PSUBQ512,
28921 IX86_BUILTIN_PTESTMD512,
28922 IX86_BUILTIN_PTESTMQ512,
28923 IX86_BUILTIN_PTESTNMD512,
28924 IX86_BUILTIN_PTESTNMQ512,
28925 IX86_BUILTIN_PUNPCKHDQ512,
28926 IX86_BUILTIN_PUNPCKHQDQ512,
28927 IX86_BUILTIN_PUNPCKLDQ512,
28928 IX86_BUILTIN_PUNPCKLQDQ512,
28929 IX86_BUILTIN_PXORD512,
28930 IX86_BUILTIN_PXORQ512,
28931 IX86_BUILTIN_RCP14PD512,
28932 IX86_BUILTIN_RCP14PS512,
28933 IX86_BUILTIN_RCP14SD,
28934 IX86_BUILTIN_RCP14SS,
28935 IX86_BUILTIN_RNDSCALEPD,
28936 IX86_BUILTIN_RNDSCALEPS,
28937 IX86_BUILTIN_RNDSCALESD,
28938 IX86_BUILTIN_RNDSCALESS,
28939 IX86_BUILTIN_RSQRT14PD512,
28940 IX86_BUILTIN_RSQRT14PS512,
28941 IX86_BUILTIN_RSQRT14SD,
28942 IX86_BUILTIN_RSQRT14SS,
28943 IX86_BUILTIN_SCALEFPD512,
28944 IX86_BUILTIN_SCALEFPS512,
28945 IX86_BUILTIN_SCALEFSD,
28946 IX86_BUILTIN_SCALEFSS,
28947 IX86_BUILTIN_SHUFPD512,
28948 IX86_BUILTIN_SHUFPS512,
28949 IX86_BUILTIN_SHUF_F32x4,
28950 IX86_BUILTIN_SHUF_F64x2,
28951 IX86_BUILTIN_SHUF_I32x4,
28952 IX86_BUILTIN_SHUF_I64x2,
28953 IX86_BUILTIN_SQRTPD512,
28954 IX86_BUILTIN_SQRTPD512_MASK,
28955 IX86_BUILTIN_SQRTPS512_MASK,
28956 IX86_BUILTIN_SQRTPS_NR512,
28957 IX86_BUILTIN_SQRTSD_ROUND,
28958 IX86_BUILTIN_SQRTSS_ROUND,
28959 IX86_BUILTIN_STOREAPD512,
28960 IX86_BUILTIN_STOREAPS512,
28961 IX86_BUILTIN_STOREDQUDI512,
28962 IX86_BUILTIN_STOREDQUSI512,
28963 IX86_BUILTIN_STOREUPD512,
28964 IX86_BUILTIN_STOREUPS512,
28965 IX86_BUILTIN_SUBPD512,
28966 IX86_BUILTIN_SUBPS512,
28967 IX86_BUILTIN_SUBSD_ROUND,
28968 IX86_BUILTIN_SUBSS_ROUND,
28969 IX86_BUILTIN_UCMPD512,
28970 IX86_BUILTIN_UCMPQ512,
28971 IX86_BUILTIN_UNPCKHPD512,
28972 IX86_BUILTIN_UNPCKHPS512,
28973 IX86_BUILTIN_UNPCKLPD512,
28974 IX86_BUILTIN_UNPCKLPS512,
28975 IX86_BUILTIN_VCVTSD2SI32,
28976 IX86_BUILTIN_VCVTSD2SI64,
28977 IX86_BUILTIN_VCVTSD2USI32,
28978 IX86_BUILTIN_VCVTSD2USI64,
28979 IX86_BUILTIN_VCVTSS2SI32,
28980 IX86_BUILTIN_VCVTSS2SI64,
28981 IX86_BUILTIN_VCVTSS2USI32,
28982 IX86_BUILTIN_VCVTSS2USI64,
28983 IX86_BUILTIN_VCVTTSD2SI32,
28984 IX86_BUILTIN_VCVTTSD2SI64,
28985 IX86_BUILTIN_VCVTTSD2USI32,
28986 IX86_BUILTIN_VCVTTSD2USI64,
28987 IX86_BUILTIN_VCVTTSS2SI32,
28988 IX86_BUILTIN_VCVTTSS2SI64,
28989 IX86_BUILTIN_VCVTTSS2USI32,
28990 IX86_BUILTIN_VCVTTSS2USI64,
28991 IX86_BUILTIN_VFMADDPD512_MASK,
28992 IX86_BUILTIN_VFMADDPD512_MASK3,
28993 IX86_BUILTIN_VFMADDPD512_MASKZ,
28994 IX86_BUILTIN_VFMADDPS512_MASK,
28995 IX86_BUILTIN_VFMADDPS512_MASK3,
28996 IX86_BUILTIN_VFMADDPS512_MASKZ,
28997 IX86_BUILTIN_VFMADDSD3_ROUND,
28998 IX86_BUILTIN_VFMADDSS3_ROUND,
28999 IX86_BUILTIN_VFMADDSUBPD512_MASK,
29000 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
29001 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
29002 IX86_BUILTIN_VFMADDSUBPS512_MASK,
29003 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
29004 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
29005 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
29006 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
29007 IX86_BUILTIN_VFMSUBPD512_MASK3,
29008 IX86_BUILTIN_VFMSUBPS512_MASK3,
29009 IX86_BUILTIN_VFMSUBSD3_MASK3,
29010 IX86_BUILTIN_VFMSUBSS3_MASK3,
29011 IX86_BUILTIN_VFNMADDPD512_MASK,
29012 IX86_BUILTIN_VFNMADDPS512_MASK,
29013 IX86_BUILTIN_VFNMSUBPD512_MASK,
29014 IX86_BUILTIN_VFNMSUBPD512_MASK3,
29015 IX86_BUILTIN_VFNMSUBPS512_MASK,
29016 IX86_BUILTIN_VFNMSUBPS512_MASK3,
29017 IX86_BUILTIN_VPCLZCNTD512,
29018 IX86_BUILTIN_VPCLZCNTQ512,
29019 IX86_BUILTIN_VPCONFLICTD512,
29020 IX86_BUILTIN_VPCONFLICTQ512,
29021 IX86_BUILTIN_VPERMDF512,
29022 IX86_BUILTIN_VPERMDI512,
29023 IX86_BUILTIN_VPERMI2VARD512,
29024 IX86_BUILTIN_VPERMI2VARPD512,
29025 IX86_BUILTIN_VPERMI2VARPS512,
29026 IX86_BUILTIN_VPERMI2VARQ512,
29027 IX86_BUILTIN_VPERMILPD512,
29028 IX86_BUILTIN_VPERMILPS512,
29029 IX86_BUILTIN_VPERMILVARPD512,
29030 IX86_BUILTIN_VPERMILVARPS512,
29031 IX86_BUILTIN_VPERMT2VARD512,
29032 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
29033 IX86_BUILTIN_VPERMT2VARPD512,
29034 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
29035 IX86_BUILTIN_VPERMT2VARPS512,
29036 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
29037 IX86_BUILTIN_VPERMT2VARQ512,
29038 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
29039 IX86_BUILTIN_VPERMVARDF512,
29040 IX86_BUILTIN_VPERMVARDI512,
29041 IX86_BUILTIN_VPERMVARSF512,
29042 IX86_BUILTIN_VPERMVARSI512,
29043 IX86_BUILTIN_VTERNLOGD512_MASK,
29044 IX86_BUILTIN_VTERNLOGD512_MASKZ,
29045 IX86_BUILTIN_VTERNLOGQ512_MASK,
29046 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
29048 /* Mask arithmetic operations */
29049 IX86_BUILTIN_KAND16,
29050 IX86_BUILTIN_KANDN16,
29051 IX86_BUILTIN_KNOT16,
29052 IX86_BUILTIN_KOR16,
29053 IX86_BUILTIN_KORTESTC16,
29054 IX86_BUILTIN_KORTESTZ16,
29055 IX86_BUILTIN_KUNPCKBW,
29056 IX86_BUILTIN_KXNOR16,
29057 IX86_BUILTIN_KXOR16,
29058 IX86_BUILTIN_KMOV16,
29060 /* AVX512VL. */
29061 IX86_BUILTIN_PMOVUSQD256_MEM,
29062 IX86_BUILTIN_PMOVUSQD128_MEM,
29063 IX86_BUILTIN_PMOVSQD256_MEM,
29064 IX86_BUILTIN_PMOVSQD128_MEM,
29065 IX86_BUILTIN_PMOVQD256_MEM,
29066 IX86_BUILTIN_PMOVQD128_MEM,
29067 IX86_BUILTIN_PMOVUSQW256_MEM,
29068 IX86_BUILTIN_PMOVUSQW128_MEM,
29069 IX86_BUILTIN_PMOVSQW256_MEM,
29070 IX86_BUILTIN_PMOVSQW128_MEM,
29071 IX86_BUILTIN_PMOVQW256_MEM,
29072 IX86_BUILTIN_PMOVQW128_MEM,
29073 IX86_BUILTIN_PMOVUSQB256_MEM,
29074 IX86_BUILTIN_PMOVUSQB128_MEM,
29075 IX86_BUILTIN_PMOVSQB256_MEM,
29076 IX86_BUILTIN_PMOVSQB128_MEM,
29077 IX86_BUILTIN_PMOVQB256_MEM,
29078 IX86_BUILTIN_PMOVQB128_MEM,
29079 IX86_BUILTIN_PMOVUSDW256_MEM,
29080 IX86_BUILTIN_PMOVUSDW128_MEM,
29081 IX86_BUILTIN_PMOVSDW256_MEM,
29082 IX86_BUILTIN_PMOVSDW128_MEM,
29083 IX86_BUILTIN_PMOVDW256_MEM,
29084 IX86_BUILTIN_PMOVDW128_MEM,
29085 IX86_BUILTIN_PMOVUSDB256_MEM,
29086 IX86_BUILTIN_PMOVUSDB128_MEM,
29087 IX86_BUILTIN_PMOVSDB256_MEM,
29088 IX86_BUILTIN_PMOVSDB128_MEM,
29089 IX86_BUILTIN_PMOVDB256_MEM,
29090 IX86_BUILTIN_PMOVDB128_MEM,
29091 IX86_BUILTIN_MOVDQA64LOAD256_MASK,
29092 IX86_BUILTIN_MOVDQA64LOAD128_MASK,
29093 IX86_BUILTIN_MOVDQA32LOAD256_MASK,
29094 IX86_BUILTIN_MOVDQA32LOAD128_MASK,
29095 IX86_BUILTIN_MOVDQA64STORE256_MASK,
29096 IX86_BUILTIN_MOVDQA64STORE128_MASK,
29097 IX86_BUILTIN_MOVDQA32STORE256_MASK,
29098 IX86_BUILTIN_MOVDQA32STORE128_MASK,
29099 IX86_BUILTIN_LOADAPD256_MASK,
29100 IX86_BUILTIN_LOADAPD128_MASK,
29101 IX86_BUILTIN_LOADAPS256_MASK,
29102 IX86_BUILTIN_LOADAPS128_MASK,
29103 IX86_BUILTIN_STOREAPD256_MASK,
29104 IX86_BUILTIN_STOREAPD128_MASK,
29105 IX86_BUILTIN_STOREAPS256_MASK,
29106 IX86_BUILTIN_STOREAPS128_MASK,
29107 IX86_BUILTIN_LOADUPD256_MASK,
29108 IX86_BUILTIN_LOADUPD128_MASK,
29109 IX86_BUILTIN_LOADUPS256_MASK,
29110 IX86_BUILTIN_LOADUPS128_MASK,
29111 IX86_BUILTIN_STOREUPD256_MASK,
29112 IX86_BUILTIN_STOREUPD128_MASK,
29113 IX86_BUILTIN_STOREUPS256_MASK,
29114 IX86_BUILTIN_STOREUPS128_MASK,
29115 IX86_BUILTIN_LOADDQUDI256_MASK,
29116 IX86_BUILTIN_LOADDQUDI128_MASK,
29117 IX86_BUILTIN_LOADDQUSI256_MASK,
29118 IX86_BUILTIN_LOADDQUSI128_MASK,
29119 IX86_BUILTIN_LOADDQUHI256_MASK,
29120 IX86_BUILTIN_LOADDQUHI128_MASK,
29121 IX86_BUILTIN_LOADDQUQI256_MASK,
29122 IX86_BUILTIN_LOADDQUQI128_MASK,
29123 IX86_BUILTIN_STOREDQUDI256_MASK,
29124 IX86_BUILTIN_STOREDQUDI128_MASK,
29125 IX86_BUILTIN_STOREDQUSI256_MASK,
29126 IX86_BUILTIN_STOREDQUSI128_MASK,
29127 IX86_BUILTIN_STOREDQUHI256_MASK,
29128 IX86_BUILTIN_STOREDQUHI128_MASK,
29129 IX86_BUILTIN_STOREDQUQI256_MASK,
29130 IX86_BUILTIN_STOREDQUQI128_MASK,
29131 IX86_BUILTIN_COMPRESSPDSTORE256,
29132 IX86_BUILTIN_COMPRESSPDSTORE128,
29133 IX86_BUILTIN_COMPRESSPSSTORE256,
29134 IX86_BUILTIN_COMPRESSPSSTORE128,
29135 IX86_BUILTIN_PCOMPRESSQSTORE256,
29136 IX86_BUILTIN_PCOMPRESSQSTORE128,
29137 IX86_BUILTIN_PCOMPRESSDSTORE256,
29138 IX86_BUILTIN_PCOMPRESSDSTORE128,
29139 IX86_BUILTIN_EXPANDPDLOAD256,
29140 IX86_BUILTIN_EXPANDPDLOAD128,
29141 IX86_BUILTIN_EXPANDPSLOAD256,
29142 IX86_BUILTIN_EXPANDPSLOAD128,
29143 IX86_BUILTIN_PEXPANDQLOAD256,
29144 IX86_BUILTIN_PEXPANDQLOAD128,
29145 IX86_BUILTIN_PEXPANDDLOAD256,
29146 IX86_BUILTIN_PEXPANDDLOAD128,
29147 IX86_BUILTIN_EXPANDPDLOAD256Z,
29148 IX86_BUILTIN_EXPANDPDLOAD128Z,
29149 IX86_BUILTIN_EXPANDPSLOAD256Z,
29150 IX86_BUILTIN_EXPANDPSLOAD128Z,
29151 IX86_BUILTIN_PEXPANDQLOAD256Z,
29152 IX86_BUILTIN_PEXPANDQLOAD128Z,
29153 IX86_BUILTIN_PEXPANDDLOAD256Z,
29154 IX86_BUILTIN_PEXPANDDLOAD128Z,
29155 IX86_BUILTIN_PALIGNR256_MASK,
29156 IX86_BUILTIN_PALIGNR128_MASK,
29157 IX86_BUILTIN_MOVDQA64_256_MASK,
29158 IX86_BUILTIN_MOVDQA64_128_MASK,
29159 IX86_BUILTIN_MOVDQA32_256_MASK,
29160 IX86_BUILTIN_MOVDQA32_128_MASK,
29161 IX86_BUILTIN_MOVAPD256_MASK,
29162 IX86_BUILTIN_MOVAPD128_MASK,
29163 IX86_BUILTIN_MOVAPS256_MASK,
29164 IX86_BUILTIN_MOVAPS128_MASK,
29165 IX86_BUILTIN_MOVDQUHI256_MASK,
29166 IX86_BUILTIN_MOVDQUHI128_MASK,
29167 IX86_BUILTIN_MOVDQUQI256_MASK,
29168 IX86_BUILTIN_MOVDQUQI128_MASK,
29169 IX86_BUILTIN_MINPS128_MASK,
29170 IX86_BUILTIN_MAXPS128_MASK,
29171 IX86_BUILTIN_MINPD128_MASK,
29172 IX86_BUILTIN_MAXPD128_MASK,
29173 IX86_BUILTIN_MAXPD256_MASK,
29174 IX86_BUILTIN_MAXPS256_MASK,
29175 IX86_BUILTIN_MINPD256_MASK,
29176 IX86_BUILTIN_MINPS256_MASK,
29177 IX86_BUILTIN_MULPS128_MASK,
29178 IX86_BUILTIN_DIVPS128_MASK,
29179 IX86_BUILTIN_MULPD128_MASK,
29180 IX86_BUILTIN_DIVPD128_MASK,
29181 IX86_BUILTIN_DIVPD256_MASK,
29182 IX86_BUILTIN_DIVPS256_MASK,
29183 IX86_BUILTIN_MULPD256_MASK,
29184 IX86_BUILTIN_MULPS256_MASK,
29185 IX86_BUILTIN_ADDPD128_MASK,
29186 IX86_BUILTIN_ADDPD256_MASK,
29187 IX86_BUILTIN_ADDPS128_MASK,
29188 IX86_BUILTIN_ADDPS256_MASK,
29189 IX86_BUILTIN_SUBPD128_MASK,
29190 IX86_BUILTIN_SUBPD256_MASK,
29191 IX86_BUILTIN_SUBPS128_MASK,
29192 IX86_BUILTIN_SUBPS256_MASK,
29193 IX86_BUILTIN_XORPD256_MASK,
29194 IX86_BUILTIN_XORPD128_MASK,
29195 IX86_BUILTIN_XORPS256_MASK,
29196 IX86_BUILTIN_XORPS128_MASK,
29197 IX86_BUILTIN_ORPD256_MASK,
29198 IX86_BUILTIN_ORPD128_MASK,
29199 IX86_BUILTIN_ORPS256_MASK,
29200 IX86_BUILTIN_ORPS128_MASK,
29201 IX86_BUILTIN_BROADCASTF32x2_256,
29202 IX86_BUILTIN_BROADCASTI32x2_256,
29203 IX86_BUILTIN_BROADCASTI32x2_128,
29204 IX86_BUILTIN_BROADCASTF64X2_256,
29205 IX86_BUILTIN_BROADCASTI64X2_256,
29206 IX86_BUILTIN_BROADCASTF32X4_256,
29207 IX86_BUILTIN_BROADCASTI32X4_256,
29208 IX86_BUILTIN_EXTRACTF32X4_256,
29209 IX86_BUILTIN_EXTRACTI32X4_256,
29210 IX86_BUILTIN_DBPSADBW256,
29211 IX86_BUILTIN_DBPSADBW128,
29212 IX86_BUILTIN_CVTTPD2QQ256,
29213 IX86_BUILTIN_CVTTPD2QQ128,
29214 IX86_BUILTIN_CVTTPD2UQQ256,
29215 IX86_BUILTIN_CVTTPD2UQQ128,
29216 IX86_BUILTIN_CVTPD2QQ256,
29217 IX86_BUILTIN_CVTPD2QQ128,
29218 IX86_BUILTIN_CVTPD2UQQ256,
29219 IX86_BUILTIN_CVTPD2UQQ128,
29220 IX86_BUILTIN_CVTPD2UDQ256_MASK,
29221 IX86_BUILTIN_CVTPD2UDQ128_MASK,
29222 IX86_BUILTIN_CVTTPS2QQ256,
29223 IX86_BUILTIN_CVTTPS2QQ128,
29224 IX86_BUILTIN_CVTTPS2UQQ256,
29225 IX86_BUILTIN_CVTTPS2UQQ128,
29226 IX86_BUILTIN_CVTTPS2DQ256_MASK,
29227 IX86_BUILTIN_CVTTPS2DQ128_MASK,
29228 IX86_BUILTIN_CVTTPS2UDQ256,
29229 IX86_BUILTIN_CVTTPS2UDQ128,
29230 IX86_BUILTIN_CVTTPD2DQ256_MASK,
29231 IX86_BUILTIN_CVTTPD2DQ128_MASK,
29232 IX86_BUILTIN_CVTTPD2UDQ256_MASK,
29233 IX86_BUILTIN_CVTTPD2UDQ128_MASK,
29234 IX86_BUILTIN_CVTPD2DQ256_MASK,
29235 IX86_BUILTIN_CVTPD2DQ128_MASK,
29236 IX86_BUILTIN_CVTDQ2PD256_MASK,
29237 IX86_BUILTIN_CVTDQ2PD128_MASK,
29238 IX86_BUILTIN_CVTUDQ2PD256_MASK,
29239 IX86_BUILTIN_CVTUDQ2PD128_MASK,
29240 IX86_BUILTIN_CVTDQ2PS256_MASK,
29241 IX86_BUILTIN_CVTDQ2PS128_MASK,
29242 IX86_BUILTIN_CVTUDQ2PS256_MASK,
29243 IX86_BUILTIN_CVTUDQ2PS128_MASK,
29244 IX86_BUILTIN_CVTPS2PD256_MASK,
29245 IX86_BUILTIN_CVTPS2PD128_MASK,
29246 IX86_BUILTIN_PBROADCASTB256_MASK,
29247 IX86_BUILTIN_PBROADCASTB256_GPR_MASK,
29248 IX86_BUILTIN_PBROADCASTB128_MASK,
29249 IX86_BUILTIN_PBROADCASTB128_GPR_MASK,
29250 IX86_BUILTIN_PBROADCASTW256_MASK,
29251 IX86_BUILTIN_PBROADCASTW256_GPR_MASK,
29252 IX86_BUILTIN_PBROADCASTW128_MASK,
29253 IX86_BUILTIN_PBROADCASTW128_GPR_MASK,
29254 IX86_BUILTIN_PBROADCASTD256_MASK,
29255 IX86_BUILTIN_PBROADCASTD256_GPR_MASK,
29256 IX86_BUILTIN_PBROADCASTD128_MASK,
29257 IX86_BUILTIN_PBROADCASTD128_GPR_MASK,
29258 IX86_BUILTIN_PBROADCASTQ256_MASK,
29259 IX86_BUILTIN_PBROADCASTQ256_GPR_MASK,
29260 IX86_BUILTIN_PBROADCASTQ256_MEM_MASK,
29261 IX86_BUILTIN_PBROADCASTQ128_MASK,
29262 IX86_BUILTIN_PBROADCASTQ128_GPR_MASK,
29263 IX86_BUILTIN_PBROADCASTQ128_MEM_MASK,
29264 IX86_BUILTIN_BROADCASTSS256,
29265 IX86_BUILTIN_BROADCASTSS128,
29266 IX86_BUILTIN_BROADCASTSD256,
29267 IX86_BUILTIN_EXTRACTF64X2_256,
29268 IX86_BUILTIN_EXTRACTI64X2_256,
29269 IX86_BUILTIN_INSERTF32X4_256,
29270 IX86_BUILTIN_INSERTI32X4_256,
29271 IX86_BUILTIN_PMOVSXBW256_MASK,
29272 IX86_BUILTIN_PMOVSXBW128_MASK,
29273 IX86_BUILTIN_PMOVSXBD256_MASK,
29274 IX86_BUILTIN_PMOVSXBD128_MASK,
29275 IX86_BUILTIN_PMOVSXBQ256_MASK,
29276 IX86_BUILTIN_PMOVSXBQ128_MASK,
29277 IX86_BUILTIN_PMOVSXWD256_MASK,
29278 IX86_BUILTIN_PMOVSXWD128_MASK,
29279 IX86_BUILTIN_PMOVSXWQ256_MASK,
29280 IX86_BUILTIN_PMOVSXWQ128_MASK,
29281 IX86_BUILTIN_PMOVSXDQ256_MASK,
29282 IX86_BUILTIN_PMOVSXDQ128_MASK,
29283 IX86_BUILTIN_PMOVZXBW256_MASK,
29284 IX86_BUILTIN_PMOVZXBW128_MASK,
29285 IX86_BUILTIN_PMOVZXBD256_MASK,
29286 IX86_BUILTIN_PMOVZXBD128_MASK,
29287 IX86_BUILTIN_PMOVZXBQ256_MASK,
29288 IX86_BUILTIN_PMOVZXBQ128_MASK,
29289 IX86_BUILTIN_PMOVZXWD256_MASK,
29290 IX86_BUILTIN_PMOVZXWD128_MASK,
29291 IX86_BUILTIN_PMOVZXWQ256_MASK,
29292 IX86_BUILTIN_PMOVZXWQ128_MASK,
29293 IX86_BUILTIN_PMOVZXDQ256_MASK,
29294 IX86_BUILTIN_PMOVZXDQ128_MASK,
29295 IX86_BUILTIN_REDUCEPD256_MASK,
29296 IX86_BUILTIN_REDUCEPD128_MASK,
29297 IX86_BUILTIN_REDUCEPS256_MASK,
29298 IX86_BUILTIN_REDUCEPS128_MASK,
29299 IX86_BUILTIN_REDUCESD_MASK,
29300 IX86_BUILTIN_REDUCESS_MASK,
29301 IX86_BUILTIN_VPERMVARHI256_MASK,
29302 IX86_BUILTIN_VPERMVARHI128_MASK,
29303 IX86_BUILTIN_VPERMT2VARHI256,
29304 IX86_BUILTIN_VPERMT2VARHI256_MASKZ,
29305 IX86_BUILTIN_VPERMT2VARHI128,
29306 IX86_BUILTIN_VPERMT2VARHI128_MASKZ,
29307 IX86_BUILTIN_VPERMI2VARHI256,
29308 IX86_BUILTIN_VPERMI2VARHI128,
29309 IX86_BUILTIN_RCP14PD256,
29310 IX86_BUILTIN_RCP14PD128,
29311 IX86_BUILTIN_RCP14PS256,
29312 IX86_BUILTIN_RCP14PS128,
29313 IX86_BUILTIN_RSQRT14PD256_MASK,
29314 IX86_BUILTIN_RSQRT14PD128_MASK,
29315 IX86_BUILTIN_RSQRT14PS256_MASK,
29316 IX86_BUILTIN_RSQRT14PS128_MASK,
29317 IX86_BUILTIN_SQRTPD256_MASK,
29318 IX86_BUILTIN_SQRTPD128_MASK,
29319 IX86_BUILTIN_SQRTPS256_MASK,
29320 IX86_BUILTIN_SQRTPS128_MASK,
29321 IX86_BUILTIN_PADDB128_MASK,
29322 IX86_BUILTIN_PADDW128_MASK,
29323 IX86_BUILTIN_PADDD128_MASK,
29324 IX86_BUILTIN_PADDQ128_MASK,
29325 IX86_BUILTIN_PSUBB128_MASK,
29326 IX86_BUILTIN_PSUBW128_MASK,
29327 IX86_BUILTIN_PSUBD128_MASK,
29328 IX86_BUILTIN_PSUBQ128_MASK,
29329 IX86_BUILTIN_PADDSB128_MASK,
29330 IX86_BUILTIN_PADDSW128_MASK,
29331 IX86_BUILTIN_PSUBSB128_MASK,
29332 IX86_BUILTIN_PSUBSW128_MASK,
29333 IX86_BUILTIN_PADDUSB128_MASK,
29334 IX86_BUILTIN_PADDUSW128_MASK,
29335 IX86_BUILTIN_PSUBUSB128_MASK,
29336 IX86_BUILTIN_PSUBUSW128_MASK,
29337 IX86_BUILTIN_PADDB256_MASK,
29338 IX86_BUILTIN_PADDW256_MASK,
29339 IX86_BUILTIN_PADDD256_MASK,
29340 IX86_BUILTIN_PADDQ256_MASK,
29341 IX86_BUILTIN_PADDSB256_MASK,
29342 IX86_BUILTIN_PADDSW256_MASK,
29343 IX86_BUILTIN_PADDUSB256_MASK,
29344 IX86_BUILTIN_PADDUSW256_MASK,
29345 IX86_BUILTIN_PSUBB256_MASK,
29346 IX86_BUILTIN_PSUBW256_MASK,
29347 IX86_BUILTIN_PSUBD256_MASK,
29348 IX86_BUILTIN_PSUBQ256_MASK,
29349 IX86_BUILTIN_PSUBSB256_MASK,
29350 IX86_BUILTIN_PSUBSW256_MASK,
29351 IX86_BUILTIN_PSUBUSB256_MASK,
29352 IX86_BUILTIN_PSUBUSW256_MASK,
29353 IX86_BUILTIN_SHUF_F64x2_256,
29354 IX86_BUILTIN_SHUF_I64x2_256,
29355 IX86_BUILTIN_SHUF_I32x4_256,
29356 IX86_BUILTIN_SHUF_F32x4_256,
29357 IX86_BUILTIN_PMOVWB128,
29358 IX86_BUILTIN_PMOVWB256,
29359 IX86_BUILTIN_PMOVSWB128,
29360 IX86_BUILTIN_PMOVSWB256,
29361 IX86_BUILTIN_PMOVUSWB128,
29362 IX86_BUILTIN_PMOVUSWB256,
29363 IX86_BUILTIN_PMOVDB128,
29364 IX86_BUILTIN_PMOVDB256,
29365 IX86_BUILTIN_PMOVSDB128,
29366 IX86_BUILTIN_PMOVSDB256,
29367 IX86_BUILTIN_PMOVUSDB128,
29368 IX86_BUILTIN_PMOVUSDB256,
29369 IX86_BUILTIN_PMOVDW128,
29370 IX86_BUILTIN_PMOVDW256,
29371 IX86_BUILTIN_PMOVSDW128,
29372 IX86_BUILTIN_PMOVSDW256,
29373 IX86_BUILTIN_PMOVUSDW128,
29374 IX86_BUILTIN_PMOVUSDW256,
29375 IX86_BUILTIN_PMOVQB128,
29376 IX86_BUILTIN_PMOVQB256,
29377 IX86_BUILTIN_PMOVSQB128,
29378 IX86_BUILTIN_PMOVSQB256,
29379 IX86_BUILTIN_PMOVUSQB128,
29380 IX86_BUILTIN_PMOVUSQB256,
29381 IX86_BUILTIN_PMOVQW128,
29382 IX86_BUILTIN_PMOVQW256,
29383 IX86_BUILTIN_PMOVSQW128,
29384 IX86_BUILTIN_PMOVSQW256,
29385 IX86_BUILTIN_PMOVUSQW128,
29386 IX86_BUILTIN_PMOVUSQW256,
29387 IX86_BUILTIN_PMOVQD128,
29388 IX86_BUILTIN_PMOVQD256,
29389 IX86_BUILTIN_PMOVSQD128,
29390 IX86_BUILTIN_PMOVSQD256,
29391 IX86_BUILTIN_PMOVUSQD128,
29392 IX86_BUILTIN_PMOVUSQD256,
29393 IX86_BUILTIN_RANGEPD256,
29394 IX86_BUILTIN_RANGEPD128,
29395 IX86_BUILTIN_RANGEPS256,
29396 IX86_BUILTIN_RANGEPS128,
29397 IX86_BUILTIN_GETEXPPS256,
29398 IX86_BUILTIN_GETEXPPD256,
29399 IX86_BUILTIN_GETEXPPS128,
29400 IX86_BUILTIN_GETEXPPD128,
29401 IX86_BUILTIN_FIXUPIMMPD256_MASK,
29402 IX86_BUILTIN_FIXUPIMMPD256_MASKZ,
29403 IX86_BUILTIN_FIXUPIMMPS256_MASK,
29404 IX86_BUILTIN_FIXUPIMMPS256_MASKZ,
29405 IX86_BUILTIN_FIXUPIMMPD128_MASK,
29406 IX86_BUILTIN_FIXUPIMMPD128_MASKZ,
29407 IX86_BUILTIN_FIXUPIMMPS128_MASK,
29408 IX86_BUILTIN_FIXUPIMMPS128_MASKZ,
29409 IX86_BUILTIN_PABSQ256,
29410 IX86_BUILTIN_PABSQ128,
29411 IX86_BUILTIN_PABSD256_MASK,
29412 IX86_BUILTIN_PABSD128_MASK,
29413 IX86_BUILTIN_PMULHRSW256_MASK,
29414 IX86_BUILTIN_PMULHRSW128_MASK,
29415 IX86_BUILTIN_PMULHUW128_MASK,
29416 IX86_BUILTIN_PMULHUW256_MASK,
29417 IX86_BUILTIN_PMULHW256_MASK,
29418 IX86_BUILTIN_PMULHW128_MASK,
29419 IX86_BUILTIN_PMULLW256_MASK,
29420 IX86_BUILTIN_PMULLW128_MASK,
29421 IX86_BUILTIN_PMULLQ256,
29422 IX86_BUILTIN_PMULLQ128,
29423 IX86_BUILTIN_ANDPD256_MASK,
29424 IX86_BUILTIN_ANDPD128_MASK,
29425 IX86_BUILTIN_ANDPS256_MASK,
29426 IX86_BUILTIN_ANDPS128_MASK,
29427 IX86_BUILTIN_ANDNPD256_MASK,
29428 IX86_BUILTIN_ANDNPD128_MASK,
29429 IX86_BUILTIN_ANDNPS256_MASK,
29430 IX86_BUILTIN_ANDNPS128_MASK,
29431 IX86_BUILTIN_PSLLWI128_MASK,
29432 IX86_BUILTIN_PSLLDI128_MASK,
29433 IX86_BUILTIN_PSLLQI128_MASK,
29434 IX86_BUILTIN_PSLLW128_MASK,
29435 IX86_BUILTIN_PSLLD128_MASK,
29436 IX86_BUILTIN_PSLLQ128_MASK,
29437 IX86_BUILTIN_PSLLWI256_MASK ,
29438 IX86_BUILTIN_PSLLW256_MASK,
29439 IX86_BUILTIN_PSLLDI256_MASK,
29440 IX86_BUILTIN_PSLLD256_MASK,
29441 IX86_BUILTIN_PSLLQI256_MASK,
29442 IX86_BUILTIN_PSLLQ256_MASK,
29443 IX86_BUILTIN_PSRADI128_MASK,
29444 IX86_BUILTIN_PSRAD128_MASK,
29445 IX86_BUILTIN_PSRADI256_MASK,
29446 IX86_BUILTIN_PSRAD256_MASK,
29447 IX86_BUILTIN_PSRAQI128_MASK,
29448 IX86_BUILTIN_PSRAQ128_MASK,
29449 IX86_BUILTIN_PSRAQI256_MASK,
29450 IX86_BUILTIN_PSRAQ256_MASK,
29451 IX86_BUILTIN_PANDD256,
29452 IX86_BUILTIN_PANDD128,
29453 IX86_BUILTIN_PSRLDI128_MASK,
29454 IX86_BUILTIN_PSRLD128_MASK,
29455 IX86_BUILTIN_PSRLDI256_MASK,
29456 IX86_BUILTIN_PSRLD256_MASK,
29457 IX86_BUILTIN_PSRLQI128_MASK,
29458 IX86_BUILTIN_PSRLQ128_MASK,
29459 IX86_BUILTIN_PSRLQI256_MASK,
29460 IX86_BUILTIN_PSRLQ256_MASK,
29461 IX86_BUILTIN_PANDQ256,
29462 IX86_BUILTIN_PANDQ128,
29463 IX86_BUILTIN_PANDND256,
29464 IX86_BUILTIN_PANDND128,
29465 IX86_BUILTIN_PANDNQ256,
29466 IX86_BUILTIN_PANDNQ128,
29467 IX86_BUILTIN_PORD256,
29468 IX86_BUILTIN_PORD128,
29469 IX86_BUILTIN_PORQ256,
29470 IX86_BUILTIN_PORQ128,
29471 IX86_BUILTIN_PXORD256,
29472 IX86_BUILTIN_PXORD128,
29473 IX86_BUILTIN_PXORQ256,
29474 IX86_BUILTIN_PXORQ128,
29475 IX86_BUILTIN_PACKSSWB256_MASK,
29476 IX86_BUILTIN_PACKSSWB128_MASK,
29477 IX86_BUILTIN_PACKUSWB256_MASK,
29478 IX86_BUILTIN_PACKUSWB128_MASK,
29479 IX86_BUILTIN_RNDSCALEPS256,
29480 IX86_BUILTIN_RNDSCALEPD256,
29481 IX86_BUILTIN_RNDSCALEPS128,
29482 IX86_BUILTIN_RNDSCALEPD128,
29483 IX86_BUILTIN_VTERNLOGQ256_MASK,
29484 IX86_BUILTIN_VTERNLOGQ256_MASKZ,
29485 IX86_BUILTIN_VTERNLOGD256_MASK,
29486 IX86_BUILTIN_VTERNLOGD256_MASKZ,
29487 IX86_BUILTIN_VTERNLOGQ128_MASK,
29488 IX86_BUILTIN_VTERNLOGQ128_MASKZ,
29489 IX86_BUILTIN_VTERNLOGD128_MASK,
29490 IX86_BUILTIN_VTERNLOGD128_MASKZ,
29491 IX86_BUILTIN_SCALEFPD256,
29492 IX86_BUILTIN_SCALEFPS256,
29493 IX86_BUILTIN_SCALEFPD128,
29494 IX86_BUILTIN_SCALEFPS128,
29495 IX86_BUILTIN_VFMADDPD256_MASK,
29496 IX86_BUILTIN_VFMADDPD256_MASK3,
29497 IX86_BUILTIN_VFMADDPD256_MASKZ,
29498 IX86_BUILTIN_VFMADDPD128_MASK,
29499 IX86_BUILTIN_VFMADDPD128_MASK3,
29500 IX86_BUILTIN_VFMADDPD128_MASKZ,
29501 IX86_BUILTIN_VFMADDPS256_MASK,
29502 IX86_BUILTIN_VFMADDPS256_MASK3,
29503 IX86_BUILTIN_VFMADDPS256_MASKZ,
29504 IX86_BUILTIN_VFMADDPS128_MASK,
29505 IX86_BUILTIN_VFMADDPS128_MASK3,
29506 IX86_BUILTIN_VFMADDPS128_MASKZ,
29507 IX86_BUILTIN_VFMSUBPD256_MASK3,
29508 IX86_BUILTIN_VFMSUBPD128_MASK3,
29509 IX86_BUILTIN_VFMSUBPS256_MASK3,
29510 IX86_BUILTIN_VFMSUBPS128_MASK3,
29511 IX86_BUILTIN_VFNMADDPD256_MASK,
29512 IX86_BUILTIN_VFNMADDPD128_MASK,
29513 IX86_BUILTIN_VFNMADDPS256_MASK,
29514 IX86_BUILTIN_VFNMADDPS128_MASK,
29515 IX86_BUILTIN_VFNMSUBPD256_MASK,
29516 IX86_BUILTIN_VFNMSUBPD256_MASK3,
29517 IX86_BUILTIN_VFNMSUBPD128_MASK,
29518 IX86_BUILTIN_VFNMSUBPD128_MASK3,
29519 IX86_BUILTIN_VFNMSUBPS256_MASK,
29520 IX86_BUILTIN_VFNMSUBPS256_MASK3,
29521 IX86_BUILTIN_VFNMSUBPS128_MASK,
29522 IX86_BUILTIN_VFNMSUBPS128_MASK3,
29523 IX86_BUILTIN_VFMADDSUBPD256_MASK,
29524 IX86_BUILTIN_VFMADDSUBPD256_MASK3,
29525 IX86_BUILTIN_VFMADDSUBPD256_MASKZ,
29526 IX86_BUILTIN_VFMADDSUBPD128_MASK,
29527 IX86_BUILTIN_VFMADDSUBPD128_MASK3,
29528 IX86_BUILTIN_VFMADDSUBPD128_MASKZ,
29529 IX86_BUILTIN_VFMADDSUBPS256_MASK,
29530 IX86_BUILTIN_VFMADDSUBPS256_MASK3,
29531 IX86_BUILTIN_VFMADDSUBPS256_MASKZ,
29532 IX86_BUILTIN_VFMADDSUBPS128_MASK,
29533 IX86_BUILTIN_VFMADDSUBPS128_MASK3,
29534 IX86_BUILTIN_VFMADDSUBPS128_MASKZ,
29535 IX86_BUILTIN_VFMSUBADDPD256_MASK3,
29536 IX86_BUILTIN_VFMSUBADDPD128_MASK3,
29537 IX86_BUILTIN_VFMSUBADDPS256_MASK3,
29538 IX86_BUILTIN_VFMSUBADDPS128_MASK3,
29539 IX86_BUILTIN_INSERTF64X2_256,
29540 IX86_BUILTIN_INSERTI64X2_256,
29541 IX86_BUILTIN_PSRAVV16HI,
29542 IX86_BUILTIN_PSRAVV8HI,
29543 IX86_BUILTIN_PMADDUBSW256_MASK,
29544 IX86_BUILTIN_PMADDUBSW128_MASK,
29545 IX86_BUILTIN_PMADDWD256_MASK,
29546 IX86_BUILTIN_PMADDWD128_MASK,
29547 IX86_BUILTIN_PSRLVV16HI,
29548 IX86_BUILTIN_PSRLVV8HI,
29549 IX86_BUILTIN_CVTPS2DQ256_MASK,
29550 IX86_BUILTIN_CVTPS2DQ128_MASK,
29551 IX86_BUILTIN_CVTPS2UDQ256,
29552 IX86_BUILTIN_CVTPS2UDQ128,
29553 IX86_BUILTIN_CVTPS2QQ256,
29554 IX86_BUILTIN_CVTPS2QQ128,
29555 IX86_BUILTIN_CVTPS2UQQ256,
29556 IX86_BUILTIN_CVTPS2UQQ128,
29557 IX86_BUILTIN_GETMANTPS256,
29558 IX86_BUILTIN_GETMANTPS128,
29559 IX86_BUILTIN_GETMANTPD256,
29560 IX86_BUILTIN_GETMANTPD128,
29561 IX86_BUILTIN_MOVDDUP256_MASK,
29562 IX86_BUILTIN_MOVDDUP128_MASK,
29563 IX86_BUILTIN_MOVSHDUP256_MASK,
29564 IX86_BUILTIN_MOVSHDUP128_MASK,
29565 IX86_BUILTIN_MOVSLDUP256_MASK,
29566 IX86_BUILTIN_MOVSLDUP128_MASK,
29567 IX86_BUILTIN_CVTQQ2PS256,
29568 IX86_BUILTIN_CVTQQ2PS128,
29569 IX86_BUILTIN_CVTUQQ2PS256,
29570 IX86_BUILTIN_CVTUQQ2PS128,
29571 IX86_BUILTIN_CVTQQ2PD256,
29572 IX86_BUILTIN_CVTQQ2PD128,
29573 IX86_BUILTIN_CVTUQQ2PD256,
29574 IX86_BUILTIN_CVTUQQ2PD128,
29575 IX86_BUILTIN_VPERMT2VARQ256,
29576 IX86_BUILTIN_VPERMT2VARQ256_MASKZ,
29577 IX86_BUILTIN_VPERMT2VARD256,
29578 IX86_BUILTIN_VPERMT2VARD256_MASKZ,
29579 IX86_BUILTIN_VPERMI2VARQ256,
29580 IX86_BUILTIN_VPERMI2VARD256,
29581 IX86_BUILTIN_VPERMT2VARPD256,
29582 IX86_BUILTIN_VPERMT2VARPD256_MASKZ,
29583 IX86_BUILTIN_VPERMT2VARPS256,
29584 IX86_BUILTIN_VPERMT2VARPS256_MASKZ,
29585 IX86_BUILTIN_VPERMI2VARPD256,
29586 IX86_BUILTIN_VPERMI2VARPS256,
29587 IX86_BUILTIN_VPERMT2VARQ128,
29588 IX86_BUILTIN_VPERMT2VARQ128_MASKZ,
29589 IX86_BUILTIN_VPERMT2VARD128,
29590 IX86_BUILTIN_VPERMT2VARD128_MASKZ,
29591 IX86_BUILTIN_VPERMI2VARQ128,
29592 IX86_BUILTIN_VPERMI2VARD128,
29593 IX86_BUILTIN_VPERMT2VARPD128,
29594 IX86_BUILTIN_VPERMT2VARPD128_MASKZ,
29595 IX86_BUILTIN_VPERMT2VARPS128,
29596 IX86_BUILTIN_VPERMT2VARPS128_MASKZ,
29597 IX86_BUILTIN_VPERMI2VARPD128,
29598 IX86_BUILTIN_VPERMI2VARPS128,
29599 IX86_BUILTIN_PSHUFB256_MASK,
29600 IX86_BUILTIN_PSHUFB128_MASK,
29601 IX86_BUILTIN_PSHUFHW256_MASK,
29602 IX86_BUILTIN_PSHUFHW128_MASK,
29603 IX86_BUILTIN_PSHUFLW256_MASK,
29604 IX86_BUILTIN_PSHUFLW128_MASK,
29605 IX86_BUILTIN_PSHUFD256_MASK,
29606 IX86_BUILTIN_PSHUFD128_MASK,
29607 IX86_BUILTIN_SHUFPD256_MASK,
29608 IX86_BUILTIN_SHUFPD128_MASK,
29609 IX86_BUILTIN_SHUFPS256_MASK,
29610 IX86_BUILTIN_SHUFPS128_MASK,
29611 IX86_BUILTIN_PROLVQ256,
29612 IX86_BUILTIN_PROLVQ128,
29613 IX86_BUILTIN_PROLQ256,
29614 IX86_BUILTIN_PROLQ128,
29615 IX86_BUILTIN_PRORVQ256,
29616 IX86_BUILTIN_PRORVQ128,
29617 IX86_BUILTIN_PRORQ256,
29618 IX86_BUILTIN_PRORQ128,
29619 IX86_BUILTIN_PSRAVQ128,
29620 IX86_BUILTIN_PSRAVQ256,
29621 IX86_BUILTIN_PSLLVV4DI_MASK,
29622 IX86_BUILTIN_PSLLVV2DI_MASK,
29623 IX86_BUILTIN_PSLLVV8SI_MASK,
29624 IX86_BUILTIN_PSLLVV4SI_MASK,
29625 IX86_BUILTIN_PSRAVV8SI_MASK,
29626 IX86_BUILTIN_PSRAVV4SI_MASK,
29627 IX86_BUILTIN_PSRLVV4DI_MASK,
29628 IX86_BUILTIN_PSRLVV2DI_MASK,
29629 IX86_BUILTIN_PSRLVV8SI_MASK,
29630 IX86_BUILTIN_PSRLVV4SI_MASK,
29631 IX86_BUILTIN_PSRAWI256_MASK,
29632 IX86_BUILTIN_PSRAW256_MASK,
29633 IX86_BUILTIN_PSRAWI128_MASK,
29634 IX86_BUILTIN_PSRAW128_MASK,
29635 IX86_BUILTIN_PSRLWI256_MASK,
29636 IX86_BUILTIN_PSRLW256_MASK,
29637 IX86_BUILTIN_PSRLWI128_MASK,
29638 IX86_BUILTIN_PSRLW128_MASK,
29639 IX86_BUILTIN_PRORVD256,
29640 IX86_BUILTIN_PROLVD256,
29641 IX86_BUILTIN_PRORD256,
29642 IX86_BUILTIN_PROLD256,
29643 IX86_BUILTIN_PRORVD128,
29644 IX86_BUILTIN_PROLVD128,
29645 IX86_BUILTIN_PRORD128,
29646 IX86_BUILTIN_PROLD128,
29647 IX86_BUILTIN_FPCLASSPD256,
29648 IX86_BUILTIN_FPCLASSPD128,
29649 IX86_BUILTIN_FPCLASSSD,
29650 IX86_BUILTIN_FPCLASSPS256,
29651 IX86_BUILTIN_FPCLASSPS128,
29652 IX86_BUILTIN_FPCLASSSS,
29653 IX86_BUILTIN_CVTB2MASK128,
29654 IX86_BUILTIN_CVTB2MASK256,
29655 IX86_BUILTIN_CVTW2MASK128,
29656 IX86_BUILTIN_CVTW2MASK256,
29657 IX86_BUILTIN_CVTD2MASK128,
29658 IX86_BUILTIN_CVTD2MASK256,
29659 IX86_BUILTIN_CVTQ2MASK128,
29660 IX86_BUILTIN_CVTQ2MASK256,
29661 IX86_BUILTIN_CVTMASK2B128,
29662 IX86_BUILTIN_CVTMASK2B256,
29663 IX86_BUILTIN_CVTMASK2W128,
29664 IX86_BUILTIN_CVTMASK2W256,
29665 IX86_BUILTIN_CVTMASK2D128,
29666 IX86_BUILTIN_CVTMASK2D256,
29667 IX86_BUILTIN_CVTMASK2Q128,
29668 IX86_BUILTIN_CVTMASK2Q256,
29669 IX86_BUILTIN_PCMPEQB128_MASK,
29670 IX86_BUILTIN_PCMPEQB256_MASK,
29671 IX86_BUILTIN_PCMPEQW128_MASK,
29672 IX86_BUILTIN_PCMPEQW256_MASK,
29673 IX86_BUILTIN_PCMPEQD128_MASK,
29674 IX86_BUILTIN_PCMPEQD256_MASK,
29675 IX86_BUILTIN_PCMPEQQ128_MASK,
29676 IX86_BUILTIN_PCMPEQQ256_MASK,
29677 IX86_BUILTIN_PCMPGTB128_MASK,
29678 IX86_BUILTIN_PCMPGTB256_MASK,
29679 IX86_BUILTIN_PCMPGTW128_MASK,
29680 IX86_BUILTIN_PCMPGTW256_MASK,
29681 IX86_BUILTIN_PCMPGTD128_MASK,
29682 IX86_BUILTIN_PCMPGTD256_MASK,
29683 IX86_BUILTIN_PCMPGTQ128_MASK,
29684 IX86_BUILTIN_PCMPGTQ256_MASK,
29685 IX86_BUILTIN_PTESTMB128,
29686 IX86_BUILTIN_PTESTMB256,
29687 IX86_BUILTIN_PTESTMW128,
29688 IX86_BUILTIN_PTESTMW256,
29689 IX86_BUILTIN_PTESTMD128,
29690 IX86_BUILTIN_PTESTMD256,
29691 IX86_BUILTIN_PTESTMQ128,
29692 IX86_BUILTIN_PTESTMQ256,
29693 IX86_BUILTIN_PTESTNMB128,
29694 IX86_BUILTIN_PTESTNMB256,
29695 IX86_BUILTIN_PTESTNMW128,
29696 IX86_BUILTIN_PTESTNMW256,
29697 IX86_BUILTIN_PTESTNMD128,
29698 IX86_BUILTIN_PTESTNMD256,
29699 IX86_BUILTIN_PTESTNMQ128,
29700 IX86_BUILTIN_PTESTNMQ256,
29701 IX86_BUILTIN_PBROADCASTMB128,
29702 IX86_BUILTIN_PBROADCASTMB256,
29703 IX86_BUILTIN_PBROADCASTMW128,
29704 IX86_BUILTIN_PBROADCASTMW256,
29705 IX86_BUILTIN_COMPRESSPD256,
29706 IX86_BUILTIN_COMPRESSPD128,
29707 IX86_BUILTIN_COMPRESSPS256,
29708 IX86_BUILTIN_COMPRESSPS128,
29709 IX86_BUILTIN_PCOMPRESSQ256,
29710 IX86_BUILTIN_PCOMPRESSQ128,
29711 IX86_BUILTIN_PCOMPRESSD256,
29712 IX86_BUILTIN_PCOMPRESSD128,
29713 IX86_BUILTIN_EXPANDPD256,
29714 IX86_BUILTIN_EXPANDPD128,
29715 IX86_BUILTIN_EXPANDPS256,
29716 IX86_BUILTIN_EXPANDPS128,
29717 IX86_BUILTIN_PEXPANDQ256,
29718 IX86_BUILTIN_PEXPANDQ128,
29719 IX86_BUILTIN_PEXPANDD256,
29720 IX86_BUILTIN_PEXPANDD128,
29721 IX86_BUILTIN_EXPANDPD256Z,
29722 IX86_BUILTIN_EXPANDPD128Z,
29723 IX86_BUILTIN_EXPANDPS256Z,
29724 IX86_BUILTIN_EXPANDPS128Z,
29725 IX86_BUILTIN_PEXPANDQ256Z,
29726 IX86_BUILTIN_PEXPANDQ128Z,
29727 IX86_BUILTIN_PEXPANDD256Z,
29728 IX86_BUILTIN_PEXPANDD128Z,
29729 IX86_BUILTIN_PMAXSD256_MASK,
29730 IX86_BUILTIN_PMINSD256_MASK,
29731 IX86_BUILTIN_PMAXUD256_MASK,
29732 IX86_BUILTIN_PMINUD256_MASK,
29733 IX86_BUILTIN_PMAXSD128_MASK,
29734 IX86_BUILTIN_PMINSD128_MASK,
29735 IX86_BUILTIN_PMAXUD128_MASK,
29736 IX86_BUILTIN_PMINUD128_MASK,
29737 IX86_BUILTIN_PMAXSQ256_MASK,
29738 IX86_BUILTIN_PMINSQ256_MASK,
29739 IX86_BUILTIN_PMAXUQ256_MASK,
29740 IX86_BUILTIN_PMINUQ256_MASK,
29741 IX86_BUILTIN_PMAXSQ128_MASK,
29742 IX86_BUILTIN_PMINSQ128_MASK,
29743 IX86_BUILTIN_PMAXUQ128_MASK,
29744 IX86_BUILTIN_PMINUQ128_MASK,
29745 IX86_BUILTIN_PMINSB256_MASK,
29746 IX86_BUILTIN_PMINUB256_MASK,
29747 IX86_BUILTIN_PMAXSB256_MASK,
29748 IX86_BUILTIN_PMAXUB256_MASK,
29749 IX86_BUILTIN_PMINSB128_MASK,
29750 IX86_BUILTIN_PMINUB128_MASK,
29751 IX86_BUILTIN_PMAXSB128_MASK,
29752 IX86_BUILTIN_PMAXUB128_MASK,
29753 IX86_BUILTIN_PMINSW256_MASK,
29754 IX86_BUILTIN_PMINUW256_MASK,
29755 IX86_BUILTIN_PMAXSW256_MASK,
29756 IX86_BUILTIN_PMAXUW256_MASK,
29757 IX86_BUILTIN_PMINSW128_MASK,
29758 IX86_BUILTIN_PMINUW128_MASK,
29759 IX86_BUILTIN_PMAXSW128_MASK,
29760 IX86_BUILTIN_PMAXUW128_MASK,
29761 IX86_BUILTIN_VPCONFLICTQ256,
29762 IX86_BUILTIN_VPCONFLICTD256,
29763 IX86_BUILTIN_VPCLZCNTQ256,
29764 IX86_BUILTIN_VPCLZCNTD256,
29765 IX86_BUILTIN_UNPCKHPD256_MASK,
29766 IX86_BUILTIN_UNPCKHPD128_MASK,
29767 IX86_BUILTIN_UNPCKHPS256_MASK,
29768 IX86_BUILTIN_UNPCKHPS128_MASK,
29769 IX86_BUILTIN_UNPCKLPD256_MASK,
29770 IX86_BUILTIN_UNPCKLPD128_MASK,
29771 IX86_BUILTIN_UNPCKLPS256_MASK,
29772 IX86_BUILTIN_VPCONFLICTQ128,
29773 IX86_BUILTIN_VPCONFLICTD128,
29774 IX86_BUILTIN_VPCLZCNTQ128,
29775 IX86_BUILTIN_VPCLZCNTD128,
29776 IX86_BUILTIN_UNPCKLPS128_MASK,
29777 IX86_BUILTIN_ALIGND256,
29778 IX86_BUILTIN_ALIGNQ256,
29779 IX86_BUILTIN_ALIGND128,
29780 IX86_BUILTIN_ALIGNQ128,
29781 IX86_BUILTIN_CVTPS2PH256_MASK,
29782 IX86_BUILTIN_CVTPS2PH_MASK,
29783 IX86_BUILTIN_CVTPH2PS_MASK,
29784 IX86_BUILTIN_CVTPH2PS256_MASK,
29785 IX86_BUILTIN_PUNPCKHDQ128_MASK,
29786 IX86_BUILTIN_PUNPCKHDQ256_MASK,
29787 IX86_BUILTIN_PUNPCKHQDQ128_MASK,
29788 IX86_BUILTIN_PUNPCKHQDQ256_MASK,
29789 IX86_BUILTIN_PUNPCKLDQ128_MASK,
29790 IX86_BUILTIN_PUNPCKLDQ256_MASK,
29791 IX86_BUILTIN_PUNPCKLQDQ128_MASK,
29792 IX86_BUILTIN_PUNPCKLQDQ256_MASK,
29793 IX86_BUILTIN_PUNPCKHBW128_MASK,
29794 IX86_BUILTIN_PUNPCKHBW256_MASK,
29795 IX86_BUILTIN_PUNPCKHWD128_MASK,
29796 IX86_BUILTIN_PUNPCKHWD256_MASK,
29797 IX86_BUILTIN_PUNPCKLBW128_MASK,
29798 IX86_BUILTIN_PUNPCKLBW256_MASK,
29799 IX86_BUILTIN_PUNPCKLWD128_MASK,
29800 IX86_BUILTIN_PUNPCKLWD256_MASK,
29801 IX86_BUILTIN_PSLLVV16HI,
29802 IX86_BUILTIN_PSLLVV8HI,
29803 IX86_BUILTIN_PACKSSDW256_MASK,
29804 IX86_BUILTIN_PACKSSDW128_MASK,
29805 IX86_BUILTIN_PACKUSDW256_MASK,
29806 IX86_BUILTIN_PACKUSDW128_MASK,
29807 IX86_BUILTIN_PAVGB256_MASK,
29808 IX86_BUILTIN_PAVGW256_MASK,
29809 IX86_BUILTIN_PAVGB128_MASK,
29810 IX86_BUILTIN_PAVGW128_MASK,
29811 IX86_BUILTIN_VPERMVARSF256_MASK,
29812 IX86_BUILTIN_VPERMVARDF256_MASK,
29813 IX86_BUILTIN_VPERMDF256_MASK,
29814 IX86_BUILTIN_PABSB256_MASK,
29815 IX86_BUILTIN_PABSB128_MASK,
29816 IX86_BUILTIN_PABSW256_MASK,
29817 IX86_BUILTIN_PABSW128_MASK,
29818 IX86_BUILTIN_VPERMILVARPD_MASK,
29819 IX86_BUILTIN_VPERMILVARPS_MASK,
29820 IX86_BUILTIN_VPERMILVARPD256_MASK,
29821 IX86_BUILTIN_VPERMILVARPS256_MASK,
29822 IX86_BUILTIN_VPERMILPD_MASK,
29823 IX86_BUILTIN_VPERMILPS_MASK,
29824 IX86_BUILTIN_VPERMILPD256_MASK,
29825 IX86_BUILTIN_VPERMILPS256_MASK,
29826 IX86_BUILTIN_BLENDMQ256,
29827 IX86_BUILTIN_BLENDMD256,
29828 IX86_BUILTIN_BLENDMPD256,
29829 IX86_BUILTIN_BLENDMPS256,
29830 IX86_BUILTIN_BLENDMQ128,
29831 IX86_BUILTIN_BLENDMD128,
29832 IX86_BUILTIN_BLENDMPD128,
29833 IX86_BUILTIN_BLENDMPS128,
29834 IX86_BUILTIN_BLENDMW256,
29835 IX86_BUILTIN_BLENDMB256,
29836 IX86_BUILTIN_BLENDMW128,
29837 IX86_BUILTIN_BLENDMB128,
29838 IX86_BUILTIN_PMULLD256_MASK,
29839 IX86_BUILTIN_PMULLD128_MASK,
29840 IX86_BUILTIN_PMULUDQ256_MASK,
29841 IX86_BUILTIN_PMULDQ256_MASK,
29842 IX86_BUILTIN_PMULDQ128_MASK,
29843 IX86_BUILTIN_PMULUDQ128_MASK,
29844 IX86_BUILTIN_CVTPD2PS256_MASK,
29845 IX86_BUILTIN_CVTPD2PS_MASK,
29846 IX86_BUILTIN_VPERMVARSI256_MASK,
29847 IX86_BUILTIN_VPERMVARDI256_MASK,
29848 IX86_BUILTIN_VPERMDI256_MASK,
29849 IX86_BUILTIN_CMPQ256,
29850 IX86_BUILTIN_CMPD256,
29851 IX86_BUILTIN_UCMPQ256,
29852 IX86_BUILTIN_UCMPD256,
29853 IX86_BUILTIN_CMPB256,
29854 IX86_BUILTIN_CMPW256,
29855 IX86_BUILTIN_UCMPB256,
29856 IX86_BUILTIN_UCMPW256,
29857 IX86_BUILTIN_CMPPD256_MASK,
29858 IX86_BUILTIN_CMPPS256_MASK,
29859 IX86_BUILTIN_CMPQ128,
29860 IX86_BUILTIN_CMPD128,
29861 IX86_BUILTIN_UCMPQ128,
29862 IX86_BUILTIN_UCMPD128,
29863 IX86_BUILTIN_CMPB128,
29864 IX86_BUILTIN_CMPW128,
29865 IX86_BUILTIN_UCMPB128,
29866 IX86_BUILTIN_UCMPW128,
29867 IX86_BUILTIN_CMPPD128_MASK,
29868 IX86_BUILTIN_CMPPS128_MASK,
29870 IX86_BUILTIN_GATHER3SIV8SF,
29871 IX86_BUILTIN_GATHER3SIV4SF,
29872 IX86_BUILTIN_GATHER3SIV4DF,
29873 IX86_BUILTIN_GATHER3SIV2DF,
29874 IX86_BUILTIN_GATHER3DIV8SF,
29875 IX86_BUILTIN_GATHER3DIV4SF,
29876 IX86_BUILTIN_GATHER3DIV4DF,
29877 IX86_BUILTIN_GATHER3DIV2DF,
29878 IX86_BUILTIN_GATHER3SIV8SI,
29879 IX86_BUILTIN_GATHER3SIV4SI,
29880 IX86_BUILTIN_GATHER3SIV4DI,
29881 IX86_BUILTIN_GATHER3SIV2DI,
29882 IX86_BUILTIN_GATHER3DIV8SI,
29883 IX86_BUILTIN_GATHER3DIV4SI,
29884 IX86_BUILTIN_GATHER3DIV4DI,
29885 IX86_BUILTIN_GATHER3DIV2DI,
29886 IX86_BUILTIN_SCATTERSIV8SF,
29887 IX86_BUILTIN_SCATTERSIV4SF,
29888 IX86_BUILTIN_SCATTERSIV4DF,
29889 IX86_BUILTIN_SCATTERSIV2DF,
29890 IX86_BUILTIN_SCATTERDIV8SF,
29891 IX86_BUILTIN_SCATTERDIV4SF,
29892 IX86_BUILTIN_SCATTERDIV4DF,
29893 IX86_BUILTIN_SCATTERDIV2DF,
29894 IX86_BUILTIN_SCATTERSIV8SI,
29895 IX86_BUILTIN_SCATTERSIV4SI,
29896 IX86_BUILTIN_SCATTERSIV4DI,
29897 IX86_BUILTIN_SCATTERSIV2DI,
29898 IX86_BUILTIN_SCATTERDIV8SI,
29899 IX86_BUILTIN_SCATTERDIV4SI,
29900 IX86_BUILTIN_SCATTERDIV4DI,
29901 IX86_BUILTIN_SCATTERDIV2DI,
29903 /* AVX512DQ. */
29904 IX86_BUILTIN_RANGESD128,
29905 IX86_BUILTIN_RANGESS128,
29906 IX86_BUILTIN_KUNPCKWD,
29907 IX86_BUILTIN_KUNPCKDQ,
29908 IX86_BUILTIN_BROADCASTF32x2_512,
29909 IX86_BUILTIN_BROADCASTI32x2_512,
29910 IX86_BUILTIN_BROADCASTF64X2_512,
29911 IX86_BUILTIN_BROADCASTI64X2_512,
29912 IX86_BUILTIN_BROADCASTF32X8_512,
29913 IX86_BUILTIN_BROADCASTI32X8_512,
29914 IX86_BUILTIN_EXTRACTF64X2_512,
29915 IX86_BUILTIN_EXTRACTF32X8,
29916 IX86_BUILTIN_EXTRACTI64X2_512,
29917 IX86_BUILTIN_EXTRACTI32X8,
29918 IX86_BUILTIN_REDUCEPD512_MASK,
29919 IX86_BUILTIN_REDUCEPS512_MASK,
29920 IX86_BUILTIN_PMULLQ512,
29921 IX86_BUILTIN_XORPD512,
29922 IX86_BUILTIN_XORPS512,
29923 IX86_BUILTIN_ORPD512,
29924 IX86_BUILTIN_ORPS512,
29925 IX86_BUILTIN_ANDPD512,
29926 IX86_BUILTIN_ANDPS512,
29927 IX86_BUILTIN_ANDNPD512,
29928 IX86_BUILTIN_ANDNPS512,
29929 IX86_BUILTIN_INSERTF32X8,
29930 IX86_BUILTIN_INSERTI32X8,
29931 IX86_BUILTIN_INSERTF64X2_512,
29932 IX86_BUILTIN_INSERTI64X2_512,
29933 IX86_BUILTIN_FPCLASSPD512,
29934 IX86_BUILTIN_FPCLASSPS512,
29935 IX86_BUILTIN_CVTD2MASK512,
29936 IX86_BUILTIN_CVTQ2MASK512,
29937 IX86_BUILTIN_CVTMASK2D512,
29938 IX86_BUILTIN_CVTMASK2Q512,
29939 IX86_BUILTIN_CVTPD2QQ512,
29940 IX86_BUILTIN_CVTPS2QQ512,
29941 IX86_BUILTIN_CVTPD2UQQ512,
29942 IX86_BUILTIN_CVTPS2UQQ512,
29943 IX86_BUILTIN_CVTQQ2PS512,
29944 IX86_BUILTIN_CVTUQQ2PS512,
29945 IX86_BUILTIN_CVTQQ2PD512,
29946 IX86_BUILTIN_CVTUQQ2PD512,
29947 IX86_BUILTIN_CVTTPS2QQ512,
29948 IX86_BUILTIN_CVTTPS2UQQ512,
29949 IX86_BUILTIN_CVTTPD2QQ512,
29950 IX86_BUILTIN_CVTTPD2UQQ512,
29951 IX86_BUILTIN_RANGEPS512,
29952 IX86_BUILTIN_RANGEPD512,
29954 /* AVX512BW. */
29955 IX86_BUILTIN_PACKUSDW512,
29956 IX86_BUILTIN_PACKSSDW512,
29957 IX86_BUILTIN_LOADDQUHI512_MASK,
29958 IX86_BUILTIN_LOADDQUQI512_MASK,
29959 IX86_BUILTIN_PSLLDQ512,
29960 IX86_BUILTIN_PSRLDQ512,
29961 IX86_BUILTIN_STOREDQUHI512_MASK,
29962 IX86_BUILTIN_STOREDQUQI512_MASK,
29963 IX86_BUILTIN_PALIGNR512,
29964 IX86_BUILTIN_PALIGNR512_MASK,
29965 IX86_BUILTIN_MOVDQUHI512_MASK,
29966 IX86_BUILTIN_MOVDQUQI512_MASK,
29967 IX86_BUILTIN_PSADBW512,
29968 IX86_BUILTIN_DBPSADBW512,
29969 IX86_BUILTIN_PBROADCASTB512,
29970 IX86_BUILTIN_PBROADCASTB512_GPR,
29971 IX86_BUILTIN_PBROADCASTW512,
29972 IX86_BUILTIN_PBROADCASTW512_GPR,
29973 IX86_BUILTIN_PMOVSXBW512_MASK,
29974 IX86_BUILTIN_PMOVZXBW512_MASK,
29975 IX86_BUILTIN_VPERMVARHI512_MASK,
29976 IX86_BUILTIN_VPERMT2VARHI512,
29977 IX86_BUILTIN_VPERMT2VARHI512_MASKZ,
29978 IX86_BUILTIN_VPERMI2VARHI512,
29979 IX86_BUILTIN_PAVGB512,
29980 IX86_BUILTIN_PAVGW512,
29981 IX86_BUILTIN_PADDB512,
29982 IX86_BUILTIN_PSUBB512,
29983 IX86_BUILTIN_PSUBSB512,
29984 IX86_BUILTIN_PADDSB512,
29985 IX86_BUILTIN_PSUBUSB512,
29986 IX86_BUILTIN_PADDUSB512,
29987 IX86_BUILTIN_PSUBW512,
29988 IX86_BUILTIN_PADDW512,
29989 IX86_BUILTIN_PSUBSW512,
29990 IX86_BUILTIN_PADDSW512,
29991 IX86_BUILTIN_PSUBUSW512,
29992 IX86_BUILTIN_PADDUSW512,
29993 IX86_BUILTIN_PMAXUW512,
29994 IX86_BUILTIN_PMAXSW512,
29995 IX86_BUILTIN_PMINUW512,
29996 IX86_BUILTIN_PMINSW512,
29997 IX86_BUILTIN_PMAXUB512,
29998 IX86_BUILTIN_PMAXSB512,
29999 IX86_BUILTIN_PMINUB512,
30000 IX86_BUILTIN_PMINSB512,
30001 IX86_BUILTIN_PMOVWB512,
30002 IX86_BUILTIN_PMOVSWB512,
30003 IX86_BUILTIN_PMOVUSWB512,
30004 IX86_BUILTIN_PMULHRSW512_MASK,
30005 IX86_BUILTIN_PMULHUW512_MASK,
30006 IX86_BUILTIN_PMULHW512_MASK,
30007 IX86_BUILTIN_PMULLW512_MASK,
30008 IX86_BUILTIN_PSLLWI512_MASK,
30009 IX86_BUILTIN_PSLLW512_MASK,
30010 IX86_BUILTIN_PACKSSWB512,
30011 IX86_BUILTIN_PACKUSWB512,
30012 IX86_BUILTIN_PSRAVV32HI,
30013 IX86_BUILTIN_PMADDUBSW512_MASK,
30014 IX86_BUILTIN_PMADDWD512_MASK,
30015 IX86_BUILTIN_PSRLVV32HI,
30016 IX86_BUILTIN_PUNPCKHBW512,
30017 IX86_BUILTIN_PUNPCKHWD512,
30018 IX86_BUILTIN_PUNPCKLBW512,
30019 IX86_BUILTIN_PUNPCKLWD512,
30020 IX86_BUILTIN_PSHUFB512,
30021 IX86_BUILTIN_PSHUFHW512,
30022 IX86_BUILTIN_PSHUFLW512,
30023 IX86_BUILTIN_PSRAWI512,
30024 IX86_BUILTIN_PSRAW512,
30025 IX86_BUILTIN_PSRLWI512,
30026 IX86_BUILTIN_PSRLW512,
30027 IX86_BUILTIN_CVTB2MASK512,
30028 IX86_BUILTIN_CVTW2MASK512,
30029 IX86_BUILTIN_CVTMASK2B512,
30030 IX86_BUILTIN_CVTMASK2W512,
30031 IX86_BUILTIN_PCMPEQB512_MASK,
30032 IX86_BUILTIN_PCMPEQW512_MASK,
30033 IX86_BUILTIN_PCMPGTB512_MASK,
30034 IX86_BUILTIN_PCMPGTW512_MASK,
30035 IX86_BUILTIN_PTESTMB512,
30036 IX86_BUILTIN_PTESTMW512,
30037 IX86_BUILTIN_PTESTNMB512,
30038 IX86_BUILTIN_PTESTNMW512,
30039 IX86_BUILTIN_PSLLVV32HI,
30040 IX86_BUILTIN_PABSB512,
30041 IX86_BUILTIN_PABSW512,
30042 IX86_BUILTIN_BLENDMW512,
30043 IX86_BUILTIN_BLENDMB512,
30044 IX86_BUILTIN_CMPB512,
30045 IX86_BUILTIN_CMPW512,
30046 IX86_BUILTIN_UCMPB512,
30047 IX86_BUILTIN_UCMPW512,
30049 /* Alternate 4 and 8 element gather/scatter for the vectorizer
30050 where all operands are 32-byte or 64-byte wide respectively. */
30051 IX86_BUILTIN_GATHERALTSIV4DF,
30052 IX86_BUILTIN_GATHERALTDIV8SF,
30053 IX86_BUILTIN_GATHERALTSIV4DI,
30054 IX86_BUILTIN_GATHERALTDIV8SI,
30055 IX86_BUILTIN_GATHER3ALTDIV16SF,
30056 IX86_BUILTIN_GATHER3ALTDIV16SI,
30057 IX86_BUILTIN_GATHER3ALTSIV4DF,
30058 IX86_BUILTIN_GATHER3ALTDIV8SF,
30059 IX86_BUILTIN_GATHER3ALTSIV4DI,
30060 IX86_BUILTIN_GATHER3ALTDIV8SI,
30061 IX86_BUILTIN_GATHER3ALTSIV8DF,
30062 IX86_BUILTIN_GATHER3ALTSIV8DI,
30063 IX86_BUILTIN_GATHER3DIV16SF,
30064 IX86_BUILTIN_GATHER3DIV16SI,
30065 IX86_BUILTIN_GATHER3DIV8DF,
30066 IX86_BUILTIN_GATHER3DIV8DI,
30067 IX86_BUILTIN_GATHER3SIV16SF,
30068 IX86_BUILTIN_GATHER3SIV16SI,
30069 IX86_BUILTIN_GATHER3SIV8DF,
30070 IX86_BUILTIN_GATHER3SIV8DI,
30071 IX86_BUILTIN_SCATTERDIV16SF,
30072 IX86_BUILTIN_SCATTERDIV16SI,
30073 IX86_BUILTIN_SCATTERDIV8DF,
30074 IX86_BUILTIN_SCATTERDIV8DI,
30075 IX86_BUILTIN_SCATTERSIV16SF,
30076 IX86_BUILTIN_SCATTERSIV16SI,
30077 IX86_BUILTIN_SCATTERSIV8DF,
30078 IX86_BUILTIN_SCATTERSIV8DI,
30080 /* AVX512PF */
30081 IX86_BUILTIN_GATHERPFQPD,
30082 IX86_BUILTIN_GATHERPFDPS,
30083 IX86_BUILTIN_GATHERPFDPD,
30084 IX86_BUILTIN_GATHERPFQPS,
30085 IX86_BUILTIN_SCATTERPFDPD,
30086 IX86_BUILTIN_SCATTERPFDPS,
30087 IX86_BUILTIN_SCATTERPFQPD,
30088 IX86_BUILTIN_SCATTERPFQPS,
30090 /* AVX-512ER */
30091 IX86_BUILTIN_EXP2PD_MASK,
30092 IX86_BUILTIN_EXP2PS_MASK,
30093 IX86_BUILTIN_EXP2PS,
30094 IX86_BUILTIN_RCP28PD,
30095 IX86_BUILTIN_RCP28PS,
30096 IX86_BUILTIN_RCP28SD,
30097 IX86_BUILTIN_RCP28SS,
30098 IX86_BUILTIN_RSQRT28PD,
30099 IX86_BUILTIN_RSQRT28PS,
30100 IX86_BUILTIN_RSQRT28SD,
30101 IX86_BUILTIN_RSQRT28SS,
30103 /* AVX-512IFMA */
30104 IX86_BUILTIN_VPMADD52LUQ512,
30105 IX86_BUILTIN_VPMADD52HUQ512,
30106 IX86_BUILTIN_VPMADD52LUQ256,
30107 IX86_BUILTIN_VPMADD52HUQ256,
30108 IX86_BUILTIN_VPMADD52LUQ128,
30109 IX86_BUILTIN_VPMADD52HUQ128,
30110 IX86_BUILTIN_VPMADD52LUQ512_MASKZ,
30111 IX86_BUILTIN_VPMADD52HUQ512_MASKZ,
30112 IX86_BUILTIN_VPMADD52LUQ256_MASKZ,
30113 IX86_BUILTIN_VPMADD52HUQ256_MASKZ,
30114 IX86_BUILTIN_VPMADD52LUQ128_MASKZ,
30115 IX86_BUILTIN_VPMADD52HUQ128_MASKZ,
30117 /* AVX-512VBMI */
30118 IX86_BUILTIN_VPMULTISHIFTQB512,
30119 IX86_BUILTIN_VPMULTISHIFTQB256,
30120 IX86_BUILTIN_VPMULTISHIFTQB128,
30121 IX86_BUILTIN_VPERMVARQI512_MASK,
30122 IX86_BUILTIN_VPERMT2VARQI512,
30123 IX86_BUILTIN_VPERMT2VARQI512_MASKZ,
30124 IX86_BUILTIN_VPERMI2VARQI512,
30125 IX86_BUILTIN_VPERMVARQI256_MASK,
30126 IX86_BUILTIN_VPERMVARQI128_MASK,
30127 IX86_BUILTIN_VPERMT2VARQI256,
30128 IX86_BUILTIN_VPERMT2VARQI256_MASKZ,
30129 IX86_BUILTIN_VPERMT2VARQI128,
30130 IX86_BUILTIN_VPERMT2VARQI128_MASKZ,
30131 IX86_BUILTIN_VPERMI2VARQI256,
30132 IX86_BUILTIN_VPERMI2VARQI128,
30134 /* SHA builtins. */
30135 IX86_BUILTIN_SHA1MSG1,
30136 IX86_BUILTIN_SHA1MSG2,
30137 IX86_BUILTIN_SHA1NEXTE,
30138 IX86_BUILTIN_SHA1RNDS4,
30139 IX86_BUILTIN_SHA256MSG1,
30140 IX86_BUILTIN_SHA256MSG2,
30141 IX86_BUILTIN_SHA256RNDS2,
30143 /* CLWB instructions. */
30144 IX86_BUILTIN_CLWB,
30146 /* PCOMMIT instructions. */
30147 IX86_BUILTIN_PCOMMIT,
30149 /* CLFLUSHOPT instructions. */
30150 IX86_BUILTIN_CLFLUSHOPT,
30152 /* TFmode support builtins. */
30153 IX86_BUILTIN_INFQ,
30154 IX86_BUILTIN_HUGE_VALQ,
30155 IX86_BUILTIN_FABSQ,
30156 IX86_BUILTIN_COPYSIGNQ,
30158 /* Vectorizer support builtins. */
30159 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
30160 IX86_BUILTIN_CPYSGNPS,
30161 IX86_BUILTIN_CPYSGNPD,
30162 IX86_BUILTIN_CPYSGNPS256,
30163 IX86_BUILTIN_CPYSGNPS512,
30164 IX86_BUILTIN_CPYSGNPD256,
30165 IX86_BUILTIN_CPYSGNPD512,
30166 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
30167 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
30170 /* FMA4 instructions. */
30171 IX86_BUILTIN_VFMADDSS,
30172 IX86_BUILTIN_VFMADDSD,
30173 IX86_BUILTIN_VFMADDPS,
30174 IX86_BUILTIN_VFMADDPD,
30175 IX86_BUILTIN_VFMADDPS256,
30176 IX86_BUILTIN_VFMADDPD256,
30177 IX86_BUILTIN_VFMADDSUBPS,
30178 IX86_BUILTIN_VFMADDSUBPD,
30179 IX86_BUILTIN_VFMADDSUBPS256,
30180 IX86_BUILTIN_VFMADDSUBPD256,
30182 /* FMA3 instructions. */
30183 IX86_BUILTIN_VFMADDSS3,
30184 IX86_BUILTIN_VFMADDSD3,
30186 /* XOP instructions. */
30187 IX86_BUILTIN_VPCMOV,
30188 IX86_BUILTIN_VPCMOV_V2DI,
30189 IX86_BUILTIN_VPCMOV_V4SI,
30190 IX86_BUILTIN_VPCMOV_V8HI,
30191 IX86_BUILTIN_VPCMOV_V16QI,
30192 IX86_BUILTIN_VPCMOV_V4SF,
30193 IX86_BUILTIN_VPCMOV_V2DF,
30194 IX86_BUILTIN_VPCMOV256,
30195 IX86_BUILTIN_VPCMOV_V4DI256,
30196 IX86_BUILTIN_VPCMOV_V8SI256,
30197 IX86_BUILTIN_VPCMOV_V16HI256,
30198 IX86_BUILTIN_VPCMOV_V32QI256,
30199 IX86_BUILTIN_VPCMOV_V8SF256,
30200 IX86_BUILTIN_VPCMOV_V4DF256,
30202 IX86_BUILTIN_VPPERM,
30204 IX86_BUILTIN_VPMACSSWW,
30205 IX86_BUILTIN_VPMACSWW,
30206 IX86_BUILTIN_VPMACSSWD,
30207 IX86_BUILTIN_VPMACSWD,
30208 IX86_BUILTIN_VPMACSSDD,
30209 IX86_BUILTIN_VPMACSDD,
30210 IX86_BUILTIN_VPMACSSDQL,
30211 IX86_BUILTIN_VPMACSSDQH,
30212 IX86_BUILTIN_VPMACSDQL,
30213 IX86_BUILTIN_VPMACSDQH,
30214 IX86_BUILTIN_VPMADCSSWD,
30215 IX86_BUILTIN_VPMADCSWD,
30217 IX86_BUILTIN_VPHADDBW,
30218 IX86_BUILTIN_VPHADDBD,
30219 IX86_BUILTIN_VPHADDBQ,
30220 IX86_BUILTIN_VPHADDWD,
30221 IX86_BUILTIN_VPHADDWQ,
30222 IX86_BUILTIN_VPHADDDQ,
30223 IX86_BUILTIN_VPHADDUBW,
30224 IX86_BUILTIN_VPHADDUBD,
30225 IX86_BUILTIN_VPHADDUBQ,
30226 IX86_BUILTIN_VPHADDUWD,
30227 IX86_BUILTIN_VPHADDUWQ,
30228 IX86_BUILTIN_VPHADDUDQ,
30229 IX86_BUILTIN_VPHSUBBW,
30230 IX86_BUILTIN_VPHSUBWD,
30231 IX86_BUILTIN_VPHSUBDQ,
30233 IX86_BUILTIN_VPROTB,
30234 IX86_BUILTIN_VPROTW,
30235 IX86_BUILTIN_VPROTD,
30236 IX86_BUILTIN_VPROTQ,
30237 IX86_BUILTIN_VPROTB_IMM,
30238 IX86_BUILTIN_VPROTW_IMM,
30239 IX86_BUILTIN_VPROTD_IMM,
30240 IX86_BUILTIN_VPROTQ_IMM,
30242 IX86_BUILTIN_VPSHLB,
30243 IX86_BUILTIN_VPSHLW,
30244 IX86_BUILTIN_VPSHLD,
30245 IX86_BUILTIN_VPSHLQ,
30246 IX86_BUILTIN_VPSHAB,
30247 IX86_BUILTIN_VPSHAW,
30248 IX86_BUILTIN_VPSHAD,
30249 IX86_BUILTIN_VPSHAQ,
30251 IX86_BUILTIN_VFRCZSS,
30252 IX86_BUILTIN_VFRCZSD,
30253 IX86_BUILTIN_VFRCZPS,
30254 IX86_BUILTIN_VFRCZPD,
30255 IX86_BUILTIN_VFRCZPS256,
30256 IX86_BUILTIN_VFRCZPD256,
30258 IX86_BUILTIN_VPCOMEQUB,
30259 IX86_BUILTIN_VPCOMNEUB,
30260 IX86_BUILTIN_VPCOMLTUB,
30261 IX86_BUILTIN_VPCOMLEUB,
30262 IX86_BUILTIN_VPCOMGTUB,
30263 IX86_BUILTIN_VPCOMGEUB,
30264 IX86_BUILTIN_VPCOMFALSEUB,
30265 IX86_BUILTIN_VPCOMTRUEUB,
30267 IX86_BUILTIN_VPCOMEQUW,
30268 IX86_BUILTIN_VPCOMNEUW,
30269 IX86_BUILTIN_VPCOMLTUW,
30270 IX86_BUILTIN_VPCOMLEUW,
30271 IX86_BUILTIN_VPCOMGTUW,
30272 IX86_BUILTIN_VPCOMGEUW,
30273 IX86_BUILTIN_VPCOMFALSEUW,
30274 IX86_BUILTIN_VPCOMTRUEUW,
30276 IX86_BUILTIN_VPCOMEQUD,
30277 IX86_BUILTIN_VPCOMNEUD,
30278 IX86_BUILTIN_VPCOMLTUD,
30279 IX86_BUILTIN_VPCOMLEUD,
30280 IX86_BUILTIN_VPCOMGTUD,
30281 IX86_BUILTIN_VPCOMGEUD,
30282 IX86_BUILTIN_VPCOMFALSEUD,
30283 IX86_BUILTIN_VPCOMTRUEUD,
30285 IX86_BUILTIN_VPCOMEQUQ,
30286 IX86_BUILTIN_VPCOMNEUQ,
30287 IX86_BUILTIN_VPCOMLTUQ,
30288 IX86_BUILTIN_VPCOMLEUQ,
30289 IX86_BUILTIN_VPCOMGTUQ,
30290 IX86_BUILTIN_VPCOMGEUQ,
30291 IX86_BUILTIN_VPCOMFALSEUQ,
30292 IX86_BUILTIN_VPCOMTRUEUQ,
30294 IX86_BUILTIN_VPCOMEQB,
30295 IX86_BUILTIN_VPCOMNEB,
30296 IX86_BUILTIN_VPCOMLTB,
30297 IX86_BUILTIN_VPCOMLEB,
30298 IX86_BUILTIN_VPCOMGTB,
30299 IX86_BUILTIN_VPCOMGEB,
30300 IX86_BUILTIN_VPCOMFALSEB,
30301 IX86_BUILTIN_VPCOMTRUEB,
30303 IX86_BUILTIN_VPCOMEQW,
30304 IX86_BUILTIN_VPCOMNEW,
30305 IX86_BUILTIN_VPCOMLTW,
30306 IX86_BUILTIN_VPCOMLEW,
30307 IX86_BUILTIN_VPCOMGTW,
30308 IX86_BUILTIN_VPCOMGEW,
30309 IX86_BUILTIN_VPCOMFALSEW,
30310 IX86_BUILTIN_VPCOMTRUEW,
30312 IX86_BUILTIN_VPCOMEQD,
30313 IX86_BUILTIN_VPCOMNED,
30314 IX86_BUILTIN_VPCOMLTD,
30315 IX86_BUILTIN_VPCOMLED,
30316 IX86_BUILTIN_VPCOMGTD,
30317 IX86_BUILTIN_VPCOMGED,
30318 IX86_BUILTIN_VPCOMFALSED,
30319 IX86_BUILTIN_VPCOMTRUED,
30321 IX86_BUILTIN_VPCOMEQQ,
30322 IX86_BUILTIN_VPCOMNEQ,
30323 IX86_BUILTIN_VPCOMLTQ,
30324 IX86_BUILTIN_VPCOMLEQ,
30325 IX86_BUILTIN_VPCOMGTQ,
30326 IX86_BUILTIN_VPCOMGEQ,
30327 IX86_BUILTIN_VPCOMFALSEQ,
30328 IX86_BUILTIN_VPCOMTRUEQ,
30330 /* LWP instructions. */
30331 IX86_BUILTIN_LLWPCB,
30332 IX86_BUILTIN_SLWPCB,
30333 IX86_BUILTIN_LWPVAL32,
30334 IX86_BUILTIN_LWPVAL64,
30335 IX86_BUILTIN_LWPINS32,
30336 IX86_BUILTIN_LWPINS64,
30338 IX86_BUILTIN_CLZS,
30340 /* RTM */
30341 IX86_BUILTIN_XBEGIN,
30342 IX86_BUILTIN_XEND,
30343 IX86_BUILTIN_XABORT,
30344 IX86_BUILTIN_XTEST,
30346 /* MPX */
30347 IX86_BUILTIN_BNDMK,
30348 IX86_BUILTIN_BNDSTX,
30349 IX86_BUILTIN_BNDLDX,
30350 IX86_BUILTIN_BNDCL,
30351 IX86_BUILTIN_BNDCU,
30352 IX86_BUILTIN_BNDRET,
30353 IX86_BUILTIN_BNDNARROW,
30354 IX86_BUILTIN_BNDINT,
30355 IX86_BUILTIN_SIZEOF,
30356 IX86_BUILTIN_BNDLOWER,
30357 IX86_BUILTIN_BNDUPPER,
30359 /* BMI instructions. */
30360 IX86_BUILTIN_BEXTR32,
30361 IX86_BUILTIN_BEXTR64,
30362 IX86_BUILTIN_CTZS,
30364 /* TBM instructions. */
30365 IX86_BUILTIN_BEXTRI32,
30366 IX86_BUILTIN_BEXTRI64,
30368 /* BMI2 instructions. */
30369 IX86_BUILTIN_BZHI32,
30370 IX86_BUILTIN_BZHI64,
30371 IX86_BUILTIN_PDEP32,
30372 IX86_BUILTIN_PDEP64,
30373 IX86_BUILTIN_PEXT32,
30374 IX86_BUILTIN_PEXT64,
30376 /* ADX instructions. */
30377 IX86_BUILTIN_ADDCARRYX32,
30378 IX86_BUILTIN_ADDCARRYX64,
30380 /* SBB instructions. */
30381 IX86_BUILTIN_SBB32,
30382 IX86_BUILTIN_SBB64,
30384 /* FSGSBASE instructions. */
30385 IX86_BUILTIN_RDFSBASE32,
30386 IX86_BUILTIN_RDFSBASE64,
30387 IX86_BUILTIN_RDGSBASE32,
30388 IX86_BUILTIN_RDGSBASE64,
30389 IX86_BUILTIN_WRFSBASE32,
30390 IX86_BUILTIN_WRFSBASE64,
30391 IX86_BUILTIN_WRGSBASE32,
30392 IX86_BUILTIN_WRGSBASE64,
30394 /* RDRND instructions. */
30395 IX86_BUILTIN_RDRAND16_STEP,
30396 IX86_BUILTIN_RDRAND32_STEP,
30397 IX86_BUILTIN_RDRAND64_STEP,
30399 /* RDSEED instructions. */
30400 IX86_BUILTIN_RDSEED16_STEP,
30401 IX86_BUILTIN_RDSEED32_STEP,
30402 IX86_BUILTIN_RDSEED64_STEP,
30404 /* F16C instructions. */
30405 IX86_BUILTIN_CVTPH2PS,
30406 IX86_BUILTIN_CVTPH2PS256,
30407 IX86_BUILTIN_CVTPS2PH,
30408 IX86_BUILTIN_CVTPS2PH256,
30410 /* CFString built-in for darwin */
30411 IX86_BUILTIN_CFSTRING,
30413 /* Builtins to get CPU type and supported features. */
30414 IX86_BUILTIN_CPU_INIT,
30415 IX86_BUILTIN_CPU_IS,
30416 IX86_BUILTIN_CPU_SUPPORTS,
30418 /* Read/write FLAGS register built-ins. */
30419 IX86_BUILTIN_READ_FLAGS,
30420 IX86_BUILTIN_WRITE_FLAGS,
30422 IX86_BUILTIN_MAX
30425 /* Table for the ix86 builtin decls. */
30426 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
30428 /* Table of all of the builtin functions that are possible with different ISA's
30429 but are waiting to be built until a function is declared to use that
30430 ISA. */
30431 struct builtin_isa {
30432 const char *name; /* function name */
30433 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
30434 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
30435 bool const_p; /* true if the declaration is constant */
30436 bool leaf_p; /* true if the declaration has leaf attribute */
30437 bool nothrow_p; /* true if the declaration has nothrow attribute */
30438 bool set_and_not_built_p;
30441 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
30444 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
30445 of which isa_flags to use in the ix86_builtins_isa array. Stores the
30446 function decl in the ix86_builtins array. Returns the function decl or
30447 NULL_TREE, if the builtin was not added.
30449 If the front end has a special hook for builtin functions, delay adding
30450 builtin functions that aren't in the current ISA until the ISA is changed
30451 with function specific optimization. Doing so, can save about 300K for the
30452 default compiler. When the builtin is expanded, check at that time whether
30453 it is valid.
30455 If the front end doesn't have a special hook, record all builtins, even if
30456 it isn't an instruction set in the current ISA in case the user uses
30457 function specific options for a different ISA, so that we don't get scope
30458 errors if a builtin is added in the middle of a function scope. */
30460 static inline tree
30461 def_builtin (HOST_WIDE_INT mask, const char *name,
30462 enum ix86_builtin_func_type tcode,
30463 enum ix86_builtins code)
30465 tree decl = NULL_TREE;
30467 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
30469 ix86_builtins_isa[(int) code].isa = mask;
30471 mask &= ~OPTION_MASK_ISA_64BIT;
30472 if (mask == 0
30473 || (mask & ix86_isa_flags) != 0
30474 || (lang_hooks.builtin_function
30475 == lang_hooks.builtin_function_ext_scope))
30478 tree type = ix86_get_builtin_func_type (tcode);
30479 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
30480 NULL, NULL_TREE);
30481 ix86_builtins[(int) code] = decl;
30482 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
30484 else
30486 ix86_builtins[(int) code] = NULL_TREE;
30487 ix86_builtins_isa[(int) code].tcode = tcode;
30488 ix86_builtins_isa[(int) code].name = name;
30489 ix86_builtins_isa[(int) code].leaf_p = false;
30490 ix86_builtins_isa[(int) code].nothrow_p = false;
30491 ix86_builtins_isa[(int) code].const_p = false;
30492 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
30496 return decl;
30499 /* Like def_builtin, but also marks the function decl "const". */
30501 static inline tree
30502 def_builtin_const (HOST_WIDE_INT mask, const char *name,
30503 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
30505 tree decl = def_builtin (mask, name, tcode, code);
30506 if (decl)
30507 TREE_READONLY (decl) = 1;
30508 else
30509 ix86_builtins_isa[(int) code].const_p = true;
30511 return decl;
30514 /* Add any new builtin functions for a given ISA that may not have been
30515 declared. This saves a bit of space compared to adding all of the
30516 declarations to the tree, even if we didn't use them. */
30518 static void
30519 ix86_add_new_builtins (HOST_WIDE_INT isa)
30521 int i;
30523 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
30525 if ((ix86_builtins_isa[i].isa & isa) != 0
30526 && ix86_builtins_isa[i].set_and_not_built_p)
30528 tree decl, type;
30530 /* Don't define the builtin again. */
30531 ix86_builtins_isa[i].set_and_not_built_p = false;
30533 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
30534 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
30535 type, i, BUILT_IN_MD, NULL,
30536 NULL_TREE);
30538 ix86_builtins[i] = decl;
30539 if (ix86_builtins_isa[i].const_p)
30540 TREE_READONLY (decl) = 1;
30541 if (ix86_builtins_isa[i].leaf_p)
30542 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
30543 NULL_TREE);
30544 if (ix86_builtins_isa[i].nothrow_p)
30545 TREE_NOTHROW (decl) = 1;
30550 /* Bits for builtin_description.flag. */
30552 /* Set when we don't support the comparison natively, and should
30553 swap_comparison in order to support it. */
30554 #define BUILTIN_DESC_SWAP_OPERANDS 1
30556 struct builtin_description
30558 const HOST_WIDE_INT mask;
30559 const enum insn_code icode;
30560 const char *const name;
30561 const enum ix86_builtins code;
30562 const enum rtx_code comparison;
30563 const int flag;
30566 static const struct builtin_description bdesc_comi[] =
30568 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
30569 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
30570 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
30571 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
30572 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
30573 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
30574 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
30575 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
30576 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
30577 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
30578 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
30579 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
30580 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
30581 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
30582 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
30583 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
30584 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
30585 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
30586 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
30587 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
30588 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
30589 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
30590 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
30591 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
30594 static const struct builtin_description bdesc_pcmpestr[] =
30596 /* SSE4.2 */
30597 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
30598 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
30599 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
30600 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
30601 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
30602 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
30603 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
30606 static const struct builtin_description bdesc_pcmpistr[] =
30608 /* SSE4.2 */
30609 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
30610 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
30611 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
30612 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
30613 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
30614 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
30615 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
30618 /* Special builtins with variable number of arguments. */
30619 static const struct builtin_description bdesc_special_args[] =
30621 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
30622 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
30623 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
30625 /* 80387 (for use internally for atomic compound assignment). */
30626 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
30627 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
30628 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) USHORT_FTYPE_VOID },
30629 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
30631 /* MMX */
30632 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
30634 /* 3DNow! */
30635 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
30637 /* FXSR, XSAVE, XSAVEOPT, XSAVEC and XSAVES. */
30638 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
30639 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
30640 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30641 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30642 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30643 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xsaves", IX86_BUILTIN_XSAVES, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30644 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xrstors", IX86_BUILTIN_XRSTORS, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30645 { OPTION_MASK_ISA_XSAVEC, CODE_FOR_nothing, "__builtin_ia32_xsavec", IX86_BUILTIN_XSAVEC, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30647 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
30648 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
30649 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30650 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30651 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30652 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaves64", IX86_BUILTIN_XSAVES64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30653 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30654 { OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30656 /* SSE */
30657 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30658 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30659 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
30661 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
30662 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
30663 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
30664 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
30666 /* SSE or 3DNow!A */
30667 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30668 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
30670 /* SSE2 */
30671 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30672 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30673 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30674 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
30675 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30676 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
30677 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
30678 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
30679 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
30680 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
30682 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
30683 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
30685 /* SSE3 */
30686 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
30688 /* SSE4.1 */
30689 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
30691 /* SSE4A */
30692 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30693 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30695 /* AVX */
30696 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
30697 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
30699 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
30700 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
30701 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
30702 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
30703 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
30705 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
30706 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
30707 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
30708 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
30709 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
30710 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
30711 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
30713 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
30714 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
30715 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
30717 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
30718 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
30719 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
30720 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
30721 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
30722 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
30723 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
30724 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
30726 /* AVX2 */
30727 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
30728 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
30729 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
30730 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
30731 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
30732 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
30733 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
30734 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
30735 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
30737 /* AVX512F */
30738 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30739 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30740 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30741 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30742 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30743 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30744 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30745 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30746 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30747 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30748 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30749 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30750 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30751 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30752 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30753 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30754 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30755 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30756 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30757 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30758 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
30759 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
30760 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
30761 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
30762 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30763 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30764 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30765 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30766 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30767 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30768 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30769 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30770 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30771 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30772 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30773 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30774 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30775 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30776 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30777 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30778 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30779 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30780 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30781 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30782 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30783 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30784 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30786 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
30787 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
30788 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
30789 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
30790 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
30791 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
30793 /* FSGSBASE */
30794 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30795 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
30796 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30797 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
30798 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
30799 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
30800 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
30801 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
30803 /* RTM */
30804 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30805 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
30806 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
30808 /* AVX512BW */
30809 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_loaddquhi512_mask", IX86_BUILTIN_LOADDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_SI },
30810 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_loaddquqi512_mask", IX86_BUILTIN_LOADDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_DI },
30811 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv32hi_mask, "__builtin_ia32_storedquhi512_mask", IX86_BUILTIN_STOREDQUHI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV32HI_V32HI_SI },
30812 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv64qi_mask, "__builtin_ia32_storedquqi512_mask", IX86_BUILTIN_STOREDQUQI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV64QI_V64QI_DI },
30814 /* AVX512VL */
30815 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_loaddquhi256_mask", IX86_BUILTIN_LOADDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_PCV16HI_V16HI_HI },
30816 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_loaddquhi128_mask", IX86_BUILTIN_LOADDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_PCV8HI_V8HI_QI },
30817 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_loaddquqi256_mask", IX86_BUILTIN_LOADDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_PCV32QI_V32QI_SI },
30818 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_loaddquqi128_mask", IX86_BUILTIN_LOADDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_PCV16QI_V16QI_HI },
30819 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64load256_mask", IX86_BUILTIN_MOVDQA64LOAD256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30820 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64load128_mask", IX86_BUILTIN_MOVDQA64LOAD128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30821 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32load256_mask", IX86_BUILTIN_MOVDQA32LOAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30822 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32load128_mask", IX86_BUILTIN_MOVDQA32LOAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30823 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4di_mask, "__builtin_ia32_movdqa64store256_mask", IX86_BUILTIN_MOVDQA64STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
30824 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2di_mask, "__builtin_ia32_movdqa64store128_mask", IX86_BUILTIN_MOVDQA64STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
30825 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8si_mask, "__builtin_ia32_movdqa32store256_mask", IX86_BUILTIN_MOVDQA32STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
30826 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4si_mask, "__builtin_ia32_movdqa32store128_mask", IX86_BUILTIN_MOVDQA32STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
30827 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_loadapd256_mask", IX86_BUILTIN_LOADAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30828 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_loadapd128_mask", IX86_BUILTIN_LOADAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30829 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_loadaps256_mask", IX86_BUILTIN_LOADAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30830 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_loadaps128_mask", IX86_BUILTIN_LOADAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30831 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4df_mask, "__builtin_ia32_storeapd256_mask", IX86_BUILTIN_STOREAPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
30832 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2df_mask, "__builtin_ia32_storeapd128_mask", IX86_BUILTIN_STOREAPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
30833 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8sf_mask, "__builtin_ia32_storeaps256_mask", IX86_BUILTIN_STOREAPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
30834 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4sf_mask, "__builtin_ia32_storeaps128_mask", IX86_BUILTIN_STOREAPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
30835 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadupd256_mask, "__builtin_ia32_loadupd256_mask", IX86_BUILTIN_LOADUPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30836 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loadupd_mask, "__builtin_ia32_loadupd128_mask", IX86_BUILTIN_LOADUPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30837 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadups256_mask, "__builtin_ia32_loadups256_mask", IX86_BUILTIN_LOADUPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30838 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_loadups_mask, "__builtin_ia32_loadups128_mask", IX86_BUILTIN_LOADUPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30839 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd256_mask, "__builtin_ia32_storeupd256_mask", IX86_BUILTIN_STOREUPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
30840 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd_mask, "__builtin_ia32_storeupd128_mask", IX86_BUILTIN_STOREUPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
30841 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups256_mask, "__builtin_ia32_storeups256_mask", IX86_BUILTIN_STOREUPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
30842 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups_mask, "__builtin_ia32_storeups128_mask", IX86_BUILTIN_STOREUPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
30843 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv4di_mask, "__builtin_ia32_loaddqudi256_mask", IX86_BUILTIN_LOADDQUDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30844 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv2di_mask, "__builtin_ia32_loaddqudi128_mask", IX86_BUILTIN_LOADDQUDI128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30845 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv8si_mask, "__builtin_ia32_loaddqusi256_mask", IX86_BUILTIN_LOADDQUSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30846 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv4si_mask, "__builtin_ia32_loaddqusi128_mask", IX86_BUILTIN_LOADDQUSI128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30847 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4di_mask, "__builtin_ia32_storedqudi256_mask", IX86_BUILTIN_STOREDQUDI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
30848 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv2di_mask, "__builtin_ia32_storedqudi128_mask", IX86_BUILTIN_STOREDQUDI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
30849 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8si_mask, "__builtin_ia32_storedqusi256_mask", IX86_BUILTIN_STOREDQUSI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
30850 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4si_mask, "__builtin_ia32_storedqusi128_mask", IX86_BUILTIN_STOREDQUSI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
30851 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16hi_mask, "__builtin_ia32_storedquhi256_mask", IX86_BUILTIN_STOREDQUHI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16HI_HI },
30852 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8hi_mask, "__builtin_ia32_storedquhi128_mask", IX86_BUILTIN_STOREDQUHI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8HI_QI },
30853 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv32qi_mask, "__builtin_ia32_storedquqi256_mask", IX86_BUILTIN_STOREDQUQI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32QI_SI },
30854 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16qi_mask, "__builtin_ia32_storedquqi128_mask", IX86_BUILTIN_STOREDQUQI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16QI_HI },
30855 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4df_mask, "__builtin_ia32_compressstoredf256_mask", IX86_BUILTIN_COMPRESSPDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
30856 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2df_mask, "__builtin_ia32_compressstoredf128_mask", IX86_BUILTIN_COMPRESSPDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
30857 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8sf_mask, "__builtin_ia32_compressstoresf256_mask", IX86_BUILTIN_COMPRESSPSSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
30858 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4sf_mask, "__builtin_ia32_compressstoresf128_mask", IX86_BUILTIN_COMPRESSPSSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
30859 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4di_mask, "__builtin_ia32_compressstoredi256_mask", IX86_BUILTIN_PCOMPRESSQSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
30860 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2di_mask, "__builtin_ia32_compressstoredi128_mask", IX86_BUILTIN_PCOMPRESSQSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
30861 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8si_mask, "__builtin_ia32_compressstoresi256_mask", IX86_BUILTIN_PCOMPRESSDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
30862 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4si_mask, "__builtin_ia32_compressstoresi128_mask", IX86_BUILTIN_PCOMPRESSDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
30863 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expandloaddf256_mask", IX86_BUILTIN_EXPANDPDLOAD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30864 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expandloaddf128_mask", IX86_BUILTIN_EXPANDPDLOAD128, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30865 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandloadsf256_mask", IX86_BUILTIN_EXPANDPSLOAD256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30866 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandloadsf128_mask", IX86_BUILTIN_EXPANDPSLOAD128, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30867 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expandloaddi256_mask", IX86_BUILTIN_PEXPANDQLOAD256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30868 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expandloaddi128_mask", IX86_BUILTIN_PEXPANDQLOAD128, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30869 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandloadsi256_mask", IX86_BUILTIN_PEXPANDDLOAD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30870 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandloadsi128_mask", IX86_BUILTIN_PEXPANDDLOAD128, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30871 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expandloaddf256_maskz", IX86_BUILTIN_EXPANDPDLOAD256Z, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30872 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expandloaddf128_maskz", IX86_BUILTIN_EXPANDPDLOAD128Z, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30873 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandloadsf256_maskz", IX86_BUILTIN_EXPANDPSLOAD256Z, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30874 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandloadsf128_maskz", IX86_BUILTIN_EXPANDPSLOAD128Z, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30875 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expandloaddi256_maskz", IX86_BUILTIN_PEXPANDQLOAD256Z, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30876 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expandloaddi128_maskz", IX86_BUILTIN_PEXPANDQLOAD128Z, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30877 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandloadsi256_maskz", IX86_BUILTIN_PEXPANDDLOAD256Z, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30878 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandloadsi128_maskz", IX86_BUILTIN_PEXPANDDLOAD128Z, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30879 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask_store, "__builtin_ia32_pmovqd256mem_mask", IX86_BUILTIN_PMOVQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
30880 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask_store, "__builtin_ia32_pmovqd128mem_mask", IX86_BUILTIN_PMOVQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
30881 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask_store, "__builtin_ia32_pmovsqd256mem_mask", IX86_BUILTIN_PMOVSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
30882 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask_store, "__builtin_ia32_pmovsqd128mem_mask", IX86_BUILTIN_PMOVSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
30883 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask_store, "__builtin_ia32_pmovusqd256mem_mask", IX86_BUILTIN_PMOVUSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
30884 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask_store, "__builtin_ia32_pmovusqd128mem_mask", IX86_BUILTIN_PMOVUSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
30885 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovqw256mem_mask", IX86_BUILTIN_PMOVQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
30886 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovqw128mem_mask", IX86_BUILTIN_PMOVQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
30887 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovsqw256mem_mask", IX86_BUILTIN_PMOVSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
30888 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovsqw128mem_mask", IX86_BUILTIN_PMOVSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
30889 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovusqw256mem_mask", IX86_BUILTIN_PMOVUSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
30890 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovusqw128mem_mask", IX86_BUILTIN_PMOVUSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
30891 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovqb256mem_mask", IX86_BUILTIN_PMOVQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
30892 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovqb128mem_mask", IX86_BUILTIN_PMOVQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
30893 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovsqb256mem_mask", IX86_BUILTIN_PMOVSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
30894 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovsqb128mem_mask", IX86_BUILTIN_PMOVSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
30895 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovusqb256mem_mask", IX86_BUILTIN_PMOVUSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
30896 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovusqb128mem_mask", IX86_BUILTIN_PMOVUSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
30897 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovdb256mem_mask", IX86_BUILTIN_PMOVDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
30898 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovdb128mem_mask", IX86_BUILTIN_PMOVDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
30899 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovsdb256mem_mask", IX86_BUILTIN_PMOVSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
30900 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovsdb128mem_mask", IX86_BUILTIN_PMOVSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
30901 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovusdb256mem_mask", IX86_BUILTIN_PMOVUSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
30902 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovusdb128mem_mask", IX86_BUILTIN_PMOVUSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
30903 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovdw256mem_mask", IX86_BUILTIN_PMOVDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
30904 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovdw128mem_mask", IX86_BUILTIN_PMOVDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
30905 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovsdw256mem_mask", IX86_BUILTIN_PMOVSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
30906 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovsdw128mem_mask", IX86_BUILTIN_PMOVSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
30907 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovusdw256mem_mask", IX86_BUILTIN_PMOVUSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
30908 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovusdw128mem_mask", IX86_BUILTIN_PMOVUSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
30910 /* PCOMMIT. */
30911 { OPTION_MASK_ISA_PCOMMIT, CODE_FOR_pcommit, "__builtin_ia32_pcommit", IX86_BUILTIN_PCOMMIT, UNKNOWN, (int) VOID_FTYPE_VOID },
30914 /* Builtins with variable number of arguments. */
30915 static const struct builtin_description bdesc_args[] =
30917 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
30918 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
30919 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
30920 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
30921 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
30922 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
30923 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
30925 /* MMX */
30926 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30927 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30928 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30929 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30930 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30931 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30933 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30934 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30935 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30936 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30937 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30938 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30939 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30940 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30942 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30943 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30945 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30946 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30947 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30948 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30950 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30951 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30952 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30953 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30954 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30955 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30957 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30958 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30959 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30960 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30961 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
30962 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
30964 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
30965 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
30966 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
30968 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
30970 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
30971 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
30972 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
30973 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
30974 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
30975 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
30977 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
30978 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
30979 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
30980 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
30981 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
30982 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
30984 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
30985 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
30986 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
30987 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
30989 /* 3DNow! */
30990 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
30991 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
30992 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
30993 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
30995 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30996 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
30997 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
30998 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
30999 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31000 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31001 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31002 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31003 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31004 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31005 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31006 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31007 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31008 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31009 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31011 /* 3DNow!A */
31012 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
31013 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
31014 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
31015 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31016 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31017 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31019 /* SSE */
31020 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
31021 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31022 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31023 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31024 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31025 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31026 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
31027 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
31028 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
31029 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
31030 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
31031 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
31033 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31035 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31036 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31037 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31038 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31039 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31040 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31041 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31042 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31044 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
31045 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
31046 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
31047 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31048 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31049 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31050 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
31051 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
31052 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
31053 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31054 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
31055 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31056 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
31057 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
31058 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
31059 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31060 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
31061 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
31062 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
31063 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31065 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31066 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31067 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31068 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31070 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31071 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31072 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31073 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31075 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31077 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31078 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31079 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31080 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31081 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31083 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
31084 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
31085 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
31087 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
31089 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31090 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31091 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31093 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
31094 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
31096 /* SSE MMX or 3Dnow!A */
31097 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31098 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31099 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31101 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31102 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31103 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31104 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31106 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
31107 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
31109 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
31111 /* SSE2 */
31112 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31114 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
31115 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
31116 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
31117 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
31118 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
31120 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
31121 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
31122 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
31123 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
31124 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
31126 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
31128 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
31129 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
31130 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
31131 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
31133 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31134 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
31135 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31137 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31138 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31139 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31140 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31141 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31142 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31143 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31144 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31146 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
31147 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
31148 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
31149 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31150 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
31151 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31152 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
31153 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
31154 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
31155 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31156 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31157 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31158 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
31159 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
31160 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
31161 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31162 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
31163 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
31164 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
31165 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31167 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31168 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31169 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31170 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31172 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31173 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31174 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31175 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31177 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31179 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31180 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31181 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31183 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
31185 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31186 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31187 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31188 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31189 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31190 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31191 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31192 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31194 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31195 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31196 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31197 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31198 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31199 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31200 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31201 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31203 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31204 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
31206 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31207 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31208 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31209 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31211 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31212 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31214 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31215 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31216 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31217 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31218 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31219 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31221 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31222 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31223 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31224 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31226 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31227 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31228 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31229 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31230 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31231 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31232 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31233 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31235 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
31236 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
31237 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
31239 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31240 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
31242 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
31243 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
31245 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
31247 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
31248 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
31249 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
31250 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
31252 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
31253 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31254 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31255 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
31256 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31257 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31258 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
31260 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
31261 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31262 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31263 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
31264 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31265 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31266 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
31268 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31269 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31270 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31271 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31273 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
31274 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
31275 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
31277 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
31279 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31281 /* SSE2 MMX */
31282 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
31283 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
31285 /* SSE3 */
31286 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
31287 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31289 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31290 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31291 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31292 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31293 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31294 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31296 /* SSSE3 */
31297 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
31298 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
31299 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31300 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
31301 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
31302 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
31304 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31305 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31306 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31307 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31308 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31309 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31310 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31311 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31312 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31313 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31314 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31315 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31316 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
31317 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
31318 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31319 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31320 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31321 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31322 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31323 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31324 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31325 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31326 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31327 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31329 /* SSSE3. */
31330 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
31331 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
31333 /* SSE4.1 */
31334 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31335 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31336 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
31337 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
31338 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31339 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31340 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31341 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
31342 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
31343 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
31345 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
31346 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
31347 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
31348 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
31349 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
31350 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
31351 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
31352 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
31353 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
31354 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
31355 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
31356 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
31357 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31359 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
31360 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31361 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31362 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31363 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31364 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31365 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31366 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31367 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31368 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31369 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
31370 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31372 /* SSE4.1 */
31373 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
31374 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
31375 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31376 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31378 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
31379 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
31380 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
31381 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
31383 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
31384 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
31386 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
31387 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
31389 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
31390 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
31391 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
31392 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
31394 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
31395 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
31397 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31398 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31400 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31401 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31402 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31404 /* SSE4.2 */
31405 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31406 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
31407 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
31408 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31409 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31411 /* SSE4A */
31412 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
31413 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
31414 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
31415 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31417 /* AES */
31418 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
31419 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31421 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31422 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31423 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31424 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31426 /* PCLMUL */
31427 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
31429 /* AVX */
31430 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31431 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31432 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31433 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31434 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31435 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31436 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31437 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31438 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31439 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31440 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31441 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31442 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31443 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31444 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31445 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31446 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31447 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31448 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31449 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31450 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31451 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31452 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31453 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31454 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31455 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31457 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
31458 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
31459 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
31460 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
31462 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31463 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31464 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
31465 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
31466 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31467 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31468 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31469 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31470 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31471 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31472 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31473 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31474 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31475 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
31476 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
31477 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
31478 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
31479 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
31480 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
31481 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31482 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
31483 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
31484 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
31485 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31486 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31487 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31488 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
31489 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
31490 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
31491 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31492 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
31493 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
31494 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
31495 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
31497 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31498 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31499 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31501 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31502 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31503 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31504 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31505 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31507 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31509 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31510 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
31512 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
31513 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
31514 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
31515 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
31517 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31518 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
31520 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
31521 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
31523 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
31524 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
31525 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
31526 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
31528 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
31529 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
31531 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31532 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31534 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31535 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31536 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31537 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31539 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
31540 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
31541 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
31542 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
31543 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
31544 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
31546 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31547 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31548 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31549 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31550 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31551 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31552 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31553 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31554 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31555 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31556 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31557 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31558 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31559 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31560 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31562 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
31563 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
31565 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31566 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31568 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
31570 /* AVX2 */
31571 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
31572 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
31573 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
31574 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
31575 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
31576 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
31577 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
31578 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
31579 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31580 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31581 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31582 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31583 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31584 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31585 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31586 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31587 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
31588 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31589 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31590 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31591 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31592 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
31593 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
31594 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31595 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31596 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31597 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31598 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31599 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31600 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31601 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31602 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31603 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31604 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31605 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31606 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31607 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31608 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
31609 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
31610 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31611 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31612 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31613 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31614 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31615 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31616 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31617 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31618 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31619 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31620 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31621 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31622 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
31623 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
31624 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
31625 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
31626 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
31627 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
31628 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
31629 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
31630 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
31631 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
31632 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
31633 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
31634 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
31635 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
31636 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31637 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31638 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31639 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31640 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31641 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
31642 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31643 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
31644 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31645 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
31646 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
31647 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
31648 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31649 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31650 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31651 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
31652 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31653 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31654 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31655 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31656 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
31657 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
31658 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31659 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31660 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31661 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31662 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
31663 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31664 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31665 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31666 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31667 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
31668 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
31669 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31670 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31671 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31672 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31673 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31674 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31675 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31676 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31677 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31678 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31679 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31680 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31681 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31682 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31683 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31684 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31685 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31686 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31687 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
31688 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
31689 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
31690 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
31691 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
31692 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
31693 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
31694 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
31695 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
31696 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
31697 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31698 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
31699 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31700 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31701 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
31702 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31703 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
31704 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
31705 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vextractf128v4di, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
31706 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vinsertf128v4di, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
31707 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31708 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31709 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31710 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31711 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31712 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31713 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31714 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31715 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31716 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31718 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
31720 /* BMI */
31721 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31722 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31723 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
31725 /* TBM */
31726 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31727 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31729 /* F16C */
31730 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
31731 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
31732 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
31733 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
31735 /* BMI2 */
31736 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31737 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31738 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31739 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31740 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31741 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31743 /* AVX512F */
31744 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_256si, "__builtin_ia32_si512_256si", IX86_BUILTIN_SI512_SI256, UNKNOWN, (int) V16SI_FTYPE_V8SI },
31745 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_256ps, "__builtin_ia32_ps512_256ps", IX86_BUILTIN_PS512_PS256, UNKNOWN, (int) V16SF_FTYPE_V8SF },
31746 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_256pd, "__builtin_ia32_pd512_256pd", IX86_BUILTIN_PD512_PD256, UNKNOWN, (int) V8DF_FTYPE_V4DF },
31747 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_si, "__builtin_ia32_si512_si", IX86_BUILTIN_SI512_SI, UNKNOWN, (int) V16SI_FTYPE_V4SI },
31748 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_ps, "__builtin_ia32_ps512_ps", IX86_BUILTIN_PS512_PS, UNKNOWN, (int) V16SF_FTYPE_V4SF },
31749 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_pd, "__builtin_ia32_pd512_pd", IX86_BUILTIN_PD512_PD, UNKNOWN, (int) V8DF_FTYPE_V2DF },
31750 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
31751 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
31752 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31753 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31754 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31755 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31756 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
31757 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_QI },
31758 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
31759 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_QI },
31760 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
31761 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
31762 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
31763 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
31764 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31765 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31766 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
31767 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
31768 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
31769 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
31770 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31771 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31772 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31773 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31774 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_QI },
31775 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_QI },
31776 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_QI },
31777 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_QI },
31778 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI },
31779 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI },
31780 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI },
31781 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI },
31782 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31783 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31784 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31785 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31786 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31787 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31788 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31789 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31790 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31791 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31792 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31793 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31794 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31795 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31796 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31797 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
31798 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI },
31799 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
31800 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
31801 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
31802 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
31803 { OPTION_MASK_ISA_AVX512F & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_memv8di_mask, "__builtin_ia32_pbroadcastq512_mem_mask", IX86_BUILTIN_PBROADCASTQ512_MEM, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
31804 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31805 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31806 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31807 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31808 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31809 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31810 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31811 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31812 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31813 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31814 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31815 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31816 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31817 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31818 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31819 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31820 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31821 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31822 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
31823 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
31824 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
31825 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
31826 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
31827 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
31828 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
31829 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
31830 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
31831 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
31832 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
31833 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
31834 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
31835 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
31836 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
31837 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
31838 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
31839 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
31840 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
31841 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
31842 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
31843 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
31844 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
31845 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
31846 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
31847 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
31848 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31849 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
31850 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31851 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31852 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31853 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31854 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31855 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31856 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31857 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31858 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31859 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31860 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31861 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
31862 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31863 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
31864 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31865 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31866 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31867 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
31868 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31869 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
31870 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31871 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31872 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31873 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
31874 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31875 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
31876 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31877 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31878 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31879 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31880 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31881 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31882 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31883 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31884 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31885 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31886 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31887 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31888 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31889 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31890 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31891 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31892 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31893 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31894 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31895 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31896 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31897 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31898 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31899 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
31900 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
31901 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
31902 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
31903 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
31904 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
31905 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
31906 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
31907 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
31908 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
31909 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
31910 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
31911 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31912 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31913 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31914 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31915 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
31916 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31917 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31918 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
31919 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
31920 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31921 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
31922 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
31923 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
31924 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
31925 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31926 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31927 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
31928 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
31929 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
31930 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
31931 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31932 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31933 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
31934 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31935 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
31936 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31937 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
31938 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
31939 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
31940 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
31942 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
31943 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
31944 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
31945 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
31946 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
31947 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
31948 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
31949 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
31951 /* Mask arithmetic operations */
31952 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31953 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31954 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) HI_FTYPE_HI },
31955 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31956 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31957 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31958 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) HI_FTYPE_HI_HI },
31959 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31960 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31961 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) HI_FTYPE_HI },
31963 /* SHA */
31964 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31965 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31966 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31967 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
31968 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31969 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31970 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
31972 /* AVX512VL. */
31973 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_palignrv32qi_mask, "__builtin_ia32_palignr256_mask", IX86_BUILTIN_PALIGNR256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT },
31974 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_palignrv16qi_mask, "__builtin_ia32_palignr128_mask", IX86_BUILTIN_PALIGNR128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT },
31975 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64_256_mask", IX86_BUILTIN_MOVDQA64_256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
31976 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64_128_mask", IX86_BUILTIN_MOVDQA64_128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
31977 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32_256_mask", IX86_BUILTIN_MOVDQA32_256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
31978 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32_128_mask", IX86_BUILTIN_MOVDQA32_128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
31979 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_movapd256_mask", IX86_BUILTIN_MOVAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
31980 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_movapd128_mask", IX86_BUILTIN_MOVAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
31981 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_movaps256_mask", IX86_BUILTIN_MOVAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
31982 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_movaps128_mask", IX86_BUILTIN_MOVAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
31983 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_movdquhi256_mask", IX86_BUILTIN_MOVDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
31984 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_movdquhi128_mask", IX86_BUILTIN_MOVDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
31985 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_movdquqi256_mask", IX86_BUILTIN_MOVDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
31986 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_movdquqi128_mask", IX86_BUILTIN_MOVDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
31987 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4sf3_mask, "__builtin_ia32_minps_mask", IX86_BUILTIN_MINPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31988 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4sf3_mask, "__builtin_ia32_maxps_mask", IX86_BUILTIN_MAXPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31989 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2df3_mask, "__builtin_ia32_minpd_mask", IX86_BUILTIN_MINPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31990 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2df3_mask, "__builtin_ia32_maxpd_mask", IX86_BUILTIN_MAXPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31991 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4df3_mask, "__builtin_ia32_maxpd256_mask", IX86_BUILTIN_MAXPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31992 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8sf3_mask, "__builtin_ia32_maxps256_mask", IX86_BUILTIN_MAXPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31993 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4df3_mask, "__builtin_ia32_minpd256_mask", IX86_BUILTIN_MINPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31994 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8sf3_mask, "__builtin_ia32_minps256_mask", IX86_BUILTIN_MINPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31995 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4sf3_mask, "__builtin_ia32_mulps_mask", IX86_BUILTIN_MULPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31996 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_divv4sf3_mask, "__builtin_ia32_divps_mask", IX86_BUILTIN_DIVPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31997 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv2df3_mask, "__builtin_ia32_mulpd_mask", IX86_BUILTIN_MULPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31998 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_divv2df3_mask, "__builtin_ia32_divpd_mask", IX86_BUILTIN_DIVPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31999 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv4df3_mask, "__builtin_ia32_divpd256_mask", IX86_BUILTIN_DIVPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32000 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv8sf3_mask, "__builtin_ia32_divps256_mask", IX86_BUILTIN_DIVPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32001 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4df3_mask, "__builtin_ia32_mulpd256_mask", IX86_BUILTIN_MULPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32002 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8sf3_mask, "__builtin_ia32_mulps256_mask", IX86_BUILTIN_MULPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32003 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2df3_mask, "__builtin_ia32_addpd128_mask", IX86_BUILTIN_ADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32004 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4df3_mask, "__builtin_ia32_addpd256_mask", IX86_BUILTIN_ADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32005 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4sf3_mask, "__builtin_ia32_addps128_mask", IX86_BUILTIN_ADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32006 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8sf3_mask, "__builtin_ia32_addps256_mask", IX86_BUILTIN_ADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32007 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2df3_mask, "__builtin_ia32_subpd128_mask", IX86_BUILTIN_SUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32008 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4df3_mask, "__builtin_ia32_subpd256_mask", IX86_BUILTIN_SUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32009 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4sf3_mask, "__builtin_ia32_subps128_mask", IX86_BUILTIN_SUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32010 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8sf3_mask, "__builtin_ia32_subps256_mask", IX86_BUILTIN_SUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32011 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4df3_mask, "__builtin_ia32_xorpd256_mask", IX86_BUILTIN_XORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32012 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2df3_mask, "__builtin_ia32_xorpd128_mask", IX86_BUILTIN_XORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32013 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8sf3_mask, "__builtin_ia32_xorps256_mask", IX86_BUILTIN_XORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32014 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4sf3_mask, "__builtin_ia32_xorps128_mask", IX86_BUILTIN_XORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32015 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4df3_mask, "__builtin_ia32_orpd256_mask", IX86_BUILTIN_ORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32016 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2df3_mask, "__builtin_ia32_orpd128_mask", IX86_BUILTIN_ORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32017 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8sf3_mask, "__builtin_ia32_orps256_mask", IX86_BUILTIN_ORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32018 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4sf3_mask, "__builtin_ia32_orps128_mask", IX86_BUILTIN_ORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32019 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8sf_mask, "__builtin_ia32_broadcastf32x2_256_mask", IX86_BUILTIN_BROADCASTF32x2_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32020 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8si_mask, "__builtin_ia32_broadcasti32x2_256_mask", IX86_BUILTIN_BROADCASTI32x2_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32021 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4si_mask, "__builtin_ia32_broadcasti32x2_128_mask", IX86_BUILTIN_BROADCASTI32x2_128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32022 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4df_mask_1, "__builtin_ia32_broadcastf64x2_256_mask", IX86_BUILTIN_BROADCASTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
32023 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4di_mask_1, "__builtin_ia32_broadcasti64x2_256_mask", IX86_BUILTIN_BROADCASTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
32024 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8sf_mask_1, "__builtin_ia32_broadcastf32x4_256_mask", IX86_BUILTIN_BROADCASTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32025 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8si_mask_1, "__builtin_ia32_broadcasti32x4_256_mask", IX86_BUILTIN_BROADCASTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32026 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8sf, "__builtin_ia32_extractf32x4_256_mask", IX86_BUILTIN_EXTRACTF32X4_256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT_V4SF_QI },
32027 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8si, "__builtin_ia32_extracti32x4_256_mask", IX86_BUILTIN_EXTRACTI32X4_256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT_V4SI_QI },
32028 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv16hi_mask, "__builtin_ia32_dbpsadbw256_mask", IX86_BUILTIN_DBPSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI },
32029 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv8hi_mask, "__builtin_ia32_dbpsadbw128_mask", IX86_BUILTIN_DBPSADBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI },
32030 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2qq256_mask", IX86_BUILTIN_CVTTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32031 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2qq128_mask", IX86_BUILTIN_CVTTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32032 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2uqq256_mask", IX86_BUILTIN_CVTTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32033 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2uqq128_mask", IX86_BUILTIN_CVTTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32034 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2qq256_mask", IX86_BUILTIN_CVTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32035 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2qq128_mask", IX86_BUILTIN_CVTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32036 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2uqq256_mask", IX86_BUILTIN_CVTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32037 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2uqq128_mask", IX86_BUILTIN_CVTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32038 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4si2_mask, "__builtin_ia32_cvtpd2udq256_mask", IX86_BUILTIN_CVTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32039 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2si2_mask, "__builtin_ia32_cvtpd2udq128_mask", IX86_BUILTIN_CVTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32040 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2qq256_mask", IX86_BUILTIN_CVTTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32041 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2qq128_mask", IX86_BUILTIN_CVTTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32042 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2uqq256_mask", IX86_BUILTIN_CVTTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32043 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2uqq128_mask", IX86_BUILTIN_CVTTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32044 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2dq256_mask", IX86_BUILTIN_CVTTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32045 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2dq128_mask", IX86_BUILTIN_CVTTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32046 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2udq256_mask", IX86_BUILTIN_CVTTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32047 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2udq128_mask", IX86_BUILTIN_CVTTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32048 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2dq256_mask", IX86_BUILTIN_CVTTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32049 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvttpd2dq_mask, "__builtin_ia32_cvttpd2dq128_mask", IX86_BUILTIN_CVTTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32050 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2udq256_mask", IX86_BUILTIN_CVTTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32051 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2si2_mask, "__builtin_ia32_cvttpd2udq128_mask", IX86_BUILTIN_CVTTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32052 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2dq256_mask, "__builtin_ia32_cvtpd2dq256_mask", IX86_BUILTIN_CVTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32053 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2dq_mask, "__builtin_ia32_cvtpd2dq128_mask", IX86_BUILTIN_CVTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32054 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4df2_mask, "__builtin_ia32_cvtdq2pd256_mask", IX86_BUILTIN_CVTDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI },
32055 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtdq2pd_mask, "__builtin_ia32_cvtdq2pd128_mask", IX86_BUILTIN_CVTDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI },
32056 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4df2_mask, "__builtin_ia32_cvtudq2pd256_mask", IX86_BUILTIN_CVTUDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI },
32057 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2siv2df2_mask, "__builtin_ia32_cvtudq2pd128_mask", IX86_BUILTIN_CVTUDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI },
32058 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv8siv8sf2_mask, "__builtin_ia32_cvtdq2ps256_mask", IX86_BUILTIN_CVTDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI },
32059 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4sf2_mask, "__builtin_ia32_cvtdq2ps128_mask", IX86_BUILTIN_CVTDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI },
32060 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv8siv8sf2_mask, "__builtin_ia32_cvtudq2ps256_mask", IX86_BUILTIN_CVTUDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI },
32061 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4sf2_mask, "__builtin_ia32_cvtudq2ps128_mask", IX86_BUILTIN_CVTUDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI },
32062 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtps2pd256_mask, "__builtin_ia32_cvtps2pd256_mask", IX86_BUILTIN_CVTPS2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SF_V4DF_QI },
32063 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtps2pd_mask, "__builtin_ia32_cvtps2pd128_mask", IX86_BUILTIN_CVTPS2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SF_V2DF_QI },
32064 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv32qi_mask, "__builtin_ia32_pbroadcastb256_mask", IX86_BUILTIN_PBROADCASTB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16QI_V32QI_SI },
32065 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv32qi_mask, "__builtin_ia32_pbroadcastb256_gpr_mask", IX86_BUILTIN_PBROADCASTB256_GPR_MASK, UNKNOWN, (int) V32QI_FTYPE_QI_V32QI_SI },
32066 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16qi_mask, "__builtin_ia32_pbroadcastb128_mask", IX86_BUILTIN_PBROADCASTB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32067 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16qi_mask, "__builtin_ia32_pbroadcastb128_gpr_mask", IX86_BUILTIN_PBROADCASTB128_GPR_MASK, UNKNOWN, (int) V16QI_FTYPE_QI_V16QI_HI },
32068 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16hi_mask, "__builtin_ia32_pbroadcastw256_mask", IX86_BUILTIN_PBROADCASTW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8HI_V16HI_HI },
32069 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16hi_mask, "__builtin_ia32_pbroadcastw256_gpr_mask", IX86_BUILTIN_PBROADCASTW256_GPR_MASK, UNKNOWN, (int) V16HI_FTYPE_HI_V16HI_HI },
32070 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8hi_mask, "__builtin_ia32_pbroadcastw128_mask", IX86_BUILTIN_PBROADCASTW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32071 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8hi_mask, "__builtin_ia32_pbroadcastw128_gpr_mask", IX86_BUILTIN_PBROADCASTW128_GPR_MASK, UNKNOWN, (int) V8HI_FTYPE_HI_V8HI_QI },
32072 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8si_mask, "__builtin_ia32_pbroadcastd256_mask", IX86_BUILTIN_PBROADCASTD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32073 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8si_mask, "__builtin_ia32_pbroadcastd256_gpr_mask", IX86_BUILTIN_PBROADCASTD256_GPR_MASK, UNKNOWN, (int) V8SI_FTYPE_SI_V8SI_QI },
32074 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4si_mask, "__builtin_ia32_pbroadcastd128_mask", IX86_BUILTIN_PBROADCASTD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32075 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4si_mask, "__builtin_ia32_pbroadcastd128_gpr_mask", IX86_BUILTIN_PBROADCASTD128_GPR_MASK, UNKNOWN, (int) V4SI_FTYPE_SI_V4SI_QI },
32076 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4di_mask, "__builtin_ia32_pbroadcastq256_mask", IX86_BUILTIN_PBROADCASTQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
32077 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512vl_vec_dup_gprv4di_mask, "__builtin_ia32_pbroadcastq256_gpr_mask", IX86_BUILTIN_PBROADCASTQ256_GPR_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_QI },
32078 { OPTION_MASK_ISA_AVX512VL & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512vl_vec_dup_memv4di_mask, "__builtin_ia32_pbroadcastq256_mem_mask", IX86_BUILTIN_PBROADCASTQ256_MEM_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_QI },
32079 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv2di_mask, "__builtin_ia32_pbroadcastq128_mask", IX86_BUILTIN_PBROADCASTQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32080 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512vl_vec_dup_gprv2di_mask, "__builtin_ia32_pbroadcastq128_gpr_mask", IX86_BUILTIN_PBROADCASTQ128_GPR_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_QI },
32081 { OPTION_MASK_ISA_AVX512VL & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512vl_vec_dup_memv2di_mask, "__builtin_ia32_pbroadcastq128_mem_mask", IX86_BUILTIN_PBROADCASTQ128_MEM_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_QI },
32082 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8sf_mask, "__builtin_ia32_broadcastss256_mask", IX86_BUILTIN_BROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32083 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4sf_mask, "__builtin_ia32_broadcastss128_mask", IX86_BUILTIN_BROADCASTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32084 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4df_mask, "__builtin_ia32_broadcastsd256_mask", IX86_BUILTIN_BROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
32085 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4df, "__builtin_ia32_extractf64x2_256_mask", IX86_BUILTIN_EXTRACTF64X2_256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT_V2DF_QI },
32086 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4di, "__builtin_ia32_extracti64x2_256_mask", IX86_BUILTIN_EXTRACTI64X2_256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT_V2DI_QI },
32087 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8sf, "__builtin_ia32_insertf32x4_256_mask", IX86_BUILTIN_INSERTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI },
32088 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8si, "__builtin_ia32_inserti32x4_256_mask", IX86_BUILTIN_INSERTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI },
32089 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv16qiv16hi2_mask, "__builtin_ia32_pmovsxbw256_mask", IX86_BUILTIN_PMOVSXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI },
32090 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv8qiv8hi2_mask, "__builtin_ia32_pmovsxbw128_mask", IX86_BUILTIN_PMOVSXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI },
32091 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8qiv8si2_mask, "__builtin_ia32_pmovsxbd256_mask", IX86_BUILTIN_PMOVSXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI },
32092 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4qiv4si2_mask, "__builtin_ia32_pmovsxbd128_mask", IX86_BUILTIN_PMOVSXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI },
32093 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4qiv4di2_mask, "__builtin_ia32_pmovsxbq256_mask", IX86_BUILTIN_PMOVSXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI },
32094 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2qiv2di2_mask, "__builtin_ia32_pmovsxbq128_mask", IX86_BUILTIN_PMOVSXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI },
32095 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8hiv8si2_mask, "__builtin_ia32_pmovsxwd256_mask", IX86_BUILTIN_PMOVSXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI },
32096 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4hiv4si2_mask, "__builtin_ia32_pmovsxwd128_mask", IX86_BUILTIN_PMOVSXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI },
32097 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4hiv4di2_mask, "__builtin_ia32_pmovsxwq256_mask", IX86_BUILTIN_PMOVSXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI },
32098 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2hiv2di2_mask, "__builtin_ia32_pmovsxwq128_mask", IX86_BUILTIN_PMOVSXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI },
32099 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4siv4di2_mask, "__builtin_ia32_pmovsxdq256_mask", IX86_BUILTIN_PMOVSXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI },
32100 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2siv2di2_mask, "__builtin_ia32_pmovsxdq128_mask", IX86_BUILTIN_PMOVSXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI },
32101 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv16qiv16hi2_mask, "__builtin_ia32_pmovzxbw256_mask", IX86_BUILTIN_PMOVZXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI },
32102 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv8qiv8hi2_mask, "__builtin_ia32_pmovzxbw128_mask", IX86_BUILTIN_PMOVZXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI },
32103 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8qiv8si2_mask, "__builtin_ia32_pmovzxbd256_mask", IX86_BUILTIN_PMOVZXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI },
32104 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4qiv4si2_mask, "__builtin_ia32_pmovzxbd128_mask", IX86_BUILTIN_PMOVZXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI },
32105 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4qiv4di2_mask, "__builtin_ia32_pmovzxbq256_mask", IX86_BUILTIN_PMOVZXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI },
32106 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2qiv2di2_mask, "__builtin_ia32_pmovzxbq128_mask", IX86_BUILTIN_PMOVZXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI },
32107 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8hiv8si2_mask, "__builtin_ia32_pmovzxwd256_mask", IX86_BUILTIN_PMOVZXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI },
32108 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4hiv4si2_mask, "__builtin_ia32_pmovzxwd128_mask", IX86_BUILTIN_PMOVZXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI },
32109 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4hiv4di2_mask, "__builtin_ia32_pmovzxwq256_mask", IX86_BUILTIN_PMOVZXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI },
32110 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2hiv2di2_mask, "__builtin_ia32_pmovzxwq128_mask", IX86_BUILTIN_PMOVZXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI },
32111 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4siv4di2_mask, "__builtin_ia32_pmovzxdq256_mask", IX86_BUILTIN_PMOVZXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI },
32112 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2siv2di2_mask, "__builtin_ia32_pmovzxdq128_mask", IX86_BUILTIN_PMOVZXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI },
32113 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4df_mask, "__builtin_ia32_reducepd256_mask", IX86_BUILTIN_REDUCEPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32114 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv2df_mask, "__builtin_ia32_reducepd128_mask", IX86_BUILTIN_REDUCEPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32115 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv8sf_mask, "__builtin_ia32_reduceps256_mask", IX86_BUILTIN_REDUCEPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32116 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4sf_mask, "__builtin_ia32_reduceps128_mask", IX86_BUILTIN_REDUCEPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32117 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv2df, "__builtin_ia32_reducesd", IX86_BUILTIN_REDUCESD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32118 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv4sf, "__builtin_ia32_reducess", IX86_BUILTIN_REDUCESS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32119 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16hi_mask, "__builtin_ia32_permvarhi256_mask", IX86_BUILTIN_VPERMVARHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32120 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv8hi_mask, "__builtin_ia32_permvarhi128_mask", IX86_BUILTIN_VPERMVARHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32121 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_mask, "__builtin_ia32_vpermt2varhi256_mask", IX86_BUILTIN_VPERMT2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32122 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_maskz, "__builtin_ia32_vpermt2varhi256_maskz", IX86_BUILTIN_VPERMT2VARHI256_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32123 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_mask, "__builtin_ia32_vpermt2varhi128_mask", IX86_BUILTIN_VPERMT2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32124 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_maskz, "__builtin_ia32_vpermt2varhi128_maskz", IX86_BUILTIN_VPERMT2VARHI128_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32125 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16hi3_mask, "__builtin_ia32_vpermi2varhi256_mask", IX86_BUILTIN_VPERMI2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32126 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8hi3_mask, "__builtin_ia32_vpermi2varhi128_mask", IX86_BUILTIN_VPERMI2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32127 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4df_mask, "__builtin_ia32_rcp14pd256_mask", IX86_BUILTIN_RCP14PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32128 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v2df_mask, "__builtin_ia32_rcp14pd128_mask", IX86_BUILTIN_RCP14PD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32129 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v8sf_mask, "__builtin_ia32_rcp14ps256_mask", IX86_BUILTIN_RCP14PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32130 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4sf_mask, "__builtin_ia32_rcp14ps128_mask", IX86_BUILTIN_RCP14PS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32131 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4df_mask, "__builtin_ia32_rsqrt14pd256_mask", IX86_BUILTIN_RSQRT14PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32132 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v2df_mask, "__builtin_ia32_rsqrt14pd128_mask", IX86_BUILTIN_RSQRT14PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32133 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v8sf_mask, "__builtin_ia32_rsqrt14ps256_mask", IX86_BUILTIN_RSQRT14PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32134 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4sf_mask, "__builtin_ia32_rsqrt14ps128_mask", IX86_BUILTIN_RSQRT14PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32135 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv4df2_mask, "__builtin_ia32_sqrtpd256_mask", IX86_BUILTIN_SQRTPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32136 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sqrtv2df2_mask, "__builtin_ia32_sqrtpd128_mask", IX86_BUILTIN_SQRTPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32137 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv8sf2_mask, "__builtin_ia32_sqrtps256_mask", IX86_BUILTIN_SQRTPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32138 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_sqrtv4sf2_mask, "__builtin_ia32_sqrtps128_mask", IX86_BUILTIN_SQRTPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32139 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16qi3_mask, "__builtin_ia32_paddb128_mask", IX86_BUILTIN_PADDB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32140 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8hi3_mask, "__builtin_ia32_paddw128_mask", IX86_BUILTIN_PADDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32141 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4si3_mask, "__builtin_ia32_paddd128_mask", IX86_BUILTIN_PADDD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32142 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2di3_mask, "__builtin_ia32_paddq128_mask", IX86_BUILTIN_PADDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32143 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16qi3_mask, "__builtin_ia32_psubb128_mask", IX86_BUILTIN_PSUBB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32144 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8hi3_mask, "__builtin_ia32_psubw128_mask", IX86_BUILTIN_PSUBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32145 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4si3_mask, "__builtin_ia32_psubd128_mask", IX86_BUILTIN_PSUBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32146 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2di3_mask, "__builtin_ia32_psubq128_mask", IX86_BUILTIN_PSUBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32147 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv16qi3_mask, "__builtin_ia32_paddsb128_mask", IX86_BUILTIN_PADDSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32148 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv8hi3_mask, "__builtin_ia32_paddsw128_mask", IX86_BUILTIN_PADDSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32149 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv16qi3_mask, "__builtin_ia32_psubsb128_mask", IX86_BUILTIN_PSUBSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32150 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv8hi3_mask, "__builtin_ia32_psubsw128_mask", IX86_BUILTIN_PSUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32151 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv16qi3_mask, "__builtin_ia32_paddusb128_mask", IX86_BUILTIN_PADDUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32152 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv8hi3_mask, "__builtin_ia32_paddusw128_mask", IX86_BUILTIN_PADDUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32153 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv16qi3_mask, "__builtin_ia32_psubusb128_mask", IX86_BUILTIN_PSUBUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32154 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv8hi3_mask, "__builtin_ia32_psubusw128_mask", IX86_BUILTIN_PSUBUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32155 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv32qi3_mask, "__builtin_ia32_paddb256_mask", IX86_BUILTIN_PADDB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32156 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16hi3_mask, "__builtin_ia32_paddw256_mask", IX86_BUILTIN_PADDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32157 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8si3_mask, "__builtin_ia32_paddd256_mask", IX86_BUILTIN_PADDD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32158 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4di3_mask, "__builtin_ia32_paddq256_mask", IX86_BUILTIN_PADDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32159 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv32qi3_mask, "__builtin_ia32_paddsb256_mask", IX86_BUILTIN_PADDSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32160 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv16hi3_mask, "__builtin_ia32_paddsw256_mask", IX86_BUILTIN_PADDSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32161 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv32qi3_mask, "__builtin_ia32_paddusb256_mask", IX86_BUILTIN_PADDUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32162 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv16hi3_mask, "__builtin_ia32_paddusw256_mask", IX86_BUILTIN_PADDUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32163 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv32qi3_mask, "__builtin_ia32_psubb256_mask", IX86_BUILTIN_PSUBB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32164 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16hi3_mask, "__builtin_ia32_psubw256_mask", IX86_BUILTIN_PSUBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32165 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8si3_mask, "__builtin_ia32_psubd256_mask", IX86_BUILTIN_PSUBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32166 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4di3_mask, "__builtin_ia32_psubq256_mask", IX86_BUILTIN_PSUBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32167 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv32qi3_mask, "__builtin_ia32_psubsb256_mask", IX86_BUILTIN_PSUBSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32168 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv16hi3_mask, "__builtin_ia32_psubsw256_mask", IX86_BUILTIN_PSUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32169 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv32qi3_mask, "__builtin_ia32_psubusb256_mask", IX86_BUILTIN_PSUBUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32170 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv16hi3_mask, "__builtin_ia32_psubusw256_mask", IX86_BUILTIN_PSUBUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32171 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_256_mask", IX86_BUILTIN_SHUF_F64x2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32172 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_256_mask", IX86_BUILTIN_SHUF_I64x2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI },
32173 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_256_mask", IX86_BUILTIN_SHUF_I32x4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI },
32174 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_256_mask", IX86_BUILTIN_SHUF_F32x4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32175 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovwb128_mask", IX86_BUILTIN_PMOVWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32176 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovwb256_mask", IX86_BUILTIN_PMOVWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32177 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovswb128_mask", IX86_BUILTIN_PMOVSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32178 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovswb256_mask", IX86_BUILTIN_PMOVSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32179 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovuswb128_mask", IX86_BUILTIN_PMOVUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32180 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovuswb256_mask", IX86_BUILTIN_PMOVUSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32181 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask, "__builtin_ia32_pmovdb128_mask", IX86_BUILTIN_PMOVDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32182 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask, "__builtin_ia32_pmovdb256_mask", IX86_BUILTIN_PMOVDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32183 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask, "__builtin_ia32_pmovsdb128_mask", IX86_BUILTIN_PMOVSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32184 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask, "__builtin_ia32_pmovsdb256_mask", IX86_BUILTIN_PMOVSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32185 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask, "__builtin_ia32_pmovusdb128_mask", IX86_BUILTIN_PMOVUSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32186 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask, "__builtin_ia32_pmovusdb256_mask", IX86_BUILTIN_PMOVUSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32187 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask, "__builtin_ia32_pmovdw128_mask", IX86_BUILTIN_PMOVDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32188 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask, "__builtin_ia32_pmovdw256_mask", IX86_BUILTIN_PMOVDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32189 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask, "__builtin_ia32_pmovsdw128_mask", IX86_BUILTIN_PMOVSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32190 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask, "__builtin_ia32_pmovsdw256_mask", IX86_BUILTIN_PMOVSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32191 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask, "__builtin_ia32_pmovusdw128_mask", IX86_BUILTIN_PMOVUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32192 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask, "__builtin_ia32_pmovusdw256_mask", IX86_BUILTIN_PMOVUSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32193 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask, "__builtin_ia32_pmovqb128_mask", IX86_BUILTIN_PMOVQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32194 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask, "__builtin_ia32_pmovqb256_mask", IX86_BUILTIN_PMOVQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32195 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask, "__builtin_ia32_pmovsqb128_mask", IX86_BUILTIN_PMOVSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32196 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask, "__builtin_ia32_pmovsqb256_mask", IX86_BUILTIN_PMOVSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32197 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask, "__builtin_ia32_pmovusqb128_mask", IX86_BUILTIN_PMOVUSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32198 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask, "__builtin_ia32_pmovusqb256_mask", IX86_BUILTIN_PMOVUSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32199 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask, "__builtin_ia32_pmovqw128_mask", IX86_BUILTIN_PMOVQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32200 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask, "__builtin_ia32_pmovqw256_mask", IX86_BUILTIN_PMOVQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32201 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask, "__builtin_ia32_pmovsqw128_mask", IX86_BUILTIN_PMOVSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32202 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask, "__builtin_ia32_pmovsqw256_mask", IX86_BUILTIN_PMOVSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32203 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask, "__builtin_ia32_pmovusqw128_mask", IX86_BUILTIN_PMOVUSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32204 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask, "__builtin_ia32_pmovusqw256_mask", IX86_BUILTIN_PMOVUSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32205 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask, "__builtin_ia32_pmovqd128_mask", IX86_BUILTIN_PMOVQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32206 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask, "__builtin_ia32_pmovqd256_mask", IX86_BUILTIN_PMOVQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32207 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask, "__builtin_ia32_pmovsqd128_mask", IX86_BUILTIN_PMOVSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32208 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask, "__builtin_ia32_pmovsqd256_mask", IX86_BUILTIN_PMOVSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32209 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask, "__builtin_ia32_pmovusqd128_mask", IX86_BUILTIN_PMOVUSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32210 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask, "__builtin_ia32_pmovusqd256_mask", IX86_BUILTIN_PMOVUSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32211 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4df_mask, "__builtin_ia32_rangepd256_mask", IX86_BUILTIN_RANGEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32212 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv2df_mask, "__builtin_ia32_rangepd128_mask", IX86_BUILTIN_RANGEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI },
32213 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv8sf_mask, "__builtin_ia32_rangeps256_mask", IX86_BUILTIN_RANGEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32214 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4sf_mask, "__builtin_ia32_rangeps128_mask", IX86_BUILTIN_RANGEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI },
32215 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv8sf_mask, "__builtin_ia32_getexpps256_mask", IX86_BUILTIN_GETEXPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32216 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4df_mask, "__builtin_ia32_getexppd256_mask", IX86_BUILTIN_GETEXPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32217 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4sf_mask, "__builtin_ia32_getexpps128_mask", IX86_BUILTIN_GETEXPPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32218 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv2df_mask, "__builtin_ia32_getexppd128_mask", IX86_BUILTIN_GETEXPPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32219 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_mask, "__builtin_ia32_fixupimmpd256_mask", IX86_BUILTIN_FIXUPIMMPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI },
32220 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_maskz, "__builtin_ia32_fixupimmpd256_maskz", IX86_BUILTIN_FIXUPIMMPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI },
32221 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_mask, "__builtin_ia32_fixupimmps256_mask", IX86_BUILTIN_FIXUPIMMPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI },
32222 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_maskz, "__builtin_ia32_fixupimmps256_maskz", IX86_BUILTIN_FIXUPIMMPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI },
32223 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_mask, "__builtin_ia32_fixupimmpd128_mask", IX86_BUILTIN_FIXUPIMMPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI },
32224 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_maskz, "__builtin_ia32_fixupimmpd128_maskz", IX86_BUILTIN_FIXUPIMMPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI },
32225 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_mask, "__builtin_ia32_fixupimmps128_mask", IX86_BUILTIN_FIXUPIMMPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI },
32226 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_maskz, "__builtin_ia32_fixupimmps128_maskz", IX86_BUILTIN_FIXUPIMMPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI },
32227 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4di2_mask, "__builtin_ia32_pabsq256_mask", IX86_BUILTIN_PABSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32228 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv2di2_mask, "__builtin_ia32_pabsq128_mask", IX86_BUILTIN_PABSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32229 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8si2_mask, "__builtin_ia32_pabsd256_mask", IX86_BUILTIN_PABSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32230 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4si2_mask, "__builtin_ia32_pabsd128_mask", IX86_BUILTIN_PABSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32231 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pmulhrswv16hi3_mask , "__builtin_ia32_pmulhrsw256_mask", IX86_BUILTIN_PMULHRSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32232 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pmulhrswv8hi3_mask, "__builtin_ia32_pmulhrsw128_mask", IX86_BUILTIN_PMULHRSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32233 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv8hi3_highpart_mask, "__builtin_ia32_pmulhuw128_mask", IX86_BUILTIN_PMULHUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32234 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv16hi3_highpart_mask, "__builtin_ia32_pmulhuw256_mask" , IX86_BUILTIN_PMULHUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32235 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv16hi3_highpart_mask, "__builtin_ia32_pmulhw256_mask" , IX86_BUILTIN_PMULHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32236 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv8hi3_highpart_mask, "__builtin_ia32_pmulhw128_mask", IX86_BUILTIN_PMULHW128_MASK, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32237 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv16hi3_mask, "__builtin_ia32_pmullw256_mask" , IX86_BUILTIN_PMULLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32238 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8hi3_mask, "__builtin_ia32_pmullw128_mask", IX86_BUILTIN_PMULLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32239 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv4di3_mask, "__builtin_ia32_pmullq256_mask", IX86_BUILTIN_PMULLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32240 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv2di3_mask, "__builtin_ia32_pmullq128_mask", IX86_BUILTIN_PMULLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32241 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4df3_mask, "__builtin_ia32_andpd256_mask", IX86_BUILTIN_ANDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32242 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2df3_mask, "__builtin_ia32_andpd128_mask", IX86_BUILTIN_ANDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32243 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8sf3_mask, "__builtin_ia32_andps256_mask", IX86_BUILTIN_ANDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32244 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4sf3_mask, "__builtin_ia32_andps128_mask", IX86_BUILTIN_ANDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32245 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv4df3_mask, "__builtin_ia32_andnpd256_mask", IX86_BUILTIN_ANDNPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32246 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2df3_mask, "__builtin_ia32_andnpd128_mask", IX86_BUILTIN_ANDNPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32247 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv8sf3_mask, "__builtin_ia32_andnps256_mask", IX86_BUILTIN_ANDNPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32248 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_andnotv4sf3_mask, "__builtin_ia32_andnps128_mask", IX86_BUILTIN_ANDNPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32249 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllwi128_mask", IX86_BUILTIN_PSLLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32250 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslldi128_mask", IX86_BUILTIN_PSLLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32251 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllqi128_mask", IX86_BUILTIN_PSLLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32252 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllw128_mask", IX86_BUILTIN_PSLLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32253 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslld128_mask", IX86_BUILTIN_PSLLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32254 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllq128_mask", IX86_BUILTIN_PSLLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32255 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllwi256_mask", IX86_BUILTIN_PSLLWI256_MASK , UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32256 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllw256_mask", IX86_BUILTIN_PSLLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32257 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslldi256_mask", IX86_BUILTIN_PSLLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32258 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslld256_mask", IX86_BUILTIN_PSLLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32259 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllqi256_mask", IX86_BUILTIN_PSLLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32260 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllq256_mask", IX86_BUILTIN_PSLLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32261 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psradi128_mask", IX86_BUILTIN_PSRADI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32262 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psrad128_mask", IX86_BUILTIN_PSRAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32263 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psradi256_mask", IX86_BUILTIN_PSRADI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32264 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psrad256_mask", IX86_BUILTIN_PSRAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32265 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraqi128_mask", IX86_BUILTIN_PSRAQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32266 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraq128_mask", IX86_BUILTIN_PSRAQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32267 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraqi256_mask", IX86_BUILTIN_PSRAQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32268 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraq256_mask", IX86_BUILTIN_PSRAQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32269 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8si3_mask, "__builtin_ia32_pandd256_mask", IX86_BUILTIN_PANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32270 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4si3_mask, "__builtin_ia32_pandd128_mask", IX86_BUILTIN_PANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32271 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrldi128_mask", IX86_BUILTIN_PSRLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32272 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrld128_mask", IX86_BUILTIN_PSRLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32273 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrldi256_mask", IX86_BUILTIN_PSRLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32274 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrld256_mask", IX86_BUILTIN_PSRLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32275 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlqi128_mask", IX86_BUILTIN_PSRLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32276 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlq128_mask", IX86_BUILTIN_PSRLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32277 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlqi256_mask", IX86_BUILTIN_PSRLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32278 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlq256_mask", IX86_BUILTIN_PSRLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32279 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4di3_mask, "__builtin_ia32_pandq256_mask", IX86_BUILTIN_PANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32280 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2di3_mask, "__builtin_ia32_pandq128_mask", IX86_BUILTIN_PANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32281 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv8si3_mask, "__builtin_ia32_pandnd256_mask", IX86_BUILTIN_PANDND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32282 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv4si3_mask, "__builtin_ia32_pandnd128_mask", IX86_BUILTIN_PANDND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32283 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv4di3_mask, "__builtin_ia32_pandnq256_mask", IX86_BUILTIN_PANDNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32284 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2di3_mask, "__builtin_ia32_pandnq128_mask", IX86_BUILTIN_PANDNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32285 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8si3_mask, "__builtin_ia32_pord256_mask", IX86_BUILTIN_PORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32286 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4si3_mask, "__builtin_ia32_pord128_mask", IX86_BUILTIN_PORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32287 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4di3_mask, "__builtin_ia32_porq256_mask", IX86_BUILTIN_PORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32288 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2di3_mask, "__builtin_ia32_porq128_mask", IX86_BUILTIN_PORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32289 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8si3_mask, "__builtin_ia32_pxord256_mask", IX86_BUILTIN_PXORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32290 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4si3_mask, "__builtin_ia32_pxord128_mask", IX86_BUILTIN_PXORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32291 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4di3_mask, "__builtin_ia32_pxorq256_mask", IX86_BUILTIN_PXORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32292 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2di3_mask, "__builtin_ia32_pxorq128_mask", IX86_BUILTIN_PXORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32293 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packsswb_mask, "__builtin_ia32_packsswb256_mask", IX86_BUILTIN_PACKSSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI },
32294 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packsswb_mask, "__builtin_ia32_packsswb128_mask", IX86_BUILTIN_PACKSSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI },
32295 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packuswb_mask, "__builtin_ia32_packuswb256_mask", IX86_BUILTIN_PACKUSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI },
32296 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packuswb_mask, "__builtin_ia32_packuswb128_mask", IX86_BUILTIN_PACKUSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI },
32297 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev8sf_mask, "__builtin_ia32_rndscaleps_256_mask", IX86_BUILTIN_RNDSCALEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32298 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4df_mask, "__builtin_ia32_rndscalepd_256_mask", IX86_BUILTIN_RNDSCALEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32299 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4sf_mask, "__builtin_ia32_rndscaleps_128_mask", IX86_BUILTIN_RNDSCALEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32300 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev2df_mask, "__builtin_ia32_rndscalepd_128_mask", IX86_BUILTIN_RNDSCALEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32301 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_mask, "__builtin_ia32_pternlogq256_mask", IX86_BUILTIN_VTERNLOGQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI },
32302 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_maskz, "__builtin_ia32_pternlogq256_maskz", IX86_BUILTIN_VTERNLOGQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI },
32303 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_mask, "__builtin_ia32_pternlogd256_mask", IX86_BUILTIN_VTERNLOGD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI },
32304 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_maskz, "__builtin_ia32_pternlogd256_maskz", IX86_BUILTIN_VTERNLOGD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI },
32305 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_mask, "__builtin_ia32_pternlogq128_mask", IX86_BUILTIN_VTERNLOGQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI },
32306 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_maskz, "__builtin_ia32_pternlogq128_maskz", IX86_BUILTIN_VTERNLOGQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI },
32307 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_mask, "__builtin_ia32_pternlogd128_mask", IX86_BUILTIN_VTERNLOGD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI },
32308 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_maskz, "__builtin_ia32_pternlogd128_maskz", IX86_BUILTIN_VTERNLOGD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI },
32309 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4df_mask, "__builtin_ia32_scalefpd256_mask", IX86_BUILTIN_SCALEFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32310 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv8sf_mask, "__builtin_ia32_scalefps256_mask", IX86_BUILTIN_SCALEFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32311 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv2df_mask, "__builtin_ia32_scalefpd128_mask", IX86_BUILTIN_SCALEFPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32312 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4sf_mask, "__builtin_ia32_scalefps128_mask", IX86_BUILTIN_SCALEFPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32313 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask, "__builtin_ia32_vfmaddpd256_mask", IX86_BUILTIN_VFMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32314 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask3, "__builtin_ia32_vfmaddpd256_mask3", IX86_BUILTIN_VFMADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32315 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_maskz, "__builtin_ia32_vfmaddpd256_maskz", IX86_BUILTIN_VFMADDPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32316 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask, "__builtin_ia32_vfmaddpd128_mask", IX86_BUILTIN_VFMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32317 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask3, "__builtin_ia32_vfmaddpd128_mask3", IX86_BUILTIN_VFMADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32318 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_maskz, "__builtin_ia32_vfmaddpd128_maskz", IX86_BUILTIN_VFMADDPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32319 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask, "__builtin_ia32_vfmaddps256_mask", IX86_BUILTIN_VFMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32320 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask3, "__builtin_ia32_vfmaddps256_mask3", IX86_BUILTIN_VFMADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32321 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_maskz, "__builtin_ia32_vfmaddps256_maskz", IX86_BUILTIN_VFMADDPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32322 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask, "__builtin_ia32_vfmaddps128_mask", IX86_BUILTIN_VFMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32323 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask3, "__builtin_ia32_vfmaddps128_mask3", IX86_BUILTIN_VFMADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32324 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_maskz, "__builtin_ia32_vfmaddps128_maskz", IX86_BUILTIN_VFMADDPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32325 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4df_mask3, "__builtin_ia32_vfmsubpd256_mask3", IX86_BUILTIN_VFMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32326 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v2df_mask3, "__builtin_ia32_vfmsubpd128_mask3", IX86_BUILTIN_VFMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32327 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v8sf_mask3, "__builtin_ia32_vfmsubps256_mask3", IX86_BUILTIN_VFMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32328 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4sf_mask3, "__builtin_ia32_vfmsubps128_mask3", IX86_BUILTIN_VFMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32329 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4df_mask, "__builtin_ia32_vfnmaddpd256_mask", IX86_BUILTIN_VFNMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32330 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v2df_mask, "__builtin_ia32_vfnmaddpd128_mask", IX86_BUILTIN_VFNMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32331 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v8sf_mask, "__builtin_ia32_vfnmaddps256_mask", IX86_BUILTIN_VFNMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32332 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4sf_mask, "__builtin_ia32_vfnmaddps128_mask", IX86_BUILTIN_VFNMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32333 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask, "__builtin_ia32_vfnmsubpd256_mask", IX86_BUILTIN_VFNMSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32334 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask3, "__builtin_ia32_vfnmsubpd256_mask3", IX86_BUILTIN_VFNMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32335 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask, "__builtin_ia32_vfnmsubpd128_mask", IX86_BUILTIN_VFNMSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32336 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask3, "__builtin_ia32_vfnmsubpd128_mask3", IX86_BUILTIN_VFNMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32337 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask, "__builtin_ia32_vfnmsubps256_mask", IX86_BUILTIN_VFNMSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32338 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask3, "__builtin_ia32_vfnmsubps256_mask3", IX86_BUILTIN_VFNMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32339 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask, "__builtin_ia32_vfnmsubps128_mask", IX86_BUILTIN_VFNMSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32340 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask3, "__builtin_ia32_vfnmsubps128_mask3", IX86_BUILTIN_VFNMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32341 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask, "__builtin_ia32_vfmaddsubpd256_mask", IX86_BUILTIN_VFMADDSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32342 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask3, "__builtin_ia32_vfmaddsubpd256_mask3", IX86_BUILTIN_VFMADDSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32343 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_maskz, "__builtin_ia32_vfmaddsubpd256_maskz", IX86_BUILTIN_VFMADDSUBPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32344 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask, "__builtin_ia32_vfmaddsubpd128_mask", IX86_BUILTIN_VFMADDSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32345 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask3, "__builtin_ia32_vfmaddsubpd128_mask3", IX86_BUILTIN_VFMADDSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32346 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_maskz, "__builtin_ia32_vfmaddsubpd128_maskz", IX86_BUILTIN_VFMADDSUBPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32347 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask, "__builtin_ia32_vfmaddsubps256_mask", IX86_BUILTIN_VFMADDSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32348 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask3, "__builtin_ia32_vfmaddsubps256_mask3", IX86_BUILTIN_VFMADDSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32349 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_maskz, "__builtin_ia32_vfmaddsubps256_maskz", IX86_BUILTIN_VFMADDSUBPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32350 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask, "__builtin_ia32_vfmaddsubps128_mask", IX86_BUILTIN_VFMADDSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32351 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask3, "__builtin_ia32_vfmaddsubps128_mask3", IX86_BUILTIN_VFMADDSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32352 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_maskz, "__builtin_ia32_vfmaddsubps128_maskz", IX86_BUILTIN_VFMADDSUBPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32353 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4df_mask3, "__builtin_ia32_vfmsubaddpd256_mask3", IX86_BUILTIN_VFMSUBADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32354 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v2df_mask3, "__builtin_ia32_vfmsubaddpd128_mask3", IX86_BUILTIN_VFMSUBADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32355 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v8sf_mask3, "__builtin_ia32_vfmsubaddps256_mask3", IX86_BUILTIN_VFMSUBADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32356 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4sf_mask3, "__builtin_ia32_vfmsubaddps128_mask3", IX86_BUILTIN_VFMSUBADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32357 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4df, "__builtin_ia32_insertf64x2_256_mask", IX86_BUILTIN_INSERTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI },
32358 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4di, "__builtin_ia32_inserti64x2_256_mask", IX86_BUILTIN_INSERTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI },
32359 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv16hi_mask, "__builtin_ia32_psrav16hi_mask", IX86_BUILTIN_PSRAVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32360 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv8hi_mask, "__builtin_ia32_psrav8hi_mask", IX86_BUILTIN_PSRAVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32361 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v16hi_mask, "__builtin_ia32_pmaddubsw256_mask", IX86_BUILTIN_PMADDUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_V16HI_HI },
32362 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v8hi_mask, "__builtin_ia32_pmaddubsw128_mask", IX86_BUILTIN_PMADDUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_V8HI_QI },
32363 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v16hi_mask, "__builtin_ia32_pmaddwd256_mask", IX86_BUILTIN_PMADDWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI_V8SI_QI },
32364 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v8hi_mask, "__builtin_ia32_pmaddwd128_mask", IX86_BUILTIN_PMADDWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI_V4SI_QI },
32365 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv16hi_mask, "__builtin_ia32_psrlv16hi_mask", IX86_BUILTIN_PSRLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32366 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv8hi_mask, "__builtin_ia32_psrlv8hi_mask", IX86_BUILTIN_PSRLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32367 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_fix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2dq256_mask", IX86_BUILTIN_CVTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32368 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_fix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2dq128_mask", IX86_BUILTIN_CVTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32369 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2udq256_mask", IX86_BUILTIN_CVTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32370 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2udq128_mask", IX86_BUILTIN_CVTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32371 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv4di_mask, "__builtin_ia32_cvtps2qq256_mask", IX86_BUILTIN_CVTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32372 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv2di_mask, "__builtin_ia32_cvtps2qq128_mask", IX86_BUILTIN_CVTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32373 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv4di_mask, "__builtin_ia32_cvtps2uqq256_mask", IX86_BUILTIN_CVTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32374 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv2di_mask, "__builtin_ia32_cvtps2uqq128_mask", IX86_BUILTIN_CVTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32375 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv8sf_mask, "__builtin_ia32_getmantps256_mask", IX86_BUILTIN_GETMANTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32376 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4sf_mask, "__builtin_ia32_getmantps128_mask", IX86_BUILTIN_GETMANTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32377 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4df_mask, "__builtin_ia32_getmantpd256_mask", IX86_BUILTIN_GETMANTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32378 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv2df_mask, "__builtin_ia32_getmantpd128_mask", IX86_BUILTIN_GETMANTPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32379 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movddup256_mask, "__builtin_ia32_movddup256_mask", IX86_BUILTIN_MOVDDUP256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32380 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_dupv2df_mask, "__builtin_ia32_movddup128_mask", IX86_BUILTIN_MOVDDUP128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32381 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movshdup256_mask, "__builtin_ia32_movshdup256_mask", IX86_BUILTIN_MOVSHDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32382 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movshdup_mask, "__builtin_ia32_movshdup128_mask", IX86_BUILTIN_MOVSHDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32383 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movsldup256_mask, "__builtin_ia32_movsldup256_mask", IX86_BUILTIN_MOVSLDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32384 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movsldup_mask, "__builtin_ia32_movsldup128_mask", IX86_BUILTIN_MOVSLDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32385 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4sf2_mask, "__builtin_ia32_cvtqq2ps256_mask", IX86_BUILTIN_CVTQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI },
32386 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2sf2_mask, "__builtin_ia32_cvtqq2ps128_mask", IX86_BUILTIN_CVTQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI },
32387 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4sf2_mask, "__builtin_ia32_cvtuqq2ps256_mask", IX86_BUILTIN_CVTUQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI },
32388 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2sf2_mask, "__builtin_ia32_cvtuqq2ps128_mask", IX86_BUILTIN_CVTUQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI },
32389 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4df2_mask, "__builtin_ia32_cvtqq2pd256_mask", IX86_BUILTIN_CVTQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI },
32390 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2df2_mask, "__builtin_ia32_cvtqq2pd128_mask", IX86_BUILTIN_CVTQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI },
32391 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4df2_mask, "__builtin_ia32_cvtuqq2pd256_mask", IX86_BUILTIN_CVTUQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI },
32392 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2df2_mask, "__builtin_ia32_cvtuqq2pd128_mask", IX86_BUILTIN_CVTUQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI },
32393 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_mask, "__builtin_ia32_vpermt2varq256_mask", IX86_BUILTIN_VPERMT2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32394 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_maskz, "__builtin_ia32_vpermt2varq256_maskz", IX86_BUILTIN_VPERMT2VARQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32395 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_mask, "__builtin_ia32_vpermt2vard256_mask", IX86_BUILTIN_VPERMT2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32396 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_maskz, "__builtin_ia32_vpermt2vard256_maskz", IX86_BUILTIN_VPERMT2VARD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32397 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4di3_mask, "__builtin_ia32_vpermi2varq256_mask", IX86_BUILTIN_VPERMI2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32398 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8si3_mask, "__builtin_ia32_vpermi2vard256_mask", IX86_BUILTIN_VPERMI2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32399 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_mask, "__builtin_ia32_vpermt2varpd256_mask", IX86_BUILTIN_VPERMT2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI },
32400 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_maskz, "__builtin_ia32_vpermt2varpd256_maskz", IX86_BUILTIN_VPERMT2VARPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI },
32401 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_mask, "__builtin_ia32_vpermt2varps256_mask", IX86_BUILTIN_VPERMT2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI },
32402 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_maskz, "__builtin_ia32_vpermt2varps256_maskz", IX86_BUILTIN_VPERMT2VARPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI },
32403 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4df3_mask, "__builtin_ia32_vpermi2varpd256_mask", IX86_BUILTIN_VPERMI2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32404 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8sf3_mask, "__builtin_ia32_vpermi2varps256_mask", IX86_BUILTIN_VPERMI2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32405 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_mask, "__builtin_ia32_vpermt2varq128_mask", IX86_BUILTIN_VPERMT2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32406 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_maskz, "__builtin_ia32_vpermt2varq128_maskz", IX86_BUILTIN_VPERMT2VARQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32407 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_mask, "__builtin_ia32_vpermt2vard128_mask", IX86_BUILTIN_VPERMT2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32408 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_maskz, "__builtin_ia32_vpermt2vard128_maskz", IX86_BUILTIN_VPERMT2VARD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32409 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2di3_mask, "__builtin_ia32_vpermi2varq128_mask", IX86_BUILTIN_VPERMI2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32410 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4si3_mask, "__builtin_ia32_vpermi2vard128_mask", IX86_BUILTIN_VPERMI2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32411 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_mask, "__builtin_ia32_vpermt2varpd128_mask", IX86_BUILTIN_VPERMT2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI },
32412 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_maskz, "__builtin_ia32_vpermt2varpd128_maskz", IX86_BUILTIN_VPERMT2VARPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI },
32413 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_mask, "__builtin_ia32_vpermt2varps128_mask", IX86_BUILTIN_VPERMT2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI },
32414 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_maskz, "__builtin_ia32_vpermt2varps128_maskz", IX86_BUILTIN_VPERMT2VARPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI },
32415 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2df3_mask, "__builtin_ia32_vpermi2varpd128_mask", IX86_BUILTIN_VPERMI2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI },
32416 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4sf3_mask, "__builtin_ia32_vpermi2varps128_mask", IX86_BUILTIN_VPERMI2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI },
32417 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pshufbv32qi3_mask, "__builtin_ia32_pshufb256_mask", IX86_BUILTIN_PSHUFB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32418 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pshufbv16qi3_mask, "__builtin_ia32_pshufb128_mask", IX86_BUILTIN_PSHUFB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32419 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhwv3_mask, "__builtin_ia32_pshufhw256_mask", IX86_BUILTIN_PSHUFHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32420 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhw_mask, "__builtin_ia32_pshufhw128_mask", IX86_BUILTIN_PSHUFHW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32421 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflwv3_mask, "__builtin_ia32_pshuflw256_mask", IX86_BUILTIN_PSHUFLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32422 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflw_mask, "__builtin_ia32_pshuflw128_mask", IX86_BUILTIN_PSHUFLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32423 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufdv3_mask, "__builtin_ia32_pshufd256_mask", IX86_BUILTIN_PSHUFD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32424 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufd_mask, "__builtin_ia32_pshufd128_mask", IX86_BUILTIN_PSHUFD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32425 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufpd256_mask, "__builtin_ia32_shufpd256_mask", IX86_BUILTIN_SHUFPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32426 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_shufpd_mask, "__builtin_ia32_shufpd128_mask", IX86_BUILTIN_SHUFPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI },
32427 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufps256_mask, "__builtin_ia32_shufps256_mask", IX86_BUILTIN_SHUFPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32428 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_shufps_mask, "__builtin_ia32_shufps128_mask", IX86_BUILTIN_SHUFPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI },
32429 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4di_mask, "__builtin_ia32_prolvq256_mask", IX86_BUILTIN_PROLVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32430 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv2di_mask, "__builtin_ia32_prolvq128_mask", IX86_BUILTIN_PROLVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32431 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4di_mask, "__builtin_ia32_prolq256_mask", IX86_BUILTIN_PROLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32432 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv2di_mask, "__builtin_ia32_prolq128_mask", IX86_BUILTIN_PROLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32433 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4di_mask, "__builtin_ia32_prorvq256_mask", IX86_BUILTIN_PRORVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32434 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv2di_mask, "__builtin_ia32_prorvq128_mask", IX86_BUILTIN_PRORVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32435 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4di_mask, "__builtin_ia32_prorq256_mask", IX86_BUILTIN_PRORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32436 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv2di_mask, "__builtin_ia32_prorq128_mask", IX86_BUILTIN_PRORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32437 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv2di_mask, "__builtin_ia32_psravq128_mask", IX86_BUILTIN_PSRAVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32438 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4di_mask, "__builtin_ia32_psravq256_mask", IX86_BUILTIN_PSRAVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32439 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4di_mask, "__builtin_ia32_psllv4di_mask", IX86_BUILTIN_PSLLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32440 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv2di_mask, "__builtin_ia32_psllv2di_mask", IX86_BUILTIN_PSLLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32441 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv8si_mask, "__builtin_ia32_psllv8si_mask", IX86_BUILTIN_PSLLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32442 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4si_mask, "__builtin_ia32_psllv4si_mask", IX86_BUILTIN_PSLLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32443 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv8si_mask, "__builtin_ia32_psrav8si_mask", IX86_BUILTIN_PSRAVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32444 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4si_mask, "__builtin_ia32_psrav4si_mask", IX86_BUILTIN_PSRAVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32445 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4di_mask, "__builtin_ia32_psrlv4di_mask", IX86_BUILTIN_PSRLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32446 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv2di_mask, "__builtin_ia32_psrlv2di_mask", IX86_BUILTIN_PSRLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32447 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv8si_mask, "__builtin_ia32_psrlv8si_mask", IX86_BUILTIN_PSRLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32448 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4si_mask, "__builtin_ia32_psrlv4si_mask", IX86_BUILTIN_PSRLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32449 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psrawi256_mask", IX86_BUILTIN_PSRAWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32450 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psraw256_mask", IX86_BUILTIN_PSRAW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32451 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psrawi128_mask", IX86_BUILTIN_PSRAWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32452 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psraw128_mask", IX86_BUILTIN_PSRAW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32453 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlwi256_mask", IX86_BUILTIN_PSRLWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32454 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlw256_mask", IX86_BUILTIN_PSRLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32455 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlwi128_mask", IX86_BUILTIN_PSRLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32456 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlw128_mask", IX86_BUILTIN_PSRLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32457 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv8si_mask, "__builtin_ia32_prorvd256_mask", IX86_BUILTIN_PRORVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32458 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv8si_mask, "__builtin_ia32_prolvd256_mask", IX86_BUILTIN_PROLVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32459 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv8si_mask, "__builtin_ia32_prord256_mask", IX86_BUILTIN_PRORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32460 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv8si_mask, "__builtin_ia32_prold256_mask", IX86_BUILTIN_PROLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32461 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4si_mask, "__builtin_ia32_prorvd128_mask", IX86_BUILTIN_PRORVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32462 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4si_mask, "__builtin_ia32_prolvd128_mask", IX86_BUILTIN_PROLVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32463 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4si_mask, "__builtin_ia32_prord128_mask", IX86_BUILTIN_PRORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32464 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4si_mask, "__builtin_ia32_prold128_mask", IX86_BUILTIN_PROLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32465 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4df_mask, "__builtin_ia32_fpclasspd256_mask", IX86_BUILTIN_FPCLASSPD256, UNKNOWN, (int) QI_FTYPE_V4DF_INT_QI },
32466 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv2df_mask, "__builtin_ia32_fpclasspd128_mask", IX86_BUILTIN_FPCLASSPD128, UNKNOWN, (int) QI_FTYPE_V2DF_INT_QI },
32467 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv2df, "__builtin_ia32_fpclasssd", IX86_BUILTIN_FPCLASSSD, UNKNOWN, (int) QI_FTYPE_V2DF_INT },
32468 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv8sf_mask, "__builtin_ia32_fpclassps256_mask", IX86_BUILTIN_FPCLASSPS256, UNKNOWN, (int) QI_FTYPE_V8SF_INT_QI },
32469 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4sf_mask, "__builtin_ia32_fpclassps128_mask", IX86_BUILTIN_FPCLASSPS128, UNKNOWN, (int) QI_FTYPE_V4SF_INT_QI },
32470 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv4sf, "__builtin_ia32_fpclassss", IX86_BUILTIN_FPCLASSSS, UNKNOWN, (int) QI_FTYPE_V4SF_INT },
32471 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv16qi, "__builtin_ia32_cvtb2mask128", IX86_BUILTIN_CVTB2MASK128, UNKNOWN, (int) HI_FTYPE_V16QI },
32472 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv32qi, "__builtin_ia32_cvtb2mask256", IX86_BUILTIN_CVTB2MASK256, UNKNOWN, (int) SI_FTYPE_V32QI },
32473 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv8hi, "__builtin_ia32_cvtw2mask128", IX86_BUILTIN_CVTW2MASK128, UNKNOWN, (int) QI_FTYPE_V8HI },
32474 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv16hi, "__builtin_ia32_cvtw2mask256", IX86_BUILTIN_CVTW2MASK256, UNKNOWN, (int) HI_FTYPE_V16HI },
32475 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv4si, "__builtin_ia32_cvtd2mask128", IX86_BUILTIN_CVTD2MASK128, UNKNOWN, (int) QI_FTYPE_V4SI },
32476 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv8si, "__builtin_ia32_cvtd2mask256", IX86_BUILTIN_CVTD2MASK256, UNKNOWN, (int) QI_FTYPE_V8SI },
32477 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv2di, "__builtin_ia32_cvtq2mask128", IX86_BUILTIN_CVTQ2MASK128, UNKNOWN, (int) QI_FTYPE_V2DI },
32478 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv4di, "__builtin_ia32_cvtq2mask256", IX86_BUILTIN_CVTQ2MASK256, UNKNOWN, (int) QI_FTYPE_V4DI },
32479 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv16qi, "__builtin_ia32_cvtmask2b128", IX86_BUILTIN_CVTMASK2B128, UNKNOWN, (int) V16QI_FTYPE_HI },
32480 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv32qi, "__builtin_ia32_cvtmask2b256", IX86_BUILTIN_CVTMASK2B256, UNKNOWN, (int) V32QI_FTYPE_SI },
32481 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv8hi, "__builtin_ia32_cvtmask2w128", IX86_BUILTIN_CVTMASK2W128, UNKNOWN, (int) V8HI_FTYPE_QI },
32482 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv16hi, "__builtin_ia32_cvtmask2w256", IX86_BUILTIN_CVTMASK2W256, UNKNOWN, (int) V16HI_FTYPE_HI },
32483 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv4si, "__builtin_ia32_cvtmask2d128", IX86_BUILTIN_CVTMASK2D128, UNKNOWN, (int) V4SI_FTYPE_QI },
32484 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv8si, "__builtin_ia32_cvtmask2d256", IX86_BUILTIN_CVTMASK2D256, UNKNOWN, (int) V8SI_FTYPE_QI },
32485 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv2di, "__builtin_ia32_cvtmask2q128", IX86_BUILTIN_CVTMASK2Q128, UNKNOWN, (int) V2DI_FTYPE_QI },
32486 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv4di, "__builtin_ia32_cvtmask2q256", IX86_BUILTIN_CVTMASK2Q256, UNKNOWN, (int) V4DI_FTYPE_QI },
32487 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16qi3_mask, "__builtin_ia32_pcmpeqb128_mask", IX86_BUILTIN_PCMPEQB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32488 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv32qi3_mask, "__builtin_ia32_pcmpeqb256_mask", IX86_BUILTIN_PCMPEQB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32489 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8hi3_mask, "__builtin_ia32_pcmpeqw128_mask", IX86_BUILTIN_PCMPEQW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32490 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16hi3_mask, "__builtin_ia32_pcmpeqw256_mask", IX86_BUILTIN_PCMPEQW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32491 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4si3_mask, "__builtin_ia32_pcmpeqd128_mask", IX86_BUILTIN_PCMPEQD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32492 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8si3_mask, "__builtin_ia32_pcmpeqd256_mask", IX86_BUILTIN_PCMPEQD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32493 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv2di3_mask, "__builtin_ia32_pcmpeqq128_mask", IX86_BUILTIN_PCMPEQQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32494 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4di3_mask, "__builtin_ia32_pcmpeqq256_mask", IX86_BUILTIN_PCMPEQQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32495 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16qi3_mask, "__builtin_ia32_pcmpgtb128_mask", IX86_BUILTIN_PCMPGTB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32496 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv32qi3_mask, "__builtin_ia32_pcmpgtb256_mask", IX86_BUILTIN_PCMPGTB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32497 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8hi3_mask, "__builtin_ia32_pcmpgtw128_mask", IX86_BUILTIN_PCMPGTW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32498 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16hi3_mask, "__builtin_ia32_pcmpgtw256_mask", IX86_BUILTIN_PCMPGTW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32499 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4si3_mask, "__builtin_ia32_pcmpgtd128_mask", IX86_BUILTIN_PCMPGTD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32500 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8si3_mask, "__builtin_ia32_pcmpgtd256_mask", IX86_BUILTIN_PCMPGTD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32501 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv2di3_mask, "__builtin_ia32_pcmpgtq128_mask", IX86_BUILTIN_PCMPGTQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32502 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4di3_mask, "__builtin_ia32_pcmpgtq256_mask", IX86_BUILTIN_PCMPGTQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32503 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16qi3_mask, "__builtin_ia32_ptestmb128", IX86_BUILTIN_PTESTMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32504 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv32qi3_mask, "__builtin_ia32_ptestmb256", IX86_BUILTIN_PTESTMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32505 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8hi3_mask, "__builtin_ia32_ptestmw128", IX86_BUILTIN_PTESTMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32506 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16hi3_mask, "__builtin_ia32_ptestmw256", IX86_BUILTIN_PTESTMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32507 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4si3_mask, "__builtin_ia32_ptestmd128", IX86_BUILTIN_PTESTMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32508 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8si3_mask, "__builtin_ia32_ptestmd256", IX86_BUILTIN_PTESTMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32509 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv2di3_mask, "__builtin_ia32_ptestmq128", IX86_BUILTIN_PTESTMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32510 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4di3_mask, "__builtin_ia32_ptestmq256", IX86_BUILTIN_PTESTMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32511 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16qi3_mask, "__builtin_ia32_ptestnmb128", IX86_BUILTIN_PTESTNMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32512 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv32qi3_mask, "__builtin_ia32_ptestnmb256", IX86_BUILTIN_PTESTNMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32513 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8hi3_mask, "__builtin_ia32_ptestnmw128", IX86_BUILTIN_PTESTNMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32514 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16hi3_mask, "__builtin_ia32_ptestnmw256", IX86_BUILTIN_PTESTNMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32515 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4si3_mask, "__builtin_ia32_ptestnmd128", IX86_BUILTIN_PTESTNMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32516 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8si3_mask, "__builtin_ia32_ptestnmd256", IX86_BUILTIN_PTESTNMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32517 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv2di3_mask, "__builtin_ia32_ptestnmq128", IX86_BUILTIN_PTESTNMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32518 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4di3_mask, "__builtin_ia32_ptestnmq256", IX86_BUILTIN_PTESTNMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32519 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv2di, "__builtin_ia32_broadcastmb128", IX86_BUILTIN_PBROADCASTMB128, UNKNOWN, (int) V2DI_FTYPE_QI },
32520 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv4di, "__builtin_ia32_broadcastmb256", IX86_BUILTIN_PBROADCASTMB256, UNKNOWN, (int) V4DI_FTYPE_QI },
32521 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv4si, "__builtin_ia32_broadcastmw128", IX86_BUILTIN_PBROADCASTMW128, UNKNOWN, (int) V4SI_FTYPE_HI },
32522 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv8si, "__builtin_ia32_broadcastmw256", IX86_BUILTIN_PBROADCASTMW256, UNKNOWN, (int) V8SI_FTYPE_HI },
32523 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4df_mask, "__builtin_ia32_compressdf256_mask", IX86_BUILTIN_COMPRESSPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32524 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2df_mask, "__builtin_ia32_compressdf128_mask", IX86_BUILTIN_COMPRESSPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32525 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8sf_mask, "__builtin_ia32_compresssf256_mask", IX86_BUILTIN_COMPRESSPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32526 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4sf_mask, "__builtin_ia32_compresssf128_mask", IX86_BUILTIN_COMPRESSPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32527 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4di_mask, "__builtin_ia32_compressdi256_mask", IX86_BUILTIN_PCOMPRESSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32528 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2di_mask, "__builtin_ia32_compressdi128_mask", IX86_BUILTIN_PCOMPRESSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32529 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8si_mask, "__builtin_ia32_compresssi256_mask", IX86_BUILTIN_PCOMPRESSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32530 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4si_mask, "__builtin_ia32_compresssi128_mask", IX86_BUILTIN_PCOMPRESSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32531 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expanddf256_mask", IX86_BUILTIN_EXPANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32532 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expanddf128_mask", IX86_BUILTIN_EXPANDPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32533 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandsf256_mask", IX86_BUILTIN_EXPANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32534 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandsf128_mask", IX86_BUILTIN_EXPANDPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32535 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expanddi256_mask", IX86_BUILTIN_PEXPANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32536 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expanddi128_mask", IX86_BUILTIN_PEXPANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32537 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandsi256_mask", IX86_BUILTIN_PEXPANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32538 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandsi128_mask", IX86_BUILTIN_PEXPANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32539 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expanddf256_maskz", IX86_BUILTIN_EXPANDPD256Z, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32540 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expanddf128_maskz", IX86_BUILTIN_EXPANDPD128Z, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32541 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandsf256_maskz", IX86_BUILTIN_EXPANDPS256Z, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32542 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandsf128_maskz", IX86_BUILTIN_EXPANDPS128Z, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32543 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expanddi256_maskz", IX86_BUILTIN_PEXPANDQ256Z, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32544 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expanddi128_maskz", IX86_BUILTIN_PEXPANDQ128Z, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32545 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandsi256_maskz", IX86_BUILTIN_PEXPANDD256Z, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32546 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandsi128_maskz", IX86_BUILTIN_PEXPANDD128Z, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32547 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8si3_mask, "__builtin_ia32_pmaxsd256_mask", IX86_BUILTIN_PMAXSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32548 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8si3_mask, "__builtin_ia32_pminsd256_mask", IX86_BUILTIN_PMINSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32549 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8si3_mask, "__builtin_ia32_pmaxud256_mask", IX86_BUILTIN_PMAXUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32550 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8si3_mask, "__builtin_ia32_pminud256_mask", IX86_BUILTIN_PMINUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32551 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4si3_mask, "__builtin_ia32_pmaxsd128_mask", IX86_BUILTIN_PMAXSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32552 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4si3_mask, "__builtin_ia32_pminsd128_mask", IX86_BUILTIN_PMINSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32553 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4si3_mask, "__builtin_ia32_pmaxud128_mask", IX86_BUILTIN_PMAXUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32554 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4si3_mask, "__builtin_ia32_pminud128_mask", IX86_BUILTIN_PMINUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32555 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4di3_mask, "__builtin_ia32_pmaxsq256_mask", IX86_BUILTIN_PMAXSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32556 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4di3_mask, "__builtin_ia32_pminsq256_mask", IX86_BUILTIN_PMINSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32557 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4di3_mask, "__builtin_ia32_pmaxuq256_mask", IX86_BUILTIN_PMAXUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32558 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4di3_mask, "__builtin_ia32_pminuq256_mask", IX86_BUILTIN_PMINUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32559 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2di3_mask, "__builtin_ia32_pmaxsq128_mask", IX86_BUILTIN_PMAXSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32560 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2di3_mask, "__builtin_ia32_pminsq128_mask", IX86_BUILTIN_PMINSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32561 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv2di3_mask, "__builtin_ia32_pmaxuq128_mask", IX86_BUILTIN_PMAXUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32562 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv2di3_mask, "__builtin_ia32_pminuq128_mask", IX86_BUILTIN_PMINUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32563 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv32qi3_mask, "__builtin_ia32_pminsb256_mask", IX86_BUILTIN_PMINSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32564 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv32qi3_mask, "__builtin_ia32_pminub256_mask", IX86_BUILTIN_PMINUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32565 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv32qi3_mask, "__builtin_ia32_pmaxsb256_mask", IX86_BUILTIN_PMAXSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32566 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv32qi3_mask, "__builtin_ia32_pmaxub256_mask", IX86_BUILTIN_PMAXUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32567 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16qi3_mask, "__builtin_ia32_pminsb128_mask", IX86_BUILTIN_PMINSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32568 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16qi3_mask, "__builtin_ia32_pminub128_mask", IX86_BUILTIN_PMINUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32569 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16qi3_mask, "__builtin_ia32_pmaxsb128_mask", IX86_BUILTIN_PMAXSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32570 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16qi3_mask, "__builtin_ia32_pmaxub128_mask", IX86_BUILTIN_PMAXUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32571 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16hi3_mask, "__builtin_ia32_pminsw256_mask", IX86_BUILTIN_PMINSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32572 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16hi3_mask, "__builtin_ia32_pminuw256_mask", IX86_BUILTIN_PMINUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32573 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16hi3_mask, "__builtin_ia32_pmaxsw256_mask", IX86_BUILTIN_PMAXSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32574 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16hi3_mask, "__builtin_ia32_pmaxuw256_mask", IX86_BUILTIN_PMAXUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32575 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8hi3_mask, "__builtin_ia32_pminsw128_mask", IX86_BUILTIN_PMINSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32576 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8hi3_mask, "__builtin_ia32_pminuw128_mask", IX86_BUILTIN_PMINUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32577 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8hi3_mask, "__builtin_ia32_pmaxsw128_mask", IX86_BUILTIN_PMAXSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32578 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8hi3_mask, "__builtin_ia32_pmaxuw128_mask", IX86_BUILTIN_PMAXUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32579 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4di_mask, "__builtin_ia32_vpconflictdi_256_mask", IX86_BUILTIN_VPCONFLICTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32580 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv8si_mask, "__builtin_ia32_vpconflictsi_256_mask", IX86_BUILTIN_VPCONFLICTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32581 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4di2_mask, "__builtin_ia32_vplzcntq_256_mask", IX86_BUILTIN_VPCLZCNTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32582 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv8si2_mask, "__builtin_ia32_vplzcntd_256_mask", IX86_BUILTIN_VPCLZCNTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32583 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhpd256_mask, "__builtin_ia32_unpckhpd256_mask", IX86_BUILTIN_UNPCKHPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32584 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpckhpd128_mask, "__builtin_ia32_unpckhpd128_mask", IX86_BUILTIN_UNPCKHPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32585 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhps256_mask, "__builtin_ia32_unpckhps256_mask", IX86_BUILTIN_UNPCKHPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32586 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4sf_mask, "__builtin_ia32_unpckhps128_mask", IX86_BUILTIN_UNPCKHPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32587 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklpd256_mask, "__builtin_ia32_unpcklpd256_mask", IX86_BUILTIN_UNPCKLPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32588 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpcklpd128_mask, "__builtin_ia32_unpcklpd128_mask", IX86_BUILTIN_UNPCKLPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32589 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklps256_mask, "__builtin_ia32_unpcklps256_mask", IX86_BUILTIN_UNPCKLPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32590 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv2di_mask, "__builtin_ia32_vpconflictdi_128_mask", IX86_BUILTIN_VPCONFLICTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32591 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4si_mask, "__builtin_ia32_vpconflictsi_128_mask", IX86_BUILTIN_VPCONFLICTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32592 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv2di2_mask, "__builtin_ia32_vplzcntq_128_mask", IX86_BUILTIN_VPCLZCNTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32593 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4si2_mask, "__builtin_ia32_vplzcntd_128_mask", IX86_BUILTIN_VPCLZCNTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32594 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_unpcklps128_mask, "__builtin_ia32_unpcklps128_mask", IX86_BUILTIN_UNPCKLPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32595 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv8si_mask, "__builtin_ia32_alignd256_mask", IX86_BUILTIN_ALIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI },
32596 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4di_mask, "__builtin_ia32_alignq256_mask", IX86_BUILTIN_ALIGNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI },
32597 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4si_mask, "__builtin_ia32_alignd128_mask", IX86_BUILTIN_ALIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI },
32598 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv2di_mask, "__builtin_ia32_alignq128_mask", IX86_BUILTIN_ALIGNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI },
32599 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph256_mask, "__builtin_ia32_vcvtps2ph256_mask", IX86_BUILTIN_CVTPS2PH256_MASK, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT_V8HI_QI },
32600 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph_mask, "__builtin_ia32_vcvtps2ph_mask", IX86_BUILTIN_CVTPS2PH_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT_V8HI_QI },
32601 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps_mask, "__builtin_ia32_vcvtph2ps_mask", IX86_BUILTIN_CVTPH2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V8HI_V4SF_QI },
32602 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps256_mask, "__builtin_ia32_vcvtph2ps256_mask", IX86_BUILTIN_CVTPH2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8HI_V8SF_QI },
32603 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4si_mask, "__builtin_ia32_punpckhdq128_mask", IX86_BUILTIN_PUNPCKHDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32604 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv8si_mask, "__builtin_ia32_punpckhdq256_mask", IX86_BUILTIN_PUNPCKHDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32605 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv2di_mask, "__builtin_ia32_punpckhqdq128_mask", IX86_BUILTIN_PUNPCKHQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32606 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv4di_mask, "__builtin_ia32_punpckhqdq256_mask", IX86_BUILTIN_PUNPCKHQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32607 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv4si_mask, "__builtin_ia32_punpckldq128_mask", IX86_BUILTIN_PUNPCKLDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32608 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv8si_mask, "__builtin_ia32_punpckldq256_mask", IX86_BUILTIN_PUNPCKLDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32609 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv2di_mask, "__builtin_ia32_punpcklqdq128_mask", IX86_BUILTIN_PUNPCKLQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32610 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv4di_mask, "__builtin_ia32_punpcklqdq256_mask", IX86_BUILTIN_PUNPCKLQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32611 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv16qi_mask, "__builtin_ia32_punpckhbw128_mask", IX86_BUILTIN_PUNPCKHBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32612 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv32qi_mask, "__builtin_ia32_punpckhbw256_mask", IX86_BUILTIN_PUNPCKHBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32613 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv8hi_mask, "__builtin_ia32_punpckhwd128_mask", IX86_BUILTIN_PUNPCKHWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32614 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv16hi_mask, "__builtin_ia32_punpckhwd256_mask", IX86_BUILTIN_PUNPCKHWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32615 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv16qi_mask, "__builtin_ia32_punpcklbw128_mask", IX86_BUILTIN_PUNPCKLBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32616 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv32qi_mask, "__builtin_ia32_punpcklbw256_mask", IX86_BUILTIN_PUNPCKLBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32617 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv8hi_mask, "__builtin_ia32_punpcklwd128_mask", IX86_BUILTIN_PUNPCKLWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32618 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv16hi_mask, "__builtin_ia32_punpcklwd256_mask", IX86_BUILTIN_PUNPCKLWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32619 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv16hi_mask, "__builtin_ia32_psllv16hi_mask", IX86_BUILTIN_PSLLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32620 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv8hi_mask, "__builtin_ia32_psllv8hi_mask", IX86_BUILTIN_PSLLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32621 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packssdw_mask, "__builtin_ia32_packssdw256_mask", IX86_BUILTIN_PACKSSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI },
32622 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packssdw_mask, "__builtin_ia32_packssdw128_mask", IX86_BUILTIN_PACKSSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI },
32623 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packusdw_mask, "__builtin_ia32_packusdw256_mask", IX86_BUILTIN_PACKUSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI },
32624 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_packusdw_mask, "__builtin_ia32_packusdw128_mask", IX86_BUILTIN_PACKUSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI },
32625 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv32qi3_mask, "__builtin_ia32_pavgb256_mask", IX86_BUILTIN_PAVGB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32626 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv16hi3_mask, "__builtin_ia32_pavgw256_mask", IX86_BUILTIN_PAVGW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32627 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv16qi3_mask, "__builtin_ia32_pavgb128_mask", IX86_BUILTIN_PAVGB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32628 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv8hi3_mask, "__builtin_ia32_pavgw128_mask", IX86_BUILTIN_PAVGW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32629 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8sf_mask, "__builtin_ia32_permvarsf256_mask", IX86_BUILTIN_VPERMVARSF256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32630 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4df_mask, "__builtin_ia32_permvardf256_mask", IX86_BUILTIN_VPERMVARDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32631 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4df_mask, "__builtin_ia32_permdf256_mask", IX86_BUILTIN_VPERMDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32632 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv32qi2_mask, "__builtin_ia32_pabsb256_mask", IX86_BUILTIN_PABSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32633 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16qi2_mask, "__builtin_ia32_pabsb128_mask", IX86_BUILTIN_PABSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32634 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16hi2_mask, "__builtin_ia32_pabsw256_mask", IX86_BUILTIN_PABSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32635 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8hi2_mask, "__builtin_ia32_pabsw128_mask", IX86_BUILTIN_PABSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32636 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv2df3_mask, "__builtin_ia32_vpermilvarpd_mask", IX86_BUILTIN_VPERMILVARPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI },
32637 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4sf3_mask, "__builtin_ia32_vpermilvarps_mask", IX86_BUILTIN_VPERMILVARPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI },
32638 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4df3_mask, "__builtin_ia32_vpermilvarpd256_mask", IX86_BUILTIN_VPERMILVARPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32639 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv8sf3_mask, "__builtin_ia32_vpermilvarps256_mask", IX86_BUILTIN_VPERMILVARPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32640 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv2df_mask, "__builtin_ia32_vpermilpd_mask", IX86_BUILTIN_VPERMILPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32641 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4sf_mask, "__builtin_ia32_vpermilps_mask", IX86_BUILTIN_VPERMILPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32642 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4df_mask, "__builtin_ia32_vpermilpd256_mask", IX86_BUILTIN_VPERMILPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32643 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv8sf_mask, "__builtin_ia32_vpermilps256_mask", IX86_BUILTIN_VPERMILPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32644 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4di, "__builtin_ia32_blendmq_256_mask", IX86_BUILTIN_BLENDMQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32645 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8si, "__builtin_ia32_blendmd_256_mask", IX86_BUILTIN_BLENDMD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32646 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4df, "__builtin_ia32_blendmpd_256_mask", IX86_BUILTIN_BLENDMPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32647 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8sf, "__builtin_ia32_blendmps_256_mask", IX86_BUILTIN_BLENDMPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32648 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2di, "__builtin_ia32_blendmq_128_mask", IX86_BUILTIN_BLENDMQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32649 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4si, "__builtin_ia32_blendmd_128_mask", IX86_BUILTIN_BLENDMD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32650 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2df, "__builtin_ia32_blendmpd_128_mask", IX86_BUILTIN_BLENDMPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32651 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4sf, "__builtin_ia32_blendmps_128_mask", IX86_BUILTIN_BLENDMPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32652 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16hi, "__builtin_ia32_blendmw_256_mask", IX86_BUILTIN_BLENDMW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32653 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv32qi, "__builtin_ia32_blendmb_256_mask", IX86_BUILTIN_BLENDMB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32654 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8hi, "__builtin_ia32_blendmw_128_mask", IX86_BUILTIN_BLENDMW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32655 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16qi, "__builtin_ia32_blendmb_128_mask", IX86_BUILTIN_BLENDMB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32656 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8si3_mask, "__builtin_ia32_pmulld256_mask", IX86_BUILTIN_PMULLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32657 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4si3_mask, "__builtin_ia32_pmulld128_mask", IX86_BUILTIN_PMULLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32658 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v8si_mask, "__builtin_ia32_pmuludq256_mask", IX86_BUILTIN_PMULUDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI },
32659 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_smult_even_v8si_mask, "__builtin_ia32_pmuldq256_mask", IX86_BUILTIN_PMULDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI },
32660 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_mulv2siv2di3_mask, "__builtin_ia32_pmuldq128_mask", IX86_BUILTIN_PMULDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI },
32661 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v4si_mask, "__builtin_ia32_pmuludq128_mask", IX86_BUILTIN_PMULUDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI },
32662 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2ps256_mask, "__builtin_ia32_cvtpd2ps256_mask", IX86_BUILTIN_CVTPD2PS256_MASK, UNKNOWN, (int) V4SF_FTYPE_V4DF_V4SF_QI },
32663 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2ps_mask, "__builtin_ia32_cvtpd2ps_mask", IX86_BUILTIN_CVTPD2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V2DF_V4SF_QI },
32664 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8si_mask, "__builtin_ia32_permvarsi256_mask", IX86_BUILTIN_VPERMVARSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32665 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4di_mask, "__builtin_ia32_permvardi256_mask", IX86_BUILTIN_VPERMVARDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32666 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4di_mask, "__builtin_ia32_permdi256_mask", IX86_BUILTIN_VPERMDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32667 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4di3_mask, "__builtin_ia32_cmpq256_mask", IX86_BUILTIN_CMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI },
32668 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8si3_mask, "__builtin_ia32_cmpd256_mask", IX86_BUILTIN_CMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI },
32669 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4di3_mask, "__builtin_ia32_ucmpq256_mask", IX86_BUILTIN_UCMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI },
32670 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8si3_mask, "__builtin_ia32_ucmpd256_mask", IX86_BUILTIN_UCMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI },
32671 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv32qi3_mask, "__builtin_ia32_cmpb256_mask", IX86_BUILTIN_CMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI },
32672 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16hi3_mask, "__builtin_ia32_cmpw256_mask", IX86_BUILTIN_CMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI },
32673 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv32qi3_mask, "__builtin_ia32_ucmpb256_mask", IX86_BUILTIN_UCMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI },
32674 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16hi3_mask, "__builtin_ia32_ucmpw256_mask", IX86_BUILTIN_UCMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI },
32675 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4df3_mask, "__builtin_ia32_cmppd256_mask", IX86_BUILTIN_CMPPD256_MASK, UNKNOWN, (int) QI_FTYPE_V4DF_V4DF_INT_QI },
32676 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8sf3_mask, "__builtin_ia32_cmpps256_mask", IX86_BUILTIN_CMPPS256_MASK, UNKNOWN, (int) QI_FTYPE_V8SF_V8SF_INT_QI },
32677 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2di3_mask, "__builtin_ia32_cmpq128_mask", IX86_BUILTIN_CMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI },
32678 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4si3_mask, "__builtin_ia32_cmpd128_mask", IX86_BUILTIN_CMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI },
32679 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv2di3_mask, "__builtin_ia32_ucmpq128_mask", IX86_BUILTIN_UCMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI },
32680 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4si3_mask, "__builtin_ia32_ucmpd128_mask", IX86_BUILTIN_UCMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI },
32681 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16qi3_mask, "__builtin_ia32_cmpb128_mask", IX86_BUILTIN_CMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI },
32682 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8hi3_mask, "__builtin_ia32_cmpw128_mask", IX86_BUILTIN_CMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI },
32683 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16qi3_mask, "__builtin_ia32_ucmpb128_mask", IX86_BUILTIN_UCMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI },
32684 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8hi3_mask, "__builtin_ia32_ucmpw128_mask", IX86_BUILTIN_UCMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI },
32685 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2df3_mask, "__builtin_ia32_cmppd128_mask", IX86_BUILTIN_CMPPD128_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI },
32686 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4sf3_mask, "__builtin_ia32_cmpps128_mask", IX86_BUILTIN_CMPPS128_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI },
32688 /* AVX512DQ. */
32689 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x2_512_mask", IX86_BUILTIN_BROADCASTF32x2_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
32690 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask, "__builtin_ia32_broadcasti32x2_512_mask", IX86_BUILTIN_BROADCASTI32x2_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
32691 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8df_mask_1, "__builtin_ia32_broadcastf64x2_512_mask", IX86_BUILTIN_BROADCASTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
32692 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8di_mask_1, "__builtin_ia32_broadcasti64x2_512_mask", IX86_BUILTIN_BROADCASTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
32693 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask_1, "__builtin_ia32_broadcastf32x8_512_mask", IX86_BUILTIN_BROADCASTF32X8_512, UNKNOWN, (int) V16SF_FTYPE_V8SF_V16SF_HI },
32694 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask_1, "__builtin_ia32_broadcasti32x8_512_mask", IX86_BUILTIN_BROADCASTI32X8_512, UNKNOWN, (int) V16SI_FTYPE_V8SI_V16SI_HI },
32695 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf64x2_mask, "__builtin_ia32_extractf64x2_512_mask", IX86_BUILTIN_EXTRACTF64X2_512, UNKNOWN, (int) V2DF_FTYPE_V8DF_INT_V2DF_QI },
32696 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf32x8_mask, "__builtin_ia32_extractf32x8_mask", IX86_BUILTIN_EXTRACTF32X8, UNKNOWN, (int) V8SF_FTYPE_V16SF_INT_V8SF_QI },
32697 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti64x2_mask, "__builtin_ia32_extracti64x2_512_mask", IX86_BUILTIN_EXTRACTI64X2_512, UNKNOWN, (int) V2DI_FTYPE_V8DI_INT_V2DI_QI },
32698 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti32x8_mask, "__builtin_ia32_extracti32x8_mask", IX86_BUILTIN_EXTRACTI32X8, UNKNOWN, (int) V8SI_FTYPE_V16SI_INT_V8SI_QI },
32699 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv8df_mask, "__builtin_ia32_reducepd512_mask", IX86_BUILTIN_REDUCEPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
32700 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv16sf_mask, "__builtin_ia32_reduceps512_mask", IX86_BUILTIN_REDUCEPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
32701 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_mulv8di3_mask, "__builtin_ia32_pmullq512_mask", IX86_BUILTIN_PMULLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32702 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv8df3_mask, "__builtin_ia32_xorpd512_mask", IX86_BUILTIN_XORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32703 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv16sf3_mask, "__builtin_ia32_xorps512_mask", IX86_BUILTIN_XORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32704 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv8df3_mask, "__builtin_ia32_orpd512_mask", IX86_BUILTIN_ORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32705 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv16sf3_mask, "__builtin_ia32_orps512_mask", IX86_BUILTIN_ORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32706 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv8df3_mask, "__builtin_ia32_andpd512_mask", IX86_BUILTIN_ANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32707 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv16sf3_mask, "__builtin_ia32_andps512_mask", IX86_BUILTIN_ANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32708 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv8df3_mask, "__builtin_ia32_andnpd512_mask", IX86_BUILTIN_ANDNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI},
32709 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv16sf3_mask, "__builtin_ia32_andnps512_mask", IX86_BUILTIN_ANDNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32710 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf32x8_mask, "__builtin_ia32_insertf32x8_mask", IX86_BUILTIN_INSERTF32X8, UNKNOWN, (int) V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI },
32711 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti32x8_mask, "__builtin_ia32_inserti32x8_mask", IX86_BUILTIN_INSERTI32X8, UNKNOWN, (int) V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI },
32712 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf64x2_mask, "__builtin_ia32_insertf64x2_512_mask", IX86_BUILTIN_INSERTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI },
32713 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti64x2_mask, "__builtin_ia32_inserti64x2_512_mask", IX86_BUILTIN_INSERTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI },
32714 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv8df_mask, "__builtin_ia32_fpclasspd512_mask", IX86_BUILTIN_FPCLASSPD512, UNKNOWN, (int) QI_FTYPE_V8DF_INT_QI },
32715 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv16sf_mask, "__builtin_ia32_fpclassps512_mask", IX86_BUILTIN_FPCLASSPS512, UNKNOWN, (int) HI_FTYPE_V16SF_INT_HI },
32716 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtd2maskv16si, "__builtin_ia32_cvtd2mask512", IX86_BUILTIN_CVTD2MASK512, UNKNOWN, (int) HI_FTYPE_V16SI },
32717 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtq2maskv8di, "__builtin_ia32_cvtq2mask512", IX86_BUILTIN_CVTQ2MASK512, UNKNOWN, (int) QI_FTYPE_V8DI },
32718 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2dv16si, "__builtin_ia32_cvtmask2d512", IX86_BUILTIN_CVTMASK2D512, UNKNOWN, (int) V16SI_FTYPE_HI },
32719 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2qv8di, "__builtin_ia32_cvtmask2q512", IX86_BUILTIN_CVTMASK2Q512, UNKNOWN, (int) V8DI_FTYPE_QI },
32721 /* AVX512BW. */
32722 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpcksi, "__builtin_ia32_kunpcksi", IX86_BUILTIN_KUNPCKWD, UNKNOWN, (int) SI_FTYPE_SI_SI },
32723 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpckdi, "__builtin_ia32_kunpckdi", IX86_BUILTIN_KUNPCKDQ, UNKNOWN, (int) DI_FTYPE_DI_DI },
32724 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packusdw_mask, "__builtin_ia32_packusdw512_mask", IX86_BUILTIN_PACKUSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI },
32725 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlv4ti3, "__builtin_ia32_pslldq512", IX86_BUILTIN_PSLLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
32726 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrv4ti3, "__builtin_ia32_psrldq512", IX86_BUILTIN_PSRLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
32727 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packssdw_mask, "__builtin_ia32_packssdw512_mask", IX86_BUILTIN_PACKSSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI },
32728 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv4ti, "__builtin_ia32_palignr512", IX86_BUILTIN_PALIGNR512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_CONVERT },
32729 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv64qi_mask, "__builtin_ia32_palignr512_mask", IX86_BUILTIN_PALIGNR512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT },
32730 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_movdquhi512_mask", IX86_BUILTIN_MOVDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32731 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_movdquqi512_mask", IX86_BUILTIN_MOVDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32732 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_psadbw, "__builtin_ia32_psadbw512", IX86_BUILTIN_PSADBW512, UNKNOWN, (int) V8DI_FTYPE_V64QI_V64QI },
32733 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_dbpsadbwv32hi_mask, "__builtin_ia32_dbpsadbw512_mask", IX86_BUILTIN_DBPSADBW512, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI },
32734 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv64qi_mask, "__builtin_ia32_pbroadcastb512_mask", IX86_BUILTIN_PBROADCASTB512, UNKNOWN, (int) V64QI_FTYPE_V16QI_V64QI_DI },
32735 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv64qi_mask, "__builtin_ia32_pbroadcastb512_gpr_mask", IX86_BUILTIN_PBROADCASTB512_GPR, UNKNOWN, (int) V64QI_FTYPE_QI_V64QI_DI },
32736 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv32hi_mask, "__builtin_ia32_pbroadcastw512_mask", IX86_BUILTIN_PBROADCASTW512, UNKNOWN, (int) V32HI_FTYPE_V8HI_V32HI_SI },
32737 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv32hi_mask, "__builtin_ia32_pbroadcastw512_gpr_mask", IX86_BUILTIN_PBROADCASTW512_GPR, UNKNOWN, (int) V32HI_FTYPE_HI_V32HI_SI },
32738 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sign_extendv32qiv32hi2_mask, "__builtin_ia32_pmovsxbw512_mask", IX86_BUILTIN_PMOVSXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI },
32739 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_zero_extendv32qiv32hi2_mask, "__builtin_ia32_pmovzxbw512_mask", IX86_BUILTIN_PMOVZXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI },
32740 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_permvarv32hi_mask, "__builtin_ia32_permvarhi512_mask", IX86_BUILTIN_VPERMVARHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32741 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_mask, "__builtin_ia32_vpermt2varhi512_mask", IX86_BUILTIN_VPERMT2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32742 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_maskz, "__builtin_ia32_vpermt2varhi512_maskz", IX86_BUILTIN_VPERMT2VARHI512_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32743 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermi2varv32hi3_mask, "__builtin_ia32_vpermi2varhi512_mask", IX86_BUILTIN_VPERMI2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32744 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv64qi3_mask, "__builtin_ia32_pavgb512_mask", IX86_BUILTIN_PAVGB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32745 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv32hi3_mask, "__builtin_ia32_pavgw512_mask", IX86_BUILTIN_PAVGW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32746 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv64qi3_mask, "__builtin_ia32_paddb512_mask", IX86_BUILTIN_PADDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32747 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv64qi3_mask, "__builtin_ia32_psubb512_mask", IX86_BUILTIN_PSUBB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32748 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv64qi3_mask, "__builtin_ia32_psubsb512_mask", IX86_BUILTIN_PSUBSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32749 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv64qi3_mask, "__builtin_ia32_paddsb512_mask", IX86_BUILTIN_PADDSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32750 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv64qi3_mask, "__builtin_ia32_psubusb512_mask", IX86_BUILTIN_PSUBUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32751 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv64qi3_mask, "__builtin_ia32_paddusb512_mask", IX86_BUILTIN_PADDUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32752 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv32hi3_mask, "__builtin_ia32_psubw512_mask", IX86_BUILTIN_PSUBW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32753 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv32hi3_mask, "__builtin_ia32_paddw512_mask", IX86_BUILTIN_PADDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32754 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv32hi3_mask, "__builtin_ia32_psubsw512_mask", IX86_BUILTIN_PSUBSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32755 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv32hi3_mask, "__builtin_ia32_paddsw512_mask", IX86_BUILTIN_PADDSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32756 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv32hi3_mask, "__builtin_ia32_psubusw512_mask", IX86_BUILTIN_PSUBUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32757 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv32hi3_mask, "__builtin_ia32_paddusw512_mask", IX86_BUILTIN_PADDUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32758 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv32hi3_mask, "__builtin_ia32_pmaxuw512_mask", IX86_BUILTIN_PMAXUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32759 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv32hi3_mask, "__builtin_ia32_pmaxsw512_mask", IX86_BUILTIN_PMAXSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32760 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv32hi3_mask, "__builtin_ia32_pminuw512_mask", IX86_BUILTIN_PMINUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32761 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv32hi3_mask, "__builtin_ia32_pminsw512_mask", IX86_BUILTIN_PMINSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32762 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv64qi3_mask, "__builtin_ia32_pmaxub512_mask", IX86_BUILTIN_PMAXUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32763 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv64qi3_mask, "__builtin_ia32_pmaxsb512_mask", IX86_BUILTIN_PMAXSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32764 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv64qi3_mask, "__builtin_ia32_pminub512_mask", IX86_BUILTIN_PMINUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32765 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv64qi3_mask, "__builtin_ia32_pminsb512_mask", IX86_BUILTIN_PMINSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32766 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovwb512_mask", IX86_BUILTIN_PMOVWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32767 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovswb512_mask", IX86_BUILTIN_PMOVSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32768 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovuswb512_mask", IX86_BUILTIN_PMOVUSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32769 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_umulhrswv32hi3_mask, "__builtin_ia32_pmulhrsw512_mask", IX86_BUILTIN_PMULHRSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32770 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umulv32hi3_highpart_mask, "__builtin_ia32_pmulhuw512_mask" , IX86_BUILTIN_PMULHUW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32771 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smulv32hi3_highpart_mask, "__builtin_ia32_pmulhw512_mask" , IX86_BUILTIN_PMULHW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32772 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_mulv32hi3_mask, "__builtin_ia32_pmullw512_mask", IX86_BUILTIN_PMULLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32773 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllwi512_mask", IX86_BUILTIN_PSLLWI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32774 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllw512_mask", IX86_BUILTIN_PSLLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32775 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packsswb_mask, "__builtin_ia32_packsswb512_mask", IX86_BUILTIN_PACKSSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI },
32776 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packuswb_mask, "__builtin_ia32_packuswb512_mask", IX86_BUILTIN_PACKUSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI },
32777 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashrvv32hi_mask, "__builtin_ia32_psrav32hi_mask", IX86_BUILTIN_PSRAVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32778 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddubsw512v32hi_mask, "__builtin_ia32_pmaddubsw512_mask", IX86_BUILTIN_PMADDUBSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_V32HI_SI },
32779 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddwd512v32hi_mask, "__builtin_ia32_pmaddwd512_mask", IX86_BUILTIN_PMADDWD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V32HI_V32HI_V16SI_HI },
32780 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrvv32hi_mask, "__builtin_ia32_psrlv32hi_mask", IX86_BUILTIN_PSRLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32781 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv64qi_mask, "__builtin_ia32_punpckhbw512_mask", IX86_BUILTIN_PUNPCKHBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32782 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv32hi_mask, "__builtin_ia32_punpckhwd512_mask", IX86_BUILTIN_PUNPCKHWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32783 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv64qi_mask, "__builtin_ia32_punpcklbw512_mask", IX86_BUILTIN_PUNPCKLBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32784 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv32hi_mask, "__builtin_ia32_punpcklwd512_mask", IX86_BUILTIN_PUNPCKLWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32785 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufbv64qi3_mask, "__builtin_ia32_pshufb512_mask", IX86_BUILTIN_PSHUFB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32786 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufhwv32hi_mask, "__builtin_ia32_pshufhw512_mask", IX86_BUILTIN_PSHUFHW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32787 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshuflwv32hi_mask, "__builtin_ia32_pshuflw512_mask", IX86_BUILTIN_PSHUFLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32788 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psrawi512_mask", IX86_BUILTIN_PSRAWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32789 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psraw512_mask", IX86_BUILTIN_PSRAW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32790 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlwi512_mask", IX86_BUILTIN_PSRLWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32791 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlw512_mask", IX86_BUILTIN_PSRLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32792 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtb2maskv64qi, "__builtin_ia32_cvtb2mask512", IX86_BUILTIN_CVTB2MASK512, UNKNOWN, (int) DI_FTYPE_V64QI },
32793 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtw2maskv32hi, "__builtin_ia32_cvtw2mask512", IX86_BUILTIN_CVTW2MASK512, UNKNOWN, (int) SI_FTYPE_V32HI },
32794 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2bv64qi, "__builtin_ia32_cvtmask2b512", IX86_BUILTIN_CVTMASK2B512, UNKNOWN, (int) V64QI_FTYPE_DI },
32795 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2wv32hi, "__builtin_ia32_cvtmask2w512", IX86_BUILTIN_CVTMASK2W512, UNKNOWN, (int) V32HI_FTYPE_SI },
32796 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv64qi3_mask, "__builtin_ia32_pcmpeqb512_mask", IX86_BUILTIN_PCMPEQB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32797 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv32hi3_mask, "__builtin_ia32_pcmpeqw512_mask", IX86_BUILTIN_PCMPEQW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32798 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv64qi3_mask, "__builtin_ia32_pcmpgtb512_mask", IX86_BUILTIN_PCMPGTB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32799 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv32hi3_mask, "__builtin_ia32_pcmpgtw512_mask", IX86_BUILTIN_PCMPGTW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32800 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv64qi3_mask, "__builtin_ia32_ptestmb512", IX86_BUILTIN_PTESTMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32801 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv32hi3_mask, "__builtin_ia32_ptestmw512", IX86_BUILTIN_PTESTMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32802 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv64qi3_mask, "__builtin_ia32_ptestnmb512", IX86_BUILTIN_PTESTNMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32803 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv32hi3_mask, "__builtin_ia32_ptestnmw512", IX86_BUILTIN_PTESTNMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32804 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlvv32hi_mask, "__builtin_ia32_psllv32hi_mask", IX86_BUILTIN_PSLLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32805 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv64qi2_mask, "__builtin_ia32_pabsb512_mask", IX86_BUILTIN_PABSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32806 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv32hi2_mask, "__builtin_ia32_pabsw512_mask", IX86_BUILTIN_PABSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32807 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv32hi, "__builtin_ia32_blendmw_512_mask", IX86_BUILTIN_BLENDMW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32808 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv64qi, "__builtin_ia32_blendmb_512_mask", IX86_BUILTIN_BLENDMB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32809 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv64qi3_mask, "__builtin_ia32_cmpb512_mask", IX86_BUILTIN_CMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI },
32810 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv32hi3_mask, "__builtin_ia32_cmpw512_mask", IX86_BUILTIN_CMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI },
32811 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv64qi3_mask, "__builtin_ia32_ucmpb512_mask", IX86_BUILTIN_UCMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI },
32812 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv32hi3_mask, "__builtin_ia32_ucmpw512_mask", IX86_BUILTIN_UCMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI },
32814 /* AVX512IFMA */
32815 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_mask, "__builtin_ia32_vpmadd52luq512_mask", IX86_BUILTIN_VPMADD52LUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32816 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_maskz, "__builtin_ia32_vpmadd52luq512_maskz", IX86_BUILTIN_VPMADD52LUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32817 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_mask, "__builtin_ia32_vpmadd52huq512_mask", IX86_BUILTIN_VPMADD52HUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32818 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_maskz, "__builtin_ia32_vpmadd52huq512_maskz", IX86_BUILTIN_VPMADD52HUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32819 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_mask, "__builtin_ia32_vpmadd52luq256_mask", IX86_BUILTIN_VPMADD52LUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32820 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_maskz, "__builtin_ia32_vpmadd52luq256_maskz", IX86_BUILTIN_VPMADD52LUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32821 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_mask, "__builtin_ia32_vpmadd52huq256_mask", IX86_BUILTIN_VPMADD52HUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32822 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_maskz, "__builtin_ia32_vpmadd52huq256_maskz", IX86_BUILTIN_VPMADD52HUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32823 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_mask, "__builtin_ia32_vpmadd52luq128_mask", IX86_BUILTIN_VPMADD52LUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32824 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_maskz, "__builtin_ia32_vpmadd52luq128_maskz", IX86_BUILTIN_VPMADD52LUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32825 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_mask, "__builtin_ia32_vpmadd52huq128_mask", IX86_BUILTIN_VPMADD52HUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32826 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_maskz, "__builtin_ia32_vpmadd52huq128_maskz", IX86_BUILTIN_VPMADD52HUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32828 /* AVX512VBMI */
32829 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_vpmultishiftqbv64qi_mask, "__builtin_ia32_vpmultishiftqb512_mask", IX86_BUILTIN_VPMULTISHIFTQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32830 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv32qi_mask, "__builtin_ia32_vpmultishiftqb256_mask", IX86_BUILTIN_VPMULTISHIFTQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32831 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv16qi_mask, "__builtin_ia32_vpmultishiftqb128_mask", IX86_BUILTIN_VPMULTISHIFTQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32832 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_permvarv64qi_mask, "__builtin_ia32_permvarqi512_mask", IX86_BUILTIN_VPERMVARQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32833 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_mask, "__builtin_ia32_vpermt2varqi512_mask", IX86_BUILTIN_VPERMT2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32834 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_maskz, "__builtin_ia32_vpermt2varqi512_maskz", IX86_BUILTIN_VPERMT2VARQI512_MASKZ, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32835 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermi2varv64qi3_mask, "__builtin_ia32_vpermi2varqi512_mask", IX86_BUILTIN_VPERMI2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32836 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv32qi_mask, "__builtin_ia32_permvarqi256_mask", IX86_BUILTIN_VPERMVARQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32837 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16qi_mask, "__builtin_ia32_permvarqi128_mask", IX86_BUILTIN_VPERMVARQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32838 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_mask, "__builtin_ia32_vpermt2varqi256_mask", IX86_BUILTIN_VPERMT2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32839 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_maskz, "__builtin_ia32_vpermt2varqi256_maskz", IX86_BUILTIN_VPERMT2VARQI256_MASKZ, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32840 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_mask, "__builtin_ia32_vpermt2varqi128_mask", IX86_BUILTIN_VPERMT2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32841 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_maskz, "__builtin_ia32_vpermt2varqi128_maskz", IX86_BUILTIN_VPERMT2VARQI128_MASKZ, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32842 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv32qi3_mask, "__builtin_ia32_vpermi2varqi256_mask", IX86_BUILTIN_VPERMI2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32843 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16qi3_mask, "__builtin_ia32_vpermi2varqi128_mask", IX86_BUILTIN_VPERMI2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32846 /* Builtins with rounding support. */
32847 static const struct builtin_description bdesc_round_args[] =
32849 /* AVX512F */
32850 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32851 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32852 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32853 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32854 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT },
32855 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT },
32856 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT },
32857 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI_INT },
32858 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
32859 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
32860 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
32861 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32862 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
32863 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32864 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
32865 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32866 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
32867 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32868 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
32869 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
32870 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
32871 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
32872 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
32873 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32874 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32875 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32876 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32877 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
32878 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
32879 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
32880 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
32881 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32882 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32883 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32884 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32885 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
32886 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
32887 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
32888 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
32889 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
32890 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
32891 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
32892 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
32893 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
32894 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
32895 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32896 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32897 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
32898 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
32899 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
32900 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
32901 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32902 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32903 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32904 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32905 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32906 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32907 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32908 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32909 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32910 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32911 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32912 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32913 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
32914 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
32915 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
32916 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
32917 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32918 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32919 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32920 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32921 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
32922 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
32923 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32924 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32925 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32926 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32927 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32928 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32929 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
32930 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
32931 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
32932 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
32933 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
32934 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
32935 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
32936 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
32937 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
32938 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
32939 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
32940 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
32941 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
32942 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
32943 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
32944 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
32945 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32946 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32947 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32948 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32949 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32950 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32951 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
32952 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
32953 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32954 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32955 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32956 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32957 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32958 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32959 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32960 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32961 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32962 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32963 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32964 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32965 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32966 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32967 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32968 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32970 /* AVX512ER */
32971 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
32972 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
32973 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
32974 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
32975 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32976 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32977 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
32978 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
32979 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32980 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32982 /* AVX512DQ. */
32983 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv2df_round, "__builtin_ia32_rangesd128_round", IX86_BUILTIN_RANGESD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
32984 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv4sf_round, "__builtin_ia32_rangess128_round", IX86_BUILTIN_RANGESS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
32985 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2qq512_mask", IX86_BUILTIN_CVTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
32986 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2qqv8di_mask_round, "__builtin_ia32_cvtps2qq512_mask", IX86_BUILTIN_CVTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
32987 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2uqq512_mask", IX86_BUILTIN_CVTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
32988 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2uqqv8di_mask_round, "__builtin_ia32_cvtps2uqq512_mask", IX86_BUILTIN_CVTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
32989 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8sf2_mask_round, "__builtin_ia32_cvtqq2ps512_mask", IX86_BUILTIN_CVTQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
32990 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8sf2_mask_round, "__builtin_ia32_cvtuqq2ps512_mask", IX86_BUILTIN_CVTUQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
32991 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8df2_mask_round, "__builtin_ia32_cvtqq2pd512_mask", IX86_BUILTIN_CVTQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
32992 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8df2_mask_round, "__builtin_ia32_cvtuqq2pd512_mask", IX86_BUILTIN_CVTUQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
32993 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2qq512_mask", IX86_BUILTIN_CVTTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
32994 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2uqq512_mask", IX86_BUILTIN_CVTTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
32995 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2qq512_mask", IX86_BUILTIN_CVTTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
32996 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2uqq512_mask", IX86_BUILTIN_CVTTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
32997 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv16sf_mask_round, "__builtin_ia32_rangeps512_mask", IX86_BUILTIN_RANGEPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT },
32998 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv8df_mask_round, "__builtin_ia32_rangepd512_mask", IX86_BUILTIN_RANGEPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT },
33001 /* Bultins for MPX. */
33002 static const struct builtin_description bdesc_mpx[] =
33004 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndstx", IX86_BUILTIN_BNDSTX, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND_PCVOID },
33005 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcl", IX86_BUILTIN_BNDCL, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
33006 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcu", IX86_BUILTIN_BNDCU, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
33009 /* Const builtins for MPX. */
33010 static const struct builtin_description bdesc_mpx_const[] =
33012 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndmk", IX86_BUILTIN_BNDMK, UNKNOWN, (int) BND_FTYPE_PCVOID_ULONG },
33013 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndldx", IX86_BUILTIN_BNDLDX, UNKNOWN, (int) BND_FTYPE_PCVOID_PCVOID },
33014 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_narrow_bounds", IX86_BUILTIN_BNDNARROW, UNKNOWN, (int) PVOID_FTYPE_PCVOID_BND_ULONG },
33015 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndint", IX86_BUILTIN_BNDINT, UNKNOWN, (int) BND_FTYPE_BND_BND },
33016 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_sizeof", IX86_BUILTIN_SIZEOF, UNKNOWN, (int) ULONG_FTYPE_VOID },
33017 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndlower", IX86_BUILTIN_BNDLOWER, UNKNOWN, (int) PVOID_FTYPE_BND },
33018 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndupper", IX86_BUILTIN_BNDUPPER, UNKNOWN, (int) PVOID_FTYPE_BND },
33019 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndret", IX86_BUILTIN_BNDRET, UNKNOWN, (int) BND_FTYPE_PCVOID },
33022 /* FMA4 and XOP. */
33023 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
33024 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
33025 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
33026 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
33027 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
33028 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
33029 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
33030 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
33031 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
33032 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
33033 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
33034 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
33035 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
33036 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
33037 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
33038 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
33039 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
33040 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
33041 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
33042 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
33043 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
33044 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
33045 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
33046 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
33047 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
33048 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
33049 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
33050 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
33051 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
33052 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
33053 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
33054 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
33055 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
33056 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
33057 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
33058 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
33059 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
33060 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
33061 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
33062 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
33063 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
33064 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
33065 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
33066 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
33067 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
33068 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
33069 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
33070 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
33071 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
33072 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
33073 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
33074 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
33076 static const struct builtin_description bdesc_multi_arg[] =
33078 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
33079 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
33080 UNKNOWN, (int)MULTI_ARG_3_SF },
33081 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
33082 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
33083 UNKNOWN, (int)MULTI_ARG_3_DF },
33085 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
33086 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
33087 UNKNOWN, (int)MULTI_ARG_3_SF },
33088 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
33089 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
33090 UNKNOWN, (int)MULTI_ARG_3_DF },
33092 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
33093 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
33094 UNKNOWN, (int)MULTI_ARG_3_SF },
33095 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
33096 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
33097 UNKNOWN, (int)MULTI_ARG_3_DF },
33098 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
33099 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
33100 UNKNOWN, (int)MULTI_ARG_3_SF2 },
33101 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
33102 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
33103 UNKNOWN, (int)MULTI_ARG_3_DF2 },
33105 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
33106 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
33107 UNKNOWN, (int)MULTI_ARG_3_SF },
33108 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
33109 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
33110 UNKNOWN, (int)MULTI_ARG_3_DF },
33111 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
33112 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
33113 UNKNOWN, (int)MULTI_ARG_3_SF2 },
33114 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
33115 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
33116 UNKNOWN, (int)MULTI_ARG_3_DF2 },
33118 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
33119 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
33120 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
33121 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
33122 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
33123 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
33124 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
33126 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
33127 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
33128 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
33129 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
33130 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
33131 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
33132 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
33134 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
33136 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
33137 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
33138 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33139 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33140 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
33141 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
33142 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33143 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33144 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33145 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33146 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33147 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33149 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33150 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
33151 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
33152 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
33153 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
33154 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
33155 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
33156 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
33157 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33158 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
33159 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
33160 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
33161 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33162 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
33163 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
33164 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
33166 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
33167 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
33168 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
33169 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
33170 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
33171 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
33173 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33174 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
33175 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
33176 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33177 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
33178 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33179 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33180 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
33181 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
33182 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33183 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
33184 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33185 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33186 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33187 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33189 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
33190 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
33191 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
33192 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
33193 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
33194 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
33195 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
33197 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
33198 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
33199 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
33200 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
33201 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
33202 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
33203 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
33205 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
33206 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
33207 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
33208 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
33209 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
33210 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
33211 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
33213 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
33214 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
33215 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
33216 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
33217 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
33218 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
33219 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
33221 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
33222 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
33223 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
33224 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
33225 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
33226 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
33227 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
33229 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
33230 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
33231 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
33232 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
33233 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
33234 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
33235 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
33237 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
33238 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
33239 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
33240 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
33241 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
33242 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
33243 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
33245 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
33246 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
33247 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
33248 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
33249 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
33250 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
33251 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
33253 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
33254 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
33255 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
33256 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
33257 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
33258 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
33259 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
33260 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
33262 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
33263 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
33264 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
33265 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
33266 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
33267 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
33268 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
33269 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
33271 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
33272 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
33273 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
33274 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
33278 /* TM vector builtins. */
33280 /* Reuse the existing x86-specific `struct builtin_description' cause
33281 we're lazy. Add casts to make them fit. */
33282 static const struct builtin_description bdesc_tm[] =
33284 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33285 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33286 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33287 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33288 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33289 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33290 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33292 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33293 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33294 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33295 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33296 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33297 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33298 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33300 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33301 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33302 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33303 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33304 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33305 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33306 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33308 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
33309 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
33310 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
33313 /* TM callbacks. */
33315 /* Return the builtin decl needed to load a vector of TYPE. */
33317 static tree
33318 ix86_builtin_tm_load (tree type)
33320 if (TREE_CODE (type) == VECTOR_TYPE)
33322 switch (tree_to_uhwi (TYPE_SIZE (type)))
33324 case 64:
33325 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
33326 case 128:
33327 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
33328 case 256:
33329 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
33332 return NULL_TREE;
33335 /* Return the builtin decl needed to store a vector of TYPE. */
33337 static tree
33338 ix86_builtin_tm_store (tree type)
33340 if (TREE_CODE (type) == VECTOR_TYPE)
33342 switch (tree_to_uhwi (TYPE_SIZE (type)))
33344 case 64:
33345 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
33346 case 128:
33347 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
33348 case 256:
33349 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
33352 return NULL_TREE;
33355 /* Initialize the transactional memory vector load/store builtins. */
33357 static void
33358 ix86_init_tm_builtins (void)
33360 enum ix86_builtin_func_type ftype;
33361 const struct builtin_description *d;
33362 size_t i;
33363 tree decl;
33364 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
33365 tree attrs_log, attrs_type_log;
33367 if (!flag_tm)
33368 return;
33370 /* If there are no builtins defined, we must be compiling in a
33371 language without trans-mem support. */
33372 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
33373 return;
33375 /* Use whatever attributes a normal TM load has. */
33376 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
33377 attrs_load = DECL_ATTRIBUTES (decl);
33378 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33379 /* Use whatever attributes a normal TM store has. */
33380 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
33381 attrs_store = DECL_ATTRIBUTES (decl);
33382 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33383 /* Use whatever attributes a normal TM log has. */
33384 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
33385 attrs_log = DECL_ATTRIBUTES (decl);
33386 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33388 for (i = 0, d = bdesc_tm;
33389 i < ARRAY_SIZE (bdesc_tm);
33390 i++, d++)
33392 if ((d->mask & ix86_isa_flags) != 0
33393 || (lang_hooks.builtin_function
33394 == lang_hooks.builtin_function_ext_scope))
33396 tree type, attrs, attrs_type;
33397 enum built_in_function code = (enum built_in_function) d->code;
33399 ftype = (enum ix86_builtin_func_type) d->flag;
33400 type = ix86_get_builtin_func_type (ftype);
33402 if (BUILTIN_TM_LOAD_P (code))
33404 attrs = attrs_load;
33405 attrs_type = attrs_type_load;
33407 else if (BUILTIN_TM_STORE_P (code))
33409 attrs = attrs_store;
33410 attrs_type = attrs_type_store;
33412 else
33414 attrs = attrs_log;
33415 attrs_type = attrs_type_log;
33417 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
33418 /* The builtin without the prefix for
33419 calling it directly. */
33420 d->name + strlen ("__builtin_"),
33421 attrs);
33422 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
33423 set the TYPE_ATTRIBUTES. */
33424 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
33426 set_builtin_decl (code, decl, false);
33431 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
33432 in the current target ISA to allow the user to compile particular modules
33433 with different target specific options that differ from the command line
33434 options. */
33435 static void
33436 ix86_init_mmx_sse_builtins (void)
33438 const struct builtin_description * d;
33439 enum ix86_builtin_func_type ftype;
33440 size_t i;
33442 /* Add all special builtins with variable number of operands. */
33443 for (i = 0, d = bdesc_special_args;
33444 i < ARRAY_SIZE (bdesc_special_args);
33445 i++, d++)
33447 if (d->name == 0)
33448 continue;
33450 ftype = (enum ix86_builtin_func_type) d->flag;
33451 def_builtin (d->mask, d->name, ftype, d->code);
33454 /* Add all builtins with variable number of operands. */
33455 for (i = 0, d = bdesc_args;
33456 i < ARRAY_SIZE (bdesc_args);
33457 i++, d++)
33459 if (d->name == 0)
33460 continue;
33462 ftype = (enum ix86_builtin_func_type) d->flag;
33463 def_builtin_const (d->mask, d->name, ftype, d->code);
33466 /* Add all builtins with rounding. */
33467 for (i = 0, d = bdesc_round_args;
33468 i < ARRAY_SIZE (bdesc_round_args);
33469 i++, d++)
33471 if (d->name == 0)
33472 continue;
33474 ftype = (enum ix86_builtin_func_type) d->flag;
33475 def_builtin_const (d->mask, d->name, ftype, d->code);
33478 /* pcmpestr[im] insns. */
33479 for (i = 0, d = bdesc_pcmpestr;
33480 i < ARRAY_SIZE (bdesc_pcmpestr);
33481 i++, d++)
33483 if (d->code == IX86_BUILTIN_PCMPESTRM128)
33484 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
33485 else
33486 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
33487 def_builtin_const (d->mask, d->name, ftype, d->code);
33490 /* pcmpistr[im] insns. */
33491 for (i = 0, d = bdesc_pcmpistr;
33492 i < ARRAY_SIZE (bdesc_pcmpistr);
33493 i++, d++)
33495 if (d->code == IX86_BUILTIN_PCMPISTRM128)
33496 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
33497 else
33498 ftype = INT_FTYPE_V16QI_V16QI_INT;
33499 def_builtin_const (d->mask, d->name, ftype, d->code);
33502 /* comi/ucomi insns. */
33503 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
33505 if (d->mask == OPTION_MASK_ISA_SSE2)
33506 ftype = INT_FTYPE_V2DF_V2DF;
33507 else
33508 ftype = INT_FTYPE_V4SF_V4SF;
33509 def_builtin_const (d->mask, d->name, ftype, d->code);
33512 /* SSE */
33513 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
33514 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
33515 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
33516 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
33518 /* SSE or 3DNow!A */
33519 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
33520 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
33521 IX86_BUILTIN_MASKMOVQ);
33523 /* SSE2 */
33524 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
33525 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
33527 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
33528 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
33529 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
33530 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
33532 /* SSE3. */
33533 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
33534 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
33535 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
33536 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
33538 /* AES */
33539 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
33540 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
33541 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
33542 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
33543 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
33544 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
33545 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
33546 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
33547 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
33548 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
33549 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
33550 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
33552 /* PCLMUL */
33553 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
33554 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
33556 /* RDRND */
33557 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
33558 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
33559 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
33560 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
33561 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
33562 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
33563 IX86_BUILTIN_RDRAND64_STEP);
33565 /* AVX2 */
33566 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
33567 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
33568 IX86_BUILTIN_GATHERSIV2DF);
33570 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
33571 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
33572 IX86_BUILTIN_GATHERSIV4DF);
33574 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
33575 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
33576 IX86_BUILTIN_GATHERDIV2DF);
33578 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
33579 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
33580 IX86_BUILTIN_GATHERDIV4DF);
33582 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
33583 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
33584 IX86_BUILTIN_GATHERSIV4SF);
33586 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
33587 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
33588 IX86_BUILTIN_GATHERSIV8SF);
33590 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
33591 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
33592 IX86_BUILTIN_GATHERDIV4SF);
33594 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
33595 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
33596 IX86_BUILTIN_GATHERDIV8SF);
33598 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
33599 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
33600 IX86_BUILTIN_GATHERSIV2DI);
33602 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
33603 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
33604 IX86_BUILTIN_GATHERSIV4DI);
33606 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
33607 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
33608 IX86_BUILTIN_GATHERDIV2DI);
33610 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
33611 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
33612 IX86_BUILTIN_GATHERDIV4DI);
33614 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
33615 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
33616 IX86_BUILTIN_GATHERSIV4SI);
33618 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
33619 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
33620 IX86_BUILTIN_GATHERSIV8SI);
33622 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
33623 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
33624 IX86_BUILTIN_GATHERDIV4SI);
33626 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
33627 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
33628 IX86_BUILTIN_GATHERDIV8SI);
33630 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
33631 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
33632 IX86_BUILTIN_GATHERALTSIV4DF);
33634 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
33635 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
33636 IX86_BUILTIN_GATHERALTDIV8SF);
33638 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
33639 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
33640 IX86_BUILTIN_GATHERALTSIV4DI);
33642 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
33643 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
33644 IX86_BUILTIN_GATHERALTDIV8SI);
33646 /* AVX512F */
33647 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
33648 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
33649 IX86_BUILTIN_GATHER3SIV16SF);
33651 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
33652 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
33653 IX86_BUILTIN_GATHER3SIV8DF);
33655 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
33656 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
33657 IX86_BUILTIN_GATHER3DIV16SF);
33659 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
33660 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
33661 IX86_BUILTIN_GATHER3DIV8DF);
33663 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
33664 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
33665 IX86_BUILTIN_GATHER3SIV16SI);
33667 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
33668 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
33669 IX86_BUILTIN_GATHER3SIV8DI);
33671 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
33672 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
33673 IX86_BUILTIN_GATHER3DIV16SI);
33675 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
33676 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
33677 IX86_BUILTIN_GATHER3DIV8DI);
33679 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
33680 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
33681 IX86_BUILTIN_GATHER3ALTSIV8DF);
33683 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
33684 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
33685 IX86_BUILTIN_GATHER3ALTDIV16SF);
33687 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
33688 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
33689 IX86_BUILTIN_GATHER3ALTSIV8DI);
33691 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
33692 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
33693 IX86_BUILTIN_GATHER3ALTDIV16SI);
33695 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
33696 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
33697 IX86_BUILTIN_SCATTERSIV16SF);
33699 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
33700 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
33701 IX86_BUILTIN_SCATTERSIV8DF);
33703 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
33704 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
33705 IX86_BUILTIN_SCATTERDIV16SF);
33707 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
33708 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
33709 IX86_BUILTIN_SCATTERDIV8DF);
33711 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
33712 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
33713 IX86_BUILTIN_SCATTERSIV16SI);
33715 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
33716 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
33717 IX86_BUILTIN_SCATTERSIV8DI);
33719 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
33720 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
33721 IX86_BUILTIN_SCATTERDIV16SI);
33723 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
33724 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
33725 IX86_BUILTIN_SCATTERDIV8DI);
33727 /* AVX512VL */
33728 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2df",
33729 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_QI_INT,
33730 IX86_BUILTIN_GATHER3SIV2DF);
33732 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4df",
33733 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_QI_INT,
33734 IX86_BUILTIN_GATHER3SIV4DF);
33736 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2df",
33737 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_QI_INT,
33738 IX86_BUILTIN_GATHER3DIV2DF);
33740 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4df",
33741 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_QI_INT,
33742 IX86_BUILTIN_GATHER3DIV4DF);
33744 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4sf",
33745 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_QI_INT,
33746 IX86_BUILTIN_GATHER3SIV4SF);
33748 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8sf",
33749 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_QI_INT,
33750 IX86_BUILTIN_GATHER3SIV8SF);
33752 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4sf",
33753 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_QI_INT,
33754 IX86_BUILTIN_GATHER3DIV4SF);
33756 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8sf",
33757 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_QI_INT,
33758 IX86_BUILTIN_GATHER3DIV8SF);
33760 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2di",
33761 V2DI_FTYPE_V2DI_PCINT64_V4SI_QI_INT,
33762 IX86_BUILTIN_GATHER3SIV2DI);
33764 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4di",
33765 V4DI_FTYPE_V4DI_PCINT64_V4SI_QI_INT,
33766 IX86_BUILTIN_GATHER3SIV4DI);
33768 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2di",
33769 V2DI_FTYPE_V2DI_PCINT64_V2DI_QI_INT,
33770 IX86_BUILTIN_GATHER3DIV2DI);
33772 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4di",
33773 V4DI_FTYPE_V4DI_PCINT64_V4DI_QI_INT,
33774 IX86_BUILTIN_GATHER3DIV4DI);
33776 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4si",
33777 V4SI_FTYPE_V4SI_PCINT_V4SI_QI_INT,
33778 IX86_BUILTIN_GATHER3SIV4SI);
33780 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8si",
33781 V8SI_FTYPE_V8SI_PCINT_V8SI_QI_INT,
33782 IX86_BUILTIN_GATHER3SIV8SI);
33784 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4si",
33785 V4SI_FTYPE_V4SI_PCINT_V2DI_QI_INT,
33786 IX86_BUILTIN_GATHER3DIV4SI);
33788 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8si",
33789 V4SI_FTYPE_V4SI_PCINT_V4DI_QI_INT,
33790 IX86_BUILTIN_GATHER3DIV8SI);
33792 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4df ",
33793 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_QI_INT,
33794 IX86_BUILTIN_GATHER3ALTSIV4DF);
33796 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8sf ",
33797 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_QI_INT,
33798 IX86_BUILTIN_GATHER3ALTDIV8SF);
33800 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4di ",
33801 V4DI_FTYPE_V4DI_PCINT64_V8SI_QI_INT,
33802 IX86_BUILTIN_GATHER3ALTSIV4DI);
33804 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8si ",
33805 V8SI_FTYPE_V8SI_PCINT_V4DI_QI_INT,
33806 IX86_BUILTIN_GATHER3ALTDIV8SI);
33808 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8sf",
33809 VOID_FTYPE_PFLOAT_QI_V8SI_V8SF_INT,
33810 IX86_BUILTIN_SCATTERSIV8SF);
33812 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4sf",
33813 VOID_FTYPE_PFLOAT_QI_V4SI_V4SF_INT,
33814 IX86_BUILTIN_SCATTERSIV4SF);
33816 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4df",
33817 VOID_FTYPE_PDOUBLE_QI_V4SI_V4DF_INT,
33818 IX86_BUILTIN_SCATTERSIV4DF);
33820 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2df",
33821 VOID_FTYPE_PDOUBLE_QI_V4SI_V2DF_INT,
33822 IX86_BUILTIN_SCATTERSIV2DF);
33824 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8sf",
33825 VOID_FTYPE_PFLOAT_QI_V4DI_V4SF_INT,
33826 IX86_BUILTIN_SCATTERDIV8SF);
33828 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4sf",
33829 VOID_FTYPE_PFLOAT_QI_V2DI_V4SF_INT,
33830 IX86_BUILTIN_SCATTERDIV4SF);
33832 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4df",
33833 VOID_FTYPE_PDOUBLE_QI_V4DI_V4DF_INT,
33834 IX86_BUILTIN_SCATTERDIV4DF);
33836 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2df",
33837 VOID_FTYPE_PDOUBLE_QI_V2DI_V2DF_INT,
33838 IX86_BUILTIN_SCATTERDIV2DF);
33840 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8si",
33841 VOID_FTYPE_PINT_QI_V8SI_V8SI_INT,
33842 IX86_BUILTIN_SCATTERSIV8SI);
33844 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4si",
33845 VOID_FTYPE_PINT_QI_V4SI_V4SI_INT,
33846 IX86_BUILTIN_SCATTERSIV4SI);
33848 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4di",
33849 VOID_FTYPE_PLONGLONG_QI_V4SI_V4DI_INT,
33850 IX86_BUILTIN_SCATTERSIV4DI);
33852 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2di",
33853 VOID_FTYPE_PLONGLONG_QI_V4SI_V2DI_INT,
33854 IX86_BUILTIN_SCATTERSIV2DI);
33856 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8si",
33857 VOID_FTYPE_PINT_QI_V4DI_V4SI_INT,
33858 IX86_BUILTIN_SCATTERDIV8SI);
33860 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4si",
33861 VOID_FTYPE_PINT_QI_V2DI_V4SI_INT,
33862 IX86_BUILTIN_SCATTERDIV4SI);
33864 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4di",
33865 VOID_FTYPE_PLONGLONG_QI_V4DI_V4DI_INT,
33866 IX86_BUILTIN_SCATTERDIV4DI);
33868 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2di",
33869 VOID_FTYPE_PLONGLONG_QI_V2DI_V2DI_INT,
33870 IX86_BUILTIN_SCATTERDIV2DI);
33872 /* AVX512PF */
33873 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
33874 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
33875 IX86_BUILTIN_GATHERPFDPD);
33876 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
33877 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
33878 IX86_BUILTIN_GATHERPFDPS);
33879 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
33880 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
33881 IX86_BUILTIN_GATHERPFQPD);
33882 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
33883 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
33884 IX86_BUILTIN_GATHERPFQPS);
33885 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
33886 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
33887 IX86_BUILTIN_SCATTERPFDPD);
33888 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
33889 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
33890 IX86_BUILTIN_SCATTERPFDPS);
33891 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
33892 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
33893 IX86_BUILTIN_SCATTERPFQPD);
33894 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
33895 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
33896 IX86_BUILTIN_SCATTERPFQPS);
33898 /* SHA */
33899 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
33900 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
33901 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
33902 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
33903 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
33904 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
33905 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
33906 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
33907 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
33908 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
33909 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
33910 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
33911 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
33912 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
33914 /* RTM. */
33915 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
33916 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
33918 /* MMX access to the vec_init patterns. */
33919 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
33920 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
33922 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
33923 V4HI_FTYPE_HI_HI_HI_HI,
33924 IX86_BUILTIN_VEC_INIT_V4HI);
33926 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
33927 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
33928 IX86_BUILTIN_VEC_INIT_V8QI);
33930 /* Access to the vec_extract patterns. */
33931 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
33932 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
33933 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
33934 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
33935 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
33936 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
33937 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
33938 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
33939 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
33940 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
33942 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
33943 "__builtin_ia32_vec_ext_v4hi",
33944 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
33946 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
33947 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
33949 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
33950 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
33952 /* Access to the vec_set patterns. */
33953 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
33954 "__builtin_ia32_vec_set_v2di",
33955 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
33957 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
33958 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
33960 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
33961 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
33963 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
33964 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
33966 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
33967 "__builtin_ia32_vec_set_v4hi",
33968 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
33970 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
33971 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
33973 /* RDSEED */
33974 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
33975 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
33976 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
33977 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
33978 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
33979 "__builtin_ia32_rdseed_di_step",
33980 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
33982 /* ADCX */
33983 def_builtin (0, "__builtin_ia32_addcarryx_u32",
33984 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
33985 def_builtin (OPTION_MASK_ISA_64BIT,
33986 "__builtin_ia32_addcarryx_u64",
33987 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
33988 IX86_BUILTIN_ADDCARRYX64);
33990 /* SBB */
33991 def_builtin (0, "__builtin_ia32_sbb_u32",
33992 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32);
33993 def_builtin (OPTION_MASK_ISA_64BIT,
33994 "__builtin_ia32_sbb_u64",
33995 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
33996 IX86_BUILTIN_SBB64);
33998 /* Read/write FLAGS. */
33999 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
34000 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
34001 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
34002 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
34003 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
34004 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
34005 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
34006 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
34008 /* CLFLUSHOPT. */
34009 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
34010 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
34012 /* CLWB. */
34013 def_builtin (OPTION_MASK_ISA_CLWB, "__builtin_ia32_clwb",
34014 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLWB);
34016 /* Add FMA4 multi-arg argument instructions */
34017 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
34019 if (d->name == 0)
34020 continue;
34022 ftype = (enum ix86_builtin_func_type) d->flag;
34023 def_builtin_const (d->mask, d->name, ftype, d->code);
34027 static void
34028 ix86_init_mpx_builtins ()
34030 const struct builtin_description * d;
34031 enum ix86_builtin_func_type ftype;
34032 tree decl;
34033 size_t i;
34035 for (i = 0, d = bdesc_mpx;
34036 i < ARRAY_SIZE (bdesc_mpx);
34037 i++, d++)
34039 if (d->name == 0)
34040 continue;
34042 ftype = (enum ix86_builtin_func_type) d->flag;
34043 decl = def_builtin (d->mask, d->name, ftype, d->code);
34045 /* With no leaf and nothrow flags for MPX builtins
34046 abnormal edges may follow its call when setjmp
34047 presents in the function. Since we may have a lot
34048 of MPX builtins calls it causes lots of useless
34049 edges and enormous PHI nodes. To avoid this we mark
34050 MPX builtins as leaf and nothrow. */
34051 if (decl)
34053 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
34054 NULL_TREE);
34055 TREE_NOTHROW (decl) = 1;
34057 else
34059 ix86_builtins_isa[(int)d->code].leaf_p = true;
34060 ix86_builtins_isa[(int)d->code].nothrow_p = true;
34064 for (i = 0, d = bdesc_mpx_const;
34065 i < ARRAY_SIZE (bdesc_mpx_const);
34066 i++, d++)
34068 if (d->name == 0)
34069 continue;
34071 ftype = (enum ix86_builtin_func_type) d->flag;
34072 decl = def_builtin_const (d->mask, d->name, ftype, d->code);
34074 if (decl)
34076 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
34077 NULL_TREE);
34078 TREE_NOTHROW (decl) = 1;
34080 else
34082 ix86_builtins_isa[(int)d->code].leaf_p = true;
34083 ix86_builtins_isa[(int)d->code].nothrow_p = true;
34088 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
34089 to return a pointer to VERSION_DECL if the outcome of the expression
34090 formed by PREDICATE_CHAIN is true. This function will be called during
34091 version dispatch to decide which function version to execute. It returns
34092 the basic block at the end, to which more conditions can be added. */
34094 static basic_block
34095 add_condition_to_bb (tree function_decl, tree version_decl,
34096 tree predicate_chain, basic_block new_bb)
34098 gimple return_stmt;
34099 tree convert_expr, result_var;
34100 gimple convert_stmt;
34101 gimple call_cond_stmt;
34102 gimple if_else_stmt;
34104 basic_block bb1, bb2, bb3;
34105 edge e12, e23;
34107 tree cond_var, and_expr_var = NULL_TREE;
34108 gimple_seq gseq;
34110 tree predicate_decl, predicate_arg;
34112 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
34114 gcc_assert (new_bb != NULL);
34115 gseq = bb_seq (new_bb);
34118 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
34119 build_fold_addr_expr (version_decl));
34120 result_var = create_tmp_var (ptr_type_node);
34121 convert_stmt = gimple_build_assign (result_var, convert_expr);
34122 return_stmt = gimple_build_return (result_var);
34124 if (predicate_chain == NULL_TREE)
34126 gimple_seq_add_stmt (&gseq, convert_stmt);
34127 gimple_seq_add_stmt (&gseq, return_stmt);
34128 set_bb_seq (new_bb, gseq);
34129 gimple_set_bb (convert_stmt, new_bb);
34130 gimple_set_bb (return_stmt, new_bb);
34131 pop_cfun ();
34132 return new_bb;
34135 while (predicate_chain != NULL)
34137 cond_var = create_tmp_var (integer_type_node);
34138 predicate_decl = TREE_PURPOSE (predicate_chain);
34139 predicate_arg = TREE_VALUE (predicate_chain);
34140 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
34141 gimple_call_set_lhs (call_cond_stmt, cond_var);
34143 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
34144 gimple_set_bb (call_cond_stmt, new_bb);
34145 gimple_seq_add_stmt (&gseq, call_cond_stmt);
34147 predicate_chain = TREE_CHAIN (predicate_chain);
34149 if (and_expr_var == NULL)
34150 and_expr_var = cond_var;
34151 else
34153 gimple assign_stmt;
34154 /* Use MIN_EXPR to check if any integer is zero?.
34155 and_expr_var = min_expr <cond_var, and_expr_var> */
34156 assign_stmt = gimple_build_assign (and_expr_var,
34157 build2 (MIN_EXPR, integer_type_node,
34158 cond_var, and_expr_var));
34160 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
34161 gimple_set_bb (assign_stmt, new_bb);
34162 gimple_seq_add_stmt (&gseq, assign_stmt);
34166 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
34167 integer_zero_node,
34168 NULL_TREE, NULL_TREE);
34169 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
34170 gimple_set_bb (if_else_stmt, new_bb);
34171 gimple_seq_add_stmt (&gseq, if_else_stmt);
34173 gimple_seq_add_stmt (&gseq, convert_stmt);
34174 gimple_seq_add_stmt (&gseq, return_stmt);
34175 set_bb_seq (new_bb, gseq);
34177 bb1 = new_bb;
34178 e12 = split_block (bb1, if_else_stmt);
34179 bb2 = e12->dest;
34180 e12->flags &= ~EDGE_FALLTHRU;
34181 e12->flags |= EDGE_TRUE_VALUE;
34183 e23 = split_block (bb2, return_stmt);
34185 gimple_set_bb (convert_stmt, bb2);
34186 gimple_set_bb (return_stmt, bb2);
34188 bb3 = e23->dest;
34189 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
34191 remove_edge (e23);
34192 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
34194 pop_cfun ();
34196 return bb3;
34199 /* This parses the attribute arguments to target in DECL and determines
34200 the right builtin to use to match the platform specification.
34201 It returns the priority value for this version decl. If PREDICATE_LIST
34202 is not NULL, it stores the list of cpu features that need to be checked
34203 before dispatching this function. */
34205 static unsigned int
34206 get_builtin_code_for_version (tree decl, tree *predicate_list)
34208 tree attrs;
34209 struct cl_target_option cur_target;
34210 tree target_node;
34211 struct cl_target_option *new_target;
34212 const char *arg_str = NULL;
34213 const char *attrs_str = NULL;
34214 char *tok_str = NULL;
34215 char *token;
34217 /* Priority of i386 features, greater value is higher priority. This is
34218 used to decide the order in which function dispatch must happen. For
34219 instance, a version specialized for SSE4.2 should be checked for dispatch
34220 before a version for SSE3, as SSE4.2 implies SSE3. */
34221 enum feature_priority
34223 P_ZERO = 0,
34224 P_MMX,
34225 P_SSE,
34226 P_SSE2,
34227 P_SSE3,
34228 P_SSSE3,
34229 P_PROC_SSSE3,
34230 P_SSE4_A,
34231 P_PROC_SSE4_A,
34232 P_SSE4_1,
34233 P_SSE4_2,
34234 P_PROC_SSE4_2,
34235 P_POPCNT,
34236 P_AVX,
34237 P_PROC_AVX,
34238 P_FMA4,
34239 P_XOP,
34240 P_PROC_XOP,
34241 P_FMA,
34242 P_PROC_FMA,
34243 P_AVX2,
34244 P_PROC_AVX2,
34245 P_AVX512F
34248 enum feature_priority priority = P_ZERO;
34250 /* These are the target attribute strings for which a dispatcher is
34251 available, from fold_builtin_cpu. */
34253 static struct _feature_list
34255 const char *const name;
34256 const enum feature_priority priority;
34258 const feature_list[] =
34260 {"mmx", P_MMX},
34261 {"sse", P_SSE},
34262 {"sse2", P_SSE2},
34263 {"sse3", P_SSE3},
34264 {"sse4a", P_SSE4_A},
34265 {"ssse3", P_SSSE3},
34266 {"sse4.1", P_SSE4_1},
34267 {"sse4.2", P_SSE4_2},
34268 {"popcnt", P_POPCNT},
34269 {"avx", P_AVX},
34270 {"fma4", P_FMA4},
34271 {"xop", P_XOP},
34272 {"fma", P_FMA},
34273 {"avx2", P_AVX2},
34274 {"avx512f", P_AVX512F}
34278 static unsigned int NUM_FEATURES
34279 = sizeof (feature_list) / sizeof (struct _feature_list);
34281 unsigned int i;
34283 tree predicate_chain = NULL_TREE;
34284 tree predicate_decl, predicate_arg;
34286 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34287 gcc_assert (attrs != NULL);
34289 attrs = TREE_VALUE (TREE_VALUE (attrs));
34291 gcc_assert (TREE_CODE (attrs) == STRING_CST);
34292 attrs_str = TREE_STRING_POINTER (attrs);
34294 /* Return priority zero for default function. */
34295 if (strcmp (attrs_str, "default") == 0)
34296 return 0;
34298 /* Handle arch= if specified. For priority, set it to be 1 more than
34299 the best instruction set the processor can handle. For instance, if
34300 there is a version for atom and a version for ssse3 (the highest ISA
34301 priority for atom), the atom version must be checked for dispatch
34302 before the ssse3 version. */
34303 if (strstr (attrs_str, "arch=") != NULL)
34305 cl_target_option_save (&cur_target, &global_options);
34306 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
34307 &global_options_set);
34309 gcc_assert (target_node);
34310 new_target = TREE_TARGET_OPTION (target_node);
34311 gcc_assert (new_target);
34313 if (new_target->arch_specified && new_target->arch > 0)
34315 switch (new_target->arch)
34317 case PROCESSOR_CORE2:
34318 arg_str = "core2";
34319 priority = P_PROC_SSSE3;
34320 break;
34321 case PROCESSOR_NEHALEM:
34322 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
34323 arg_str = "westmere";
34324 else
34325 /* We translate "arch=corei7" and "arch=nehalem" to
34326 "corei7" so that it will be mapped to M_INTEL_COREI7
34327 as cpu type to cover all M_INTEL_COREI7_XXXs. */
34328 arg_str = "corei7";
34329 priority = P_PROC_SSE4_2;
34330 break;
34331 case PROCESSOR_SANDYBRIDGE:
34332 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
34333 arg_str = "ivybridge";
34334 else
34335 arg_str = "sandybridge";
34336 priority = P_PROC_AVX;
34337 break;
34338 case PROCESSOR_HASWELL:
34339 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
34340 arg_str = "broadwell";
34341 else
34342 arg_str = "haswell";
34343 priority = P_PROC_AVX2;
34344 break;
34345 case PROCESSOR_BONNELL:
34346 arg_str = "bonnell";
34347 priority = P_PROC_SSSE3;
34348 break;
34349 case PROCESSOR_SILVERMONT:
34350 arg_str = "silvermont";
34351 priority = P_PROC_SSE4_2;
34352 break;
34353 case PROCESSOR_AMDFAM10:
34354 arg_str = "amdfam10h";
34355 priority = P_PROC_SSE4_A;
34356 break;
34357 case PROCESSOR_BTVER1:
34358 arg_str = "btver1";
34359 priority = P_PROC_SSE4_A;
34360 break;
34361 case PROCESSOR_BTVER2:
34362 arg_str = "btver2";
34363 priority = P_PROC_AVX;
34364 break;
34365 case PROCESSOR_BDVER1:
34366 arg_str = "bdver1";
34367 priority = P_PROC_XOP;
34368 break;
34369 case PROCESSOR_BDVER2:
34370 arg_str = "bdver2";
34371 priority = P_PROC_FMA;
34372 break;
34373 case PROCESSOR_BDVER3:
34374 arg_str = "bdver3";
34375 priority = P_PROC_FMA;
34376 break;
34377 case PROCESSOR_BDVER4:
34378 arg_str = "bdver4";
34379 priority = P_PROC_AVX2;
34380 break;
34384 cl_target_option_restore (&global_options, &cur_target);
34386 if (predicate_list && arg_str == NULL)
34388 error_at (DECL_SOURCE_LOCATION (decl),
34389 "No dispatcher found for the versioning attributes");
34390 return 0;
34393 if (predicate_list)
34395 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
34396 /* For a C string literal the length includes the trailing NULL. */
34397 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
34398 predicate_chain = tree_cons (predicate_decl, predicate_arg,
34399 predicate_chain);
34403 /* Process feature name. */
34404 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
34405 strcpy (tok_str, attrs_str);
34406 token = strtok (tok_str, ",");
34407 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
34409 while (token != NULL)
34411 /* Do not process "arch=" */
34412 if (strncmp (token, "arch=", 5) == 0)
34414 token = strtok (NULL, ",");
34415 continue;
34417 for (i = 0; i < NUM_FEATURES; ++i)
34419 if (strcmp (token, feature_list[i].name) == 0)
34421 if (predicate_list)
34423 predicate_arg = build_string_literal (
34424 strlen (feature_list[i].name) + 1,
34425 feature_list[i].name);
34426 predicate_chain = tree_cons (predicate_decl, predicate_arg,
34427 predicate_chain);
34429 /* Find the maximum priority feature. */
34430 if (feature_list[i].priority > priority)
34431 priority = feature_list[i].priority;
34433 break;
34436 if (predicate_list && i == NUM_FEATURES)
34438 error_at (DECL_SOURCE_LOCATION (decl),
34439 "No dispatcher found for %s", token);
34440 return 0;
34442 token = strtok (NULL, ",");
34444 free (tok_str);
34446 if (predicate_list && predicate_chain == NULL_TREE)
34448 error_at (DECL_SOURCE_LOCATION (decl),
34449 "No dispatcher found for the versioning attributes : %s",
34450 attrs_str);
34451 return 0;
34453 else if (predicate_list)
34455 predicate_chain = nreverse (predicate_chain);
34456 *predicate_list = predicate_chain;
34459 return priority;
34462 /* This compares the priority of target features in function DECL1
34463 and DECL2. It returns positive value if DECL1 is higher priority,
34464 negative value if DECL2 is higher priority and 0 if they are the
34465 same. */
34467 static int
34468 ix86_compare_version_priority (tree decl1, tree decl2)
34470 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
34471 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
34473 return (int)priority1 - (int)priority2;
34476 /* V1 and V2 point to function versions with different priorities
34477 based on the target ISA. This function compares their priorities. */
34479 static int
34480 feature_compare (const void *v1, const void *v2)
34482 typedef struct _function_version_info
34484 tree version_decl;
34485 tree predicate_chain;
34486 unsigned int dispatch_priority;
34487 } function_version_info;
34489 const function_version_info c1 = *(const function_version_info *)v1;
34490 const function_version_info c2 = *(const function_version_info *)v2;
34491 return (c2.dispatch_priority - c1.dispatch_priority);
34494 /* This function generates the dispatch function for
34495 multi-versioned functions. DISPATCH_DECL is the function which will
34496 contain the dispatch logic. FNDECLS are the function choices for
34497 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
34498 in DISPATCH_DECL in which the dispatch code is generated. */
34500 static int
34501 dispatch_function_versions (tree dispatch_decl,
34502 void *fndecls_p,
34503 basic_block *empty_bb)
34505 tree default_decl;
34506 gimple ifunc_cpu_init_stmt;
34507 gimple_seq gseq;
34508 int ix;
34509 tree ele;
34510 vec<tree> *fndecls;
34511 unsigned int num_versions = 0;
34512 unsigned int actual_versions = 0;
34513 unsigned int i;
34515 struct _function_version_info
34517 tree version_decl;
34518 tree predicate_chain;
34519 unsigned int dispatch_priority;
34520 }*function_version_info;
34522 gcc_assert (dispatch_decl != NULL
34523 && fndecls_p != NULL
34524 && empty_bb != NULL);
34526 /*fndecls_p is actually a vector. */
34527 fndecls = static_cast<vec<tree> *> (fndecls_p);
34529 /* At least one more version other than the default. */
34530 num_versions = fndecls->length ();
34531 gcc_assert (num_versions >= 2);
34533 function_version_info = (struct _function_version_info *)
34534 XNEWVEC (struct _function_version_info, (num_versions - 1));
34536 /* The first version in the vector is the default decl. */
34537 default_decl = (*fndecls)[0];
34539 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
34541 gseq = bb_seq (*empty_bb);
34542 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
34543 constructors, so explicity call __builtin_cpu_init here. */
34544 ifunc_cpu_init_stmt = gimple_build_call_vec (
34545 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
34546 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
34547 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
34548 set_bb_seq (*empty_bb, gseq);
34550 pop_cfun ();
34553 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
34555 tree version_decl = ele;
34556 tree predicate_chain = NULL_TREE;
34557 unsigned int priority;
34558 /* Get attribute string, parse it and find the right predicate decl.
34559 The predicate function could be a lengthy combination of many
34560 features, like arch-type and various isa-variants. */
34561 priority = get_builtin_code_for_version (version_decl,
34562 &predicate_chain);
34564 if (predicate_chain == NULL_TREE)
34565 continue;
34567 function_version_info [actual_versions].version_decl = version_decl;
34568 function_version_info [actual_versions].predicate_chain
34569 = predicate_chain;
34570 function_version_info [actual_versions].dispatch_priority = priority;
34571 actual_versions++;
34574 /* Sort the versions according to descending order of dispatch priority. The
34575 priority is based on the ISA. This is not a perfect solution. There
34576 could still be ambiguity. If more than one function version is suitable
34577 to execute, which one should be dispatched? In future, allow the user
34578 to specify a dispatch priority next to the version. */
34579 qsort (function_version_info, actual_versions,
34580 sizeof (struct _function_version_info), feature_compare);
34582 for (i = 0; i < actual_versions; ++i)
34583 *empty_bb = add_condition_to_bb (dispatch_decl,
34584 function_version_info[i].version_decl,
34585 function_version_info[i].predicate_chain,
34586 *empty_bb);
34588 /* dispatch default version at the end. */
34589 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
34590 NULL, *empty_bb);
34592 free (function_version_info);
34593 return 0;
34596 /* Comparator function to be used in qsort routine to sort attribute
34597 specification strings to "target". */
34599 static int
34600 attr_strcmp (const void *v1, const void *v2)
34602 const char *c1 = *(char *const*)v1;
34603 const char *c2 = *(char *const*)v2;
34604 return strcmp (c1, c2);
34607 /* ARGLIST is the argument to target attribute. This function tokenizes
34608 the comma separated arguments, sorts them and returns a string which
34609 is a unique identifier for the comma separated arguments. It also
34610 replaces non-identifier characters "=,-" with "_". */
34612 static char *
34613 sorted_attr_string (tree arglist)
34615 tree arg;
34616 size_t str_len_sum = 0;
34617 char **args = NULL;
34618 char *attr_str, *ret_str;
34619 char *attr = NULL;
34620 unsigned int argnum = 1;
34621 unsigned int i;
34623 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
34625 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
34626 size_t len = strlen (str);
34627 str_len_sum += len + 1;
34628 if (arg != arglist)
34629 argnum++;
34630 for (i = 0; i < strlen (str); i++)
34631 if (str[i] == ',')
34632 argnum++;
34635 attr_str = XNEWVEC (char, str_len_sum);
34636 str_len_sum = 0;
34637 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
34639 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
34640 size_t len = strlen (str);
34641 memcpy (attr_str + str_len_sum, str, len);
34642 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
34643 str_len_sum += len + 1;
34646 /* Replace "=,-" with "_". */
34647 for (i = 0; i < strlen (attr_str); i++)
34648 if (attr_str[i] == '=' || attr_str[i]== '-')
34649 attr_str[i] = '_';
34651 if (argnum == 1)
34652 return attr_str;
34654 args = XNEWVEC (char *, argnum);
34656 i = 0;
34657 attr = strtok (attr_str, ",");
34658 while (attr != NULL)
34660 args[i] = attr;
34661 i++;
34662 attr = strtok (NULL, ",");
34665 qsort (args, argnum, sizeof (char *), attr_strcmp);
34667 ret_str = XNEWVEC (char, str_len_sum);
34668 str_len_sum = 0;
34669 for (i = 0; i < argnum; i++)
34671 size_t len = strlen (args[i]);
34672 memcpy (ret_str + str_len_sum, args[i], len);
34673 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
34674 str_len_sum += len + 1;
34677 XDELETEVEC (args);
34678 XDELETEVEC (attr_str);
34679 return ret_str;
34682 /* This function changes the assembler name for functions that are
34683 versions. If DECL is a function version and has a "target"
34684 attribute, it appends the attribute string to its assembler name. */
34686 static tree
34687 ix86_mangle_function_version_assembler_name (tree decl, tree id)
34689 tree version_attr;
34690 const char *orig_name, *version_string;
34691 char *attr_str, *assembler_name;
34693 if (DECL_DECLARED_INLINE_P (decl)
34694 && lookup_attribute ("gnu_inline",
34695 DECL_ATTRIBUTES (decl)))
34696 error_at (DECL_SOURCE_LOCATION (decl),
34697 "Function versions cannot be marked as gnu_inline,"
34698 " bodies have to be generated");
34700 if (DECL_VIRTUAL_P (decl)
34701 || DECL_VINDEX (decl))
34702 sorry ("Virtual function multiversioning not supported");
34704 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34706 /* target attribute string cannot be NULL. */
34707 gcc_assert (version_attr != NULL_TREE);
34709 orig_name = IDENTIFIER_POINTER (id);
34710 version_string
34711 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
34713 if (strcmp (version_string, "default") == 0)
34714 return id;
34716 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
34717 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
34719 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
34721 /* Allow assembler name to be modified if already set. */
34722 if (DECL_ASSEMBLER_NAME_SET_P (decl))
34723 SET_DECL_RTL (decl, NULL);
34725 tree ret = get_identifier (assembler_name);
34726 XDELETEVEC (attr_str);
34727 XDELETEVEC (assembler_name);
34728 return ret;
34731 /* This function returns true if FN1 and FN2 are versions of the same function,
34732 that is, the target strings of the function decls are different. This assumes
34733 that FN1 and FN2 have the same signature. */
34735 static bool
34736 ix86_function_versions (tree fn1, tree fn2)
34738 tree attr1, attr2;
34739 char *target1, *target2;
34740 bool result;
34742 if (TREE_CODE (fn1) != FUNCTION_DECL
34743 || TREE_CODE (fn2) != FUNCTION_DECL)
34744 return false;
34746 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
34747 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
34749 /* At least one function decl should have the target attribute specified. */
34750 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
34751 return false;
34753 /* Diagnose missing target attribute if one of the decls is already
34754 multi-versioned. */
34755 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
34757 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
34759 if (attr2 != NULL_TREE)
34761 tree tem = fn1;
34762 fn1 = fn2;
34763 fn2 = tem;
34764 attr1 = attr2;
34766 error_at (DECL_SOURCE_LOCATION (fn2),
34767 "missing %<target%> attribute for multi-versioned %D",
34768 fn2);
34769 inform (DECL_SOURCE_LOCATION (fn1),
34770 "previous declaration of %D", fn1);
34771 /* Prevent diagnosing of the same error multiple times. */
34772 DECL_ATTRIBUTES (fn2)
34773 = tree_cons (get_identifier ("target"),
34774 copy_node (TREE_VALUE (attr1)),
34775 DECL_ATTRIBUTES (fn2));
34777 return false;
34780 target1 = sorted_attr_string (TREE_VALUE (attr1));
34781 target2 = sorted_attr_string (TREE_VALUE (attr2));
34783 /* The sorted target strings must be different for fn1 and fn2
34784 to be versions. */
34785 if (strcmp (target1, target2) == 0)
34786 result = false;
34787 else
34788 result = true;
34790 XDELETEVEC (target1);
34791 XDELETEVEC (target2);
34793 return result;
34796 static tree
34797 ix86_mangle_decl_assembler_name (tree decl, tree id)
34799 /* For function version, add the target suffix to the assembler name. */
34800 if (TREE_CODE (decl) == FUNCTION_DECL
34801 && DECL_FUNCTION_VERSIONED (decl))
34802 id = ix86_mangle_function_version_assembler_name (decl, id);
34803 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
34804 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
34805 #endif
34807 return id;
34810 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
34811 is true, append the full path name of the source file. */
34813 static char *
34814 make_name (tree decl, const char *suffix, bool make_unique)
34816 char *global_var_name;
34817 int name_len;
34818 const char *name;
34819 const char *unique_name = NULL;
34821 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
34823 /* Get a unique name that can be used globally without any chances
34824 of collision at link time. */
34825 if (make_unique)
34826 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
34828 name_len = strlen (name) + strlen (suffix) + 2;
34830 if (make_unique)
34831 name_len += strlen (unique_name) + 1;
34832 global_var_name = XNEWVEC (char, name_len);
34834 /* Use '.' to concatenate names as it is demangler friendly. */
34835 if (make_unique)
34836 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
34837 suffix);
34838 else
34839 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
34841 return global_var_name;
34844 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
34846 /* Make a dispatcher declaration for the multi-versioned function DECL.
34847 Calls to DECL function will be replaced with calls to the dispatcher
34848 by the front-end. Return the decl created. */
34850 static tree
34851 make_dispatcher_decl (const tree decl)
34853 tree func_decl;
34854 char *func_name;
34855 tree fn_type, func_type;
34856 bool is_uniq = false;
34858 if (TREE_PUBLIC (decl) == 0)
34859 is_uniq = true;
34861 func_name = make_name (decl, "ifunc", is_uniq);
34863 fn_type = TREE_TYPE (decl);
34864 func_type = build_function_type (TREE_TYPE (fn_type),
34865 TYPE_ARG_TYPES (fn_type));
34867 func_decl = build_fn_decl (func_name, func_type);
34868 XDELETEVEC (func_name);
34869 TREE_USED (func_decl) = 1;
34870 DECL_CONTEXT (func_decl) = NULL_TREE;
34871 DECL_INITIAL (func_decl) = error_mark_node;
34872 DECL_ARTIFICIAL (func_decl) = 1;
34873 /* Mark this func as external, the resolver will flip it again if
34874 it gets generated. */
34875 DECL_EXTERNAL (func_decl) = 1;
34876 /* This will be of type IFUNCs have to be externally visible. */
34877 TREE_PUBLIC (func_decl) = 1;
34879 return func_decl;
34882 #endif
34884 /* Returns true if decl is multi-versioned and DECL is the default function,
34885 that is it is not tagged with target specific optimization. */
34887 static bool
34888 is_function_default_version (const tree decl)
34890 if (TREE_CODE (decl) != FUNCTION_DECL
34891 || !DECL_FUNCTION_VERSIONED (decl))
34892 return false;
34893 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34894 gcc_assert (attr);
34895 attr = TREE_VALUE (TREE_VALUE (attr));
34896 return (TREE_CODE (attr) == STRING_CST
34897 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
34900 /* Make a dispatcher declaration for the multi-versioned function DECL.
34901 Calls to DECL function will be replaced with calls to the dispatcher
34902 by the front-end. Returns the decl of the dispatcher function. */
34904 static tree
34905 ix86_get_function_versions_dispatcher (void *decl)
34907 tree fn = (tree) decl;
34908 struct cgraph_node *node = NULL;
34909 struct cgraph_node *default_node = NULL;
34910 struct cgraph_function_version_info *node_v = NULL;
34911 struct cgraph_function_version_info *first_v = NULL;
34913 tree dispatch_decl = NULL;
34915 struct cgraph_function_version_info *default_version_info = NULL;
34917 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
34919 node = cgraph_node::get (fn);
34920 gcc_assert (node != NULL);
34922 node_v = node->function_version ();
34923 gcc_assert (node_v != NULL);
34925 if (node_v->dispatcher_resolver != NULL)
34926 return node_v->dispatcher_resolver;
34928 /* Find the default version and make it the first node. */
34929 first_v = node_v;
34930 /* Go to the beginning of the chain. */
34931 while (first_v->prev != NULL)
34932 first_v = first_v->prev;
34933 default_version_info = first_v;
34934 while (default_version_info != NULL)
34936 if (is_function_default_version
34937 (default_version_info->this_node->decl))
34938 break;
34939 default_version_info = default_version_info->next;
34942 /* If there is no default node, just return NULL. */
34943 if (default_version_info == NULL)
34944 return NULL;
34946 /* Make default info the first node. */
34947 if (first_v != default_version_info)
34949 default_version_info->prev->next = default_version_info->next;
34950 if (default_version_info->next)
34951 default_version_info->next->prev = default_version_info->prev;
34952 first_v->prev = default_version_info;
34953 default_version_info->next = first_v;
34954 default_version_info->prev = NULL;
34957 default_node = default_version_info->this_node;
34959 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
34960 if (targetm.has_ifunc_p ())
34962 struct cgraph_function_version_info *it_v = NULL;
34963 struct cgraph_node *dispatcher_node = NULL;
34964 struct cgraph_function_version_info *dispatcher_version_info = NULL;
34966 /* Right now, the dispatching is done via ifunc. */
34967 dispatch_decl = make_dispatcher_decl (default_node->decl);
34969 dispatcher_node = cgraph_node::get_create (dispatch_decl);
34970 gcc_assert (dispatcher_node != NULL);
34971 dispatcher_node->dispatcher_function = 1;
34972 dispatcher_version_info
34973 = dispatcher_node->insert_new_function_version ();
34974 dispatcher_version_info->next = default_version_info;
34975 dispatcher_node->definition = 1;
34977 /* Set the dispatcher for all the versions. */
34978 it_v = default_version_info;
34979 while (it_v != NULL)
34981 it_v->dispatcher_resolver = dispatch_decl;
34982 it_v = it_v->next;
34985 else
34986 #endif
34988 error_at (DECL_SOURCE_LOCATION (default_node->decl),
34989 "multiversioning needs ifunc which is not supported "
34990 "on this target");
34993 return dispatch_decl;
34996 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
34997 it to CHAIN. */
34999 static tree
35000 make_attribute (const char *name, const char *arg_name, tree chain)
35002 tree attr_name;
35003 tree attr_arg_name;
35004 tree attr_args;
35005 tree attr;
35007 attr_name = get_identifier (name);
35008 attr_arg_name = build_string (strlen (arg_name), arg_name);
35009 attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
35010 attr = tree_cons (attr_name, attr_args, chain);
35011 return attr;
35014 /* Make the resolver function decl to dispatch the versions of
35015 a multi-versioned function, DEFAULT_DECL. Create an
35016 empty basic block in the resolver and store the pointer in
35017 EMPTY_BB. Return the decl of the resolver function. */
35019 static tree
35020 make_resolver_func (const tree default_decl,
35021 const tree dispatch_decl,
35022 basic_block *empty_bb)
35024 char *resolver_name;
35025 tree decl, type, decl_name, t;
35026 bool is_uniq = false;
35028 /* IFUNC's have to be globally visible. So, if the default_decl is
35029 not, then the name of the IFUNC should be made unique. */
35030 if (TREE_PUBLIC (default_decl) == 0)
35031 is_uniq = true;
35033 /* Append the filename to the resolver function if the versions are
35034 not externally visible. This is because the resolver function has
35035 to be externally visible for the loader to find it. So, appending
35036 the filename will prevent conflicts with a resolver function from
35037 another module which is based on the same version name. */
35038 resolver_name = make_name (default_decl, "resolver", is_uniq);
35040 /* The resolver function should return a (void *). */
35041 type = build_function_type_list (ptr_type_node, NULL_TREE);
35043 decl = build_fn_decl (resolver_name, type);
35044 decl_name = get_identifier (resolver_name);
35045 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
35047 DECL_NAME (decl) = decl_name;
35048 TREE_USED (decl) = 1;
35049 DECL_ARTIFICIAL (decl) = 1;
35050 DECL_IGNORED_P (decl) = 0;
35051 /* IFUNC resolvers have to be externally visible. */
35052 TREE_PUBLIC (decl) = 1;
35053 DECL_UNINLINABLE (decl) = 1;
35055 /* Resolver is not external, body is generated. */
35056 DECL_EXTERNAL (decl) = 0;
35057 DECL_EXTERNAL (dispatch_decl) = 0;
35059 DECL_CONTEXT (decl) = NULL_TREE;
35060 DECL_INITIAL (decl) = make_node (BLOCK);
35061 DECL_STATIC_CONSTRUCTOR (decl) = 0;
35063 if (DECL_COMDAT_GROUP (default_decl)
35064 || TREE_PUBLIC (default_decl))
35066 /* In this case, each translation unit with a call to this
35067 versioned function will put out a resolver. Ensure it
35068 is comdat to keep just one copy. */
35069 DECL_COMDAT (decl) = 1;
35070 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
35072 /* Build result decl and add to function_decl. */
35073 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
35074 DECL_ARTIFICIAL (t) = 1;
35075 DECL_IGNORED_P (t) = 1;
35076 DECL_RESULT (decl) = t;
35078 gimplify_function_tree (decl);
35079 push_cfun (DECL_STRUCT_FUNCTION (decl));
35080 *empty_bb = init_lowered_empty_function (decl, false);
35082 cgraph_node::add_new_function (decl, true);
35083 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
35085 pop_cfun ();
35087 gcc_assert (dispatch_decl != NULL);
35088 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
35089 DECL_ATTRIBUTES (dispatch_decl)
35090 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
35092 /* Create the alias for dispatch to resolver here. */
35093 /*cgraph_create_function_alias (dispatch_decl, decl);*/
35094 cgraph_node::create_same_body_alias (dispatch_decl, decl);
35095 XDELETEVEC (resolver_name);
35096 return decl;
35099 /* Generate the dispatching code body to dispatch multi-versioned function
35100 DECL. The target hook is called to process the "target" attributes and
35101 provide the code to dispatch the right function at run-time. NODE points
35102 to the dispatcher decl whose body will be created. */
35104 static tree
35105 ix86_generate_version_dispatcher_body (void *node_p)
35107 tree resolver_decl;
35108 basic_block empty_bb;
35109 tree default_ver_decl;
35110 struct cgraph_node *versn;
35111 struct cgraph_node *node;
35113 struct cgraph_function_version_info *node_version_info = NULL;
35114 struct cgraph_function_version_info *versn_info = NULL;
35116 node = (cgraph_node *)node_p;
35118 node_version_info = node->function_version ();
35119 gcc_assert (node->dispatcher_function
35120 && node_version_info != NULL);
35122 if (node_version_info->dispatcher_resolver)
35123 return node_version_info->dispatcher_resolver;
35125 /* The first version in the chain corresponds to the default version. */
35126 default_ver_decl = node_version_info->next->this_node->decl;
35128 /* node is going to be an alias, so remove the finalized bit. */
35129 node->definition = false;
35131 resolver_decl = make_resolver_func (default_ver_decl,
35132 node->decl, &empty_bb);
35134 node_version_info->dispatcher_resolver = resolver_decl;
35136 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
35138 auto_vec<tree, 2> fn_ver_vec;
35140 for (versn_info = node_version_info->next; versn_info;
35141 versn_info = versn_info->next)
35143 versn = versn_info->this_node;
35144 /* Check for virtual functions here again, as by this time it should
35145 have been determined if this function needs a vtable index or
35146 not. This happens for methods in derived classes that override
35147 virtual methods in base classes but are not explicitly marked as
35148 virtual. */
35149 if (DECL_VINDEX (versn->decl))
35150 sorry ("Virtual function multiversioning not supported");
35152 fn_ver_vec.safe_push (versn->decl);
35155 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
35156 cgraph_edge::rebuild_edges ();
35157 pop_cfun ();
35158 return resolver_decl;
35160 /* This builds the processor_model struct type defined in
35161 libgcc/config/i386/cpuinfo.c */
35163 static tree
35164 build_processor_model_struct (void)
35166 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
35167 "__cpu_features"};
35168 tree field = NULL_TREE, field_chain = NULL_TREE;
35169 int i;
35170 tree type = make_node (RECORD_TYPE);
35172 /* The first 3 fields are unsigned int. */
35173 for (i = 0; i < 3; ++i)
35175 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
35176 get_identifier (field_name[i]), unsigned_type_node);
35177 if (field_chain != NULL_TREE)
35178 DECL_CHAIN (field) = field_chain;
35179 field_chain = field;
35182 /* The last field is an array of unsigned integers of size one. */
35183 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
35184 get_identifier (field_name[3]),
35185 build_array_type (unsigned_type_node,
35186 build_index_type (size_one_node)));
35187 if (field_chain != NULL_TREE)
35188 DECL_CHAIN (field) = field_chain;
35189 field_chain = field;
35191 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
35192 return type;
35195 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
35197 static tree
35198 make_var_decl (tree type, const char *name)
35200 tree new_decl;
35202 new_decl = build_decl (UNKNOWN_LOCATION,
35203 VAR_DECL,
35204 get_identifier(name),
35205 type);
35207 DECL_EXTERNAL (new_decl) = 1;
35208 TREE_STATIC (new_decl) = 1;
35209 TREE_PUBLIC (new_decl) = 1;
35210 DECL_INITIAL (new_decl) = 0;
35211 DECL_ARTIFICIAL (new_decl) = 0;
35212 DECL_PRESERVE_P (new_decl) = 1;
35214 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
35215 assemble_variable (new_decl, 0, 0, 0);
35217 return new_decl;
35220 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
35221 into an integer defined in libgcc/config/i386/cpuinfo.c */
35223 static tree
35224 fold_builtin_cpu (tree fndecl, tree *args)
35226 unsigned int i;
35227 enum ix86_builtins fn_code = (enum ix86_builtins)
35228 DECL_FUNCTION_CODE (fndecl);
35229 tree param_string_cst = NULL;
35231 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
35232 enum processor_features
35234 F_CMOV = 0,
35235 F_MMX,
35236 F_POPCNT,
35237 F_SSE,
35238 F_SSE2,
35239 F_SSE3,
35240 F_SSSE3,
35241 F_SSE4_1,
35242 F_SSE4_2,
35243 F_AVX,
35244 F_AVX2,
35245 F_SSE4_A,
35246 F_FMA4,
35247 F_XOP,
35248 F_FMA,
35249 F_AVX512F,
35250 F_MAX
35253 /* These are the values for vendor types and cpu types and subtypes
35254 in cpuinfo.c. Cpu types and subtypes should be subtracted by
35255 the corresponding start value. */
35256 enum processor_model
35258 M_INTEL = 1,
35259 M_AMD,
35260 M_CPU_TYPE_START,
35261 M_INTEL_BONNELL,
35262 M_INTEL_CORE2,
35263 M_INTEL_COREI7,
35264 M_AMDFAM10H,
35265 M_AMDFAM15H,
35266 M_INTEL_SILVERMONT,
35267 M_AMD_BTVER1,
35268 M_AMD_BTVER2,
35269 M_CPU_SUBTYPE_START,
35270 M_INTEL_COREI7_NEHALEM,
35271 M_INTEL_COREI7_WESTMERE,
35272 M_INTEL_COREI7_SANDYBRIDGE,
35273 M_AMDFAM10H_BARCELONA,
35274 M_AMDFAM10H_SHANGHAI,
35275 M_AMDFAM10H_ISTANBUL,
35276 M_AMDFAM15H_BDVER1,
35277 M_AMDFAM15H_BDVER2,
35278 M_AMDFAM15H_BDVER3,
35279 M_AMDFAM15H_BDVER4,
35280 M_INTEL_COREI7_IVYBRIDGE,
35281 M_INTEL_COREI7_HASWELL
35284 static struct _arch_names_table
35286 const char *const name;
35287 const enum processor_model model;
35289 const arch_names_table[] =
35291 {"amd", M_AMD},
35292 {"intel", M_INTEL},
35293 {"atom", M_INTEL_BONNELL},
35294 {"slm", M_INTEL_SILVERMONT},
35295 {"core2", M_INTEL_CORE2},
35296 {"corei7", M_INTEL_COREI7},
35297 {"nehalem", M_INTEL_COREI7_NEHALEM},
35298 {"westmere", M_INTEL_COREI7_WESTMERE},
35299 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
35300 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
35301 {"haswell", M_INTEL_COREI7_HASWELL},
35302 {"bonnell", M_INTEL_BONNELL},
35303 {"silvermont", M_INTEL_SILVERMONT},
35304 {"amdfam10h", M_AMDFAM10H},
35305 {"barcelona", M_AMDFAM10H_BARCELONA},
35306 {"shanghai", M_AMDFAM10H_SHANGHAI},
35307 {"istanbul", M_AMDFAM10H_ISTANBUL},
35308 {"btver1", M_AMD_BTVER1},
35309 {"amdfam15h", M_AMDFAM15H},
35310 {"bdver1", M_AMDFAM15H_BDVER1},
35311 {"bdver2", M_AMDFAM15H_BDVER2},
35312 {"bdver3", M_AMDFAM15H_BDVER3},
35313 {"bdver4", M_AMDFAM15H_BDVER4},
35314 {"btver2", M_AMD_BTVER2},
35317 static struct _isa_names_table
35319 const char *const name;
35320 const enum processor_features feature;
35322 const isa_names_table[] =
35324 {"cmov", F_CMOV},
35325 {"mmx", F_MMX},
35326 {"popcnt", F_POPCNT},
35327 {"sse", F_SSE},
35328 {"sse2", F_SSE2},
35329 {"sse3", F_SSE3},
35330 {"ssse3", F_SSSE3},
35331 {"sse4a", F_SSE4_A},
35332 {"sse4.1", F_SSE4_1},
35333 {"sse4.2", F_SSE4_2},
35334 {"avx", F_AVX},
35335 {"fma4", F_FMA4},
35336 {"xop", F_XOP},
35337 {"fma", F_FMA},
35338 {"avx2", F_AVX2},
35339 {"avx512f",F_AVX512F}
35342 tree __processor_model_type = build_processor_model_struct ();
35343 tree __cpu_model_var = make_var_decl (__processor_model_type,
35344 "__cpu_model");
35347 varpool_node::add (__cpu_model_var);
35349 gcc_assert ((args != NULL) && (*args != NULL));
35351 param_string_cst = *args;
35352 while (param_string_cst
35353 && TREE_CODE (param_string_cst) != STRING_CST)
35355 /* *args must be a expr that can contain other EXPRS leading to a
35356 STRING_CST. */
35357 if (!EXPR_P (param_string_cst))
35359 error ("Parameter to builtin must be a string constant or literal");
35360 return integer_zero_node;
35362 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
35365 gcc_assert (param_string_cst);
35367 if (fn_code == IX86_BUILTIN_CPU_IS)
35369 tree ref;
35370 tree field;
35371 tree final;
35373 unsigned int field_val = 0;
35374 unsigned int NUM_ARCH_NAMES
35375 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
35377 for (i = 0; i < NUM_ARCH_NAMES; i++)
35378 if (strcmp (arch_names_table[i].name,
35379 TREE_STRING_POINTER (param_string_cst)) == 0)
35380 break;
35382 if (i == NUM_ARCH_NAMES)
35384 error ("Parameter to builtin not valid: %s",
35385 TREE_STRING_POINTER (param_string_cst));
35386 return integer_zero_node;
35389 field = TYPE_FIELDS (__processor_model_type);
35390 field_val = arch_names_table[i].model;
35392 /* CPU types are stored in the next field. */
35393 if (field_val > M_CPU_TYPE_START
35394 && field_val < M_CPU_SUBTYPE_START)
35396 field = DECL_CHAIN (field);
35397 field_val -= M_CPU_TYPE_START;
35400 /* CPU subtypes are stored in the next field. */
35401 if (field_val > M_CPU_SUBTYPE_START)
35403 field = DECL_CHAIN ( DECL_CHAIN (field));
35404 field_val -= M_CPU_SUBTYPE_START;
35407 /* Get the appropriate field in __cpu_model. */
35408 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
35409 field, NULL_TREE);
35411 /* Check the value. */
35412 final = build2 (EQ_EXPR, unsigned_type_node, ref,
35413 build_int_cstu (unsigned_type_node, field_val));
35414 return build1 (CONVERT_EXPR, integer_type_node, final);
35416 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
35418 tree ref;
35419 tree array_elt;
35420 tree field;
35421 tree final;
35423 unsigned int field_val = 0;
35424 unsigned int NUM_ISA_NAMES
35425 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
35427 for (i = 0; i < NUM_ISA_NAMES; i++)
35428 if (strcmp (isa_names_table[i].name,
35429 TREE_STRING_POINTER (param_string_cst)) == 0)
35430 break;
35432 if (i == NUM_ISA_NAMES)
35434 error ("Parameter to builtin not valid: %s",
35435 TREE_STRING_POINTER (param_string_cst));
35436 return integer_zero_node;
35439 field = TYPE_FIELDS (__processor_model_type);
35440 /* Get the last field, which is __cpu_features. */
35441 while (DECL_CHAIN (field))
35442 field = DECL_CHAIN (field);
35444 /* Get the appropriate field: __cpu_model.__cpu_features */
35445 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
35446 field, NULL_TREE);
35448 /* Access the 0th element of __cpu_features array. */
35449 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
35450 integer_zero_node, NULL_TREE, NULL_TREE);
35452 field_val = (1 << isa_names_table[i].feature);
35453 /* Return __cpu_model.__cpu_features[0] & field_val */
35454 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
35455 build_int_cstu (unsigned_type_node, field_val));
35456 return build1 (CONVERT_EXPR, integer_type_node, final);
35458 gcc_unreachable ();
35461 static tree
35462 ix86_fold_builtin (tree fndecl, int n_args,
35463 tree *args, bool ignore ATTRIBUTE_UNUSED)
35465 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
35467 enum ix86_builtins fn_code = (enum ix86_builtins)
35468 DECL_FUNCTION_CODE (fndecl);
35469 if (fn_code == IX86_BUILTIN_CPU_IS
35470 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
35472 gcc_assert (n_args == 1);
35473 return fold_builtin_cpu (fndecl, args);
35477 #ifdef SUBTARGET_FOLD_BUILTIN
35478 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
35479 #endif
35481 return NULL_TREE;
35484 /* Make builtins to detect cpu type and features supported. NAME is
35485 the builtin name, CODE is the builtin code, and FTYPE is the function
35486 type of the builtin. */
35488 static void
35489 make_cpu_type_builtin (const char* name, int code,
35490 enum ix86_builtin_func_type ftype, bool is_const)
35492 tree decl;
35493 tree type;
35495 type = ix86_get_builtin_func_type (ftype);
35496 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
35497 NULL, NULL_TREE);
35498 gcc_assert (decl != NULL_TREE);
35499 ix86_builtins[(int) code] = decl;
35500 TREE_READONLY (decl) = is_const;
35503 /* Make builtins to get CPU type and features supported. The created
35504 builtins are :
35506 __builtin_cpu_init (), to detect cpu type and features,
35507 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
35508 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
35511 static void
35512 ix86_init_platform_type_builtins (void)
35514 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
35515 INT_FTYPE_VOID, false);
35516 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
35517 INT_FTYPE_PCCHAR, true);
35518 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
35519 INT_FTYPE_PCCHAR, true);
35522 /* Internal method for ix86_init_builtins. */
35524 static void
35525 ix86_init_builtins_va_builtins_abi (void)
35527 tree ms_va_ref, sysv_va_ref;
35528 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
35529 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
35530 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
35531 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
35533 if (!TARGET_64BIT)
35534 return;
35535 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
35536 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
35537 ms_va_ref = build_reference_type (ms_va_list_type_node);
35538 sysv_va_ref =
35539 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
35541 fnvoid_va_end_ms =
35542 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
35543 fnvoid_va_start_ms =
35544 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
35545 fnvoid_va_end_sysv =
35546 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
35547 fnvoid_va_start_sysv =
35548 build_varargs_function_type_list (void_type_node, sysv_va_ref,
35549 NULL_TREE);
35550 fnvoid_va_copy_ms =
35551 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
35552 NULL_TREE);
35553 fnvoid_va_copy_sysv =
35554 build_function_type_list (void_type_node, sysv_va_ref,
35555 sysv_va_ref, NULL_TREE);
35557 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
35558 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
35559 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
35560 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
35561 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
35562 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
35563 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
35564 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35565 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
35566 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35567 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
35568 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35571 static void
35572 ix86_init_builtin_types (void)
35574 tree float128_type_node, float80_type_node;
35576 /* The __float80 type. */
35577 float80_type_node = long_double_type_node;
35578 if (TYPE_MODE (float80_type_node) != XFmode)
35580 /* The __float80 type. */
35581 float80_type_node = make_node (REAL_TYPE);
35583 TYPE_PRECISION (float80_type_node) = 80;
35584 layout_type (float80_type_node);
35586 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
35588 /* The __float128 type. */
35589 float128_type_node = make_node (REAL_TYPE);
35590 TYPE_PRECISION (float128_type_node) = 128;
35591 layout_type (float128_type_node);
35592 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
35594 /* This macro is built by i386-builtin-types.awk. */
35595 DEFINE_BUILTIN_PRIMITIVE_TYPES;
35598 static void
35599 ix86_init_builtins (void)
35601 tree t;
35603 ix86_init_builtin_types ();
35605 /* Builtins to get CPU type and features. */
35606 ix86_init_platform_type_builtins ();
35608 /* TFmode support builtins. */
35609 def_builtin_const (0, "__builtin_infq",
35610 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
35611 def_builtin_const (0, "__builtin_huge_valq",
35612 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
35614 /* We will expand them to normal call if SSE isn't available since
35615 they are used by libgcc. */
35616 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
35617 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
35618 BUILT_IN_MD, "__fabstf2", NULL_TREE);
35619 TREE_READONLY (t) = 1;
35620 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
35622 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
35623 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
35624 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
35625 TREE_READONLY (t) = 1;
35626 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
35628 ix86_init_tm_builtins ();
35629 ix86_init_mmx_sse_builtins ();
35630 ix86_init_mpx_builtins ();
35632 if (TARGET_LP64)
35633 ix86_init_builtins_va_builtins_abi ();
35635 #ifdef SUBTARGET_INIT_BUILTINS
35636 SUBTARGET_INIT_BUILTINS;
35637 #endif
35640 /* Return the ix86 builtin for CODE. */
35642 static tree
35643 ix86_builtin_decl (unsigned code, bool)
35645 if (code >= IX86_BUILTIN_MAX)
35646 return error_mark_node;
35648 return ix86_builtins[code];
35651 /* Errors in the source file can cause expand_expr to return const0_rtx
35652 where we expect a vector. To avoid crashing, use one of the vector
35653 clear instructions. */
35654 static rtx
35655 safe_vector_operand (rtx x, machine_mode mode)
35657 if (x == const0_rtx)
35658 x = CONST0_RTX (mode);
35659 return x;
35662 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
35664 static rtx
35665 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
35667 rtx pat;
35668 tree arg0 = CALL_EXPR_ARG (exp, 0);
35669 tree arg1 = CALL_EXPR_ARG (exp, 1);
35670 rtx op0 = expand_normal (arg0);
35671 rtx op1 = expand_normal (arg1);
35672 machine_mode tmode = insn_data[icode].operand[0].mode;
35673 machine_mode mode0 = insn_data[icode].operand[1].mode;
35674 machine_mode mode1 = insn_data[icode].operand[2].mode;
35676 if (VECTOR_MODE_P (mode0))
35677 op0 = safe_vector_operand (op0, mode0);
35678 if (VECTOR_MODE_P (mode1))
35679 op1 = safe_vector_operand (op1, mode1);
35681 if (optimize || !target
35682 || GET_MODE (target) != tmode
35683 || !insn_data[icode].operand[0].predicate (target, tmode))
35684 target = gen_reg_rtx (tmode);
35686 if (GET_MODE (op1) == SImode && mode1 == TImode)
35688 rtx x = gen_reg_rtx (V4SImode);
35689 emit_insn (gen_sse2_loadd (x, op1));
35690 op1 = gen_lowpart (TImode, x);
35693 if (!insn_data[icode].operand[1].predicate (op0, mode0))
35694 op0 = copy_to_mode_reg (mode0, op0);
35695 if (!insn_data[icode].operand[2].predicate (op1, mode1))
35696 op1 = copy_to_mode_reg (mode1, op1);
35698 pat = GEN_FCN (icode) (target, op0, op1);
35699 if (! pat)
35700 return 0;
35702 emit_insn (pat);
35704 return target;
35707 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
35709 static rtx
35710 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
35711 enum ix86_builtin_func_type m_type,
35712 enum rtx_code sub_code)
35714 rtx pat;
35715 int i;
35716 int nargs;
35717 bool comparison_p = false;
35718 bool tf_p = false;
35719 bool last_arg_constant = false;
35720 int num_memory = 0;
35721 struct {
35722 rtx op;
35723 machine_mode mode;
35724 } args[4];
35726 machine_mode tmode = insn_data[icode].operand[0].mode;
35728 switch (m_type)
35730 case MULTI_ARG_4_DF2_DI_I:
35731 case MULTI_ARG_4_DF2_DI_I1:
35732 case MULTI_ARG_4_SF2_SI_I:
35733 case MULTI_ARG_4_SF2_SI_I1:
35734 nargs = 4;
35735 last_arg_constant = true;
35736 break;
35738 case MULTI_ARG_3_SF:
35739 case MULTI_ARG_3_DF:
35740 case MULTI_ARG_3_SF2:
35741 case MULTI_ARG_3_DF2:
35742 case MULTI_ARG_3_DI:
35743 case MULTI_ARG_3_SI:
35744 case MULTI_ARG_3_SI_DI:
35745 case MULTI_ARG_3_HI:
35746 case MULTI_ARG_3_HI_SI:
35747 case MULTI_ARG_3_QI:
35748 case MULTI_ARG_3_DI2:
35749 case MULTI_ARG_3_SI2:
35750 case MULTI_ARG_3_HI2:
35751 case MULTI_ARG_3_QI2:
35752 nargs = 3;
35753 break;
35755 case MULTI_ARG_2_SF:
35756 case MULTI_ARG_2_DF:
35757 case MULTI_ARG_2_DI:
35758 case MULTI_ARG_2_SI:
35759 case MULTI_ARG_2_HI:
35760 case MULTI_ARG_2_QI:
35761 nargs = 2;
35762 break;
35764 case MULTI_ARG_2_DI_IMM:
35765 case MULTI_ARG_2_SI_IMM:
35766 case MULTI_ARG_2_HI_IMM:
35767 case MULTI_ARG_2_QI_IMM:
35768 nargs = 2;
35769 last_arg_constant = true;
35770 break;
35772 case MULTI_ARG_1_SF:
35773 case MULTI_ARG_1_DF:
35774 case MULTI_ARG_1_SF2:
35775 case MULTI_ARG_1_DF2:
35776 case MULTI_ARG_1_DI:
35777 case MULTI_ARG_1_SI:
35778 case MULTI_ARG_1_HI:
35779 case MULTI_ARG_1_QI:
35780 case MULTI_ARG_1_SI_DI:
35781 case MULTI_ARG_1_HI_DI:
35782 case MULTI_ARG_1_HI_SI:
35783 case MULTI_ARG_1_QI_DI:
35784 case MULTI_ARG_1_QI_SI:
35785 case MULTI_ARG_1_QI_HI:
35786 nargs = 1;
35787 break;
35789 case MULTI_ARG_2_DI_CMP:
35790 case MULTI_ARG_2_SI_CMP:
35791 case MULTI_ARG_2_HI_CMP:
35792 case MULTI_ARG_2_QI_CMP:
35793 nargs = 2;
35794 comparison_p = true;
35795 break;
35797 case MULTI_ARG_2_SF_TF:
35798 case MULTI_ARG_2_DF_TF:
35799 case MULTI_ARG_2_DI_TF:
35800 case MULTI_ARG_2_SI_TF:
35801 case MULTI_ARG_2_HI_TF:
35802 case MULTI_ARG_2_QI_TF:
35803 nargs = 2;
35804 tf_p = true;
35805 break;
35807 default:
35808 gcc_unreachable ();
35811 if (optimize || !target
35812 || GET_MODE (target) != tmode
35813 || !insn_data[icode].operand[0].predicate (target, tmode))
35814 target = gen_reg_rtx (tmode);
35816 gcc_assert (nargs <= 4);
35818 for (i = 0; i < nargs; i++)
35820 tree arg = CALL_EXPR_ARG (exp, i);
35821 rtx op = expand_normal (arg);
35822 int adjust = (comparison_p) ? 1 : 0;
35823 machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
35825 if (last_arg_constant && i == nargs - 1)
35827 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
35829 enum insn_code new_icode = icode;
35830 switch (icode)
35832 case CODE_FOR_xop_vpermil2v2df3:
35833 case CODE_FOR_xop_vpermil2v4sf3:
35834 case CODE_FOR_xop_vpermil2v4df3:
35835 case CODE_FOR_xop_vpermil2v8sf3:
35836 error ("the last argument must be a 2-bit immediate");
35837 return gen_reg_rtx (tmode);
35838 case CODE_FOR_xop_rotlv2di3:
35839 new_icode = CODE_FOR_rotlv2di3;
35840 goto xop_rotl;
35841 case CODE_FOR_xop_rotlv4si3:
35842 new_icode = CODE_FOR_rotlv4si3;
35843 goto xop_rotl;
35844 case CODE_FOR_xop_rotlv8hi3:
35845 new_icode = CODE_FOR_rotlv8hi3;
35846 goto xop_rotl;
35847 case CODE_FOR_xop_rotlv16qi3:
35848 new_icode = CODE_FOR_rotlv16qi3;
35849 xop_rotl:
35850 if (CONST_INT_P (op))
35852 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
35853 op = GEN_INT (INTVAL (op) & mask);
35854 gcc_checking_assert
35855 (insn_data[icode].operand[i + 1].predicate (op, mode));
35857 else
35859 gcc_checking_assert
35860 (nargs == 2
35861 && insn_data[new_icode].operand[0].mode == tmode
35862 && insn_data[new_icode].operand[1].mode == tmode
35863 && insn_data[new_icode].operand[2].mode == mode
35864 && insn_data[new_icode].operand[0].predicate
35865 == insn_data[icode].operand[0].predicate
35866 && insn_data[new_icode].operand[1].predicate
35867 == insn_data[icode].operand[1].predicate);
35868 icode = new_icode;
35869 goto non_constant;
35871 break;
35872 default:
35873 gcc_unreachable ();
35877 else
35879 non_constant:
35880 if (VECTOR_MODE_P (mode))
35881 op = safe_vector_operand (op, mode);
35883 /* If we aren't optimizing, only allow one memory operand to be
35884 generated. */
35885 if (memory_operand (op, mode))
35886 num_memory++;
35888 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
35890 if (optimize
35891 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
35892 || num_memory > 1)
35893 op = force_reg (mode, op);
35896 args[i].op = op;
35897 args[i].mode = mode;
35900 switch (nargs)
35902 case 1:
35903 pat = GEN_FCN (icode) (target, args[0].op);
35904 break;
35906 case 2:
35907 if (tf_p)
35908 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
35909 GEN_INT ((int)sub_code));
35910 else if (! comparison_p)
35911 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
35912 else
35914 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
35915 args[0].op,
35916 args[1].op);
35918 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
35920 break;
35922 case 3:
35923 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
35924 break;
35926 case 4:
35927 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
35928 break;
35930 default:
35931 gcc_unreachable ();
35934 if (! pat)
35935 return 0;
35937 emit_insn (pat);
35938 return target;
35941 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
35942 insns with vec_merge. */
35944 static rtx
35945 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
35946 rtx target)
35948 rtx pat;
35949 tree arg0 = CALL_EXPR_ARG (exp, 0);
35950 rtx op1, op0 = expand_normal (arg0);
35951 machine_mode tmode = insn_data[icode].operand[0].mode;
35952 machine_mode mode0 = insn_data[icode].operand[1].mode;
35954 if (optimize || !target
35955 || GET_MODE (target) != tmode
35956 || !insn_data[icode].operand[0].predicate (target, tmode))
35957 target = gen_reg_rtx (tmode);
35959 if (VECTOR_MODE_P (mode0))
35960 op0 = safe_vector_operand (op0, mode0);
35962 if ((optimize && !register_operand (op0, mode0))
35963 || !insn_data[icode].operand[1].predicate (op0, mode0))
35964 op0 = copy_to_mode_reg (mode0, op0);
35966 op1 = op0;
35967 if (!insn_data[icode].operand[2].predicate (op1, mode0))
35968 op1 = copy_to_mode_reg (mode0, op1);
35970 pat = GEN_FCN (icode) (target, op0, op1);
35971 if (! pat)
35972 return 0;
35973 emit_insn (pat);
35974 return target;
35977 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
35979 static rtx
35980 ix86_expand_sse_compare (const struct builtin_description *d,
35981 tree exp, rtx target, bool swap)
35983 rtx pat;
35984 tree arg0 = CALL_EXPR_ARG (exp, 0);
35985 tree arg1 = CALL_EXPR_ARG (exp, 1);
35986 rtx op0 = expand_normal (arg0);
35987 rtx op1 = expand_normal (arg1);
35988 rtx op2;
35989 machine_mode tmode = insn_data[d->icode].operand[0].mode;
35990 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
35991 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
35992 enum rtx_code comparison = d->comparison;
35994 if (VECTOR_MODE_P (mode0))
35995 op0 = safe_vector_operand (op0, mode0);
35996 if (VECTOR_MODE_P (mode1))
35997 op1 = safe_vector_operand (op1, mode1);
35999 /* Swap operands if we have a comparison that isn't available in
36000 hardware. */
36001 if (swap)
36003 rtx tmp = gen_reg_rtx (mode1);
36004 emit_move_insn (tmp, op1);
36005 op1 = op0;
36006 op0 = tmp;
36009 if (optimize || !target
36010 || GET_MODE (target) != tmode
36011 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36012 target = gen_reg_rtx (tmode);
36014 if ((optimize && !register_operand (op0, mode0))
36015 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
36016 op0 = copy_to_mode_reg (mode0, op0);
36017 if ((optimize && !register_operand (op1, mode1))
36018 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
36019 op1 = copy_to_mode_reg (mode1, op1);
36021 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
36022 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
36023 if (! pat)
36024 return 0;
36025 emit_insn (pat);
36026 return target;
36029 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
36031 static rtx
36032 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
36033 rtx target)
36035 rtx pat;
36036 tree arg0 = CALL_EXPR_ARG (exp, 0);
36037 tree arg1 = CALL_EXPR_ARG (exp, 1);
36038 rtx op0 = expand_normal (arg0);
36039 rtx op1 = expand_normal (arg1);
36040 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
36041 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
36042 enum rtx_code comparison = d->comparison;
36044 if (VECTOR_MODE_P (mode0))
36045 op0 = safe_vector_operand (op0, mode0);
36046 if (VECTOR_MODE_P (mode1))
36047 op1 = safe_vector_operand (op1, mode1);
36049 /* Swap operands if we have a comparison that isn't available in
36050 hardware. */
36051 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
36052 std::swap (op1, op0);
36054 target = gen_reg_rtx (SImode);
36055 emit_move_insn (target, const0_rtx);
36056 target = gen_rtx_SUBREG (QImode, target, 0);
36058 if ((optimize && !register_operand (op0, mode0))
36059 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36060 op0 = copy_to_mode_reg (mode0, op0);
36061 if ((optimize && !register_operand (op1, mode1))
36062 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36063 op1 = copy_to_mode_reg (mode1, op1);
36065 pat = GEN_FCN (d->icode) (op0, op1);
36066 if (! pat)
36067 return 0;
36068 emit_insn (pat);
36069 emit_insn (gen_rtx_SET (VOIDmode,
36070 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36071 gen_rtx_fmt_ee (comparison, QImode,
36072 SET_DEST (pat),
36073 const0_rtx)));
36075 return SUBREG_REG (target);
36078 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
36080 static rtx
36081 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
36082 rtx target)
36084 rtx pat;
36085 tree arg0 = CALL_EXPR_ARG (exp, 0);
36086 rtx op1, op0 = expand_normal (arg0);
36087 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36088 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36090 if (optimize || target == 0
36091 || GET_MODE (target) != tmode
36092 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36093 target = gen_reg_rtx (tmode);
36095 if (VECTOR_MODE_P (mode0))
36096 op0 = safe_vector_operand (op0, mode0);
36098 if ((optimize && !register_operand (op0, mode0))
36099 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36100 op0 = copy_to_mode_reg (mode0, op0);
36102 op1 = GEN_INT (d->comparison);
36104 pat = GEN_FCN (d->icode) (target, op0, op1);
36105 if (! pat)
36106 return 0;
36107 emit_insn (pat);
36108 return target;
36111 static rtx
36112 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
36113 tree exp, rtx target)
36115 rtx pat;
36116 tree arg0 = CALL_EXPR_ARG (exp, 0);
36117 tree arg1 = CALL_EXPR_ARG (exp, 1);
36118 rtx op0 = expand_normal (arg0);
36119 rtx op1 = expand_normal (arg1);
36120 rtx op2;
36121 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36122 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36123 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
36125 if (optimize || target == 0
36126 || GET_MODE (target) != tmode
36127 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36128 target = gen_reg_rtx (tmode);
36130 op0 = safe_vector_operand (op0, mode0);
36131 op1 = safe_vector_operand (op1, mode1);
36133 if ((optimize && !register_operand (op0, mode0))
36134 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36135 op0 = copy_to_mode_reg (mode0, op0);
36136 if ((optimize && !register_operand (op1, mode1))
36137 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36138 op1 = copy_to_mode_reg (mode1, op1);
36140 op2 = GEN_INT (d->comparison);
36142 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
36143 if (! pat)
36144 return 0;
36145 emit_insn (pat);
36146 return target;
36149 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
36151 static rtx
36152 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
36153 rtx target)
36155 rtx pat;
36156 tree arg0 = CALL_EXPR_ARG (exp, 0);
36157 tree arg1 = CALL_EXPR_ARG (exp, 1);
36158 rtx op0 = expand_normal (arg0);
36159 rtx op1 = expand_normal (arg1);
36160 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
36161 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
36162 enum rtx_code comparison = d->comparison;
36164 if (VECTOR_MODE_P (mode0))
36165 op0 = safe_vector_operand (op0, mode0);
36166 if (VECTOR_MODE_P (mode1))
36167 op1 = safe_vector_operand (op1, mode1);
36169 target = gen_reg_rtx (SImode);
36170 emit_move_insn (target, const0_rtx);
36171 target = gen_rtx_SUBREG (QImode, target, 0);
36173 if ((optimize && !register_operand (op0, mode0))
36174 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36175 op0 = copy_to_mode_reg (mode0, op0);
36176 if ((optimize && !register_operand (op1, mode1))
36177 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36178 op1 = copy_to_mode_reg (mode1, op1);
36180 pat = GEN_FCN (d->icode) (op0, op1);
36181 if (! pat)
36182 return 0;
36183 emit_insn (pat);
36184 emit_insn (gen_rtx_SET (VOIDmode,
36185 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36186 gen_rtx_fmt_ee (comparison, QImode,
36187 SET_DEST (pat),
36188 const0_rtx)));
36190 return SUBREG_REG (target);
36193 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
36195 static rtx
36196 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
36197 tree exp, rtx target)
36199 rtx pat;
36200 tree arg0 = CALL_EXPR_ARG (exp, 0);
36201 tree arg1 = CALL_EXPR_ARG (exp, 1);
36202 tree arg2 = CALL_EXPR_ARG (exp, 2);
36203 tree arg3 = CALL_EXPR_ARG (exp, 3);
36204 tree arg4 = CALL_EXPR_ARG (exp, 4);
36205 rtx scratch0, scratch1;
36206 rtx op0 = expand_normal (arg0);
36207 rtx op1 = expand_normal (arg1);
36208 rtx op2 = expand_normal (arg2);
36209 rtx op3 = expand_normal (arg3);
36210 rtx op4 = expand_normal (arg4);
36211 machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
36213 tmode0 = insn_data[d->icode].operand[0].mode;
36214 tmode1 = insn_data[d->icode].operand[1].mode;
36215 modev2 = insn_data[d->icode].operand[2].mode;
36216 modei3 = insn_data[d->icode].operand[3].mode;
36217 modev4 = insn_data[d->icode].operand[4].mode;
36218 modei5 = insn_data[d->icode].operand[5].mode;
36219 modeimm = insn_data[d->icode].operand[6].mode;
36221 if (VECTOR_MODE_P (modev2))
36222 op0 = safe_vector_operand (op0, modev2);
36223 if (VECTOR_MODE_P (modev4))
36224 op2 = safe_vector_operand (op2, modev4);
36226 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
36227 op0 = copy_to_mode_reg (modev2, op0);
36228 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
36229 op1 = copy_to_mode_reg (modei3, op1);
36230 if ((optimize && !register_operand (op2, modev4))
36231 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
36232 op2 = copy_to_mode_reg (modev4, op2);
36233 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
36234 op3 = copy_to_mode_reg (modei5, op3);
36236 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
36238 error ("the fifth argument must be an 8-bit immediate");
36239 return const0_rtx;
36242 if (d->code == IX86_BUILTIN_PCMPESTRI128)
36244 if (optimize || !target
36245 || GET_MODE (target) != tmode0
36246 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
36247 target = gen_reg_rtx (tmode0);
36249 scratch1 = gen_reg_rtx (tmode1);
36251 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
36253 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
36255 if (optimize || !target
36256 || GET_MODE (target) != tmode1
36257 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
36258 target = gen_reg_rtx (tmode1);
36260 scratch0 = gen_reg_rtx (tmode0);
36262 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
36264 else
36266 gcc_assert (d->flag);
36268 scratch0 = gen_reg_rtx (tmode0);
36269 scratch1 = gen_reg_rtx (tmode1);
36271 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
36274 if (! pat)
36275 return 0;
36277 emit_insn (pat);
36279 if (d->flag)
36281 target = gen_reg_rtx (SImode);
36282 emit_move_insn (target, const0_rtx);
36283 target = gen_rtx_SUBREG (QImode, target, 0);
36285 emit_insn
36286 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36287 gen_rtx_fmt_ee (EQ, QImode,
36288 gen_rtx_REG ((machine_mode) d->flag,
36289 FLAGS_REG),
36290 const0_rtx)));
36291 return SUBREG_REG (target);
36293 else
36294 return target;
36298 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
36300 static rtx
36301 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
36302 tree exp, rtx target)
36304 rtx pat;
36305 tree arg0 = CALL_EXPR_ARG (exp, 0);
36306 tree arg1 = CALL_EXPR_ARG (exp, 1);
36307 tree arg2 = CALL_EXPR_ARG (exp, 2);
36308 rtx scratch0, scratch1;
36309 rtx op0 = expand_normal (arg0);
36310 rtx op1 = expand_normal (arg1);
36311 rtx op2 = expand_normal (arg2);
36312 machine_mode tmode0, tmode1, modev2, modev3, modeimm;
36314 tmode0 = insn_data[d->icode].operand[0].mode;
36315 tmode1 = insn_data[d->icode].operand[1].mode;
36316 modev2 = insn_data[d->icode].operand[2].mode;
36317 modev3 = insn_data[d->icode].operand[3].mode;
36318 modeimm = insn_data[d->icode].operand[4].mode;
36320 if (VECTOR_MODE_P (modev2))
36321 op0 = safe_vector_operand (op0, modev2);
36322 if (VECTOR_MODE_P (modev3))
36323 op1 = safe_vector_operand (op1, modev3);
36325 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
36326 op0 = copy_to_mode_reg (modev2, op0);
36327 if ((optimize && !register_operand (op1, modev3))
36328 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
36329 op1 = copy_to_mode_reg (modev3, op1);
36331 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
36333 error ("the third argument must be an 8-bit immediate");
36334 return const0_rtx;
36337 if (d->code == IX86_BUILTIN_PCMPISTRI128)
36339 if (optimize || !target
36340 || GET_MODE (target) != tmode0
36341 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
36342 target = gen_reg_rtx (tmode0);
36344 scratch1 = gen_reg_rtx (tmode1);
36346 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
36348 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
36350 if (optimize || !target
36351 || GET_MODE (target) != tmode1
36352 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
36353 target = gen_reg_rtx (tmode1);
36355 scratch0 = gen_reg_rtx (tmode0);
36357 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
36359 else
36361 gcc_assert (d->flag);
36363 scratch0 = gen_reg_rtx (tmode0);
36364 scratch1 = gen_reg_rtx (tmode1);
36366 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
36369 if (! pat)
36370 return 0;
36372 emit_insn (pat);
36374 if (d->flag)
36376 target = gen_reg_rtx (SImode);
36377 emit_move_insn (target, const0_rtx);
36378 target = gen_rtx_SUBREG (QImode, target, 0);
36380 emit_insn
36381 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36382 gen_rtx_fmt_ee (EQ, QImode,
36383 gen_rtx_REG ((machine_mode) d->flag,
36384 FLAGS_REG),
36385 const0_rtx)));
36386 return SUBREG_REG (target);
36388 else
36389 return target;
36392 /* Subroutine of ix86_expand_builtin to take care of insns with
36393 variable number of operands. */
36395 static rtx
36396 ix86_expand_args_builtin (const struct builtin_description *d,
36397 tree exp, rtx target)
36399 rtx pat, real_target;
36400 unsigned int i, nargs;
36401 unsigned int nargs_constant = 0;
36402 unsigned int mask_pos = 0;
36403 int num_memory = 0;
36404 struct
36406 rtx op;
36407 machine_mode mode;
36408 } args[6];
36409 bool last_arg_count = false;
36410 enum insn_code icode = d->icode;
36411 const struct insn_data_d *insn_p = &insn_data[icode];
36412 machine_mode tmode = insn_p->operand[0].mode;
36413 machine_mode rmode = VOIDmode;
36414 bool swap = false;
36415 enum rtx_code comparison = d->comparison;
36417 switch ((enum ix86_builtin_func_type) d->flag)
36419 case V2DF_FTYPE_V2DF_ROUND:
36420 case V4DF_FTYPE_V4DF_ROUND:
36421 case V4SF_FTYPE_V4SF_ROUND:
36422 case V8SF_FTYPE_V8SF_ROUND:
36423 case V4SI_FTYPE_V4SF_ROUND:
36424 case V8SI_FTYPE_V8SF_ROUND:
36425 return ix86_expand_sse_round (d, exp, target);
36426 case V4SI_FTYPE_V2DF_V2DF_ROUND:
36427 case V8SI_FTYPE_V4DF_V4DF_ROUND:
36428 case V16SI_FTYPE_V8DF_V8DF_ROUND:
36429 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
36430 case INT_FTYPE_V8SF_V8SF_PTEST:
36431 case INT_FTYPE_V4DI_V4DI_PTEST:
36432 case INT_FTYPE_V4DF_V4DF_PTEST:
36433 case INT_FTYPE_V4SF_V4SF_PTEST:
36434 case INT_FTYPE_V2DI_V2DI_PTEST:
36435 case INT_FTYPE_V2DF_V2DF_PTEST:
36436 return ix86_expand_sse_ptest (d, exp, target);
36437 case FLOAT128_FTYPE_FLOAT128:
36438 case FLOAT_FTYPE_FLOAT:
36439 case INT_FTYPE_INT:
36440 case UINT64_FTYPE_INT:
36441 case UINT16_FTYPE_UINT16:
36442 case INT64_FTYPE_INT64:
36443 case INT64_FTYPE_V4SF:
36444 case INT64_FTYPE_V2DF:
36445 case INT_FTYPE_V16QI:
36446 case INT_FTYPE_V8QI:
36447 case INT_FTYPE_V8SF:
36448 case INT_FTYPE_V4DF:
36449 case INT_FTYPE_V4SF:
36450 case INT_FTYPE_V2DF:
36451 case INT_FTYPE_V32QI:
36452 case V16QI_FTYPE_V16QI:
36453 case V8SI_FTYPE_V8SF:
36454 case V8SI_FTYPE_V4SI:
36455 case V8HI_FTYPE_V8HI:
36456 case V8HI_FTYPE_V16QI:
36457 case V8QI_FTYPE_V8QI:
36458 case V8SF_FTYPE_V8SF:
36459 case V8SF_FTYPE_V8SI:
36460 case V8SF_FTYPE_V4SF:
36461 case V8SF_FTYPE_V8HI:
36462 case V4SI_FTYPE_V4SI:
36463 case V4SI_FTYPE_V16QI:
36464 case V4SI_FTYPE_V4SF:
36465 case V4SI_FTYPE_V8SI:
36466 case V4SI_FTYPE_V8HI:
36467 case V4SI_FTYPE_V4DF:
36468 case V4SI_FTYPE_V2DF:
36469 case V4HI_FTYPE_V4HI:
36470 case V4DF_FTYPE_V4DF:
36471 case V4DF_FTYPE_V4SI:
36472 case V4DF_FTYPE_V4SF:
36473 case V4DF_FTYPE_V2DF:
36474 case V4SF_FTYPE_V4SF:
36475 case V4SF_FTYPE_V4SI:
36476 case V4SF_FTYPE_V8SF:
36477 case V4SF_FTYPE_V4DF:
36478 case V4SF_FTYPE_V8HI:
36479 case V4SF_FTYPE_V2DF:
36480 case V2DI_FTYPE_V2DI:
36481 case V2DI_FTYPE_V16QI:
36482 case V2DI_FTYPE_V8HI:
36483 case V2DI_FTYPE_V4SI:
36484 case V2DF_FTYPE_V2DF:
36485 case V2DF_FTYPE_V4SI:
36486 case V2DF_FTYPE_V4DF:
36487 case V2DF_FTYPE_V4SF:
36488 case V2DF_FTYPE_V2SI:
36489 case V2SI_FTYPE_V2SI:
36490 case V2SI_FTYPE_V4SF:
36491 case V2SI_FTYPE_V2SF:
36492 case V2SI_FTYPE_V2DF:
36493 case V2SF_FTYPE_V2SF:
36494 case V2SF_FTYPE_V2SI:
36495 case V32QI_FTYPE_V32QI:
36496 case V32QI_FTYPE_V16QI:
36497 case V16HI_FTYPE_V16HI:
36498 case V16HI_FTYPE_V8HI:
36499 case V8SI_FTYPE_V8SI:
36500 case V16HI_FTYPE_V16QI:
36501 case V8SI_FTYPE_V16QI:
36502 case V4DI_FTYPE_V16QI:
36503 case V8SI_FTYPE_V8HI:
36504 case V4DI_FTYPE_V8HI:
36505 case V4DI_FTYPE_V4SI:
36506 case V4DI_FTYPE_V2DI:
36507 case HI_FTYPE_HI:
36508 case HI_FTYPE_V16QI:
36509 case SI_FTYPE_V32QI:
36510 case DI_FTYPE_V64QI:
36511 case V16QI_FTYPE_HI:
36512 case V32QI_FTYPE_SI:
36513 case V64QI_FTYPE_DI:
36514 case V8HI_FTYPE_QI:
36515 case V16HI_FTYPE_HI:
36516 case V32HI_FTYPE_SI:
36517 case V4SI_FTYPE_QI:
36518 case V8SI_FTYPE_QI:
36519 case V4SI_FTYPE_HI:
36520 case V8SI_FTYPE_HI:
36521 case QI_FTYPE_V8HI:
36522 case HI_FTYPE_V16HI:
36523 case SI_FTYPE_V32HI:
36524 case QI_FTYPE_V4SI:
36525 case QI_FTYPE_V8SI:
36526 case HI_FTYPE_V16SI:
36527 case QI_FTYPE_V2DI:
36528 case QI_FTYPE_V4DI:
36529 case QI_FTYPE_V8DI:
36530 case UINT_FTYPE_V2DF:
36531 case UINT_FTYPE_V4SF:
36532 case UINT64_FTYPE_V2DF:
36533 case UINT64_FTYPE_V4SF:
36534 case V16QI_FTYPE_V8DI:
36535 case V16HI_FTYPE_V16SI:
36536 case V16SI_FTYPE_HI:
36537 case V2DI_FTYPE_QI:
36538 case V4DI_FTYPE_QI:
36539 case V16SI_FTYPE_V16SI:
36540 case V16SI_FTYPE_INT:
36541 case V16SF_FTYPE_FLOAT:
36542 case V16SF_FTYPE_V8SF:
36543 case V16SI_FTYPE_V8SI:
36544 case V16SF_FTYPE_V4SF:
36545 case V16SI_FTYPE_V4SI:
36546 case V16SF_FTYPE_V16SF:
36547 case V8HI_FTYPE_V8DI:
36548 case V8UHI_FTYPE_V8UHI:
36549 case V8SI_FTYPE_V8DI:
36550 case V8SF_FTYPE_V8DF:
36551 case V8DI_FTYPE_QI:
36552 case V8DI_FTYPE_INT64:
36553 case V8DI_FTYPE_V4DI:
36554 case V8DI_FTYPE_V8DI:
36555 case V8DF_FTYPE_DOUBLE:
36556 case V8DF_FTYPE_V4DF:
36557 case V8DF_FTYPE_V2DF:
36558 case V8DF_FTYPE_V8DF:
36559 case V8DF_FTYPE_V8SI:
36560 nargs = 1;
36561 break;
36562 case V4SF_FTYPE_V4SF_VEC_MERGE:
36563 case V2DF_FTYPE_V2DF_VEC_MERGE:
36564 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
36565 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
36566 case V16QI_FTYPE_V16QI_V16QI:
36567 case V16QI_FTYPE_V8HI_V8HI:
36568 case V16SI_FTYPE_V16SI_V16SI:
36569 case V16SF_FTYPE_V16SF_V16SF:
36570 case V16SF_FTYPE_V16SF_V16SI:
36571 case V8QI_FTYPE_V8QI_V8QI:
36572 case V8QI_FTYPE_V4HI_V4HI:
36573 case V8HI_FTYPE_V8HI_V8HI:
36574 case V8HI_FTYPE_V16QI_V16QI:
36575 case V8HI_FTYPE_V4SI_V4SI:
36576 case V8SF_FTYPE_V8SF_V8SF:
36577 case V8SF_FTYPE_V8SF_V8SI:
36578 case V8DI_FTYPE_V8DI_V8DI:
36579 case V8DF_FTYPE_V8DF_V8DF:
36580 case V8DF_FTYPE_V8DF_V8DI:
36581 case V4SI_FTYPE_V4SI_V4SI:
36582 case V4SI_FTYPE_V8HI_V8HI:
36583 case V4SI_FTYPE_V4SF_V4SF:
36584 case V4SI_FTYPE_V2DF_V2DF:
36585 case V4HI_FTYPE_V4HI_V4HI:
36586 case V4HI_FTYPE_V8QI_V8QI:
36587 case V4HI_FTYPE_V2SI_V2SI:
36588 case V4DF_FTYPE_V4DF_V4DF:
36589 case V4DF_FTYPE_V4DF_V4DI:
36590 case V4SF_FTYPE_V4SF_V4SF:
36591 case V4SF_FTYPE_V4SF_V4SI:
36592 case V4SF_FTYPE_V4SF_V2SI:
36593 case V4SF_FTYPE_V4SF_V2DF:
36594 case V4SF_FTYPE_V4SF_UINT:
36595 case V4SF_FTYPE_V4SF_UINT64:
36596 case V4SF_FTYPE_V4SF_DI:
36597 case V4SF_FTYPE_V4SF_SI:
36598 case V2DI_FTYPE_V2DI_V2DI:
36599 case V2DI_FTYPE_V16QI_V16QI:
36600 case V2DI_FTYPE_V4SI_V4SI:
36601 case V2UDI_FTYPE_V4USI_V4USI:
36602 case V2DI_FTYPE_V2DI_V16QI:
36603 case V2DI_FTYPE_V2DF_V2DF:
36604 case V2SI_FTYPE_V2SI_V2SI:
36605 case V2SI_FTYPE_V4HI_V4HI:
36606 case V2SI_FTYPE_V2SF_V2SF:
36607 case V2DF_FTYPE_V2DF_V2DF:
36608 case V2DF_FTYPE_V2DF_V4SF:
36609 case V2DF_FTYPE_V2DF_V2DI:
36610 case V2DF_FTYPE_V2DF_DI:
36611 case V2DF_FTYPE_V2DF_SI:
36612 case V2DF_FTYPE_V2DF_UINT:
36613 case V2DF_FTYPE_V2DF_UINT64:
36614 case V2SF_FTYPE_V2SF_V2SF:
36615 case V1DI_FTYPE_V1DI_V1DI:
36616 case V1DI_FTYPE_V8QI_V8QI:
36617 case V1DI_FTYPE_V2SI_V2SI:
36618 case V32QI_FTYPE_V16HI_V16HI:
36619 case V16HI_FTYPE_V8SI_V8SI:
36620 case V32QI_FTYPE_V32QI_V32QI:
36621 case V16HI_FTYPE_V32QI_V32QI:
36622 case V16HI_FTYPE_V16HI_V16HI:
36623 case V8SI_FTYPE_V4DF_V4DF:
36624 case V8SI_FTYPE_V8SI_V8SI:
36625 case V8SI_FTYPE_V16HI_V16HI:
36626 case V4DI_FTYPE_V4DI_V4DI:
36627 case V4DI_FTYPE_V8SI_V8SI:
36628 case V4UDI_FTYPE_V8USI_V8USI:
36629 case QI_FTYPE_V8DI_V8DI:
36630 case V8DI_FTYPE_V64QI_V64QI:
36631 case HI_FTYPE_V16SI_V16SI:
36632 if (comparison == UNKNOWN)
36633 return ix86_expand_binop_builtin (icode, exp, target);
36634 nargs = 2;
36635 break;
36636 case V4SF_FTYPE_V4SF_V4SF_SWAP:
36637 case V2DF_FTYPE_V2DF_V2DF_SWAP:
36638 gcc_assert (comparison != UNKNOWN);
36639 nargs = 2;
36640 swap = true;
36641 break;
36642 case V16HI_FTYPE_V16HI_V8HI_COUNT:
36643 case V16HI_FTYPE_V16HI_SI_COUNT:
36644 case V8SI_FTYPE_V8SI_V4SI_COUNT:
36645 case V8SI_FTYPE_V8SI_SI_COUNT:
36646 case V4DI_FTYPE_V4DI_V2DI_COUNT:
36647 case V4DI_FTYPE_V4DI_INT_COUNT:
36648 case V8HI_FTYPE_V8HI_V8HI_COUNT:
36649 case V8HI_FTYPE_V8HI_SI_COUNT:
36650 case V4SI_FTYPE_V4SI_V4SI_COUNT:
36651 case V4SI_FTYPE_V4SI_SI_COUNT:
36652 case V4HI_FTYPE_V4HI_V4HI_COUNT:
36653 case V4HI_FTYPE_V4HI_SI_COUNT:
36654 case V2DI_FTYPE_V2DI_V2DI_COUNT:
36655 case V2DI_FTYPE_V2DI_SI_COUNT:
36656 case V2SI_FTYPE_V2SI_V2SI_COUNT:
36657 case V2SI_FTYPE_V2SI_SI_COUNT:
36658 case V1DI_FTYPE_V1DI_V1DI_COUNT:
36659 case V1DI_FTYPE_V1DI_SI_COUNT:
36660 nargs = 2;
36661 last_arg_count = true;
36662 break;
36663 case UINT64_FTYPE_UINT64_UINT64:
36664 case UINT_FTYPE_UINT_UINT:
36665 case UINT_FTYPE_UINT_USHORT:
36666 case UINT_FTYPE_UINT_UCHAR:
36667 case UINT16_FTYPE_UINT16_INT:
36668 case UINT8_FTYPE_UINT8_INT:
36669 case HI_FTYPE_HI_HI:
36670 case SI_FTYPE_SI_SI:
36671 case DI_FTYPE_DI_DI:
36672 case V16SI_FTYPE_V8DF_V8DF:
36673 nargs = 2;
36674 break;
36675 case V2DI_FTYPE_V2DI_INT_CONVERT:
36676 nargs = 2;
36677 rmode = V1TImode;
36678 nargs_constant = 1;
36679 break;
36680 case V4DI_FTYPE_V4DI_INT_CONVERT:
36681 nargs = 2;
36682 rmode = V2TImode;
36683 nargs_constant = 1;
36684 break;
36685 case V8DI_FTYPE_V8DI_INT_CONVERT:
36686 nargs = 2;
36687 rmode = V4TImode;
36688 nargs_constant = 1;
36689 break;
36690 case V8HI_FTYPE_V8HI_INT:
36691 case V8HI_FTYPE_V8SF_INT:
36692 case V16HI_FTYPE_V16SF_INT:
36693 case V8HI_FTYPE_V4SF_INT:
36694 case V8SF_FTYPE_V8SF_INT:
36695 case V4SF_FTYPE_V16SF_INT:
36696 case V16SF_FTYPE_V16SF_INT:
36697 case V4SI_FTYPE_V4SI_INT:
36698 case V4SI_FTYPE_V8SI_INT:
36699 case V4HI_FTYPE_V4HI_INT:
36700 case V4DF_FTYPE_V4DF_INT:
36701 case V4DF_FTYPE_V8DF_INT:
36702 case V4SF_FTYPE_V4SF_INT:
36703 case V4SF_FTYPE_V8SF_INT:
36704 case V2DI_FTYPE_V2DI_INT:
36705 case V2DF_FTYPE_V2DF_INT:
36706 case V2DF_FTYPE_V4DF_INT:
36707 case V16HI_FTYPE_V16HI_INT:
36708 case V8SI_FTYPE_V8SI_INT:
36709 case V16SI_FTYPE_V16SI_INT:
36710 case V4SI_FTYPE_V16SI_INT:
36711 case V4DI_FTYPE_V4DI_INT:
36712 case V2DI_FTYPE_V4DI_INT:
36713 case V4DI_FTYPE_V8DI_INT:
36714 case HI_FTYPE_HI_INT:
36715 case QI_FTYPE_V4SF_INT:
36716 case QI_FTYPE_V2DF_INT:
36717 nargs = 2;
36718 nargs_constant = 1;
36719 break;
36720 case V16QI_FTYPE_V16QI_V16QI_V16QI:
36721 case V8SF_FTYPE_V8SF_V8SF_V8SF:
36722 case V4DF_FTYPE_V4DF_V4DF_V4DF:
36723 case V4SF_FTYPE_V4SF_V4SF_V4SF:
36724 case V2DF_FTYPE_V2DF_V2DF_V2DF:
36725 case V32QI_FTYPE_V32QI_V32QI_V32QI:
36726 case HI_FTYPE_V16SI_V16SI_HI:
36727 case QI_FTYPE_V8DI_V8DI_QI:
36728 case V16HI_FTYPE_V16SI_V16HI_HI:
36729 case V16QI_FTYPE_V16SI_V16QI_HI:
36730 case V16QI_FTYPE_V8DI_V16QI_QI:
36731 case V16SF_FTYPE_V16SF_V16SF_HI:
36732 case V16SF_FTYPE_V16SF_V16SF_V16SF:
36733 case V16SF_FTYPE_V16SF_V16SI_V16SF:
36734 case V16SF_FTYPE_V16SI_V16SF_HI:
36735 case V16SF_FTYPE_V16SI_V16SF_V16SF:
36736 case V16SF_FTYPE_V4SF_V16SF_HI:
36737 case V16SI_FTYPE_SI_V16SI_HI:
36738 case V16SI_FTYPE_V16HI_V16SI_HI:
36739 case V16SI_FTYPE_V16QI_V16SI_HI:
36740 case V16SI_FTYPE_V16SF_V16SI_HI:
36741 case V8SF_FTYPE_V4SF_V8SF_QI:
36742 case V4DF_FTYPE_V2DF_V4DF_QI:
36743 case V8SI_FTYPE_V4SI_V8SI_QI:
36744 case V8SI_FTYPE_SI_V8SI_QI:
36745 case V4SI_FTYPE_V4SI_V4SI_QI:
36746 case V4SI_FTYPE_SI_V4SI_QI:
36747 case V4DI_FTYPE_V2DI_V4DI_QI:
36748 case V4DI_FTYPE_DI_V4DI_QI:
36749 case V2DI_FTYPE_V2DI_V2DI_QI:
36750 case V2DI_FTYPE_DI_V2DI_QI:
36751 case V64QI_FTYPE_V64QI_V64QI_DI:
36752 case V64QI_FTYPE_V16QI_V64QI_DI:
36753 case V64QI_FTYPE_QI_V64QI_DI:
36754 case V32QI_FTYPE_V32QI_V32QI_SI:
36755 case V32QI_FTYPE_V16QI_V32QI_SI:
36756 case V32QI_FTYPE_QI_V32QI_SI:
36757 case V16QI_FTYPE_V16QI_V16QI_HI:
36758 case V16QI_FTYPE_QI_V16QI_HI:
36759 case V32HI_FTYPE_V8HI_V32HI_SI:
36760 case V32HI_FTYPE_HI_V32HI_SI:
36761 case V16HI_FTYPE_V8HI_V16HI_HI:
36762 case V16HI_FTYPE_HI_V16HI_HI:
36763 case V8HI_FTYPE_V8HI_V8HI_QI:
36764 case V8HI_FTYPE_HI_V8HI_QI:
36765 case V8SF_FTYPE_V8HI_V8SF_QI:
36766 case V4SF_FTYPE_V8HI_V4SF_QI:
36767 case V8SI_FTYPE_V8SF_V8SI_QI:
36768 case V4SI_FTYPE_V4SF_V4SI_QI:
36769 case V8DI_FTYPE_V8SF_V8DI_QI:
36770 case V4DI_FTYPE_V4SF_V4DI_QI:
36771 case V2DI_FTYPE_V4SF_V2DI_QI:
36772 case V8SF_FTYPE_V8DI_V8SF_QI:
36773 case V4SF_FTYPE_V4DI_V4SF_QI:
36774 case V4SF_FTYPE_V2DI_V4SF_QI:
36775 case V8DF_FTYPE_V8DI_V8DF_QI:
36776 case V4DF_FTYPE_V4DI_V4DF_QI:
36777 case V2DF_FTYPE_V2DI_V2DF_QI:
36778 case V16QI_FTYPE_V8HI_V16QI_QI:
36779 case V16QI_FTYPE_V16HI_V16QI_HI:
36780 case V16QI_FTYPE_V4SI_V16QI_QI:
36781 case V16QI_FTYPE_V8SI_V16QI_QI:
36782 case V8HI_FTYPE_V4SI_V8HI_QI:
36783 case V8HI_FTYPE_V8SI_V8HI_QI:
36784 case V16QI_FTYPE_V2DI_V16QI_QI:
36785 case V16QI_FTYPE_V4DI_V16QI_QI:
36786 case V8HI_FTYPE_V2DI_V8HI_QI:
36787 case V8HI_FTYPE_V4DI_V8HI_QI:
36788 case V4SI_FTYPE_V2DI_V4SI_QI:
36789 case V4SI_FTYPE_V4DI_V4SI_QI:
36790 case V32QI_FTYPE_V32HI_V32QI_SI:
36791 case HI_FTYPE_V16QI_V16QI_HI:
36792 case SI_FTYPE_V32QI_V32QI_SI:
36793 case DI_FTYPE_V64QI_V64QI_DI:
36794 case QI_FTYPE_V8HI_V8HI_QI:
36795 case HI_FTYPE_V16HI_V16HI_HI:
36796 case SI_FTYPE_V32HI_V32HI_SI:
36797 case QI_FTYPE_V4SI_V4SI_QI:
36798 case QI_FTYPE_V8SI_V8SI_QI:
36799 case QI_FTYPE_V2DI_V2DI_QI:
36800 case QI_FTYPE_V4DI_V4DI_QI:
36801 case V4SF_FTYPE_V2DF_V4SF_QI:
36802 case V4SF_FTYPE_V4DF_V4SF_QI:
36803 case V16SI_FTYPE_V16SI_V16SI_HI:
36804 case V16SI_FTYPE_V16SI_V16SI_V16SI:
36805 case V16SI_FTYPE_V4SI_V16SI_HI:
36806 case V2DI_FTYPE_V2DI_V2DI_V2DI:
36807 case V2DI_FTYPE_V4SI_V2DI_QI:
36808 case V2DI_FTYPE_V8HI_V2DI_QI:
36809 case V2DI_FTYPE_V16QI_V2DI_QI:
36810 case V4DI_FTYPE_V4DI_V4DI_QI:
36811 case V4DI_FTYPE_V4SI_V4DI_QI:
36812 case V4DI_FTYPE_V8HI_V4DI_QI:
36813 case V4DI_FTYPE_V16QI_V4DI_QI:
36814 case V8DI_FTYPE_V8DF_V8DI_QI:
36815 case V4DI_FTYPE_V4DF_V4DI_QI:
36816 case V2DI_FTYPE_V2DF_V2DI_QI:
36817 case V4SI_FTYPE_V4DF_V4SI_QI:
36818 case V4SI_FTYPE_V2DF_V4SI_QI:
36819 case V4SI_FTYPE_V8HI_V4SI_QI:
36820 case V4SI_FTYPE_V16QI_V4SI_QI:
36821 case V8SI_FTYPE_V8SI_V8SI_V8SI:
36822 case V4DI_FTYPE_V4DI_V4DI_V4DI:
36823 case V8DF_FTYPE_V2DF_V8DF_QI:
36824 case V8DF_FTYPE_V4DF_V8DF_QI:
36825 case V8DF_FTYPE_V8DF_V8DF_QI:
36826 case V8DF_FTYPE_V8DF_V8DF_V8DF:
36827 case V8SF_FTYPE_V8SF_V8SF_QI:
36828 case V8SF_FTYPE_V8SI_V8SF_QI:
36829 case V4DF_FTYPE_V4DF_V4DF_QI:
36830 case V4SF_FTYPE_V4SF_V4SF_QI:
36831 case V2DF_FTYPE_V2DF_V2DF_QI:
36832 case V2DF_FTYPE_V4SF_V2DF_QI:
36833 case V2DF_FTYPE_V4SI_V2DF_QI:
36834 case V4SF_FTYPE_V4SI_V4SF_QI:
36835 case V4DF_FTYPE_V4SF_V4DF_QI:
36836 case V4DF_FTYPE_V4SI_V4DF_QI:
36837 case V8SI_FTYPE_V8SI_V8SI_QI:
36838 case V8SI_FTYPE_V8HI_V8SI_QI:
36839 case V8SI_FTYPE_V16QI_V8SI_QI:
36840 case V8DF_FTYPE_V8DF_V8DI_V8DF:
36841 case V8DF_FTYPE_V8DI_V8DF_V8DF:
36842 case V8DF_FTYPE_V8SF_V8DF_QI:
36843 case V8DF_FTYPE_V8SI_V8DF_QI:
36844 case V8DI_FTYPE_DI_V8DI_QI:
36845 case V16SF_FTYPE_V8SF_V16SF_HI:
36846 case V16SI_FTYPE_V8SI_V16SI_HI:
36847 case V16HI_FTYPE_V16HI_V16HI_HI:
36848 case V8HI_FTYPE_V16QI_V8HI_QI:
36849 case V16HI_FTYPE_V16QI_V16HI_HI:
36850 case V32HI_FTYPE_V32HI_V32HI_SI:
36851 case V32HI_FTYPE_V32QI_V32HI_SI:
36852 case V8DI_FTYPE_V16QI_V8DI_QI:
36853 case V8DI_FTYPE_V2DI_V8DI_QI:
36854 case V8DI_FTYPE_V4DI_V8DI_QI:
36855 case V8DI_FTYPE_V8DI_V8DI_QI:
36856 case V8DI_FTYPE_V8DI_V8DI_V8DI:
36857 case V8DI_FTYPE_V8HI_V8DI_QI:
36858 case V8DI_FTYPE_V8SI_V8DI_QI:
36859 case V8HI_FTYPE_V8DI_V8HI_QI:
36860 case V8SF_FTYPE_V8DF_V8SF_QI:
36861 case V8SI_FTYPE_V8DF_V8SI_QI:
36862 case V8SI_FTYPE_V8DI_V8SI_QI:
36863 case V4SI_FTYPE_V4SI_V4SI_V4SI:
36864 nargs = 3;
36865 break;
36866 case V32QI_FTYPE_V32QI_V32QI_INT:
36867 case V16HI_FTYPE_V16HI_V16HI_INT:
36868 case V16QI_FTYPE_V16QI_V16QI_INT:
36869 case V4DI_FTYPE_V4DI_V4DI_INT:
36870 case V8HI_FTYPE_V8HI_V8HI_INT:
36871 case V8SI_FTYPE_V8SI_V8SI_INT:
36872 case V8SI_FTYPE_V8SI_V4SI_INT:
36873 case V8SF_FTYPE_V8SF_V8SF_INT:
36874 case V8SF_FTYPE_V8SF_V4SF_INT:
36875 case V4SI_FTYPE_V4SI_V4SI_INT:
36876 case V4DF_FTYPE_V4DF_V4DF_INT:
36877 case V16SF_FTYPE_V16SF_V16SF_INT:
36878 case V16SF_FTYPE_V16SF_V4SF_INT:
36879 case V16SI_FTYPE_V16SI_V4SI_INT:
36880 case V4DF_FTYPE_V4DF_V2DF_INT:
36881 case V4SF_FTYPE_V4SF_V4SF_INT:
36882 case V2DI_FTYPE_V2DI_V2DI_INT:
36883 case V4DI_FTYPE_V4DI_V2DI_INT:
36884 case V2DF_FTYPE_V2DF_V2DF_INT:
36885 case QI_FTYPE_V8DI_V8DI_INT:
36886 case QI_FTYPE_V8DF_V8DF_INT:
36887 case QI_FTYPE_V2DF_V2DF_INT:
36888 case QI_FTYPE_V4SF_V4SF_INT:
36889 case HI_FTYPE_V16SI_V16SI_INT:
36890 case HI_FTYPE_V16SF_V16SF_INT:
36891 nargs = 3;
36892 nargs_constant = 1;
36893 break;
36894 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
36895 nargs = 3;
36896 rmode = V4DImode;
36897 nargs_constant = 1;
36898 break;
36899 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
36900 nargs = 3;
36901 rmode = V2DImode;
36902 nargs_constant = 1;
36903 break;
36904 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
36905 nargs = 3;
36906 rmode = DImode;
36907 nargs_constant = 1;
36908 break;
36909 case V2DI_FTYPE_V2DI_UINT_UINT:
36910 nargs = 3;
36911 nargs_constant = 2;
36912 break;
36913 case V8DI_FTYPE_V8DI_V8DI_INT_CONVERT:
36914 nargs = 3;
36915 rmode = V8DImode;
36916 nargs_constant = 1;
36917 break;
36918 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT:
36919 nargs = 5;
36920 rmode = V8DImode;
36921 mask_pos = 2;
36922 nargs_constant = 1;
36923 break;
36924 case QI_FTYPE_V8DF_INT_QI:
36925 case QI_FTYPE_V4DF_INT_QI:
36926 case QI_FTYPE_V2DF_INT_QI:
36927 case HI_FTYPE_V16SF_INT_HI:
36928 case QI_FTYPE_V8SF_INT_QI:
36929 case QI_FTYPE_V4SF_INT_QI:
36930 nargs = 3;
36931 mask_pos = 1;
36932 nargs_constant = 1;
36933 break;
36934 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT:
36935 nargs = 5;
36936 rmode = V4DImode;
36937 mask_pos = 2;
36938 nargs_constant = 1;
36939 break;
36940 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT:
36941 nargs = 5;
36942 rmode = V2DImode;
36943 mask_pos = 2;
36944 nargs_constant = 1;
36945 break;
36946 case V32QI_FTYPE_V32QI_V32QI_V32QI_SI:
36947 case V32HI_FTYPE_V32HI_V32HI_V32HI_SI:
36948 case V32HI_FTYPE_V64QI_V64QI_V32HI_SI:
36949 case V16SI_FTYPE_V32HI_V32HI_V16SI_HI:
36950 case V64QI_FTYPE_V64QI_V64QI_V64QI_DI:
36951 case V32HI_FTYPE_V32HI_V8HI_V32HI_SI:
36952 case V16HI_FTYPE_V16HI_V8HI_V16HI_HI:
36953 case V8SI_FTYPE_V8SI_V4SI_V8SI_QI:
36954 case V4DI_FTYPE_V4DI_V2DI_V4DI_QI:
36955 case V64QI_FTYPE_V32HI_V32HI_V64QI_DI:
36956 case V32QI_FTYPE_V16HI_V16HI_V32QI_SI:
36957 case V16QI_FTYPE_V8HI_V8HI_V16QI_HI:
36958 case V32HI_FTYPE_V16SI_V16SI_V32HI_SI:
36959 case V16HI_FTYPE_V8SI_V8SI_V16HI_HI:
36960 case V8HI_FTYPE_V4SI_V4SI_V8HI_QI:
36961 case V4DF_FTYPE_V4DF_V4DI_V4DF_QI:
36962 case V8SF_FTYPE_V8SF_V8SI_V8SF_QI:
36963 case V4SF_FTYPE_V4SF_V4SI_V4SF_QI:
36964 case V2DF_FTYPE_V2DF_V2DI_V2DF_QI:
36965 case V2DI_FTYPE_V4SI_V4SI_V2DI_QI:
36966 case V4DI_FTYPE_V8SI_V8SI_V4DI_QI:
36967 case V4DF_FTYPE_V4DI_V4DF_V4DF_QI:
36968 case V8SF_FTYPE_V8SI_V8SF_V8SF_QI:
36969 case V2DF_FTYPE_V2DI_V2DF_V2DF_QI:
36970 case V4SF_FTYPE_V4SI_V4SF_V4SF_QI:
36971 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI:
36972 case V16SF_FTYPE_V16SF_V16SI_V16SF_HI:
36973 case V16SF_FTYPE_V16SI_V16SF_V16SF_HI:
36974 case V16SI_FTYPE_V16SI_V16SI_V16SI_HI:
36975 case V16SI_FTYPE_V16SI_V4SI_V16SI_HI:
36976 case V8HI_FTYPE_V8HI_V8HI_V8HI_QI:
36977 case V8SI_FTYPE_V8SI_V8SI_V8SI_QI:
36978 case V4SI_FTYPE_V4SI_V4SI_V4SI_QI:
36979 case V8SF_FTYPE_V8SF_V8SF_V8SF_QI:
36980 case V16QI_FTYPE_V16QI_V16QI_V16QI_HI:
36981 case V16HI_FTYPE_V16HI_V16HI_V16HI_HI:
36982 case V2DI_FTYPE_V2DI_V2DI_V2DI_QI:
36983 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI:
36984 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI:
36985 case V4DI_FTYPE_V4DI_V4DI_V4DI_QI:
36986 case V4DF_FTYPE_V4DF_V4DF_V4DF_QI:
36987 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI:
36988 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI:
36989 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI:
36990 case V8DF_FTYPE_V8DF_V8DI_V8DF_QI:
36991 case V8DF_FTYPE_V8DI_V8DF_V8DF_QI:
36992 case V8DI_FTYPE_V16SI_V16SI_V8DI_QI:
36993 case V8DI_FTYPE_V8DI_SI_V8DI_V8DI:
36994 case V8DI_FTYPE_V8DI_V2DI_V8DI_QI:
36995 case V8DI_FTYPE_V8DI_V8DI_V8DI_QI:
36996 case V8HI_FTYPE_V16QI_V16QI_V8HI_QI:
36997 case V16HI_FTYPE_V32QI_V32QI_V16HI_HI:
36998 case V8SI_FTYPE_V16HI_V16HI_V8SI_QI:
36999 case V4SI_FTYPE_V8HI_V8HI_V4SI_QI:
37000 nargs = 4;
37001 break;
37002 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
37003 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
37004 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
37005 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
37006 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
37007 nargs = 4;
37008 nargs_constant = 1;
37009 break;
37010 case QI_FTYPE_V4DI_V4DI_INT_QI:
37011 case QI_FTYPE_V8SI_V8SI_INT_QI:
37012 case QI_FTYPE_V4DF_V4DF_INT_QI:
37013 case QI_FTYPE_V8SF_V8SF_INT_QI:
37014 case QI_FTYPE_V2DI_V2DI_INT_QI:
37015 case QI_FTYPE_V4SI_V4SI_INT_QI:
37016 case QI_FTYPE_V2DF_V2DF_INT_QI:
37017 case QI_FTYPE_V4SF_V4SF_INT_QI:
37018 case DI_FTYPE_V64QI_V64QI_INT_DI:
37019 case SI_FTYPE_V32QI_V32QI_INT_SI:
37020 case HI_FTYPE_V16QI_V16QI_INT_HI:
37021 case SI_FTYPE_V32HI_V32HI_INT_SI:
37022 case HI_FTYPE_V16HI_V16HI_INT_HI:
37023 case QI_FTYPE_V8HI_V8HI_INT_QI:
37024 nargs = 4;
37025 mask_pos = 1;
37026 nargs_constant = 1;
37027 break;
37028 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
37029 nargs = 4;
37030 nargs_constant = 2;
37031 break;
37032 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
37033 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
37034 nargs = 4;
37035 break;
37036 case QI_FTYPE_V8DI_V8DI_INT_QI:
37037 case HI_FTYPE_V16SI_V16SI_INT_HI:
37038 case QI_FTYPE_V8DF_V8DF_INT_QI:
37039 case HI_FTYPE_V16SF_V16SF_INT_HI:
37040 mask_pos = 1;
37041 nargs = 4;
37042 nargs_constant = 1;
37043 break;
37044 case V8SF_FTYPE_V8SF_INT_V8SF_QI:
37045 case V4SF_FTYPE_V4SF_INT_V4SF_QI:
37046 case V2DF_FTYPE_V4DF_INT_V2DF_QI:
37047 case V2DI_FTYPE_V4DI_INT_V2DI_QI:
37048 case V8SF_FTYPE_V16SF_INT_V8SF_QI:
37049 case V8SI_FTYPE_V16SI_INT_V8SI_QI:
37050 case V2DF_FTYPE_V8DF_INT_V2DF_QI:
37051 case V2DI_FTYPE_V8DI_INT_V2DI_QI:
37052 case V4SF_FTYPE_V8SF_INT_V4SF_QI:
37053 case V4SI_FTYPE_V8SI_INT_V4SI_QI:
37054 case V8HI_FTYPE_V8SF_INT_V8HI_QI:
37055 case V8HI_FTYPE_V4SF_INT_V8HI_QI:
37056 case V32HI_FTYPE_V32HI_INT_V32HI_SI:
37057 case V16HI_FTYPE_V16HI_INT_V16HI_HI:
37058 case V8HI_FTYPE_V8HI_INT_V8HI_QI:
37059 case V4DI_FTYPE_V4DI_INT_V4DI_QI:
37060 case V2DI_FTYPE_V2DI_INT_V2DI_QI:
37061 case V8SI_FTYPE_V8SI_INT_V8SI_QI:
37062 case V4SI_FTYPE_V4SI_INT_V4SI_QI:
37063 case V4DF_FTYPE_V4DF_INT_V4DF_QI:
37064 case V2DF_FTYPE_V2DF_INT_V2DF_QI:
37065 case V8DF_FTYPE_V8DF_INT_V8DF_QI:
37066 case V16SF_FTYPE_V16SF_INT_V16SF_HI:
37067 case V16HI_FTYPE_V16SF_INT_V16HI_HI:
37068 case V16SI_FTYPE_V16SI_INT_V16SI_HI:
37069 case V4SI_FTYPE_V16SI_INT_V4SI_QI:
37070 case V4DI_FTYPE_V8DI_INT_V4DI_QI:
37071 case V4DF_FTYPE_V8DF_INT_V4DF_QI:
37072 case V4SF_FTYPE_V16SF_INT_V4SF_QI:
37073 case V8DI_FTYPE_V8DI_INT_V8DI_QI:
37074 nargs = 4;
37075 mask_pos = 2;
37076 nargs_constant = 1;
37077 break;
37078 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI:
37079 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI:
37080 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI:
37081 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI:
37082 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI:
37083 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI:
37084 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI:
37085 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI:
37086 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI:
37087 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI:
37088 case V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI:
37089 case V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI:
37090 case V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI:
37091 case V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI:
37092 case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI:
37093 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI:
37094 case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI:
37095 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI:
37096 case V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI:
37097 case V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI:
37098 case V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI:
37099 case V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI:
37100 case V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI:
37101 case V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI:
37102 case V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI:
37103 case V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI:
37104 case V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI:
37105 nargs = 5;
37106 mask_pos = 2;
37107 nargs_constant = 1;
37108 break;
37109 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI:
37110 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI:
37111 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI:
37112 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI:
37113 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI:
37114 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI:
37115 case V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI:
37116 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI:
37117 case V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI:
37118 case V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI:
37119 case V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI:
37120 nargs = 5;
37121 nargs = 5;
37122 mask_pos = 1;
37123 nargs_constant = 1;
37124 break;
37126 default:
37127 gcc_unreachable ();
37130 gcc_assert (nargs <= ARRAY_SIZE (args));
37132 if (comparison != UNKNOWN)
37134 gcc_assert (nargs == 2);
37135 return ix86_expand_sse_compare (d, exp, target, swap);
37138 if (rmode == VOIDmode || rmode == tmode)
37140 if (optimize
37141 || target == 0
37142 || GET_MODE (target) != tmode
37143 || !insn_p->operand[0].predicate (target, tmode))
37144 target = gen_reg_rtx (tmode);
37145 real_target = target;
37147 else
37149 real_target = gen_reg_rtx (tmode);
37150 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
37153 for (i = 0; i < nargs; i++)
37155 tree arg = CALL_EXPR_ARG (exp, i);
37156 rtx op = expand_normal (arg);
37157 machine_mode mode = insn_p->operand[i + 1].mode;
37158 bool match = insn_p->operand[i + 1].predicate (op, mode);
37160 if (last_arg_count && (i + 1) == nargs)
37162 /* SIMD shift insns take either an 8-bit immediate or
37163 register as count. But builtin functions take int as
37164 count. If count doesn't match, we put it in register. */
37165 if (!match)
37167 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
37168 if (!insn_p->operand[i + 1].predicate (op, mode))
37169 op = copy_to_reg (op);
37172 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
37173 (!mask_pos && (nargs - i) <= nargs_constant))
37175 if (!match)
37176 switch (icode)
37178 case CODE_FOR_avx_vinsertf128v4di:
37179 case CODE_FOR_avx_vextractf128v4di:
37180 error ("the last argument must be an 1-bit immediate");
37181 return const0_rtx;
37183 case CODE_FOR_avx512f_cmpv8di3_mask:
37184 case CODE_FOR_avx512f_cmpv16si3_mask:
37185 case CODE_FOR_avx512f_ucmpv8di3_mask:
37186 case CODE_FOR_avx512f_ucmpv16si3_mask:
37187 case CODE_FOR_avx512vl_cmpv4di3_mask:
37188 case CODE_FOR_avx512vl_cmpv8si3_mask:
37189 case CODE_FOR_avx512vl_ucmpv4di3_mask:
37190 case CODE_FOR_avx512vl_ucmpv8si3_mask:
37191 case CODE_FOR_avx512vl_cmpv2di3_mask:
37192 case CODE_FOR_avx512vl_cmpv4si3_mask:
37193 case CODE_FOR_avx512vl_ucmpv2di3_mask:
37194 case CODE_FOR_avx512vl_ucmpv4si3_mask:
37195 error ("the last argument must be a 3-bit immediate");
37196 return const0_rtx;
37198 case CODE_FOR_sse4_1_roundsd:
37199 case CODE_FOR_sse4_1_roundss:
37201 case CODE_FOR_sse4_1_roundpd:
37202 case CODE_FOR_sse4_1_roundps:
37203 case CODE_FOR_avx_roundpd256:
37204 case CODE_FOR_avx_roundps256:
37206 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
37207 case CODE_FOR_sse4_1_roundps_sfix:
37208 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
37209 case CODE_FOR_avx_roundps_sfix256:
37211 case CODE_FOR_sse4_1_blendps:
37212 case CODE_FOR_avx_blendpd256:
37213 case CODE_FOR_avx_vpermilv4df:
37214 case CODE_FOR_avx_vpermilv4df_mask:
37215 case CODE_FOR_avx512f_getmantv8df_mask:
37216 case CODE_FOR_avx512f_getmantv16sf_mask:
37217 case CODE_FOR_avx512vl_getmantv8sf_mask:
37218 case CODE_FOR_avx512vl_getmantv4df_mask:
37219 case CODE_FOR_avx512vl_getmantv4sf_mask:
37220 case CODE_FOR_avx512vl_getmantv2df_mask:
37221 case CODE_FOR_avx512dq_rangepv8df_mask_round:
37222 case CODE_FOR_avx512dq_rangepv16sf_mask_round:
37223 case CODE_FOR_avx512dq_rangepv4df_mask:
37224 case CODE_FOR_avx512dq_rangepv8sf_mask:
37225 case CODE_FOR_avx512dq_rangepv2df_mask:
37226 case CODE_FOR_avx512dq_rangepv4sf_mask:
37227 case CODE_FOR_avx_shufpd256_mask:
37228 error ("the last argument must be a 4-bit immediate");
37229 return const0_rtx;
37231 case CODE_FOR_sha1rnds4:
37232 case CODE_FOR_sse4_1_blendpd:
37233 case CODE_FOR_avx_vpermilv2df:
37234 case CODE_FOR_avx_vpermilv2df_mask:
37235 case CODE_FOR_xop_vpermil2v2df3:
37236 case CODE_FOR_xop_vpermil2v4sf3:
37237 case CODE_FOR_xop_vpermil2v4df3:
37238 case CODE_FOR_xop_vpermil2v8sf3:
37239 case CODE_FOR_avx512f_vinsertf32x4_mask:
37240 case CODE_FOR_avx512f_vinserti32x4_mask:
37241 case CODE_FOR_avx512f_vextractf32x4_mask:
37242 case CODE_FOR_avx512f_vextracti32x4_mask:
37243 case CODE_FOR_sse2_shufpd:
37244 case CODE_FOR_sse2_shufpd_mask:
37245 case CODE_FOR_avx512dq_shuf_f64x2_mask:
37246 case CODE_FOR_avx512dq_shuf_i64x2_mask:
37247 case CODE_FOR_avx512vl_shuf_i32x4_mask:
37248 case CODE_FOR_avx512vl_shuf_f32x4_mask:
37249 error ("the last argument must be a 2-bit immediate");
37250 return const0_rtx;
37252 case CODE_FOR_avx_vextractf128v4df:
37253 case CODE_FOR_avx_vextractf128v8sf:
37254 case CODE_FOR_avx_vextractf128v8si:
37255 case CODE_FOR_avx_vinsertf128v4df:
37256 case CODE_FOR_avx_vinsertf128v8sf:
37257 case CODE_FOR_avx_vinsertf128v8si:
37258 case CODE_FOR_avx512f_vinsertf64x4_mask:
37259 case CODE_FOR_avx512f_vinserti64x4_mask:
37260 case CODE_FOR_avx512f_vextractf64x4_mask:
37261 case CODE_FOR_avx512f_vextracti64x4_mask:
37262 case CODE_FOR_avx512dq_vinsertf32x8_mask:
37263 case CODE_FOR_avx512dq_vinserti32x8_mask:
37264 case CODE_FOR_avx512vl_vinsertv4df:
37265 case CODE_FOR_avx512vl_vinsertv4di:
37266 case CODE_FOR_avx512vl_vinsertv8sf:
37267 case CODE_FOR_avx512vl_vinsertv8si:
37268 error ("the last argument must be a 1-bit immediate");
37269 return const0_rtx;
37271 case CODE_FOR_avx_vmcmpv2df3:
37272 case CODE_FOR_avx_vmcmpv4sf3:
37273 case CODE_FOR_avx_cmpv2df3:
37274 case CODE_FOR_avx_cmpv4sf3:
37275 case CODE_FOR_avx_cmpv4df3:
37276 case CODE_FOR_avx_cmpv8sf3:
37277 case CODE_FOR_avx512f_cmpv8df3_mask:
37278 case CODE_FOR_avx512f_cmpv16sf3_mask:
37279 case CODE_FOR_avx512f_vmcmpv2df3_mask:
37280 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
37281 error ("the last argument must be a 5-bit immediate");
37282 return const0_rtx;
37284 default:
37285 switch (nargs_constant)
37287 case 2:
37288 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
37289 (!mask_pos && (nargs - i) == nargs_constant))
37291 error ("the next to last argument must be an 8-bit immediate");
37292 break;
37294 case 1:
37295 error ("the last argument must be an 8-bit immediate");
37296 break;
37297 default:
37298 gcc_unreachable ();
37300 return const0_rtx;
37303 else
37305 if (VECTOR_MODE_P (mode))
37306 op = safe_vector_operand (op, mode);
37308 /* If we aren't optimizing, only allow one memory operand to
37309 be generated. */
37310 if (memory_operand (op, mode))
37311 num_memory++;
37313 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
37315 if (optimize || !match || num_memory > 1)
37316 op = copy_to_mode_reg (mode, op);
37318 else
37320 op = copy_to_reg (op);
37321 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
37325 args[i].op = op;
37326 args[i].mode = mode;
37329 switch (nargs)
37331 case 1:
37332 pat = GEN_FCN (icode) (real_target, args[0].op);
37333 break;
37334 case 2:
37335 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
37336 break;
37337 case 3:
37338 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37339 args[2].op);
37340 break;
37341 case 4:
37342 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37343 args[2].op, args[3].op);
37344 break;
37345 case 5:
37346 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37347 args[2].op, args[3].op, args[4].op);
37348 case 6:
37349 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37350 args[2].op, args[3].op, args[4].op,
37351 args[5].op);
37352 break;
37353 default:
37354 gcc_unreachable ();
37357 if (! pat)
37358 return 0;
37360 emit_insn (pat);
37361 return target;
37364 /* Transform pattern of following layout:
37365 (parallel [
37366 set (A B)
37367 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
37369 into:
37370 (set (A B))
37373 (parallel [ A B
37375 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
37378 into:
37379 (parallel [ A B ... ]) */
37381 static rtx
37382 ix86_erase_embedded_rounding (rtx pat)
37384 if (GET_CODE (pat) == INSN)
37385 pat = PATTERN (pat);
37387 gcc_assert (GET_CODE (pat) == PARALLEL);
37389 if (XVECLEN (pat, 0) == 2)
37391 rtx p0 = XVECEXP (pat, 0, 0);
37392 rtx p1 = XVECEXP (pat, 0, 1);
37394 gcc_assert (GET_CODE (p0) == SET
37395 && GET_CODE (p1) == UNSPEC
37396 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
37398 return p0;
37400 else
37402 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
37403 int i = 0;
37404 int j = 0;
37406 for (; i < XVECLEN (pat, 0); ++i)
37408 rtx elem = XVECEXP (pat, 0, i);
37409 if (GET_CODE (elem) != UNSPEC
37410 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
37411 res [j++] = elem;
37414 /* No more than 1 occurence was removed. */
37415 gcc_assert (j >= XVECLEN (pat, 0) - 1);
37417 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
37421 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
37422 with rounding. */
37423 static rtx
37424 ix86_expand_sse_comi_round (const struct builtin_description *d,
37425 tree exp, rtx target)
37427 rtx pat, set_dst;
37428 tree arg0 = CALL_EXPR_ARG (exp, 0);
37429 tree arg1 = CALL_EXPR_ARG (exp, 1);
37430 tree arg2 = CALL_EXPR_ARG (exp, 2);
37431 tree arg3 = CALL_EXPR_ARG (exp, 3);
37432 rtx op0 = expand_normal (arg0);
37433 rtx op1 = expand_normal (arg1);
37434 rtx op2 = expand_normal (arg2);
37435 rtx op3 = expand_normal (arg3);
37436 enum insn_code icode = d->icode;
37437 const struct insn_data_d *insn_p = &insn_data[icode];
37438 machine_mode mode0 = insn_p->operand[0].mode;
37439 machine_mode mode1 = insn_p->operand[1].mode;
37440 enum rtx_code comparison = UNEQ;
37441 bool need_ucomi = false;
37443 /* See avxintrin.h for values. */
37444 enum rtx_code comi_comparisons[32] =
37446 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
37447 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
37448 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
37450 bool need_ucomi_values[32] =
37452 true, false, false, true, true, false, false, true,
37453 true, false, false, true, true, false, false, true,
37454 false, true, true, false, false, true, true, false,
37455 false, true, true, false, false, true, true, false
37458 if (!CONST_INT_P (op2))
37460 error ("the third argument must be comparison constant");
37461 return const0_rtx;
37463 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
37465 error ("incorect comparison mode");
37466 return const0_rtx;
37469 if (!insn_p->operand[2].predicate (op3, SImode))
37471 error ("incorrect rounding operand");
37472 return const0_rtx;
37475 comparison = comi_comparisons[INTVAL (op2)];
37476 need_ucomi = need_ucomi_values[INTVAL (op2)];
37478 if (VECTOR_MODE_P (mode0))
37479 op0 = safe_vector_operand (op0, mode0);
37480 if (VECTOR_MODE_P (mode1))
37481 op1 = safe_vector_operand (op1, mode1);
37483 target = gen_reg_rtx (SImode);
37484 emit_move_insn (target, const0_rtx);
37485 target = gen_rtx_SUBREG (QImode, target, 0);
37487 if ((optimize && !register_operand (op0, mode0))
37488 || !insn_p->operand[0].predicate (op0, mode0))
37489 op0 = copy_to_mode_reg (mode0, op0);
37490 if ((optimize && !register_operand (op1, mode1))
37491 || !insn_p->operand[1].predicate (op1, mode1))
37492 op1 = copy_to_mode_reg (mode1, op1);
37494 if (need_ucomi)
37495 icode = icode == CODE_FOR_sse_comi_round
37496 ? CODE_FOR_sse_ucomi_round
37497 : CODE_FOR_sse2_ucomi_round;
37499 pat = GEN_FCN (icode) (op0, op1, op3);
37500 if (! pat)
37501 return 0;
37503 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
37504 if (INTVAL (op3) == NO_ROUND)
37506 pat = ix86_erase_embedded_rounding (pat);
37507 if (! pat)
37508 return 0;
37510 set_dst = SET_DEST (pat);
37512 else
37514 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
37515 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
37518 emit_insn (pat);
37519 emit_insn (gen_rtx_SET (VOIDmode,
37520 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
37521 gen_rtx_fmt_ee (comparison, QImode,
37522 set_dst,
37523 const0_rtx)));
37525 return SUBREG_REG (target);
37528 static rtx
37529 ix86_expand_round_builtin (const struct builtin_description *d,
37530 tree exp, rtx target)
37532 rtx pat;
37533 unsigned int i, nargs;
37534 struct
37536 rtx op;
37537 machine_mode mode;
37538 } args[6];
37539 enum insn_code icode = d->icode;
37540 const struct insn_data_d *insn_p = &insn_data[icode];
37541 machine_mode tmode = insn_p->operand[0].mode;
37542 unsigned int nargs_constant = 0;
37543 unsigned int redundant_embed_rnd = 0;
37545 switch ((enum ix86_builtin_func_type) d->flag)
37547 case UINT64_FTYPE_V2DF_INT:
37548 case UINT64_FTYPE_V4SF_INT:
37549 case UINT_FTYPE_V2DF_INT:
37550 case UINT_FTYPE_V4SF_INT:
37551 case INT64_FTYPE_V2DF_INT:
37552 case INT64_FTYPE_V4SF_INT:
37553 case INT_FTYPE_V2DF_INT:
37554 case INT_FTYPE_V4SF_INT:
37555 nargs = 2;
37556 break;
37557 case V4SF_FTYPE_V4SF_UINT_INT:
37558 case V4SF_FTYPE_V4SF_UINT64_INT:
37559 case V2DF_FTYPE_V2DF_UINT64_INT:
37560 case V4SF_FTYPE_V4SF_INT_INT:
37561 case V4SF_FTYPE_V4SF_INT64_INT:
37562 case V2DF_FTYPE_V2DF_INT64_INT:
37563 case V4SF_FTYPE_V4SF_V4SF_INT:
37564 case V2DF_FTYPE_V2DF_V2DF_INT:
37565 case V4SF_FTYPE_V4SF_V2DF_INT:
37566 case V2DF_FTYPE_V2DF_V4SF_INT:
37567 nargs = 3;
37568 break;
37569 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
37570 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
37571 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
37572 case V8DI_FTYPE_V8DF_V8DI_QI_INT:
37573 case V8SF_FTYPE_V8DI_V8SF_QI_INT:
37574 case V8DF_FTYPE_V8DI_V8DF_QI_INT:
37575 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
37576 case V8DI_FTYPE_V8SF_V8DI_QI_INT:
37577 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
37578 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
37579 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
37580 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
37581 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
37582 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
37583 nargs = 4;
37584 break;
37585 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
37586 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
37587 nargs_constant = 2;
37588 nargs = 4;
37589 break;
37590 case INT_FTYPE_V4SF_V4SF_INT_INT:
37591 case INT_FTYPE_V2DF_V2DF_INT_INT:
37592 return ix86_expand_sse_comi_round (d, exp, target);
37593 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT:
37594 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
37595 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
37596 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
37597 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
37598 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
37599 nargs = 5;
37600 break;
37601 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
37602 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
37603 nargs_constant = 4;
37604 nargs = 5;
37605 break;
37606 case QI_FTYPE_V8DF_V8DF_INT_QI_INT:
37607 case QI_FTYPE_V2DF_V2DF_INT_QI_INT:
37608 case HI_FTYPE_V16SF_V16SF_INT_HI_INT:
37609 case QI_FTYPE_V4SF_V4SF_INT_QI_INT:
37610 nargs_constant = 3;
37611 nargs = 5;
37612 break;
37613 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT:
37614 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT:
37615 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
37616 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
37617 nargs = 6;
37618 nargs_constant = 4;
37619 break;
37620 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
37621 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
37622 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
37623 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
37624 nargs = 6;
37625 nargs_constant = 3;
37626 break;
37627 default:
37628 gcc_unreachable ();
37630 gcc_assert (nargs <= ARRAY_SIZE (args));
37632 if (optimize
37633 || target == 0
37634 || GET_MODE (target) != tmode
37635 || !insn_p->operand[0].predicate (target, tmode))
37636 target = gen_reg_rtx (tmode);
37638 for (i = 0; i < nargs; i++)
37640 tree arg = CALL_EXPR_ARG (exp, i);
37641 rtx op = expand_normal (arg);
37642 machine_mode mode = insn_p->operand[i + 1].mode;
37643 bool match = insn_p->operand[i + 1].predicate (op, mode);
37645 if (i == nargs - nargs_constant)
37647 if (!match)
37649 switch (icode)
37651 case CODE_FOR_avx512f_getmantv8df_mask_round:
37652 case CODE_FOR_avx512f_getmantv16sf_mask_round:
37653 case CODE_FOR_avx512f_vgetmantv2df_round:
37654 case CODE_FOR_avx512f_vgetmantv4sf_round:
37655 error ("the immediate argument must be a 4-bit immediate");
37656 return const0_rtx;
37657 case CODE_FOR_avx512f_cmpv8df3_mask_round:
37658 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
37659 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
37660 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
37661 error ("the immediate argument must be a 5-bit immediate");
37662 return const0_rtx;
37663 default:
37664 error ("the immediate argument must be an 8-bit immediate");
37665 return const0_rtx;
37669 else if (i == nargs-1)
37671 if (!insn_p->operand[nargs].predicate (op, SImode))
37673 error ("incorrect rounding operand");
37674 return const0_rtx;
37677 /* If there is no rounding use normal version of the pattern. */
37678 if (INTVAL (op) == NO_ROUND)
37679 redundant_embed_rnd = 1;
37681 else
37683 if (VECTOR_MODE_P (mode))
37684 op = safe_vector_operand (op, mode);
37686 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
37688 if (optimize || !match)
37689 op = copy_to_mode_reg (mode, op);
37691 else
37693 op = copy_to_reg (op);
37694 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
37698 args[i].op = op;
37699 args[i].mode = mode;
37702 switch (nargs)
37704 case 1:
37705 pat = GEN_FCN (icode) (target, args[0].op);
37706 break;
37707 case 2:
37708 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
37709 break;
37710 case 3:
37711 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37712 args[2].op);
37713 break;
37714 case 4:
37715 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37716 args[2].op, args[3].op);
37717 break;
37718 case 5:
37719 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37720 args[2].op, args[3].op, args[4].op);
37721 case 6:
37722 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37723 args[2].op, args[3].op, args[4].op,
37724 args[5].op);
37725 break;
37726 default:
37727 gcc_unreachable ();
37730 if (!pat)
37731 return 0;
37733 if (redundant_embed_rnd)
37734 pat = ix86_erase_embedded_rounding (pat);
37736 emit_insn (pat);
37737 return target;
37740 /* Subroutine of ix86_expand_builtin to take care of special insns
37741 with variable number of operands. */
37743 static rtx
37744 ix86_expand_special_args_builtin (const struct builtin_description *d,
37745 tree exp, rtx target)
37747 tree arg;
37748 rtx pat, op;
37749 unsigned int i, nargs, arg_adjust, memory;
37750 bool aligned_mem = false;
37751 struct
37753 rtx op;
37754 machine_mode mode;
37755 } args[3];
37756 enum insn_code icode = d->icode;
37757 bool last_arg_constant = false;
37758 const struct insn_data_d *insn_p = &insn_data[icode];
37759 machine_mode tmode = insn_p->operand[0].mode;
37760 enum { load, store } klass;
37762 switch ((enum ix86_builtin_func_type) d->flag)
37764 case VOID_FTYPE_VOID:
37765 emit_insn (GEN_FCN (icode) (target));
37766 return 0;
37767 case VOID_FTYPE_UINT64:
37768 case VOID_FTYPE_UNSIGNED:
37769 nargs = 0;
37770 klass = store;
37771 memory = 0;
37772 break;
37774 case INT_FTYPE_VOID:
37775 case USHORT_FTYPE_VOID:
37776 case UINT64_FTYPE_VOID:
37777 case UNSIGNED_FTYPE_VOID:
37778 nargs = 0;
37779 klass = load;
37780 memory = 0;
37781 break;
37782 case UINT64_FTYPE_PUNSIGNED:
37783 case V2DI_FTYPE_PV2DI:
37784 case V4DI_FTYPE_PV4DI:
37785 case V32QI_FTYPE_PCCHAR:
37786 case V16QI_FTYPE_PCCHAR:
37787 case V8SF_FTYPE_PCV4SF:
37788 case V8SF_FTYPE_PCFLOAT:
37789 case V4SF_FTYPE_PCFLOAT:
37790 case V4DF_FTYPE_PCV2DF:
37791 case V4DF_FTYPE_PCDOUBLE:
37792 case V2DF_FTYPE_PCDOUBLE:
37793 case VOID_FTYPE_PVOID:
37794 case V16SI_FTYPE_PV4SI:
37795 case V16SF_FTYPE_PV4SF:
37796 case V8DI_FTYPE_PV4DI:
37797 case V8DI_FTYPE_PV8DI:
37798 case V8DF_FTYPE_PV4DF:
37799 nargs = 1;
37800 klass = load;
37801 memory = 0;
37802 switch (icode)
37804 case CODE_FOR_sse4_1_movntdqa:
37805 case CODE_FOR_avx2_movntdqa:
37806 case CODE_FOR_avx512f_movntdqa:
37807 aligned_mem = true;
37808 break;
37809 default:
37810 break;
37812 break;
37813 case VOID_FTYPE_PV2SF_V4SF:
37814 case VOID_FTYPE_PV8DI_V8DI:
37815 case VOID_FTYPE_PV4DI_V4DI:
37816 case VOID_FTYPE_PV2DI_V2DI:
37817 case VOID_FTYPE_PCHAR_V32QI:
37818 case VOID_FTYPE_PCHAR_V16QI:
37819 case VOID_FTYPE_PFLOAT_V16SF:
37820 case VOID_FTYPE_PFLOAT_V8SF:
37821 case VOID_FTYPE_PFLOAT_V4SF:
37822 case VOID_FTYPE_PDOUBLE_V8DF:
37823 case VOID_FTYPE_PDOUBLE_V4DF:
37824 case VOID_FTYPE_PDOUBLE_V2DF:
37825 case VOID_FTYPE_PLONGLONG_LONGLONG:
37826 case VOID_FTYPE_PULONGLONG_ULONGLONG:
37827 case VOID_FTYPE_PINT_INT:
37828 nargs = 1;
37829 klass = store;
37830 /* Reserve memory operand for target. */
37831 memory = ARRAY_SIZE (args);
37832 switch (icode)
37834 /* These builtins and instructions require the memory
37835 to be properly aligned. */
37836 case CODE_FOR_avx_movntv4di:
37837 case CODE_FOR_sse2_movntv2di:
37838 case CODE_FOR_avx_movntv8sf:
37839 case CODE_FOR_sse_movntv4sf:
37840 case CODE_FOR_sse4a_vmmovntv4sf:
37841 case CODE_FOR_avx_movntv4df:
37842 case CODE_FOR_sse2_movntv2df:
37843 case CODE_FOR_sse4a_vmmovntv2df:
37844 case CODE_FOR_sse2_movntidi:
37845 case CODE_FOR_sse_movntq:
37846 case CODE_FOR_sse2_movntisi:
37847 case CODE_FOR_avx512f_movntv16sf:
37848 case CODE_FOR_avx512f_movntv8df:
37849 case CODE_FOR_avx512f_movntv8di:
37850 aligned_mem = true;
37851 break;
37852 default:
37853 break;
37855 break;
37856 case V4SF_FTYPE_V4SF_PCV2SF:
37857 case V2DF_FTYPE_V2DF_PCDOUBLE:
37858 nargs = 2;
37859 klass = load;
37860 memory = 1;
37861 break;
37862 case V8SF_FTYPE_PCV8SF_V8SI:
37863 case V4DF_FTYPE_PCV4DF_V4DI:
37864 case V4SF_FTYPE_PCV4SF_V4SI:
37865 case V2DF_FTYPE_PCV2DF_V2DI:
37866 case V8SI_FTYPE_PCV8SI_V8SI:
37867 case V4DI_FTYPE_PCV4DI_V4DI:
37868 case V4SI_FTYPE_PCV4SI_V4SI:
37869 case V2DI_FTYPE_PCV2DI_V2DI:
37870 nargs = 2;
37871 klass = load;
37872 memory = 0;
37873 break;
37874 case VOID_FTYPE_PV8DF_V8DF_QI:
37875 case VOID_FTYPE_PV16SF_V16SF_HI:
37876 case VOID_FTYPE_PV8DI_V8DI_QI:
37877 case VOID_FTYPE_PV4DI_V4DI_QI:
37878 case VOID_FTYPE_PV2DI_V2DI_QI:
37879 case VOID_FTYPE_PV16SI_V16SI_HI:
37880 case VOID_FTYPE_PV8SI_V8SI_QI:
37881 case VOID_FTYPE_PV4SI_V4SI_QI:
37882 switch (icode)
37884 /* These builtins and instructions require the memory
37885 to be properly aligned. */
37886 case CODE_FOR_avx512f_storev16sf_mask:
37887 case CODE_FOR_avx512f_storev16si_mask:
37888 case CODE_FOR_avx512f_storev8df_mask:
37889 case CODE_FOR_avx512f_storev8di_mask:
37890 case CODE_FOR_avx512vl_storev8sf_mask:
37891 case CODE_FOR_avx512vl_storev8si_mask:
37892 case CODE_FOR_avx512vl_storev4df_mask:
37893 case CODE_FOR_avx512vl_storev4di_mask:
37894 case CODE_FOR_avx512vl_storev4sf_mask:
37895 case CODE_FOR_avx512vl_storev4si_mask:
37896 case CODE_FOR_avx512vl_storev2df_mask:
37897 case CODE_FOR_avx512vl_storev2di_mask:
37898 aligned_mem = true;
37899 break;
37900 default:
37901 break;
37903 /* FALLTHRU */
37904 case VOID_FTYPE_PV8SF_V8SI_V8SF:
37905 case VOID_FTYPE_PV4DF_V4DI_V4DF:
37906 case VOID_FTYPE_PV4SF_V4SI_V4SF:
37907 case VOID_FTYPE_PV2DF_V2DI_V2DF:
37908 case VOID_FTYPE_PV8SI_V8SI_V8SI:
37909 case VOID_FTYPE_PV4DI_V4DI_V4DI:
37910 case VOID_FTYPE_PV4SI_V4SI_V4SI:
37911 case VOID_FTYPE_PV2DI_V2DI_V2DI:
37912 case VOID_FTYPE_PDOUBLE_V2DF_QI:
37913 case VOID_FTYPE_PFLOAT_V4SF_QI:
37914 case VOID_FTYPE_PV8SI_V8DI_QI:
37915 case VOID_FTYPE_PV8HI_V8DI_QI:
37916 case VOID_FTYPE_PV16HI_V16SI_HI:
37917 case VOID_FTYPE_PV16QI_V8DI_QI:
37918 case VOID_FTYPE_PV16QI_V16SI_HI:
37919 case VOID_FTYPE_PV4SI_V4DI_QI:
37920 case VOID_FTYPE_PV4SI_V2DI_QI:
37921 case VOID_FTYPE_PV8HI_V4DI_QI:
37922 case VOID_FTYPE_PV8HI_V2DI_QI:
37923 case VOID_FTYPE_PV8HI_V8SI_QI:
37924 case VOID_FTYPE_PV8HI_V4SI_QI:
37925 case VOID_FTYPE_PV16QI_V4DI_QI:
37926 case VOID_FTYPE_PV16QI_V2DI_QI:
37927 case VOID_FTYPE_PV16QI_V8SI_QI:
37928 case VOID_FTYPE_PV16QI_V4SI_QI:
37929 case VOID_FTYPE_PV8HI_V8HI_QI:
37930 case VOID_FTYPE_PV16HI_V16HI_HI:
37931 case VOID_FTYPE_PV32HI_V32HI_SI:
37932 case VOID_FTYPE_PV16QI_V16QI_HI:
37933 case VOID_FTYPE_PV32QI_V32QI_SI:
37934 case VOID_FTYPE_PV64QI_V64QI_DI:
37935 case VOID_FTYPE_PV4DF_V4DF_QI:
37936 case VOID_FTYPE_PV2DF_V2DF_QI:
37937 case VOID_FTYPE_PV8SF_V8SF_QI:
37938 case VOID_FTYPE_PV4SF_V4SF_QI:
37939 nargs = 2;
37940 klass = store;
37941 /* Reserve memory operand for target. */
37942 memory = ARRAY_SIZE (args);
37943 break;
37944 case V4SF_FTYPE_PCV4SF_V4SF_QI:
37945 case V8SF_FTYPE_PCV8SF_V8SF_QI:
37946 case V16SF_FTYPE_PCV16SF_V16SF_HI:
37947 case V4SI_FTYPE_PCV4SI_V4SI_QI:
37948 case V8SI_FTYPE_PCV8SI_V8SI_QI:
37949 case V16SI_FTYPE_PCV16SI_V16SI_HI:
37950 case V2DF_FTYPE_PCV2DF_V2DF_QI:
37951 case V4DF_FTYPE_PCV4DF_V4DF_QI:
37952 case V8DF_FTYPE_PCV8DF_V8DF_QI:
37953 case V2DI_FTYPE_PCV2DI_V2DI_QI:
37954 case V4DI_FTYPE_PCV4DI_V4DI_QI:
37955 case V8DI_FTYPE_PCV8DI_V8DI_QI:
37956 case V2DF_FTYPE_PCDOUBLE_V2DF_QI:
37957 case V4SF_FTYPE_PCFLOAT_V4SF_QI:
37958 case V8HI_FTYPE_PCV8HI_V8HI_QI:
37959 case V16HI_FTYPE_PCV16HI_V16HI_HI:
37960 case V32HI_FTYPE_PCV32HI_V32HI_SI:
37961 case V16QI_FTYPE_PCV16QI_V16QI_HI:
37962 case V32QI_FTYPE_PCV32QI_V32QI_SI:
37963 case V64QI_FTYPE_PCV64QI_V64QI_DI:
37964 nargs = 3;
37965 klass = load;
37966 memory = 0;
37967 switch (icode)
37969 /* These builtins and instructions require the memory
37970 to be properly aligned. */
37971 case CODE_FOR_avx512f_loadv16sf_mask:
37972 case CODE_FOR_avx512f_loadv16si_mask:
37973 case CODE_FOR_avx512f_loadv8df_mask:
37974 case CODE_FOR_avx512f_loadv8di_mask:
37975 case CODE_FOR_avx512vl_loadv8sf_mask:
37976 case CODE_FOR_avx512vl_loadv8si_mask:
37977 case CODE_FOR_avx512vl_loadv4df_mask:
37978 case CODE_FOR_avx512vl_loadv4di_mask:
37979 case CODE_FOR_avx512vl_loadv4sf_mask:
37980 case CODE_FOR_avx512vl_loadv4si_mask:
37981 case CODE_FOR_avx512vl_loadv2df_mask:
37982 case CODE_FOR_avx512vl_loadv2di_mask:
37983 case CODE_FOR_avx512bw_loadv64qi_mask:
37984 case CODE_FOR_avx512vl_loadv32qi_mask:
37985 case CODE_FOR_avx512vl_loadv16qi_mask:
37986 case CODE_FOR_avx512bw_loadv32hi_mask:
37987 case CODE_FOR_avx512vl_loadv16hi_mask:
37988 case CODE_FOR_avx512vl_loadv8hi_mask:
37989 aligned_mem = true;
37990 break;
37991 default:
37992 break;
37994 break;
37995 case VOID_FTYPE_UINT_UINT_UINT:
37996 case VOID_FTYPE_UINT64_UINT_UINT:
37997 case UCHAR_FTYPE_UINT_UINT_UINT:
37998 case UCHAR_FTYPE_UINT64_UINT_UINT:
37999 nargs = 3;
38000 klass = load;
38001 memory = ARRAY_SIZE (args);
38002 last_arg_constant = true;
38003 break;
38004 default:
38005 gcc_unreachable ();
38008 gcc_assert (nargs <= ARRAY_SIZE (args));
38010 if (klass == store)
38012 arg = CALL_EXPR_ARG (exp, 0);
38013 op = expand_normal (arg);
38014 gcc_assert (target == 0);
38015 if (memory)
38017 op = ix86_zero_extend_to_Pmode (op);
38018 target = gen_rtx_MEM (tmode, op);
38019 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
38020 on it. Try to improve it using get_pointer_alignment,
38021 and if the special builtin is one that requires strict
38022 mode alignment, also from it's GET_MODE_ALIGNMENT.
38023 Failure to do so could lead to ix86_legitimate_combined_insn
38024 rejecting all changes to such insns. */
38025 unsigned int align = get_pointer_alignment (arg);
38026 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
38027 align = GET_MODE_ALIGNMENT (tmode);
38028 if (MEM_ALIGN (target) < align)
38029 set_mem_align (target, align);
38031 else
38032 target = force_reg (tmode, op);
38033 arg_adjust = 1;
38035 else
38037 arg_adjust = 0;
38038 if (optimize
38039 || target == 0
38040 || !register_operand (target, tmode)
38041 || GET_MODE (target) != tmode)
38042 target = gen_reg_rtx (tmode);
38045 for (i = 0; i < nargs; i++)
38047 machine_mode mode = insn_p->operand[i + 1].mode;
38048 bool match;
38050 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
38051 op = expand_normal (arg);
38052 match = insn_p->operand[i + 1].predicate (op, mode);
38054 if (last_arg_constant && (i + 1) == nargs)
38056 if (!match)
38058 if (icode == CODE_FOR_lwp_lwpvalsi3
38059 || icode == CODE_FOR_lwp_lwpinssi3
38060 || icode == CODE_FOR_lwp_lwpvaldi3
38061 || icode == CODE_FOR_lwp_lwpinsdi3)
38062 error ("the last argument must be a 32-bit immediate");
38063 else
38064 error ("the last argument must be an 8-bit immediate");
38065 return const0_rtx;
38068 else
38070 if (i == memory)
38072 /* This must be the memory operand. */
38073 op = ix86_zero_extend_to_Pmode (op);
38074 op = gen_rtx_MEM (mode, op);
38075 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
38076 on it. Try to improve it using get_pointer_alignment,
38077 and if the special builtin is one that requires strict
38078 mode alignment, also from it's GET_MODE_ALIGNMENT.
38079 Failure to do so could lead to ix86_legitimate_combined_insn
38080 rejecting all changes to such insns. */
38081 unsigned int align = get_pointer_alignment (arg);
38082 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
38083 align = GET_MODE_ALIGNMENT (mode);
38084 if (MEM_ALIGN (op) < align)
38085 set_mem_align (op, align);
38087 else
38089 /* This must be register. */
38090 if (VECTOR_MODE_P (mode))
38091 op = safe_vector_operand (op, mode);
38093 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
38094 op = copy_to_mode_reg (mode, op);
38095 else
38097 op = copy_to_reg (op);
38098 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
38103 args[i].op = op;
38104 args[i].mode = mode;
38107 switch (nargs)
38109 case 0:
38110 pat = GEN_FCN (icode) (target);
38111 break;
38112 case 1:
38113 pat = GEN_FCN (icode) (target, args[0].op);
38114 break;
38115 case 2:
38116 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
38117 break;
38118 case 3:
38119 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
38120 break;
38121 default:
38122 gcc_unreachable ();
38125 if (! pat)
38126 return 0;
38127 emit_insn (pat);
38128 return klass == store ? 0 : target;
38131 /* Return the integer constant in ARG. Constrain it to be in the range
38132 of the subparts of VEC_TYPE; issue an error if not. */
38134 static int
38135 get_element_number (tree vec_type, tree arg)
38137 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
38139 if (!tree_fits_uhwi_p (arg)
38140 || (elt = tree_to_uhwi (arg), elt > max))
38142 error ("selector must be an integer constant in the range 0..%wi", max);
38143 return 0;
38146 return elt;
38149 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38150 ix86_expand_vector_init. We DO have language-level syntax for this, in
38151 the form of (type){ init-list }. Except that since we can't place emms
38152 instructions from inside the compiler, we can't allow the use of MMX
38153 registers unless the user explicitly asks for it. So we do *not* define
38154 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
38155 we have builtins invoked by mmintrin.h that gives us license to emit
38156 these sorts of instructions. */
38158 static rtx
38159 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
38161 machine_mode tmode = TYPE_MODE (type);
38162 machine_mode inner_mode = GET_MODE_INNER (tmode);
38163 int i, n_elt = GET_MODE_NUNITS (tmode);
38164 rtvec v = rtvec_alloc (n_elt);
38166 gcc_assert (VECTOR_MODE_P (tmode));
38167 gcc_assert (call_expr_nargs (exp) == n_elt);
38169 for (i = 0; i < n_elt; ++i)
38171 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
38172 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
38175 if (!target || !register_operand (target, tmode))
38176 target = gen_reg_rtx (tmode);
38178 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
38179 return target;
38182 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38183 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
38184 had a language-level syntax for referencing vector elements. */
38186 static rtx
38187 ix86_expand_vec_ext_builtin (tree exp, rtx target)
38189 machine_mode tmode, mode0;
38190 tree arg0, arg1;
38191 int elt;
38192 rtx op0;
38194 arg0 = CALL_EXPR_ARG (exp, 0);
38195 arg1 = CALL_EXPR_ARG (exp, 1);
38197 op0 = expand_normal (arg0);
38198 elt = get_element_number (TREE_TYPE (arg0), arg1);
38200 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
38201 mode0 = TYPE_MODE (TREE_TYPE (arg0));
38202 gcc_assert (VECTOR_MODE_P (mode0));
38204 op0 = force_reg (mode0, op0);
38206 if (optimize || !target || !register_operand (target, tmode))
38207 target = gen_reg_rtx (tmode);
38209 ix86_expand_vector_extract (true, target, op0, elt);
38211 return target;
38214 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38215 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
38216 a language-level syntax for referencing vector elements. */
38218 static rtx
38219 ix86_expand_vec_set_builtin (tree exp)
38221 machine_mode tmode, mode1;
38222 tree arg0, arg1, arg2;
38223 int elt;
38224 rtx op0, op1, target;
38226 arg0 = CALL_EXPR_ARG (exp, 0);
38227 arg1 = CALL_EXPR_ARG (exp, 1);
38228 arg2 = CALL_EXPR_ARG (exp, 2);
38230 tmode = TYPE_MODE (TREE_TYPE (arg0));
38231 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
38232 gcc_assert (VECTOR_MODE_P (tmode));
38234 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
38235 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
38236 elt = get_element_number (TREE_TYPE (arg0), arg2);
38238 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
38239 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
38241 op0 = force_reg (tmode, op0);
38242 op1 = force_reg (mode1, op1);
38244 /* OP0 is the source of these builtin functions and shouldn't be
38245 modified. Create a copy, use it and return it as target. */
38246 target = gen_reg_rtx (tmode);
38247 emit_move_insn (target, op0);
38248 ix86_expand_vector_set (true, target, op1, elt);
38250 return target;
38253 /* Emit conditional move of SRC to DST with condition
38254 OP1 CODE OP2. */
38255 static void
38256 ix86_emit_cmove (rtx dst, rtx src, enum rtx_code code, rtx op1, rtx op2)
38258 rtx t;
38260 if (TARGET_CMOVE)
38262 t = ix86_expand_compare (code, op1, op2);
38263 emit_insn (gen_rtx_SET (VOIDmode, dst,
38264 gen_rtx_IF_THEN_ELSE (GET_MODE (dst), t,
38265 src, dst)));
38267 else
38269 rtx nomove = gen_label_rtx ();
38270 emit_cmp_and_jump_insns (op1, op2, reverse_condition (code),
38271 const0_rtx, GET_MODE (op1), 1, nomove);
38272 emit_move_insn (dst, src);
38273 emit_label (nomove);
38277 /* Choose max of DST and SRC and put it to DST. */
38278 static void
38279 ix86_emit_move_max (rtx dst, rtx src)
38281 ix86_emit_cmove (dst, src, LTU, dst, src);
38284 /* Expand an expression EXP that calls a built-in function,
38285 with result going to TARGET if that's convenient
38286 (and in mode MODE if that's convenient).
38287 SUBTARGET may be used as the target for computing one of EXP's operands.
38288 IGNORE is nonzero if the value is to be ignored. */
38290 static rtx
38291 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
38292 machine_mode mode, int ignore)
38294 const struct builtin_description *d;
38295 size_t i;
38296 enum insn_code icode;
38297 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
38298 tree arg0, arg1, arg2, arg3, arg4;
38299 rtx op0, op1, op2, op3, op4, pat, insn;
38300 machine_mode mode0, mode1, mode2, mode3, mode4;
38301 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
38303 /* For CPU builtins that can be folded, fold first and expand the fold. */
38304 switch (fcode)
38306 case IX86_BUILTIN_CPU_INIT:
38308 /* Make it call __cpu_indicator_init in libgcc. */
38309 tree call_expr, fndecl, type;
38310 type = build_function_type_list (integer_type_node, NULL_TREE);
38311 fndecl = build_fn_decl ("__cpu_indicator_init", type);
38312 call_expr = build_call_expr (fndecl, 0);
38313 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
38315 case IX86_BUILTIN_CPU_IS:
38316 case IX86_BUILTIN_CPU_SUPPORTS:
38318 tree arg0 = CALL_EXPR_ARG (exp, 0);
38319 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
38320 gcc_assert (fold_expr != NULL_TREE);
38321 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
38325 /* Determine whether the builtin function is available under the current ISA.
38326 Originally the builtin was not created if it wasn't applicable to the
38327 current ISA based on the command line switches. With function specific
38328 options, we need to check in the context of the function making the call
38329 whether it is supported. */
38330 if (ix86_builtins_isa[fcode].isa
38331 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
38333 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
38334 NULL, (enum fpmath_unit) 0, false);
38336 if (!opts)
38337 error ("%qE needs unknown isa option", fndecl);
38338 else
38340 gcc_assert (opts != NULL);
38341 error ("%qE needs isa option %s", fndecl, opts);
38342 free (opts);
38344 return const0_rtx;
38347 switch (fcode)
38349 case IX86_BUILTIN_BNDMK:
38350 if (!target
38351 || GET_MODE (target) != BNDmode
38352 || !register_operand (target, BNDmode))
38353 target = gen_reg_rtx (BNDmode);
38355 arg0 = CALL_EXPR_ARG (exp, 0);
38356 arg1 = CALL_EXPR_ARG (exp, 1);
38358 op0 = expand_normal (arg0);
38359 op1 = expand_normal (arg1);
38361 if (!register_operand (op0, Pmode))
38362 op0 = ix86_zero_extend_to_Pmode (op0);
38363 if (!register_operand (op1, Pmode))
38364 op1 = ix86_zero_extend_to_Pmode (op1);
38366 /* Builtin arg1 is size of block but instruction op1 should
38367 be (size - 1). */
38368 op1 = expand_simple_binop (Pmode, PLUS, op1, constm1_rtx,
38369 NULL_RTX, 1, OPTAB_DIRECT);
38371 emit_insn (BNDmode == BND64mode
38372 ? gen_bnd64_mk (target, op0, op1)
38373 : gen_bnd32_mk (target, op0, op1));
38374 return target;
38376 case IX86_BUILTIN_BNDSTX:
38377 arg0 = CALL_EXPR_ARG (exp, 0);
38378 arg1 = CALL_EXPR_ARG (exp, 1);
38379 arg2 = CALL_EXPR_ARG (exp, 2);
38381 op0 = expand_normal (arg0);
38382 op1 = expand_normal (arg1);
38383 op2 = expand_normal (arg2);
38385 if (!register_operand (op0, Pmode))
38386 op0 = ix86_zero_extend_to_Pmode (op0);
38387 if (!register_operand (op1, BNDmode))
38388 op1 = copy_to_mode_reg (BNDmode, op1);
38389 if (!register_operand (op2, Pmode))
38390 op2 = ix86_zero_extend_to_Pmode (op2);
38392 emit_insn (BNDmode == BND64mode
38393 ? gen_bnd64_stx (op2, op0, op1)
38394 : gen_bnd32_stx (op2, op0, op1));
38395 return 0;
38397 case IX86_BUILTIN_BNDLDX:
38398 if (!target
38399 || GET_MODE (target) != BNDmode
38400 || !register_operand (target, BNDmode))
38401 target = gen_reg_rtx (BNDmode);
38403 arg0 = CALL_EXPR_ARG (exp, 0);
38404 arg1 = CALL_EXPR_ARG (exp, 1);
38406 op0 = expand_normal (arg0);
38407 op1 = expand_normal (arg1);
38409 if (!register_operand (op0, Pmode))
38410 op0 = ix86_zero_extend_to_Pmode (op0);
38411 if (!register_operand (op1, Pmode))
38412 op1 = ix86_zero_extend_to_Pmode (op1);
38414 emit_insn (BNDmode == BND64mode
38415 ? gen_bnd64_ldx (target, op0, op1)
38416 : gen_bnd32_ldx (target, op0, op1));
38417 return target;
38419 case IX86_BUILTIN_BNDCL:
38420 arg0 = CALL_EXPR_ARG (exp, 0);
38421 arg1 = CALL_EXPR_ARG (exp, 1);
38423 op0 = expand_normal (arg0);
38424 op1 = expand_normal (arg1);
38426 if (!register_operand (op0, Pmode))
38427 op0 = ix86_zero_extend_to_Pmode (op0);
38428 if (!register_operand (op1, BNDmode))
38429 op1 = copy_to_mode_reg (BNDmode, op1);
38431 emit_insn (BNDmode == BND64mode
38432 ? gen_bnd64_cl (op1, op0)
38433 : gen_bnd32_cl (op1, op0));
38434 return 0;
38436 case IX86_BUILTIN_BNDCU:
38437 arg0 = CALL_EXPR_ARG (exp, 0);
38438 arg1 = CALL_EXPR_ARG (exp, 1);
38440 op0 = expand_normal (arg0);
38441 op1 = expand_normal (arg1);
38443 if (!register_operand (op0, Pmode))
38444 op0 = ix86_zero_extend_to_Pmode (op0);
38445 if (!register_operand (op1, BNDmode))
38446 op1 = copy_to_mode_reg (BNDmode, op1);
38448 emit_insn (BNDmode == BND64mode
38449 ? gen_bnd64_cu (op1, op0)
38450 : gen_bnd32_cu (op1, op0));
38451 return 0;
38453 case IX86_BUILTIN_BNDRET:
38454 arg0 = CALL_EXPR_ARG (exp, 0);
38455 gcc_assert (TREE_CODE (arg0) == SSA_NAME);
38456 target = chkp_get_rtl_bounds (arg0);
38458 /* If no bounds were specified for returned value,
38459 then use INIT bounds. It usually happens when
38460 some built-in function is expanded. */
38461 if (!target)
38463 rtx t1 = gen_reg_rtx (Pmode);
38464 rtx t2 = gen_reg_rtx (Pmode);
38465 target = gen_reg_rtx (BNDmode);
38466 emit_move_insn (t1, const0_rtx);
38467 emit_move_insn (t2, constm1_rtx);
38468 emit_insn (BNDmode == BND64mode
38469 ? gen_bnd64_mk (target, t1, t2)
38470 : gen_bnd32_mk (target, t1, t2));
38473 gcc_assert (target && REG_P (target));
38474 return target;
38476 case IX86_BUILTIN_BNDNARROW:
38478 rtx m1, m1h1, m1h2, lb, ub, t1;
38480 /* Return value and lb. */
38481 arg0 = CALL_EXPR_ARG (exp, 0);
38482 /* Bounds. */
38483 arg1 = CALL_EXPR_ARG (exp, 1);
38484 /* Size. */
38485 arg2 = CALL_EXPR_ARG (exp, 2);
38487 lb = expand_normal (arg0);
38488 op1 = expand_normal (arg1);
38489 op2 = expand_normal (arg2);
38491 /* Size was passed but we need to use (size - 1) as for bndmk. */
38492 op2 = expand_simple_binop (Pmode, PLUS, op2, constm1_rtx,
38493 NULL_RTX, 1, OPTAB_DIRECT);
38495 /* Add LB to size and inverse to get UB. */
38496 op2 = expand_simple_binop (Pmode, PLUS, op2, lb,
38497 op2, 1, OPTAB_DIRECT);
38498 ub = expand_simple_unop (Pmode, NOT, op2, op2, 1);
38500 if (!register_operand (lb, Pmode))
38501 lb = ix86_zero_extend_to_Pmode (lb);
38502 if (!register_operand (ub, Pmode))
38503 ub = ix86_zero_extend_to_Pmode (ub);
38505 /* We need to move bounds to memory before any computations. */
38506 if (MEM_P (op1))
38507 m1 = op1;
38508 else
38510 m1 = assign_386_stack_local (BNDmode, SLOT_TEMP);
38511 emit_move_insn (m1, op1);
38514 /* Generate mem expression to be used for access to LB and UB. */
38515 m1h1 = adjust_address (m1, Pmode, 0);
38516 m1h2 = adjust_address (m1, Pmode, GET_MODE_SIZE (Pmode));
38518 t1 = gen_reg_rtx (Pmode);
38520 /* Compute LB. */
38521 emit_move_insn (t1, m1h1);
38522 ix86_emit_move_max (t1, lb);
38523 emit_move_insn (m1h1, t1);
38525 /* Compute UB. UB is stored in 1's complement form. Therefore
38526 we also use max here. */
38527 emit_move_insn (t1, m1h2);
38528 ix86_emit_move_max (t1, ub);
38529 emit_move_insn (m1h2, t1);
38531 op2 = gen_reg_rtx (BNDmode);
38532 emit_move_insn (op2, m1);
38534 return chkp_join_splitted_slot (lb, op2);
38537 case IX86_BUILTIN_BNDINT:
38539 rtx res, rh1, rh2, lb1, lb2, ub1, ub2;
38541 if (!target
38542 || GET_MODE (target) != BNDmode
38543 || !register_operand (target, BNDmode))
38544 target = gen_reg_rtx (BNDmode);
38546 arg0 = CALL_EXPR_ARG (exp, 0);
38547 arg1 = CALL_EXPR_ARG (exp, 1);
38549 op0 = expand_normal (arg0);
38550 op1 = expand_normal (arg1);
38552 res = assign_386_stack_local (BNDmode, SLOT_TEMP);
38553 rh1 = adjust_address (res, Pmode, 0);
38554 rh2 = adjust_address (res, Pmode, GET_MODE_SIZE (Pmode));
38556 /* Put first bounds to temporaries. */
38557 lb1 = gen_reg_rtx (Pmode);
38558 ub1 = gen_reg_rtx (Pmode);
38559 if (MEM_P (op0))
38561 emit_move_insn (lb1, adjust_address (op0, Pmode, 0));
38562 emit_move_insn (ub1, adjust_address (op0, Pmode,
38563 GET_MODE_SIZE (Pmode)));
38565 else
38567 emit_move_insn (res, op0);
38568 emit_move_insn (lb1, rh1);
38569 emit_move_insn (ub1, rh2);
38572 /* Put second bounds to temporaries. */
38573 lb2 = gen_reg_rtx (Pmode);
38574 ub2 = gen_reg_rtx (Pmode);
38575 if (MEM_P (op1))
38577 emit_move_insn (lb2, adjust_address (op1, Pmode, 0));
38578 emit_move_insn (ub2, adjust_address (op1, Pmode,
38579 GET_MODE_SIZE (Pmode)));
38581 else
38583 emit_move_insn (res, op1);
38584 emit_move_insn (lb2, rh1);
38585 emit_move_insn (ub2, rh2);
38588 /* Compute LB. */
38589 ix86_emit_move_max (lb1, lb2);
38590 emit_move_insn (rh1, lb1);
38592 /* Compute UB. UB is stored in 1's complement form. Therefore
38593 we also use max here. */
38594 ix86_emit_move_max (ub1, ub2);
38595 emit_move_insn (rh2, ub1);
38597 emit_move_insn (target, res);
38599 return target;
38602 case IX86_BUILTIN_SIZEOF:
38604 tree name;
38605 rtx symbol;
38607 if (!target
38608 || GET_MODE (target) != Pmode
38609 || !register_operand (target, Pmode))
38610 target = gen_reg_rtx (Pmode);
38612 arg0 = CALL_EXPR_ARG (exp, 0);
38613 gcc_assert (TREE_CODE (arg0) == VAR_DECL);
38615 name = DECL_ASSEMBLER_NAME (arg0);
38616 symbol = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (name));
38618 emit_insn (Pmode == SImode
38619 ? gen_move_size_reloc_si (target, symbol)
38620 : gen_move_size_reloc_di (target, symbol));
38622 return target;
38625 case IX86_BUILTIN_BNDLOWER:
38627 rtx mem, hmem;
38629 if (!target
38630 || GET_MODE (target) != Pmode
38631 || !register_operand (target, Pmode))
38632 target = gen_reg_rtx (Pmode);
38634 arg0 = CALL_EXPR_ARG (exp, 0);
38635 op0 = expand_normal (arg0);
38637 /* We need to move bounds to memory first. */
38638 if (MEM_P (op0))
38639 mem = op0;
38640 else
38642 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
38643 emit_move_insn (mem, op0);
38646 /* Generate mem expression to access LB and load it. */
38647 hmem = adjust_address (mem, Pmode, 0);
38648 emit_move_insn (target, hmem);
38650 return target;
38653 case IX86_BUILTIN_BNDUPPER:
38655 rtx mem, hmem, res;
38657 if (!target
38658 || GET_MODE (target) != Pmode
38659 || !register_operand (target, Pmode))
38660 target = gen_reg_rtx (Pmode);
38662 arg0 = CALL_EXPR_ARG (exp, 0);
38663 op0 = expand_normal (arg0);
38665 /* We need to move bounds to memory first. */
38666 if (MEM_P (op0))
38667 mem = op0;
38668 else
38670 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
38671 emit_move_insn (mem, op0);
38674 /* Generate mem expression to access UB. */
38675 hmem = adjust_address (mem, Pmode, GET_MODE_SIZE (Pmode));
38677 /* We need to inverse all bits of UB. */
38678 res = expand_simple_unop (Pmode, NOT, hmem, target, 1);
38680 if (res != target)
38681 emit_move_insn (target, res);
38683 return target;
38686 case IX86_BUILTIN_MASKMOVQ:
38687 case IX86_BUILTIN_MASKMOVDQU:
38688 icode = (fcode == IX86_BUILTIN_MASKMOVQ
38689 ? CODE_FOR_mmx_maskmovq
38690 : CODE_FOR_sse2_maskmovdqu);
38691 /* Note the arg order is different from the operand order. */
38692 arg1 = CALL_EXPR_ARG (exp, 0);
38693 arg2 = CALL_EXPR_ARG (exp, 1);
38694 arg0 = CALL_EXPR_ARG (exp, 2);
38695 op0 = expand_normal (arg0);
38696 op1 = expand_normal (arg1);
38697 op2 = expand_normal (arg2);
38698 mode0 = insn_data[icode].operand[0].mode;
38699 mode1 = insn_data[icode].operand[1].mode;
38700 mode2 = insn_data[icode].operand[2].mode;
38702 op0 = ix86_zero_extend_to_Pmode (op0);
38703 op0 = gen_rtx_MEM (mode1, op0);
38705 if (!insn_data[icode].operand[0].predicate (op0, mode0))
38706 op0 = copy_to_mode_reg (mode0, op0);
38707 if (!insn_data[icode].operand[1].predicate (op1, mode1))
38708 op1 = copy_to_mode_reg (mode1, op1);
38709 if (!insn_data[icode].operand[2].predicate (op2, mode2))
38710 op2 = copy_to_mode_reg (mode2, op2);
38711 pat = GEN_FCN (icode) (op0, op1, op2);
38712 if (! pat)
38713 return 0;
38714 emit_insn (pat);
38715 return 0;
38717 case IX86_BUILTIN_LDMXCSR:
38718 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
38719 target = assign_386_stack_local (SImode, SLOT_TEMP);
38720 emit_move_insn (target, op0);
38721 emit_insn (gen_sse_ldmxcsr (target));
38722 return 0;
38724 case IX86_BUILTIN_STMXCSR:
38725 target = assign_386_stack_local (SImode, SLOT_TEMP);
38726 emit_insn (gen_sse_stmxcsr (target));
38727 return copy_to_mode_reg (SImode, target);
38729 case IX86_BUILTIN_CLFLUSH:
38730 arg0 = CALL_EXPR_ARG (exp, 0);
38731 op0 = expand_normal (arg0);
38732 icode = CODE_FOR_sse2_clflush;
38733 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38734 op0 = ix86_zero_extend_to_Pmode (op0);
38736 emit_insn (gen_sse2_clflush (op0));
38737 return 0;
38739 case IX86_BUILTIN_CLWB:
38740 arg0 = CALL_EXPR_ARG (exp, 0);
38741 op0 = expand_normal (arg0);
38742 icode = CODE_FOR_clwb;
38743 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38744 op0 = ix86_zero_extend_to_Pmode (op0);
38746 emit_insn (gen_clwb (op0));
38747 return 0;
38749 case IX86_BUILTIN_CLFLUSHOPT:
38750 arg0 = CALL_EXPR_ARG (exp, 0);
38751 op0 = expand_normal (arg0);
38752 icode = CODE_FOR_clflushopt;
38753 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38754 op0 = ix86_zero_extend_to_Pmode (op0);
38756 emit_insn (gen_clflushopt (op0));
38757 return 0;
38759 case IX86_BUILTIN_MONITOR:
38760 arg0 = CALL_EXPR_ARG (exp, 0);
38761 arg1 = CALL_EXPR_ARG (exp, 1);
38762 arg2 = CALL_EXPR_ARG (exp, 2);
38763 op0 = expand_normal (arg0);
38764 op1 = expand_normal (arg1);
38765 op2 = expand_normal (arg2);
38766 if (!REG_P (op0))
38767 op0 = ix86_zero_extend_to_Pmode (op0);
38768 if (!REG_P (op1))
38769 op1 = copy_to_mode_reg (SImode, op1);
38770 if (!REG_P (op2))
38771 op2 = copy_to_mode_reg (SImode, op2);
38772 emit_insn (ix86_gen_monitor (op0, op1, op2));
38773 return 0;
38775 case IX86_BUILTIN_MWAIT:
38776 arg0 = CALL_EXPR_ARG (exp, 0);
38777 arg1 = CALL_EXPR_ARG (exp, 1);
38778 op0 = expand_normal (arg0);
38779 op1 = expand_normal (arg1);
38780 if (!REG_P (op0))
38781 op0 = copy_to_mode_reg (SImode, op0);
38782 if (!REG_P (op1))
38783 op1 = copy_to_mode_reg (SImode, op1);
38784 emit_insn (gen_sse3_mwait (op0, op1));
38785 return 0;
38787 case IX86_BUILTIN_VEC_INIT_V2SI:
38788 case IX86_BUILTIN_VEC_INIT_V4HI:
38789 case IX86_BUILTIN_VEC_INIT_V8QI:
38790 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
38792 case IX86_BUILTIN_VEC_EXT_V2DF:
38793 case IX86_BUILTIN_VEC_EXT_V2DI:
38794 case IX86_BUILTIN_VEC_EXT_V4SF:
38795 case IX86_BUILTIN_VEC_EXT_V4SI:
38796 case IX86_BUILTIN_VEC_EXT_V8HI:
38797 case IX86_BUILTIN_VEC_EXT_V2SI:
38798 case IX86_BUILTIN_VEC_EXT_V4HI:
38799 case IX86_BUILTIN_VEC_EXT_V16QI:
38800 return ix86_expand_vec_ext_builtin (exp, target);
38802 case IX86_BUILTIN_VEC_SET_V2DI:
38803 case IX86_BUILTIN_VEC_SET_V4SF:
38804 case IX86_BUILTIN_VEC_SET_V4SI:
38805 case IX86_BUILTIN_VEC_SET_V8HI:
38806 case IX86_BUILTIN_VEC_SET_V4HI:
38807 case IX86_BUILTIN_VEC_SET_V16QI:
38808 return ix86_expand_vec_set_builtin (exp);
38810 case IX86_BUILTIN_INFQ:
38811 case IX86_BUILTIN_HUGE_VALQ:
38813 REAL_VALUE_TYPE inf;
38814 rtx tmp;
38816 real_inf (&inf);
38817 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
38819 tmp = validize_mem (force_const_mem (mode, tmp));
38821 if (target == 0)
38822 target = gen_reg_rtx (mode);
38824 emit_move_insn (target, tmp);
38825 return target;
38828 case IX86_BUILTIN_RDPMC:
38829 case IX86_BUILTIN_RDTSC:
38830 case IX86_BUILTIN_RDTSCP:
38832 op0 = gen_reg_rtx (DImode);
38833 op1 = gen_reg_rtx (DImode);
38835 if (fcode == IX86_BUILTIN_RDPMC)
38837 arg0 = CALL_EXPR_ARG (exp, 0);
38838 op2 = expand_normal (arg0);
38839 if (!register_operand (op2, SImode))
38840 op2 = copy_to_mode_reg (SImode, op2);
38842 insn = (TARGET_64BIT
38843 ? gen_rdpmc_rex64 (op0, op1, op2)
38844 : gen_rdpmc (op0, op2));
38845 emit_insn (insn);
38847 else if (fcode == IX86_BUILTIN_RDTSC)
38849 insn = (TARGET_64BIT
38850 ? gen_rdtsc_rex64 (op0, op1)
38851 : gen_rdtsc (op0));
38852 emit_insn (insn);
38854 else
38856 op2 = gen_reg_rtx (SImode);
38858 insn = (TARGET_64BIT
38859 ? gen_rdtscp_rex64 (op0, op1, op2)
38860 : gen_rdtscp (op0, op2));
38861 emit_insn (insn);
38863 arg0 = CALL_EXPR_ARG (exp, 0);
38864 op4 = expand_normal (arg0);
38865 if (!address_operand (op4, VOIDmode))
38867 op4 = convert_memory_address (Pmode, op4);
38868 op4 = copy_addr_to_reg (op4);
38870 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
38873 if (target == 0)
38875 /* mode is VOIDmode if __builtin_rd* has been called
38876 without lhs. */
38877 if (mode == VOIDmode)
38878 return target;
38879 target = gen_reg_rtx (mode);
38882 if (TARGET_64BIT)
38884 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
38885 op1, 1, OPTAB_DIRECT);
38886 op0 = expand_simple_binop (DImode, IOR, op0, op1,
38887 op0, 1, OPTAB_DIRECT);
38890 emit_move_insn (target, op0);
38891 return target;
38893 case IX86_BUILTIN_FXSAVE:
38894 case IX86_BUILTIN_FXRSTOR:
38895 case IX86_BUILTIN_FXSAVE64:
38896 case IX86_BUILTIN_FXRSTOR64:
38897 case IX86_BUILTIN_FNSTENV:
38898 case IX86_BUILTIN_FLDENV:
38899 mode0 = BLKmode;
38900 switch (fcode)
38902 case IX86_BUILTIN_FXSAVE:
38903 icode = CODE_FOR_fxsave;
38904 break;
38905 case IX86_BUILTIN_FXRSTOR:
38906 icode = CODE_FOR_fxrstor;
38907 break;
38908 case IX86_BUILTIN_FXSAVE64:
38909 icode = CODE_FOR_fxsave64;
38910 break;
38911 case IX86_BUILTIN_FXRSTOR64:
38912 icode = CODE_FOR_fxrstor64;
38913 break;
38914 case IX86_BUILTIN_FNSTENV:
38915 icode = CODE_FOR_fnstenv;
38916 break;
38917 case IX86_BUILTIN_FLDENV:
38918 icode = CODE_FOR_fldenv;
38919 break;
38920 default:
38921 gcc_unreachable ();
38924 arg0 = CALL_EXPR_ARG (exp, 0);
38925 op0 = expand_normal (arg0);
38927 if (!address_operand (op0, VOIDmode))
38929 op0 = convert_memory_address (Pmode, op0);
38930 op0 = copy_addr_to_reg (op0);
38932 op0 = gen_rtx_MEM (mode0, op0);
38934 pat = GEN_FCN (icode) (op0);
38935 if (pat)
38936 emit_insn (pat);
38937 return 0;
38939 case IX86_BUILTIN_XSAVE:
38940 case IX86_BUILTIN_XRSTOR:
38941 case IX86_BUILTIN_XSAVE64:
38942 case IX86_BUILTIN_XRSTOR64:
38943 case IX86_BUILTIN_XSAVEOPT:
38944 case IX86_BUILTIN_XSAVEOPT64:
38945 case IX86_BUILTIN_XSAVES:
38946 case IX86_BUILTIN_XRSTORS:
38947 case IX86_BUILTIN_XSAVES64:
38948 case IX86_BUILTIN_XRSTORS64:
38949 case IX86_BUILTIN_XSAVEC:
38950 case IX86_BUILTIN_XSAVEC64:
38951 arg0 = CALL_EXPR_ARG (exp, 0);
38952 arg1 = CALL_EXPR_ARG (exp, 1);
38953 op0 = expand_normal (arg0);
38954 op1 = expand_normal (arg1);
38956 if (!address_operand (op0, VOIDmode))
38958 op0 = convert_memory_address (Pmode, op0);
38959 op0 = copy_addr_to_reg (op0);
38961 op0 = gen_rtx_MEM (BLKmode, op0);
38963 op1 = force_reg (DImode, op1);
38965 if (TARGET_64BIT)
38967 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
38968 NULL, 1, OPTAB_DIRECT);
38969 switch (fcode)
38971 case IX86_BUILTIN_XSAVE:
38972 icode = CODE_FOR_xsave_rex64;
38973 break;
38974 case IX86_BUILTIN_XRSTOR:
38975 icode = CODE_FOR_xrstor_rex64;
38976 break;
38977 case IX86_BUILTIN_XSAVE64:
38978 icode = CODE_FOR_xsave64;
38979 break;
38980 case IX86_BUILTIN_XRSTOR64:
38981 icode = CODE_FOR_xrstor64;
38982 break;
38983 case IX86_BUILTIN_XSAVEOPT:
38984 icode = CODE_FOR_xsaveopt_rex64;
38985 break;
38986 case IX86_BUILTIN_XSAVEOPT64:
38987 icode = CODE_FOR_xsaveopt64;
38988 break;
38989 case IX86_BUILTIN_XSAVES:
38990 icode = CODE_FOR_xsaves_rex64;
38991 break;
38992 case IX86_BUILTIN_XRSTORS:
38993 icode = CODE_FOR_xrstors_rex64;
38994 break;
38995 case IX86_BUILTIN_XSAVES64:
38996 icode = CODE_FOR_xsaves64;
38997 break;
38998 case IX86_BUILTIN_XRSTORS64:
38999 icode = CODE_FOR_xrstors64;
39000 break;
39001 case IX86_BUILTIN_XSAVEC:
39002 icode = CODE_FOR_xsavec_rex64;
39003 break;
39004 case IX86_BUILTIN_XSAVEC64:
39005 icode = CODE_FOR_xsavec64;
39006 break;
39007 default:
39008 gcc_unreachable ();
39011 op2 = gen_lowpart (SImode, op2);
39012 op1 = gen_lowpart (SImode, op1);
39013 pat = GEN_FCN (icode) (op0, op1, op2);
39015 else
39017 switch (fcode)
39019 case IX86_BUILTIN_XSAVE:
39020 icode = CODE_FOR_xsave;
39021 break;
39022 case IX86_BUILTIN_XRSTOR:
39023 icode = CODE_FOR_xrstor;
39024 break;
39025 case IX86_BUILTIN_XSAVEOPT:
39026 icode = CODE_FOR_xsaveopt;
39027 break;
39028 case IX86_BUILTIN_XSAVES:
39029 icode = CODE_FOR_xsaves;
39030 break;
39031 case IX86_BUILTIN_XRSTORS:
39032 icode = CODE_FOR_xrstors;
39033 break;
39034 case IX86_BUILTIN_XSAVEC:
39035 icode = CODE_FOR_xsavec;
39036 break;
39037 default:
39038 gcc_unreachable ();
39040 pat = GEN_FCN (icode) (op0, op1);
39043 if (pat)
39044 emit_insn (pat);
39045 return 0;
39047 case IX86_BUILTIN_LLWPCB:
39048 arg0 = CALL_EXPR_ARG (exp, 0);
39049 op0 = expand_normal (arg0);
39050 icode = CODE_FOR_lwp_llwpcb;
39051 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39052 op0 = ix86_zero_extend_to_Pmode (op0);
39053 emit_insn (gen_lwp_llwpcb (op0));
39054 return 0;
39056 case IX86_BUILTIN_SLWPCB:
39057 icode = CODE_FOR_lwp_slwpcb;
39058 if (!target
39059 || !insn_data[icode].operand[0].predicate (target, Pmode))
39060 target = gen_reg_rtx (Pmode);
39061 emit_insn (gen_lwp_slwpcb (target));
39062 return target;
39064 case IX86_BUILTIN_BEXTRI32:
39065 case IX86_BUILTIN_BEXTRI64:
39066 arg0 = CALL_EXPR_ARG (exp, 0);
39067 arg1 = CALL_EXPR_ARG (exp, 1);
39068 op0 = expand_normal (arg0);
39069 op1 = expand_normal (arg1);
39070 icode = (fcode == IX86_BUILTIN_BEXTRI32
39071 ? CODE_FOR_tbm_bextri_si
39072 : CODE_FOR_tbm_bextri_di);
39073 if (!CONST_INT_P (op1))
39075 error ("last argument must be an immediate");
39076 return const0_rtx;
39078 else
39080 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
39081 unsigned char lsb_index = INTVAL (op1) & 0xFF;
39082 op1 = GEN_INT (length);
39083 op2 = GEN_INT (lsb_index);
39084 pat = GEN_FCN (icode) (target, op0, op1, op2);
39085 if (pat)
39086 emit_insn (pat);
39087 return target;
39090 case IX86_BUILTIN_RDRAND16_STEP:
39091 icode = CODE_FOR_rdrandhi_1;
39092 mode0 = HImode;
39093 goto rdrand_step;
39095 case IX86_BUILTIN_RDRAND32_STEP:
39096 icode = CODE_FOR_rdrandsi_1;
39097 mode0 = SImode;
39098 goto rdrand_step;
39100 case IX86_BUILTIN_RDRAND64_STEP:
39101 icode = CODE_FOR_rdranddi_1;
39102 mode0 = DImode;
39104 rdrand_step:
39105 op0 = gen_reg_rtx (mode0);
39106 emit_insn (GEN_FCN (icode) (op0));
39108 arg0 = CALL_EXPR_ARG (exp, 0);
39109 op1 = expand_normal (arg0);
39110 if (!address_operand (op1, VOIDmode))
39112 op1 = convert_memory_address (Pmode, op1);
39113 op1 = copy_addr_to_reg (op1);
39115 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
39117 op1 = gen_reg_rtx (SImode);
39118 emit_move_insn (op1, CONST1_RTX (SImode));
39120 /* Emit SImode conditional move. */
39121 if (mode0 == HImode)
39123 op2 = gen_reg_rtx (SImode);
39124 emit_insn (gen_zero_extendhisi2 (op2, op0));
39126 else if (mode0 == SImode)
39127 op2 = op0;
39128 else
39129 op2 = gen_rtx_SUBREG (SImode, op0, 0);
39131 if (target == 0
39132 || !register_operand (target, SImode))
39133 target = gen_reg_rtx (SImode);
39135 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
39136 const0_rtx);
39137 emit_insn (gen_rtx_SET (VOIDmode, target,
39138 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
39139 return target;
39141 case IX86_BUILTIN_RDSEED16_STEP:
39142 icode = CODE_FOR_rdseedhi_1;
39143 mode0 = HImode;
39144 goto rdseed_step;
39146 case IX86_BUILTIN_RDSEED32_STEP:
39147 icode = CODE_FOR_rdseedsi_1;
39148 mode0 = SImode;
39149 goto rdseed_step;
39151 case IX86_BUILTIN_RDSEED64_STEP:
39152 icode = CODE_FOR_rdseeddi_1;
39153 mode0 = DImode;
39155 rdseed_step:
39156 op0 = gen_reg_rtx (mode0);
39157 emit_insn (GEN_FCN (icode) (op0));
39159 arg0 = CALL_EXPR_ARG (exp, 0);
39160 op1 = expand_normal (arg0);
39161 if (!address_operand (op1, VOIDmode))
39163 op1 = convert_memory_address (Pmode, op1);
39164 op1 = copy_addr_to_reg (op1);
39166 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
39168 op2 = gen_reg_rtx (QImode);
39170 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
39171 const0_rtx);
39172 emit_insn (gen_rtx_SET (VOIDmode, op2, pat));
39174 if (target == 0
39175 || !register_operand (target, SImode))
39176 target = gen_reg_rtx (SImode);
39178 emit_insn (gen_zero_extendqisi2 (target, op2));
39179 return target;
39181 case IX86_BUILTIN_SBB32:
39182 icode = CODE_FOR_subsi3_carry;
39183 mode0 = SImode;
39184 goto addcarryx;
39186 case IX86_BUILTIN_SBB64:
39187 icode = CODE_FOR_subdi3_carry;
39188 mode0 = DImode;
39189 goto addcarryx;
39191 case IX86_BUILTIN_ADDCARRYX32:
39192 icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry;
39193 mode0 = SImode;
39194 goto addcarryx;
39196 case IX86_BUILTIN_ADDCARRYX64:
39197 icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry;
39198 mode0 = DImode;
39200 addcarryx:
39201 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
39202 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
39203 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
39204 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
39206 op0 = gen_reg_rtx (QImode);
39208 /* Generate CF from input operand. */
39209 op1 = expand_normal (arg0);
39210 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
39211 emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx));
39213 /* Gen ADCX instruction to compute X+Y+CF. */
39214 op2 = expand_normal (arg1);
39215 op3 = expand_normal (arg2);
39217 if (!REG_P (op2))
39218 op2 = copy_to_mode_reg (mode0, op2);
39219 if (!REG_P (op3))
39220 op3 = copy_to_mode_reg (mode0, op3);
39222 op0 = gen_reg_rtx (mode0);
39224 op4 = gen_rtx_REG (CCCmode, FLAGS_REG);
39225 pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx);
39226 emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat));
39228 /* Store the result. */
39229 op4 = expand_normal (arg3);
39230 if (!address_operand (op4, VOIDmode))
39232 op4 = convert_memory_address (Pmode, op4);
39233 op4 = copy_addr_to_reg (op4);
39235 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
39237 /* Return current CF value. */
39238 if (target == 0)
39239 target = gen_reg_rtx (QImode);
39241 PUT_MODE (pat, QImode);
39242 emit_insn (gen_rtx_SET (VOIDmode, target, pat));
39243 return target;
39245 case IX86_BUILTIN_READ_FLAGS:
39246 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
39248 if (optimize
39249 || target == NULL_RTX
39250 || !nonimmediate_operand (target, word_mode)
39251 || GET_MODE (target) != word_mode)
39252 target = gen_reg_rtx (word_mode);
39254 emit_insn (gen_pop (target));
39255 return target;
39257 case IX86_BUILTIN_WRITE_FLAGS:
39259 arg0 = CALL_EXPR_ARG (exp, 0);
39260 op0 = expand_normal (arg0);
39261 if (!general_no_elim_operand (op0, word_mode))
39262 op0 = copy_to_mode_reg (word_mode, op0);
39264 emit_insn (gen_push (op0));
39265 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
39266 return 0;
39268 case IX86_BUILTIN_KORTESTC16:
39269 icode = CODE_FOR_kortestchi;
39270 mode0 = HImode;
39271 mode1 = CCCmode;
39272 goto kortest;
39274 case IX86_BUILTIN_KORTESTZ16:
39275 icode = CODE_FOR_kortestzhi;
39276 mode0 = HImode;
39277 mode1 = CCZmode;
39279 kortest:
39280 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
39281 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
39282 op0 = expand_normal (arg0);
39283 op1 = expand_normal (arg1);
39285 op0 = copy_to_reg (op0);
39286 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
39287 op1 = copy_to_reg (op1);
39288 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
39290 target = gen_reg_rtx (QImode);
39291 emit_insn (gen_rtx_SET (mode0, target, const0_rtx));
39293 /* Emit kortest. */
39294 emit_insn (GEN_FCN (icode) (op0, op1));
39295 /* And use setcc to return result from flags. */
39296 ix86_expand_setcc (target, EQ,
39297 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
39298 return target;
39300 case IX86_BUILTIN_GATHERSIV2DF:
39301 icode = CODE_FOR_avx2_gathersiv2df;
39302 goto gather_gen;
39303 case IX86_BUILTIN_GATHERSIV4DF:
39304 icode = CODE_FOR_avx2_gathersiv4df;
39305 goto gather_gen;
39306 case IX86_BUILTIN_GATHERDIV2DF:
39307 icode = CODE_FOR_avx2_gatherdiv2df;
39308 goto gather_gen;
39309 case IX86_BUILTIN_GATHERDIV4DF:
39310 icode = CODE_FOR_avx2_gatherdiv4df;
39311 goto gather_gen;
39312 case IX86_BUILTIN_GATHERSIV4SF:
39313 icode = CODE_FOR_avx2_gathersiv4sf;
39314 goto gather_gen;
39315 case IX86_BUILTIN_GATHERSIV8SF:
39316 icode = CODE_FOR_avx2_gathersiv8sf;
39317 goto gather_gen;
39318 case IX86_BUILTIN_GATHERDIV4SF:
39319 icode = CODE_FOR_avx2_gatherdiv4sf;
39320 goto gather_gen;
39321 case IX86_BUILTIN_GATHERDIV8SF:
39322 icode = CODE_FOR_avx2_gatherdiv8sf;
39323 goto gather_gen;
39324 case IX86_BUILTIN_GATHERSIV2DI:
39325 icode = CODE_FOR_avx2_gathersiv2di;
39326 goto gather_gen;
39327 case IX86_BUILTIN_GATHERSIV4DI:
39328 icode = CODE_FOR_avx2_gathersiv4di;
39329 goto gather_gen;
39330 case IX86_BUILTIN_GATHERDIV2DI:
39331 icode = CODE_FOR_avx2_gatherdiv2di;
39332 goto gather_gen;
39333 case IX86_BUILTIN_GATHERDIV4DI:
39334 icode = CODE_FOR_avx2_gatherdiv4di;
39335 goto gather_gen;
39336 case IX86_BUILTIN_GATHERSIV4SI:
39337 icode = CODE_FOR_avx2_gathersiv4si;
39338 goto gather_gen;
39339 case IX86_BUILTIN_GATHERSIV8SI:
39340 icode = CODE_FOR_avx2_gathersiv8si;
39341 goto gather_gen;
39342 case IX86_BUILTIN_GATHERDIV4SI:
39343 icode = CODE_FOR_avx2_gatherdiv4si;
39344 goto gather_gen;
39345 case IX86_BUILTIN_GATHERDIV8SI:
39346 icode = CODE_FOR_avx2_gatherdiv8si;
39347 goto gather_gen;
39348 case IX86_BUILTIN_GATHERALTSIV4DF:
39349 icode = CODE_FOR_avx2_gathersiv4df;
39350 goto gather_gen;
39351 case IX86_BUILTIN_GATHERALTDIV8SF:
39352 icode = CODE_FOR_avx2_gatherdiv8sf;
39353 goto gather_gen;
39354 case IX86_BUILTIN_GATHERALTSIV4DI:
39355 icode = CODE_FOR_avx2_gathersiv4di;
39356 goto gather_gen;
39357 case IX86_BUILTIN_GATHERALTDIV8SI:
39358 icode = CODE_FOR_avx2_gatherdiv8si;
39359 goto gather_gen;
39360 case IX86_BUILTIN_GATHER3SIV16SF:
39361 icode = CODE_FOR_avx512f_gathersiv16sf;
39362 goto gather_gen;
39363 case IX86_BUILTIN_GATHER3SIV8DF:
39364 icode = CODE_FOR_avx512f_gathersiv8df;
39365 goto gather_gen;
39366 case IX86_BUILTIN_GATHER3DIV16SF:
39367 icode = CODE_FOR_avx512f_gatherdiv16sf;
39368 goto gather_gen;
39369 case IX86_BUILTIN_GATHER3DIV8DF:
39370 icode = CODE_FOR_avx512f_gatherdiv8df;
39371 goto gather_gen;
39372 case IX86_BUILTIN_GATHER3SIV16SI:
39373 icode = CODE_FOR_avx512f_gathersiv16si;
39374 goto gather_gen;
39375 case IX86_BUILTIN_GATHER3SIV8DI:
39376 icode = CODE_FOR_avx512f_gathersiv8di;
39377 goto gather_gen;
39378 case IX86_BUILTIN_GATHER3DIV16SI:
39379 icode = CODE_FOR_avx512f_gatherdiv16si;
39380 goto gather_gen;
39381 case IX86_BUILTIN_GATHER3DIV8DI:
39382 icode = CODE_FOR_avx512f_gatherdiv8di;
39383 goto gather_gen;
39384 case IX86_BUILTIN_GATHER3ALTSIV8DF:
39385 icode = CODE_FOR_avx512f_gathersiv8df;
39386 goto gather_gen;
39387 case IX86_BUILTIN_GATHER3ALTDIV16SF:
39388 icode = CODE_FOR_avx512f_gatherdiv16sf;
39389 goto gather_gen;
39390 case IX86_BUILTIN_GATHER3ALTSIV8DI:
39391 icode = CODE_FOR_avx512f_gathersiv8di;
39392 goto gather_gen;
39393 case IX86_BUILTIN_GATHER3ALTDIV16SI:
39394 icode = CODE_FOR_avx512f_gatherdiv16si;
39395 goto gather_gen;
39396 case IX86_BUILTIN_GATHER3SIV2DF:
39397 icode = CODE_FOR_avx512vl_gathersiv2df;
39398 goto gather_gen;
39399 case IX86_BUILTIN_GATHER3SIV4DF:
39400 icode = CODE_FOR_avx512vl_gathersiv4df;
39401 goto gather_gen;
39402 case IX86_BUILTIN_GATHER3DIV2DF:
39403 icode = CODE_FOR_avx512vl_gatherdiv2df;
39404 goto gather_gen;
39405 case IX86_BUILTIN_GATHER3DIV4DF:
39406 icode = CODE_FOR_avx512vl_gatherdiv4df;
39407 goto gather_gen;
39408 case IX86_BUILTIN_GATHER3SIV4SF:
39409 icode = CODE_FOR_avx512vl_gathersiv4sf;
39410 goto gather_gen;
39411 case IX86_BUILTIN_GATHER3SIV8SF:
39412 icode = CODE_FOR_avx512vl_gathersiv8sf;
39413 goto gather_gen;
39414 case IX86_BUILTIN_GATHER3DIV4SF:
39415 icode = CODE_FOR_avx512vl_gatherdiv4sf;
39416 goto gather_gen;
39417 case IX86_BUILTIN_GATHER3DIV8SF:
39418 icode = CODE_FOR_avx512vl_gatherdiv8sf;
39419 goto gather_gen;
39420 case IX86_BUILTIN_GATHER3SIV2DI:
39421 icode = CODE_FOR_avx512vl_gathersiv2di;
39422 goto gather_gen;
39423 case IX86_BUILTIN_GATHER3SIV4DI:
39424 icode = CODE_FOR_avx512vl_gathersiv4di;
39425 goto gather_gen;
39426 case IX86_BUILTIN_GATHER3DIV2DI:
39427 icode = CODE_FOR_avx512vl_gatherdiv2di;
39428 goto gather_gen;
39429 case IX86_BUILTIN_GATHER3DIV4DI:
39430 icode = CODE_FOR_avx512vl_gatherdiv4di;
39431 goto gather_gen;
39432 case IX86_BUILTIN_GATHER3SIV4SI:
39433 icode = CODE_FOR_avx512vl_gathersiv4si;
39434 goto gather_gen;
39435 case IX86_BUILTIN_GATHER3SIV8SI:
39436 icode = CODE_FOR_avx512vl_gathersiv8si;
39437 goto gather_gen;
39438 case IX86_BUILTIN_GATHER3DIV4SI:
39439 icode = CODE_FOR_avx512vl_gatherdiv4si;
39440 goto gather_gen;
39441 case IX86_BUILTIN_GATHER3DIV8SI:
39442 icode = CODE_FOR_avx512vl_gatherdiv8si;
39443 goto gather_gen;
39444 case IX86_BUILTIN_GATHER3ALTSIV4DF:
39445 icode = CODE_FOR_avx512vl_gathersiv4df;
39446 goto gather_gen;
39447 case IX86_BUILTIN_GATHER3ALTDIV8SF:
39448 icode = CODE_FOR_avx512vl_gatherdiv8sf;
39449 goto gather_gen;
39450 case IX86_BUILTIN_GATHER3ALTSIV4DI:
39451 icode = CODE_FOR_avx512vl_gathersiv4di;
39452 goto gather_gen;
39453 case IX86_BUILTIN_GATHER3ALTDIV8SI:
39454 icode = CODE_FOR_avx512vl_gatherdiv8si;
39455 goto gather_gen;
39456 case IX86_BUILTIN_SCATTERSIV16SF:
39457 icode = CODE_FOR_avx512f_scattersiv16sf;
39458 goto scatter_gen;
39459 case IX86_BUILTIN_SCATTERSIV8DF:
39460 icode = CODE_FOR_avx512f_scattersiv8df;
39461 goto scatter_gen;
39462 case IX86_BUILTIN_SCATTERDIV16SF:
39463 icode = CODE_FOR_avx512f_scatterdiv16sf;
39464 goto scatter_gen;
39465 case IX86_BUILTIN_SCATTERDIV8DF:
39466 icode = CODE_FOR_avx512f_scatterdiv8df;
39467 goto scatter_gen;
39468 case IX86_BUILTIN_SCATTERSIV16SI:
39469 icode = CODE_FOR_avx512f_scattersiv16si;
39470 goto scatter_gen;
39471 case IX86_BUILTIN_SCATTERSIV8DI:
39472 icode = CODE_FOR_avx512f_scattersiv8di;
39473 goto scatter_gen;
39474 case IX86_BUILTIN_SCATTERDIV16SI:
39475 icode = CODE_FOR_avx512f_scatterdiv16si;
39476 goto scatter_gen;
39477 case IX86_BUILTIN_SCATTERDIV8DI:
39478 icode = CODE_FOR_avx512f_scatterdiv8di;
39479 goto scatter_gen;
39480 case IX86_BUILTIN_SCATTERSIV8SF:
39481 icode = CODE_FOR_avx512vl_scattersiv8sf;
39482 goto scatter_gen;
39483 case IX86_BUILTIN_SCATTERSIV4SF:
39484 icode = CODE_FOR_avx512vl_scattersiv4sf;
39485 goto scatter_gen;
39486 case IX86_BUILTIN_SCATTERSIV4DF:
39487 icode = CODE_FOR_avx512vl_scattersiv4df;
39488 goto scatter_gen;
39489 case IX86_BUILTIN_SCATTERSIV2DF:
39490 icode = CODE_FOR_avx512vl_scattersiv2df;
39491 goto scatter_gen;
39492 case IX86_BUILTIN_SCATTERDIV8SF:
39493 icode = CODE_FOR_avx512vl_scatterdiv8sf;
39494 goto scatter_gen;
39495 case IX86_BUILTIN_SCATTERDIV4SF:
39496 icode = CODE_FOR_avx512vl_scatterdiv4sf;
39497 goto scatter_gen;
39498 case IX86_BUILTIN_SCATTERDIV4DF:
39499 icode = CODE_FOR_avx512vl_scatterdiv4df;
39500 goto scatter_gen;
39501 case IX86_BUILTIN_SCATTERDIV2DF:
39502 icode = CODE_FOR_avx512vl_scatterdiv2df;
39503 goto scatter_gen;
39504 case IX86_BUILTIN_SCATTERSIV8SI:
39505 icode = CODE_FOR_avx512vl_scattersiv8si;
39506 goto scatter_gen;
39507 case IX86_BUILTIN_SCATTERSIV4SI:
39508 icode = CODE_FOR_avx512vl_scattersiv4si;
39509 goto scatter_gen;
39510 case IX86_BUILTIN_SCATTERSIV4DI:
39511 icode = CODE_FOR_avx512vl_scattersiv4di;
39512 goto scatter_gen;
39513 case IX86_BUILTIN_SCATTERSIV2DI:
39514 icode = CODE_FOR_avx512vl_scattersiv2di;
39515 goto scatter_gen;
39516 case IX86_BUILTIN_SCATTERDIV8SI:
39517 icode = CODE_FOR_avx512vl_scatterdiv8si;
39518 goto scatter_gen;
39519 case IX86_BUILTIN_SCATTERDIV4SI:
39520 icode = CODE_FOR_avx512vl_scatterdiv4si;
39521 goto scatter_gen;
39522 case IX86_BUILTIN_SCATTERDIV4DI:
39523 icode = CODE_FOR_avx512vl_scatterdiv4di;
39524 goto scatter_gen;
39525 case IX86_BUILTIN_SCATTERDIV2DI:
39526 icode = CODE_FOR_avx512vl_scatterdiv2di;
39527 goto scatter_gen;
39528 case IX86_BUILTIN_GATHERPFDPD:
39529 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
39530 goto vec_prefetch_gen;
39531 case IX86_BUILTIN_GATHERPFDPS:
39532 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
39533 goto vec_prefetch_gen;
39534 case IX86_BUILTIN_GATHERPFQPD:
39535 icode = CODE_FOR_avx512pf_gatherpfv8didf;
39536 goto vec_prefetch_gen;
39537 case IX86_BUILTIN_GATHERPFQPS:
39538 icode = CODE_FOR_avx512pf_gatherpfv8disf;
39539 goto vec_prefetch_gen;
39540 case IX86_BUILTIN_SCATTERPFDPD:
39541 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
39542 goto vec_prefetch_gen;
39543 case IX86_BUILTIN_SCATTERPFDPS:
39544 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
39545 goto vec_prefetch_gen;
39546 case IX86_BUILTIN_SCATTERPFQPD:
39547 icode = CODE_FOR_avx512pf_scatterpfv8didf;
39548 goto vec_prefetch_gen;
39549 case IX86_BUILTIN_SCATTERPFQPS:
39550 icode = CODE_FOR_avx512pf_scatterpfv8disf;
39551 goto vec_prefetch_gen;
39553 gather_gen:
39554 rtx half;
39555 rtx (*gen) (rtx, rtx);
39557 arg0 = CALL_EXPR_ARG (exp, 0);
39558 arg1 = CALL_EXPR_ARG (exp, 1);
39559 arg2 = CALL_EXPR_ARG (exp, 2);
39560 arg3 = CALL_EXPR_ARG (exp, 3);
39561 arg4 = CALL_EXPR_ARG (exp, 4);
39562 op0 = expand_normal (arg0);
39563 op1 = expand_normal (arg1);
39564 op2 = expand_normal (arg2);
39565 op3 = expand_normal (arg3);
39566 op4 = expand_normal (arg4);
39567 /* Note the arg order is different from the operand order. */
39568 mode0 = insn_data[icode].operand[1].mode;
39569 mode2 = insn_data[icode].operand[3].mode;
39570 mode3 = insn_data[icode].operand[4].mode;
39571 mode4 = insn_data[icode].operand[5].mode;
39573 if (target == NULL_RTX
39574 || GET_MODE (target) != insn_data[icode].operand[0].mode
39575 || !insn_data[icode].operand[0].predicate (target,
39576 GET_MODE (target)))
39577 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
39578 else
39579 subtarget = target;
39581 switch (fcode)
39583 case IX86_BUILTIN_GATHER3ALTSIV8DF:
39584 case IX86_BUILTIN_GATHER3ALTSIV8DI:
39585 half = gen_reg_rtx (V8SImode);
39586 if (!nonimmediate_operand (op2, V16SImode))
39587 op2 = copy_to_mode_reg (V16SImode, op2);
39588 emit_insn (gen_vec_extract_lo_v16si (half, op2));
39589 op2 = half;
39590 break;
39591 case IX86_BUILTIN_GATHER3ALTSIV4DF:
39592 case IX86_BUILTIN_GATHER3ALTSIV4DI:
39593 case IX86_BUILTIN_GATHERALTSIV4DF:
39594 case IX86_BUILTIN_GATHERALTSIV4DI:
39595 half = gen_reg_rtx (V4SImode);
39596 if (!nonimmediate_operand (op2, V8SImode))
39597 op2 = copy_to_mode_reg (V8SImode, op2);
39598 emit_insn (gen_vec_extract_lo_v8si (half, op2));
39599 op2 = half;
39600 break;
39601 case IX86_BUILTIN_GATHER3ALTDIV16SF:
39602 case IX86_BUILTIN_GATHER3ALTDIV16SI:
39603 half = gen_reg_rtx (mode0);
39604 if (mode0 == V8SFmode)
39605 gen = gen_vec_extract_lo_v16sf;
39606 else
39607 gen = gen_vec_extract_lo_v16si;
39608 if (!nonimmediate_operand (op0, GET_MODE (op0)))
39609 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
39610 emit_insn (gen (half, op0));
39611 op0 = half;
39612 if (GET_MODE (op3) != VOIDmode)
39614 if (!nonimmediate_operand (op3, GET_MODE (op3)))
39615 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
39616 emit_insn (gen (half, op3));
39617 op3 = half;
39619 break;
39620 case IX86_BUILTIN_GATHER3ALTDIV8SF:
39621 case IX86_BUILTIN_GATHER3ALTDIV8SI:
39622 case IX86_BUILTIN_GATHERALTDIV8SF:
39623 case IX86_BUILTIN_GATHERALTDIV8SI:
39624 half = gen_reg_rtx (mode0);
39625 if (mode0 == V4SFmode)
39626 gen = gen_vec_extract_lo_v8sf;
39627 else
39628 gen = gen_vec_extract_lo_v8si;
39629 if (!nonimmediate_operand (op0, GET_MODE (op0)))
39630 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
39631 emit_insn (gen (half, op0));
39632 op0 = half;
39633 if (GET_MODE (op3) != VOIDmode)
39635 if (!nonimmediate_operand (op3, GET_MODE (op3)))
39636 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
39637 emit_insn (gen (half, op3));
39638 op3 = half;
39640 break;
39641 default:
39642 break;
39645 /* Force memory operand only with base register here. But we
39646 don't want to do it on memory operand for other builtin
39647 functions. */
39648 op1 = ix86_zero_extend_to_Pmode (op1);
39650 if (!insn_data[icode].operand[1].predicate (op0, mode0))
39651 op0 = copy_to_mode_reg (mode0, op0);
39652 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
39653 op1 = copy_to_mode_reg (Pmode, op1);
39654 if (!insn_data[icode].operand[3].predicate (op2, mode2))
39655 op2 = copy_to_mode_reg (mode2, op2);
39656 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
39658 if (!insn_data[icode].operand[4].predicate (op3, mode3))
39659 op3 = copy_to_mode_reg (mode3, op3);
39661 else
39663 op3 = copy_to_reg (op3);
39664 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
39666 if (!insn_data[icode].operand[5].predicate (op4, mode4))
39668 error ("the last argument must be scale 1, 2, 4, 8");
39669 return const0_rtx;
39672 /* Optimize. If mask is known to have all high bits set,
39673 replace op0 with pc_rtx to signal that the instruction
39674 overwrites the whole destination and doesn't use its
39675 previous contents. */
39676 if (optimize)
39678 if (TREE_CODE (arg3) == INTEGER_CST)
39680 if (integer_all_onesp (arg3))
39681 op0 = pc_rtx;
39683 else if (TREE_CODE (arg3) == VECTOR_CST)
39685 unsigned int negative = 0;
39686 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
39688 tree cst = VECTOR_CST_ELT (arg3, i);
39689 if (TREE_CODE (cst) == INTEGER_CST
39690 && tree_int_cst_sign_bit (cst))
39691 negative++;
39692 else if (TREE_CODE (cst) == REAL_CST
39693 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
39694 negative++;
39696 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
39697 op0 = pc_rtx;
39699 else if (TREE_CODE (arg3) == SSA_NAME
39700 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
39702 /* Recognize also when mask is like:
39703 __v2df src = _mm_setzero_pd ();
39704 __v2df mask = _mm_cmpeq_pd (src, src);
39706 __v8sf src = _mm256_setzero_ps ();
39707 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
39708 as that is a cheaper way to load all ones into
39709 a register than having to load a constant from
39710 memory. */
39711 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
39712 if (is_gimple_call (def_stmt))
39714 tree fndecl = gimple_call_fndecl (def_stmt);
39715 if (fndecl
39716 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
39717 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
39719 case IX86_BUILTIN_CMPPD:
39720 case IX86_BUILTIN_CMPPS:
39721 case IX86_BUILTIN_CMPPD256:
39722 case IX86_BUILTIN_CMPPS256:
39723 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
39724 break;
39725 /* FALLTHRU */
39726 case IX86_BUILTIN_CMPEQPD:
39727 case IX86_BUILTIN_CMPEQPS:
39728 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
39729 && initializer_zerop (gimple_call_arg (def_stmt,
39730 1)))
39731 op0 = pc_rtx;
39732 break;
39733 default:
39734 break;
39740 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
39741 if (! pat)
39742 return const0_rtx;
39743 emit_insn (pat);
39745 switch (fcode)
39747 case IX86_BUILTIN_GATHER3DIV16SF:
39748 if (target == NULL_RTX)
39749 target = gen_reg_rtx (V8SFmode);
39750 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
39751 break;
39752 case IX86_BUILTIN_GATHER3DIV16SI:
39753 if (target == NULL_RTX)
39754 target = gen_reg_rtx (V8SImode);
39755 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
39756 break;
39757 case IX86_BUILTIN_GATHER3DIV8SF:
39758 case IX86_BUILTIN_GATHERDIV8SF:
39759 if (target == NULL_RTX)
39760 target = gen_reg_rtx (V4SFmode);
39761 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
39762 break;
39763 case IX86_BUILTIN_GATHER3DIV8SI:
39764 case IX86_BUILTIN_GATHERDIV8SI:
39765 if (target == NULL_RTX)
39766 target = gen_reg_rtx (V4SImode);
39767 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
39768 break;
39769 default:
39770 target = subtarget;
39771 break;
39773 return target;
39775 scatter_gen:
39776 arg0 = CALL_EXPR_ARG (exp, 0);
39777 arg1 = CALL_EXPR_ARG (exp, 1);
39778 arg2 = CALL_EXPR_ARG (exp, 2);
39779 arg3 = CALL_EXPR_ARG (exp, 3);
39780 arg4 = CALL_EXPR_ARG (exp, 4);
39781 op0 = expand_normal (arg0);
39782 op1 = expand_normal (arg1);
39783 op2 = expand_normal (arg2);
39784 op3 = expand_normal (arg3);
39785 op4 = expand_normal (arg4);
39786 mode1 = insn_data[icode].operand[1].mode;
39787 mode2 = insn_data[icode].operand[2].mode;
39788 mode3 = insn_data[icode].operand[3].mode;
39789 mode4 = insn_data[icode].operand[4].mode;
39791 /* Force memory operand only with base register here. But we
39792 don't want to do it on memory operand for other builtin
39793 functions. */
39794 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
39796 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39797 op0 = copy_to_mode_reg (Pmode, op0);
39799 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
39801 if (!insn_data[icode].operand[1].predicate (op1, mode1))
39802 op1 = copy_to_mode_reg (mode1, op1);
39804 else
39806 op1 = copy_to_reg (op1);
39807 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
39810 if (!insn_data[icode].operand[2].predicate (op2, mode2))
39811 op2 = copy_to_mode_reg (mode2, op2);
39813 if (!insn_data[icode].operand[3].predicate (op3, mode3))
39814 op3 = copy_to_mode_reg (mode3, op3);
39816 if (!insn_data[icode].operand[4].predicate (op4, mode4))
39818 error ("the last argument must be scale 1, 2, 4, 8");
39819 return const0_rtx;
39822 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
39823 if (! pat)
39824 return const0_rtx;
39826 emit_insn (pat);
39827 return 0;
39829 vec_prefetch_gen:
39830 arg0 = CALL_EXPR_ARG (exp, 0);
39831 arg1 = CALL_EXPR_ARG (exp, 1);
39832 arg2 = CALL_EXPR_ARG (exp, 2);
39833 arg3 = CALL_EXPR_ARG (exp, 3);
39834 arg4 = CALL_EXPR_ARG (exp, 4);
39835 op0 = expand_normal (arg0);
39836 op1 = expand_normal (arg1);
39837 op2 = expand_normal (arg2);
39838 op3 = expand_normal (arg3);
39839 op4 = expand_normal (arg4);
39840 mode0 = insn_data[icode].operand[0].mode;
39841 mode1 = insn_data[icode].operand[1].mode;
39842 mode3 = insn_data[icode].operand[3].mode;
39843 mode4 = insn_data[icode].operand[4].mode;
39845 if (GET_MODE (op0) == mode0
39846 || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
39848 if (!insn_data[icode].operand[0].predicate (op0, mode0))
39849 op0 = copy_to_mode_reg (mode0, op0);
39851 else if (op0 != constm1_rtx)
39853 op0 = copy_to_reg (op0);
39854 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
39857 if (!insn_data[icode].operand[1].predicate (op1, mode1))
39858 op1 = copy_to_mode_reg (mode1, op1);
39860 /* Force memory operand only with base register here. But we
39861 don't want to do it on memory operand for other builtin
39862 functions. */
39863 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
39865 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
39866 op2 = copy_to_mode_reg (Pmode, op2);
39868 if (!insn_data[icode].operand[3].predicate (op3, mode3))
39870 error ("the forth argument must be scale 1, 2, 4, 8");
39871 return const0_rtx;
39874 if (!insn_data[icode].operand[4].predicate (op4, mode4))
39876 error ("incorrect hint operand");
39877 return const0_rtx;
39880 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
39881 if (! pat)
39882 return const0_rtx;
39884 emit_insn (pat);
39886 return 0;
39888 case IX86_BUILTIN_XABORT:
39889 icode = CODE_FOR_xabort;
39890 arg0 = CALL_EXPR_ARG (exp, 0);
39891 op0 = expand_normal (arg0);
39892 mode0 = insn_data[icode].operand[0].mode;
39893 if (!insn_data[icode].operand[0].predicate (op0, mode0))
39895 error ("the xabort's argument must be an 8-bit immediate");
39896 return const0_rtx;
39898 emit_insn (gen_xabort (op0));
39899 return 0;
39901 default:
39902 break;
39905 for (i = 0, d = bdesc_special_args;
39906 i < ARRAY_SIZE (bdesc_special_args);
39907 i++, d++)
39908 if (d->code == fcode)
39909 return ix86_expand_special_args_builtin (d, exp, target);
39911 for (i = 0, d = bdesc_args;
39912 i < ARRAY_SIZE (bdesc_args);
39913 i++, d++)
39914 if (d->code == fcode)
39915 switch (fcode)
39917 case IX86_BUILTIN_FABSQ:
39918 case IX86_BUILTIN_COPYSIGNQ:
39919 if (!TARGET_SSE)
39920 /* Emit a normal call if SSE isn't available. */
39921 return expand_call (exp, target, ignore);
39922 default:
39923 return ix86_expand_args_builtin (d, exp, target);
39926 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
39927 if (d->code == fcode)
39928 return ix86_expand_sse_comi (d, exp, target);
39930 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
39931 if (d->code == fcode)
39932 return ix86_expand_round_builtin (d, exp, target);
39934 for (i = 0, d = bdesc_pcmpestr;
39935 i < ARRAY_SIZE (bdesc_pcmpestr);
39936 i++, d++)
39937 if (d->code == fcode)
39938 return ix86_expand_sse_pcmpestr (d, exp, target);
39940 for (i = 0, d = bdesc_pcmpistr;
39941 i < ARRAY_SIZE (bdesc_pcmpistr);
39942 i++, d++)
39943 if (d->code == fcode)
39944 return ix86_expand_sse_pcmpistr (d, exp, target);
39946 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
39947 if (d->code == fcode)
39948 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
39949 (enum ix86_builtin_func_type)
39950 d->flag, d->comparison);
39952 gcc_unreachable ();
39955 /* This returns the target-specific builtin with code CODE if
39956 current_function_decl has visibility on this builtin, which is checked
39957 using isa flags. Returns NULL_TREE otherwise. */
39959 static tree ix86_get_builtin (enum ix86_builtins code)
39961 struct cl_target_option *opts;
39962 tree target_tree = NULL_TREE;
39964 /* Determine the isa flags of current_function_decl. */
39966 if (current_function_decl)
39967 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
39969 if (target_tree == NULL)
39970 target_tree = target_option_default_node;
39972 opts = TREE_TARGET_OPTION (target_tree);
39974 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
39975 return ix86_builtin_decl (code, true);
39976 else
39977 return NULL_TREE;
39980 /* Return function decl for target specific builtin
39981 for given MPX builtin passed i FCODE. */
39982 static tree
39983 ix86_builtin_mpx_function (unsigned fcode)
39985 switch (fcode)
39987 case BUILT_IN_CHKP_BNDMK:
39988 return ix86_builtins[IX86_BUILTIN_BNDMK];
39990 case BUILT_IN_CHKP_BNDSTX:
39991 return ix86_builtins[IX86_BUILTIN_BNDSTX];
39993 case BUILT_IN_CHKP_BNDLDX:
39994 return ix86_builtins[IX86_BUILTIN_BNDLDX];
39996 case BUILT_IN_CHKP_BNDCL:
39997 return ix86_builtins[IX86_BUILTIN_BNDCL];
39999 case BUILT_IN_CHKP_BNDCU:
40000 return ix86_builtins[IX86_BUILTIN_BNDCU];
40002 case BUILT_IN_CHKP_BNDRET:
40003 return ix86_builtins[IX86_BUILTIN_BNDRET];
40005 case BUILT_IN_CHKP_INTERSECT:
40006 return ix86_builtins[IX86_BUILTIN_BNDINT];
40008 case BUILT_IN_CHKP_NARROW:
40009 return ix86_builtins[IX86_BUILTIN_BNDNARROW];
40011 case BUILT_IN_CHKP_SIZEOF:
40012 return ix86_builtins[IX86_BUILTIN_SIZEOF];
40014 case BUILT_IN_CHKP_EXTRACT_LOWER:
40015 return ix86_builtins[IX86_BUILTIN_BNDLOWER];
40017 case BUILT_IN_CHKP_EXTRACT_UPPER:
40018 return ix86_builtins[IX86_BUILTIN_BNDUPPER];
40020 default:
40021 return NULL_TREE;
40024 gcc_unreachable ();
40027 /* Helper function for ix86_load_bounds and ix86_store_bounds.
40029 Return an address to be used to load/store bounds for pointer
40030 passed in SLOT.
40032 SLOT_NO is an integer constant holding number of a target
40033 dependent special slot to be used in case SLOT is not a memory.
40035 SPECIAL_BASE is a pointer to be used as a base of fake address
40036 to access special slots in Bounds Table. SPECIAL_BASE[-1],
40037 SPECIAL_BASE[-2] etc. will be used as fake pointer locations. */
40039 static rtx
40040 ix86_get_arg_address_for_bt (rtx slot, rtx slot_no, rtx special_base)
40042 rtx addr = NULL;
40044 /* NULL slot means we pass bounds for pointer not passed to the
40045 function at all. Register slot means we pass pointer in a
40046 register. In both these cases bounds are passed via Bounds
40047 Table. Since we do not have actual pointer stored in memory,
40048 we have to use fake addresses to access Bounds Table. We
40049 start with (special_base - sizeof (void*)) and decrease this
40050 address by pointer size to get addresses for other slots. */
40051 if (!slot || REG_P (slot))
40053 gcc_assert (CONST_INT_P (slot_no));
40054 addr = plus_constant (Pmode, special_base,
40055 -(INTVAL (slot_no) + 1) * GET_MODE_SIZE (Pmode));
40057 /* If pointer is passed in a memory then its address is used to
40058 access Bounds Table. */
40059 else if (MEM_P (slot))
40061 addr = XEXP (slot, 0);
40062 if (!register_operand (addr, Pmode))
40063 addr = copy_addr_to_reg (addr);
40065 else
40066 gcc_unreachable ();
40068 return addr;
40071 /* Expand pass uses this hook to load bounds for function parameter
40072 PTR passed in SLOT in case its bounds are not passed in a register.
40074 If SLOT is a memory, then bounds are loaded as for regular pointer
40075 loaded from memory. PTR may be NULL in case SLOT is a memory.
40076 In such case value of PTR (if required) may be loaded from SLOT.
40078 If SLOT is NULL or a register then SLOT_NO is an integer constant
40079 holding number of the target dependent special slot which should be
40080 used to obtain bounds.
40082 Return loaded bounds. */
40084 static rtx
40085 ix86_load_bounds (rtx slot, rtx ptr, rtx slot_no)
40087 rtx reg = gen_reg_rtx (BNDmode);
40088 rtx addr;
40090 /* Get address to be used to access Bounds Table. Special slots start
40091 at the location of return address of the current function. */
40092 addr = ix86_get_arg_address_for_bt (slot, slot_no, arg_pointer_rtx);
40094 /* Load pointer value from a memory if we don't have it. */
40095 if (!ptr)
40097 gcc_assert (MEM_P (slot));
40098 ptr = copy_addr_to_reg (slot);
40101 emit_insn (BNDmode == BND64mode
40102 ? gen_bnd64_ldx (reg, addr, ptr)
40103 : gen_bnd32_ldx (reg, addr, ptr));
40105 return reg;
40108 /* Expand pass uses this hook to store BOUNDS for call argument PTR
40109 passed in SLOT in case BOUNDS are not passed in a register.
40111 If SLOT is a memory, then BOUNDS are stored as for regular pointer
40112 stored in memory. PTR may be NULL in case SLOT is a memory.
40113 In such case value of PTR (if required) may be loaded from SLOT.
40115 If SLOT is NULL or a register then SLOT_NO is an integer constant
40116 holding number of the target dependent special slot which should be
40117 used to store BOUNDS. */
40119 static void
40120 ix86_store_bounds (rtx ptr, rtx slot, rtx bounds, rtx slot_no)
40122 rtx addr;
40124 /* Get address to be used to access Bounds Table. Special slots start
40125 at the location of return address of a called function. */
40126 addr = ix86_get_arg_address_for_bt (slot, slot_no, stack_pointer_rtx);
40128 /* Load pointer value from a memory if we don't have it. */
40129 if (!ptr)
40131 gcc_assert (MEM_P (slot));
40132 ptr = copy_addr_to_reg (slot);
40135 gcc_assert (POINTER_BOUNDS_MODE_P (GET_MODE (bounds)));
40136 if (!register_operand (bounds, BNDmode))
40137 bounds = copy_to_mode_reg (BNDmode, bounds);
40139 emit_insn (BNDmode == BND64mode
40140 ? gen_bnd64_stx (addr, ptr, bounds)
40141 : gen_bnd32_stx (addr, ptr, bounds));
40144 /* Load and return bounds returned by function in SLOT. */
40146 static rtx
40147 ix86_load_returned_bounds (rtx slot)
40149 rtx res;
40151 gcc_assert (REG_P (slot));
40152 res = gen_reg_rtx (BNDmode);
40153 emit_move_insn (res, slot);
40155 return res;
40158 /* Store BOUNDS returned by function into SLOT. */
40160 static void
40161 ix86_store_returned_bounds (rtx slot, rtx bounds)
40163 gcc_assert (REG_P (slot));
40164 emit_move_insn (slot, bounds);
40167 /* Returns a function decl for a vectorized version of the builtin function
40168 with builtin function code FN and the result vector type TYPE, or NULL_TREE
40169 if it is not available. */
40171 static tree
40172 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
40173 tree type_in)
40175 machine_mode in_mode, out_mode;
40176 int in_n, out_n;
40177 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
40179 if (TREE_CODE (type_out) != VECTOR_TYPE
40180 || TREE_CODE (type_in) != VECTOR_TYPE
40181 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
40182 return NULL_TREE;
40184 out_mode = TYPE_MODE (TREE_TYPE (type_out));
40185 out_n = TYPE_VECTOR_SUBPARTS (type_out);
40186 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40187 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40189 switch (fn)
40191 case BUILT_IN_SQRT:
40192 if (out_mode == DFmode && in_mode == DFmode)
40194 if (out_n == 2 && in_n == 2)
40195 return ix86_get_builtin (IX86_BUILTIN_SQRTPD);
40196 else if (out_n == 4 && in_n == 4)
40197 return ix86_get_builtin (IX86_BUILTIN_SQRTPD256);
40198 else if (out_n == 8 && in_n == 8)
40199 return ix86_get_builtin (IX86_BUILTIN_SQRTPD512);
40201 break;
40203 case BUILT_IN_EXP2F:
40204 if (out_mode == SFmode && in_mode == SFmode)
40206 if (out_n == 16 && in_n == 16)
40207 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
40209 break;
40211 case BUILT_IN_SQRTF:
40212 if (out_mode == SFmode && in_mode == SFmode)
40214 if (out_n == 4 && in_n == 4)
40215 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR);
40216 else if (out_n == 8 && in_n == 8)
40217 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256);
40218 else if (out_n == 16 && in_n == 16)
40219 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR512);
40221 break;
40223 case BUILT_IN_IFLOOR:
40224 case BUILT_IN_LFLOOR:
40225 case BUILT_IN_LLFLOOR:
40226 /* The round insn does not trap on denormals. */
40227 if (flag_trapping_math || !TARGET_ROUND)
40228 break;
40230 if (out_mode == SImode && in_mode == DFmode)
40232 if (out_n == 4 && in_n == 2)
40233 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
40234 else if (out_n == 8 && in_n == 4)
40235 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
40236 else if (out_n == 16 && in_n == 8)
40237 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
40239 break;
40241 case BUILT_IN_IFLOORF:
40242 case BUILT_IN_LFLOORF:
40243 case BUILT_IN_LLFLOORF:
40244 /* The round insn does not trap on denormals. */
40245 if (flag_trapping_math || !TARGET_ROUND)
40246 break;
40248 if (out_mode == SImode && in_mode == SFmode)
40250 if (out_n == 4 && in_n == 4)
40251 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
40252 else if (out_n == 8 && in_n == 8)
40253 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
40255 break;
40257 case BUILT_IN_ICEIL:
40258 case BUILT_IN_LCEIL:
40259 case BUILT_IN_LLCEIL:
40260 /* The round insn does not trap on denormals. */
40261 if (flag_trapping_math || !TARGET_ROUND)
40262 break;
40264 if (out_mode == SImode && in_mode == DFmode)
40266 if (out_n == 4 && in_n == 2)
40267 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
40268 else if (out_n == 8 && in_n == 4)
40269 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
40270 else if (out_n == 16 && in_n == 8)
40271 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
40273 break;
40275 case BUILT_IN_ICEILF:
40276 case BUILT_IN_LCEILF:
40277 case BUILT_IN_LLCEILF:
40278 /* The round insn does not trap on denormals. */
40279 if (flag_trapping_math || !TARGET_ROUND)
40280 break;
40282 if (out_mode == SImode && in_mode == SFmode)
40284 if (out_n == 4 && in_n == 4)
40285 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
40286 else if (out_n == 8 && in_n == 8)
40287 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
40289 break;
40291 case BUILT_IN_IRINT:
40292 case BUILT_IN_LRINT:
40293 case BUILT_IN_LLRINT:
40294 if (out_mode == SImode && in_mode == DFmode)
40296 if (out_n == 4 && in_n == 2)
40297 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
40298 else if (out_n == 8 && in_n == 4)
40299 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
40301 break;
40303 case BUILT_IN_IRINTF:
40304 case BUILT_IN_LRINTF:
40305 case BUILT_IN_LLRINTF:
40306 if (out_mode == SImode && in_mode == SFmode)
40308 if (out_n == 4 && in_n == 4)
40309 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
40310 else if (out_n == 8 && in_n == 8)
40311 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
40313 break;
40315 case BUILT_IN_IROUND:
40316 case BUILT_IN_LROUND:
40317 case BUILT_IN_LLROUND:
40318 /* The round insn does not trap on denormals. */
40319 if (flag_trapping_math || !TARGET_ROUND)
40320 break;
40322 if (out_mode == SImode && in_mode == DFmode)
40324 if (out_n == 4 && in_n == 2)
40325 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
40326 else if (out_n == 8 && in_n == 4)
40327 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
40328 else if (out_n == 16 && in_n == 8)
40329 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
40331 break;
40333 case BUILT_IN_IROUNDF:
40334 case BUILT_IN_LROUNDF:
40335 case BUILT_IN_LLROUNDF:
40336 /* The round insn does not trap on denormals. */
40337 if (flag_trapping_math || !TARGET_ROUND)
40338 break;
40340 if (out_mode == SImode && in_mode == SFmode)
40342 if (out_n == 4 && in_n == 4)
40343 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
40344 else if (out_n == 8 && in_n == 8)
40345 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
40347 break;
40349 case BUILT_IN_COPYSIGN:
40350 if (out_mode == DFmode && in_mode == DFmode)
40352 if (out_n == 2 && in_n == 2)
40353 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD);
40354 else if (out_n == 4 && in_n == 4)
40355 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256);
40356 else if (out_n == 8 && in_n == 8)
40357 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD512);
40359 break;
40361 case BUILT_IN_COPYSIGNF:
40362 if (out_mode == SFmode && in_mode == SFmode)
40364 if (out_n == 4 && in_n == 4)
40365 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS);
40366 else if (out_n == 8 && in_n == 8)
40367 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256);
40368 else if (out_n == 16 && in_n == 16)
40369 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS512);
40371 break;
40373 case BUILT_IN_FLOOR:
40374 /* The round insn does not trap on denormals. */
40375 if (flag_trapping_math || !TARGET_ROUND)
40376 break;
40378 if (out_mode == DFmode && in_mode == DFmode)
40380 if (out_n == 2 && in_n == 2)
40381 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
40382 else if (out_n == 4 && in_n == 4)
40383 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
40385 break;
40387 case BUILT_IN_FLOORF:
40388 /* The round insn does not trap on denormals. */
40389 if (flag_trapping_math || !TARGET_ROUND)
40390 break;
40392 if (out_mode == SFmode && in_mode == SFmode)
40394 if (out_n == 4 && in_n == 4)
40395 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
40396 else if (out_n == 8 && in_n == 8)
40397 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
40399 break;
40401 case BUILT_IN_CEIL:
40402 /* The round insn does not trap on denormals. */
40403 if (flag_trapping_math || !TARGET_ROUND)
40404 break;
40406 if (out_mode == DFmode && in_mode == DFmode)
40408 if (out_n == 2 && in_n == 2)
40409 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
40410 else if (out_n == 4 && in_n == 4)
40411 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
40413 break;
40415 case BUILT_IN_CEILF:
40416 /* The round insn does not trap on denormals. */
40417 if (flag_trapping_math || !TARGET_ROUND)
40418 break;
40420 if (out_mode == SFmode && in_mode == SFmode)
40422 if (out_n == 4 && in_n == 4)
40423 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
40424 else if (out_n == 8 && in_n == 8)
40425 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
40427 break;
40429 case BUILT_IN_TRUNC:
40430 /* The round insn does not trap on denormals. */
40431 if (flag_trapping_math || !TARGET_ROUND)
40432 break;
40434 if (out_mode == DFmode && in_mode == DFmode)
40436 if (out_n == 2 && in_n == 2)
40437 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
40438 else if (out_n == 4 && in_n == 4)
40439 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
40441 break;
40443 case BUILT_IN_TRUNCF:
40444 /* The round insn does not trap on denormals. */
40445 if (flag_trapping_math || !TARGET_ROUND)
40446 break;
40448 if (out_mode == SFmode && in_mode == SFmode)
40450 if (out_n == 4 && in_n == 4)
40451 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
40452 else if (out_n == 8 && in_n == 8)
40453 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
40455 break;
40457 case BUILT_IN_RINT:
40458 /* The round insn does not trap on denormals. */
40459 if (flag_trapping_math || !TARGET_ROUND)
40460 break;
40462 if (out_mode == DFmode && in_mode == DFmode)
40464 if (out_n == 2 && in_n == 2)
40465 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
40466 else if (out_n == 4 && in_n == 4)
40467 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
40469 break;
40471 case BUILT_IN_RINTF:
40472 /* The round insn does not trap on denormals. */
40473 if (flag_trapping_math || !TARGET_ROUND)
40474 break;
40476 if (out_mode == SFmode && in_mode == SFmode)
40478 if (out_n == 4 && in_n == 4)
40479 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
40480 else if (out_n == 8 && in_n == 8)
40481 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
40483 break;
40485 case BUILT_IN_ROUND:
40486 /* The round insn does not trap on denormals. */
40487 if (flag_trapping_math || !TARGET_ROUND)
40488 break;
40490 if (out_mode == DFmode && in_mode == DFmode)
40492 if (out_n == 2 && in_n == 2)
40493 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
40494 else if (out_n == 4 && in_n == 4)
40495 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
40497 break;
40499 case BUILT_IN_ROUNDF:
40500 /* The round insn does not trap on denormals. */
40501 if (flag_trapping_math || !TARGET_ROUND)
40502 break;
40504 if (out_mode == SFmode && in_mode == SFmode)
40506 if (out_n == 4 && in_n == 4)
40507 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
40508 else if (out_n == 8 && in_n == 8)
40509 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
40511 break;
40513 case BUILT_IN_FMA:
40514 if (out_mode == DFmode && in_mode == DFmode)
40516 if (out_n == 2 && in_n == 2)
40517 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
40518 if (out_n == 4 && in_n == 4)
40519 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
40521 break;
40523 case BUILT_IN_FMAF:
40524 if (out_mode == SFmode && in_mode == SFmode)
40526 if (out_n == 4 && in_n == 4)
40527 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
40528 if (out_n == 8 && in_n == 8)
40529 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
40531 break;
40533 default:
40534 break;
40537 /* Dispatch to a handler for a vectorization library. */
40538 if (ix86_veclib_handler)
40539 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
40540 type_in);
40542 return NULL_TREE;
40545 /* Handler for an SVML-style interface to
40546 a library with vectorized intrinsics. */
40548 static tree
40549 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
40551 char name[20];
40552 tree fntype, new_fndecl, args;
40553 unsigned arity;
40554 const char *bname;
40555 machine_mode el_mode, in_mode;
40556 int n, in_n;
40558 /* The SVML is suitable for unsafe math only. */
40559 if (!flag_unsafe_math_optimizations)
40560 return NULL_TREE;
40562 el_mode = TYPE_MODE (TREE_TYPE (type_out));
40563 n = TYPE_VECTOR_SUBPARTS (type_out);
40564 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40565 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40566 if (el_mode != in_mode
40567 || n != in_n)
40568 return NULL_TREE;
40570 switch (fn)
40572 case BUILT_IN_EXP:
40573 case BUILT_IN_LOG:
40574 case BUILT_IN_LOG10:
40575 case BUILT_IN_POW:
40576 case BUILT_IN_TANH:
40577 case BUILT_IN_TAN:
40578 case BUILT_IN_ATAN:
40579 case BUILT_IN_ATAN2:
40580 case BUILT_IN_ATANH:
40581 case BUILT_IN_CBRT:
40582 case BUILT_IN_SINH:
40583 case BUILT_IN_SIN:
40584 case BUILT_IN_ASINH:
40585 case BUILT_IN_ASIN:
40586 case BUILT_IN_COSH:
40587 case BUILT_IN_COS:
40588 case BUILT_IN_ACOSH:
40589 case BUILT_IN_ACOS:
40590 if (el_mode != DFmode || n != 2)
40591 return NULL_TREE;
40592 break;
40594 case BUILT_IN_EXPF:
40595 case BUILT_IN_LOGF:
40596 case BUILT_IN_LOG10F:
40597 case BUILT_IN_POWF:
40598 case BUILT_IN_TANHF:
40599 case BUILT_IN_TANF:
40600 case BUILT_IN_ATANF:
40601 case BUILT_IN_ATAN2F:
40602 case BUILT_IN_ATANHF:
40603 case BUILT_IN_CBRTF:
40604 case BUILT_IN_SINHF:
40605 case BUILT_IN_SINF:
40606 case BUILT_IN_ASINHF:
40607 case BUILT_IN_ASINF:
40608 case BUILT_IN_COSHF:
40609 case BUILT_IN_COSF:
40610 case BUILT_IN_ACOSHF:
40611 case BUILT_IN_ACOSF:
40612 if (el_mode != SFmode || n != 4)
40613 return NULL_TREE;
40614 break;
40616 default:
40617 return NULL_TREE;
40620 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
40622 if (fn == BUILT_IN_LOGF)
40623 strcpy (name, "vmlsLn4");
40624 else if (fn == BUILT_IN_LOG)
40625 strcpy (name, "vmldLn2");
40626 else if (n == 4)
40628 sprintf (name, "vmls%s", bname+10);
40629 name[strlen (name)-1] = '4';
40631 else
40632 sprintf (name, "vmld%s2", bname+10);
40634 /* Convert to uppercase. */
40635 name[4] &= ~0x20;
40637 arity = 0;
40638 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
40639 args;
40640 args = TREE_CHAIN (args))
40641 arity++;
40643 if (arity == 1)
40644 fntype = build_function_type_list (type_out, type_in, NULL);
40645 else
40646 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
40648 /* Build a function declaration for the vectorized function. */
40649 new_fndecl = build_decl (BUILTINS_LOCATION,
40650 FUNCTION_DECL, get_identifier (name), fntype);
40651 TREE_PUBLIC (new_fndecl) = 1;
40652 DECL_EXTERNAL (new_fndecl) = 1;
40653 DECL_IS_NOVOPS (new_fndecl) = 1;
40654 TREE_READONLY (new_fndecl) = 1;
40656 return new_fndecl;
40659 /* Handler for an ACML-style interface to
40660 a library with vectorized intrinsics. */
40662 static tree
40663 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
40665 char name[20] = "__vr.._";
40666 tree fntype, new_fndecl, args;
40667 unsigned arity;
40668 const char *bname;
40669 machine_mode el_mode, in_mode;
40670 int n, in_n;
40672 /* The ACML is 64bits only and suitable for unsafe math only as
40673 it does not correctly support parts of IEEE with the required
40674 precision such as denormals. */
40675 if (!TARGET_64BIT
40676 || !flag_unsafe_math_optimizations)
40677 return NULL_TREE;
40679 el_mode = TYPE_MODE (TREE_TYPE (type_out));
40680 n = TYPE_VECTOR_SUBPARTS (type_out);
40681 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40682 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40683 if (el_mode != in_mode
40684 || n != in_n)
40685 return NULL_TREE;
40687 switch (fn)
40689 case BUILT_IN_SIN:
40690 case BUILT_IN_COS:
40691 case BUILT_IN_EXP:
40692 case BUILT_IN_LOG:
40693 case BUILT_IN_LOG2:
40694 case BUILT_IN_LOG10:
40695 name[4] = 'd';
40696 name[5] = '2';
40697 if (el_mode != DFmode
40698 || n != 2)
40699 return NULL_TREE;
40700 break;
40702 case BUILT_IN_SINF:
40703 case BUILT_IN_COSF:
40704 case BUILT_IN_EXPF:
40705 case BUILT_IN_POWF:
40706 case BUILT_IN_LOGF:
40707 case BUILT_IN_LOG2F:
40708 case BUILT_IN_LOG10F:
40709 name[4] = 's';
40710 name[5] = '4';
40711 if (el_mode != SFmode
40712 || n != 4)
40713 return NULL_TREE;
40714 break;
40716 default:
40717 return NULL_TREE;
40720 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
40721 sprintf (name + 7, "%s", bname+10);
40723 arity = 0;
40724 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
40725 args;
40726 args = TREE_CHAIN (args))
40727 arity++;
40729 if (arity == 1)
40730 fntype = build_function_type_list (type_out, type_in, NULL);
40731 else
40732 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
40734 /* Build a function declaration for the vectorized function. */
40735 new_fndecl = build_decl (BUILTINS_LOCATION,
40736 FUNCTION_DECL, get_identifier (name), fntype);
40737 TREE_PUBLIC (new_fndecl) = 1;
40738 DECL_EXTERNAL (new_fndecl) = 1;
40739 DECL_IS_NOVOPS (new_fndecl) = 1;
40740 TREE_READONLY (new_fndecl) = 1;
40742 return new_fndecl;
40745 /* Returns a decl of a function that implements gather load with
40746 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
40747 Return NULL_TREE if it is not available. */
40749 static tree
40750 ix86_vectorize_builtin_gather (const_tree mem_vectype,
40751 const_tree index_type, int scale)
40753 bool si;
40754 enum ix86_builtins code;
40756 if (! TARGET_AVX2)
40757 return NULL_TREE;
40759 if ((TREE_CODE (index_type) != INTEGER_TYPE
40760 && !POINTER_TYPE_P (index_type))
40761 || (TYPE_MODE (index_type) != SImode
40762 && TYPE_MODE (index_type) != DImode))
40763 return NULL_TREE;
40765 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
40766 return NULL_TREE;
40768 /* v*gather* insn sign extends index to pointer mode. */
40769 if (TYPE_PRECISION (index_type) < POINTER_SIZE
40770 && TYPE_UNSIGNED (index_type))
40771 return NULL_TREE;
40773 if (scale <= 0
40774 || scale > 8
40775 || (scale & (scale - 1)) != 0)
40776 return NULL_TREE;
40778 si = TYPE_MODE (index_type) == SImode;
40779 switch (TYPE_MODE (mem_vectype))
40781 case V2DFmode:
40782 if (TARGET_AVX512VL)
40783 code = si ? IX86_BUILTIN_GATHER3SIV2DF : IX86_BUILTIN_GATHER3DIV2DF;
40784 else
40785 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
40786 break;
40787 case V4DFmode:
40788 if (TARGET_AVX512VL)
40789 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DF : IX86_BUILTIN_GATHER3DIV4DF;
40790 else
40791 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
40792 break;
40793 case V2DImode:
40794 if (TARGET_AVX512VL)
40795 code = si ? IX86_BUILTIN_GATHER3SIV2DI : IX86_BUILTIN_GATHER3DIV2DI;
40796 else
40797 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
40798 break;
40799 case V4DImode:
40800 if (TARGET_AVX512VL)
40801 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DI : IX86_BUILTIN_GATHER3DIV4DI;
40802 else
40803 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
40804 break;
40805 case V4SFmode:
40806 if (TARGET_AVX512VL)
40807 code = si ? IX86_BUILTIN_GATHER3SIV4SF : IX86_BUILTIN_GATHER3DIV4SF;
40808 else
40809 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
40810 break;
40811 case V8SFmode:
40812 if (TARGET_AVX512VL)
40813 code = si ? IX86_BUILTIN_GATHER3SIV8SF : IX86_BUILTIN_GATHER3ALTDIV8SF;
40814 else
40815 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
40816 break;
40817 case V4SImode:
40818 if (TARGET_AVX512VL)
40819 code = si ? IX86_BUILTIN_GATHER3SIV4SI : IX86_BUILTIN_GATHER3DIV4SI;
40820 else
40821 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
40822 break;
40823 case V8SImode:
40824 if (TARGET_AVX512VL)
40825 code = si ? IX86_BUILTIN_GATHER3SIV8SI : IX86_BUILTIN_GATHER3ALTDIV8SI;
40826 else
40827 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
40828 break;
40829 case V8DFmode:
40830 if (TARGET_AVX512F)
40831 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
40832 else
40833 return NULL_TREE;
40834 break;
40835 case V8DImode:
40836 if (TARGET_AVX512F)
40837 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
40838 else
40839 return NULL_TREE;
40840 break;
40841 case V16SFmode:
40842 if (TARGET_AVX512F)
40843 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
40844 else
40845 return NULL_TREE;
40846 break;
40847 case V16SImode:
40848 if (TARGET_AVX512F)
40849 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
40850 else
40851 return NULL_TREE;
40852 break;
40853 default:
40854 return NULL_TREE;
40857 return ix86_get_builtin (code);
40860 /* Returns a code for a target-specific builtin that implements
40861 reciprocal of the function, or NULL_TREE if not available. */
40863 static tree
40864 ix86_builtin_reciprocal (unsigned int fn, bool md_fn, bool)
40866 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
40867 && flag_finite_math_only && !flag_trapping_math
40868 && flag_unsafe_math_optimizations))
40869 return NULL_TREE;
40871 if (md_fn)
40872 /* Machine dependent builtins. */
40873 switch (fn)
40875 /* Vectorized version of sqrt to rsqrt conversion. */
40876 case IX86_BUILTIN_SQRTPS_NR:
40877 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
40879 case IX86_BUILTIN_SQRTPS_NR256:
40880 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
40882 default:
40883 return NULL_TREE;
40885 else
40886 /* Normal builtins. */
40887 switch (fn)
40889 /* Sqrt to rsqrt conversion. */
40890 case BUILT_IN_SQRTF:
40891 return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
40893 default:
40894 return NULL_TREE;
40898 /* Helper for avx_vpermilps256_operand et al. This is also used by
40899 the expansion functions to turn the parallel back into a mask.
40900 The return value is 0 for no match and the imm8+1 for a match. */
40903 avx_vpermilp_parallel (rtx par, machine_mode mode)
40905 unsigned i, nelt = GET_MODE_NUNITS (mode);
40906 unsigned mask = 0;
40907 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
40909 if (XVECLEN (par, 0) != (int) nelt)
40910 return 0;
40912 /* Validate that all of the elements are constants, and not totally
40913 out of range. Copy the data into an integral array to make the
40914 subsequent checks easier. */
40915 for (i = 0; i < nelt; ++i)
40917 rtx er = XVECEXP (par, 0, i);
40918 unsigned HOST_WIDE_INT ei;
40920 if (!CONST_INT_P (er))
40921 return 0;
40922 ei = INTVAL (er);
40923 if (ei >= nelt)
40924 return 0;
40925 ipar[i] = ei;
40928 switch (mode)
40930 case V8DFmode:
40931 /* In the 512-bit DFmode case, we can only move elements within
40932 a 128-bit lane. First fill the second part of the mask,
40933 then fallthru. */
40934 for (i = 4; i < 6; ++i)
40936 if (ipar[i] < 4 || ipar[i] >= 6)
40937 return 0;
40938 mask |= (ipar[i] - 4) << i;
40940 for (i = 6; i < 8; ++i)
40942 if (ipar[i] < 6)
40943 return 0;
40944 mask |= (ipar[i] - 6) << i;
40946 /* FALLTHRU */
40948 case V4DFmode:
40949 /* In the 256-bit DFmode case, we can only move elements within
40950 a 128-bit lane. */
40951 for (i = 0; i < 2; ++i)
40953 if (ipar[i] >= 2)
40954 return 0;
40955 mask |= ipar[i] << i;
40957 for (i = 2; i < 4; ++i)
40959 if (ipar[i] < 2)
40960 return 0;
40961 mask |= (ipar[i] - 2) << i;
40963 break;
40965 case V16SFmode:
40966 /* In 512 bit SFmode case, permutation in the upper 256 bits
40967 must mirror the permutation in the lower 256-bits. */
40968 for (i = 0; i < 8; ++i)
40969 if (ipar[i] + 8 != ipar[i + 8])
40970 return 0;
40971 /* FALLTHRU */
40973 case V8SFmode:
40974 /* In 256 bit SFmode case, we have full freedom of
40975 movement within the low 128-bit lane, but the high 128-bit
40976 lane must mirror the exact same pattern. */
40977 for (i = 0; i < 4; ++i)
40978 if (ipar[i] + 4 != ipar[i + 4])
40979 return 0;
40980 nelt = 4;
40981 /* FALLTHRU */
40983 case V2DFmode:
40984 case V4SFmode:
40985 /* In the 128-bit case, we've full freedom in the placement of
40986 the elements from the source operand. */
40987 for (i = 0; i < nelt; ++i)
40988 mask |= ipar[i] << (i * (nelt / 2));
40989 break;
40991 default:
40992 gcc_unreachable ();
40995 /* Make sure success has a non-zero value by adding one. */
40996 return mask + 1;
40999 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
41000 the expansion functions to turn the parallel back into a mask.
41001 The return value is 0 for no match and the imm8+1 for a match. */
41004 avx_vperm2f128_parallel (rtx par, machine_mode mode)
41006 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
41007 unsigned mask = 0;
41008 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
41010 if (XVECLEN (par, 0) != (int) nelt)
41011 return 0;
41013 /* Validate that all of the elements are constants, and not totally
41014 out of range. Copy the data into an integral array to make the
41015 subsequent checks easier. */
41016 for (i = 0; i < nelt; ++i)
41018 rtx er = XVECEXP (par, 0, i);
41019 unsigned HOST_WIDE_INT ei;
41021 if (!CONST_INT_P (er))
41022 return 0;
41023 ei = INTVAL (er);
41024 if (ei >= 2 * nelt)
41025 return 0;
41026 ipar[i] = ei;
41029 /* Validate that the halves of the permute are halves. */
41030 for (i = 0; i < nelt2 - 1; ++i)
41031 if (ipar[i] + 1 != ipar[i + 1])
41032 return 0;
41033 for (i = nelt2; i < nelt - 1; ++i)
41034 if (ipar[i] + 1 != ipar[i + 1])
41035 return 0;
41037 /* Reconstruct the mask. */
41038 for (i = 0; i < 2; ++i)
41040 unsigned e = ipar[i * nelt2];
41041 if (e % nelt2)
41042 return 0;
41043 e /= nelt2;
41044 mask |= e << (i * 4);
41047 /* Make sure success has a non-zero value by adding one. */
41048 return mask + 1;
41051 /* Return a register priority for hard reg REGNO. */
41052 static int
41053 ix86_register_priority (int hard_regno)
41055 /* ebp and r13 as the base always wants a displacement, r12 as the
41056 base always wants an index. So discourage their usage in an
41057 address. */
41058 if (hard_regno == R12_REG || hard_regno == R13_REG)
41059 return 0;
41060 if (hard_regno == BP_REG)
41061 return 1;
41062 /* New x86-64 int registers result in bigger code size. Discourage
41063 them. */
41064 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
41065 return 2;
41066 /* New x86-64 SSE registers result in bigger code size. Discourage
41067 them. */
41068 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
41069 return 2;
41070 /* Usage of AX register results in smaller code. Prefer it. */
41071 if (hard_regno == 0)
41072 return 4;
41073 return 3;
41076 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
41078 Put float CONST_DOUBLE in the constant pool instead of fp regs.
41079 QImode must go into class Q_REGS.
41080 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
41081 movdf to do mem-to-mem moves through integer regs. */
41083 static reg_class_t
41084 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
41086 machine_mode mode = GET_MODE (x);
41088 /* We're only allowed to return a subclass of CLASS. Many of the
41089 following checks fail for NO_REGS, so eliminate that early. */
41090 if (regclass == NO_REGS)
41091 return NO_REGS;
41093 /* All classes can load zeros. */
41094 if (x == CONST0_RTX (mode))
41095 return regclass;
41097 /* Force constants into memory if we are loading a (nonzero) constant into
41098 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
41099 instructions to load from a constant. */
41100 if (CONSTANT_P (x)
41101 && (MAYBE_MMX_CLASS_P (regclass)
41102 || MAYBE_SSE_CLASS_P (regclass)
41103 || MAYBE_MASK_CLASS_P (regclass)))
41104 return NO_REGS;
41106 /* Prefer SSE regs only, if we can use them for math. */
41107 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
41108 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
41110 /* Floating-point constants need more complex checks. */
41111 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
41113 /* General regs can load everything. */
41114 if (reg_class_subset_p (regclass, GENERAL_REGS))
41115 return regclass;
41117 /* Floats can load 0 and 1 plus some others. Note that we eliminated
41118 zero above. We only want to wind up preferring 80387 registers if
41119 we plan on doing computation with them. */
41120 if (TARGET_80387
41121 && standard_80387_constant_p (x) > 0)
41123 /* Limit class to non-sse. */
41124 if (regclass == FLOAT_SSE_REGS)
41125 return FLOAT_REGS;
41126 if (regclass == FP_TOP_SSE_REGS)
41127 return FP_TOP_REG;
41128 if (regclass == FP_SECOND_SSE_REGS)
41129 return FP_SECOND_REG;
41130 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
41131 return regclass;
41134 return NO_REGS;
41137 /* Generally when we see PLUS here, it's the function invariant
41138 (plus soft-fp const_int). Which can only be computed into general
41139 regs. */
41140 if (GET_CODE (x) == PLUS)
41141 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
41143 /* QImode constants are easy to load, but non-constant QImode data
41144 must go into Q_REGS. */
41145 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
41147 if (reg_class_subset_p (regclass, Q_REGS))
41148 return regclass;
41149 if (reg_class_subset_p (Q_REGS, regclass))
41150 return Q_REGS;
41151 return NO_REGS;
41154 return regclass;
41157 /* Discourage putting floating-point values in SSE registers unless
41158 SSE math is being used, and likewise for the 387 registers. */
41159 static reg_class_t
41160 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
41162 machine_mode mode = GET_MODE (x);
41164 /* Restrict the output reload class to the register bank that we are doing
41165 math on. If we would like not to return a subset of CLASS, reject this
41166 alternative: if reload cannot do this, it will still use its choice. */
41167 mode = GET_MODE (x);
41168 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
41169 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
41171 if (X87_FLOAT_MODE_P (mode))
41173 if (regclass == FP_TOP_SSE_REGS)
41174 return FP_TOP_REG;
41175 else if (regclass == FP_SECOND_SSE_REGS)
41176 return FP_SECOND_REG;
41177 else
41178 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
41181 return regclass;
41184 static reg_class_t
41185 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
41186 machine_mode mode, secondary_reload_info *sri)
41188 /* Double-word spills from general registers to non-offsettable memory
41189 references (zero-extended addresses) require special handling. */
41190 if (TARGET_64BIT
41191 && MEM_P (x)
41192 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
41193 && INTEGER_CLASS_P (rclass)
41194 && !offsettable_memref_p (x))
41196 sri->icode = (in_p
41197 ? CODE_FOR_reload_noff_load
41198 : CODE_FOR_reload_noff_store);
41199 /* Add the cost of moving address to a temporary. */
41200 sri->extra_cost = 1;
41202 return NO_REGS;
41205 /* QImode spills from non-QI registers require
41206 intermediate register on 32bit targets. */
41207 if (mode == QImode
41208 && (MAYBE_MASK_CLASS_P (rclass)
41209 || (!TARGET_64BIT && !in_p
41210 && INTEGER_CLASS_P (rclass)
41211 && MAYBE_NON_Q_CLASS_P (rclass))))
41213 int regno;
41215 if (REG_P (x))
41216 regno = REGNO (x);
41217 else
41218 regno = -1;
41220 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
41221 regno = true_regnum (x);
41223 /* Return Q_REGS if the operand is in memory. */
41224 if (regno == -1)
41225 return Q_REGS;
41228 /* This condition handles corner case where an expression involving
41229 pointers gets vectorized. We're trying to use the address of a
41230 stack slot as a vector initializer.
41232 (set (reg:V2DI 74 [ vect_cst_.2 ])
41233 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
41235 Eventually frame gets turned into sp+offset like this:
41237 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41238 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
41239 (const_int 392 [0x188]))))
41241 That later gets turned into:
41243 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41244 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
41245 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
41247 We'll have the following reload recorded:
41249 Reload 0: reload_in (DI) =
41250 (plus:DI (reg/f:DI 7 sp)
41251 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
41252 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41253 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
41254 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
41255 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41256 reload_reg_rtx: (reg:V2DI 22 xmm1)
41258 Which isn't going to work since SSE instructions can't handle scalar
41259 additions. Returning GENERAL_REGS forces the addition into integer
41260 register and reload can handle subsequent reloads without problems. */
41262 if (in_p && GET_CODE (x) == PLUS
41263 && SSE_CLASS_P (rclass)
41264 && SCALAR_INT_MODE_P (mode))
41265 return GENERAL_REGS;
41267 return NO_REGS;
41270 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
41272 static bool
41273 ix86_class_likely_spilled_p (reg_class_t rclass)
41275 switch (rclass)
41277 case AREG:
41278 case DREG:
41279 case CREG:
41280 case BREG:
41281 case AD_REGS:
41282 case SIREG:
41283 case DIREG:
41284 case SSE_FIRST_REG:
41285 case FP_TOP_REG:
41286 case FP_SECOND_REG:
41287 case BND_REGS:
41288 return true;
41290 default:
41291 break;
41294 return false;
41297 /* If we are copying between general and FP registers, we need a memory
41298 location. The same is true for SSE and MMX registers.
41300 To optimize register_move_cost performance, allow inline variant.
41302 The macro can't work reliably when one of the CLASSES is class containing
41303 registers from multiple units (SSE, MMX, integer). We avoid this by never
41304 combining those units in single alternative in the machine description.
41305 Ensure that this constraint holds to avoid unexpected surprises.
41307 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
41308 enforce these sanity checks. */
41310 static inline bool
41311 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
41312 machine_mode mode, int strict)
41314 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
41315 return false;
41316 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
41317 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
41318 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
41319 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
41320 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
41321 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
41323 gcc_assert (!strict || lra_in_progress);
41324 return true;
41327 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
41328 return true;
41330 /* Between mask and general, we have moves no larger than word size. */
41331 if ((MAYBE_MASK_CLASS_P (class1) != MAYBE_MASK_CLASS_P (class2))
41332 && (GET_MODE_SIZE (mode) > UNITS_PER_WORD))
41333 return true;
41335 /* ??? This is a lie. We do have moves between mmx/general, and for
41336 mmx/sse2. But by saying we need secondary memory we discourage the
41337 register allocator from using the mmx registers unless needed. */
41338 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
41339 return true;
41341 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
41343 /* SSE1 doesn't have any direct moves from other classes. */
41344 if (!TARGET_SSE2)
41345 return true;
41347 /* If the target says that inter-unit moves are more expensive
41348 than moving through memory, then don't generate them. */
41349 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
41350 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
41351 return true;
41353 /* Between SSE and general, we have moves no larger than word size. */
41354 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
41355 return true;
41358 return false;
41361 bool
41362 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
41363 machine_mode mode, int strict)
41365 return inline_secondary_memory_needed (class1, class2, mode, strict);
41368 /* Implement the TARGET_CLASS_MAX_NREGS hook.
41370 On the 80386, this is the size of MODE in words,
41371 except in the FP regs, where a single reg is always enough. */
41373 static unsigned char
41374 ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
41376 if (MAYBE_INTEGER_CLASS_P (rclass))
41378 if (mode == XFmode)
41379 return (TARGET_64BIT ? 2 : 3);
41380 else if (mode == XCmode)
41381 return (TARGET_64BIT ? 4 : 6);
41382 else
41383 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
41385 else
41387 if (COMPLEX_MODE_P (mode))
41388 return 2;
41389 else
41390 return 1;
41394 /* Return true if the registers in CLASS cannot represent the change from
41395 modes FROM to TO. */
41397 bool
41398 ix86_cannot_change_mode_class (machine_mode from, machine_mode to,
41399 enum reg_class regclass)
41401 if (from == to)
41402 return false;
41404 /* x87 registers can't do subreg at all, as all values are reformatted
41405 to extended precision. */
41406 if (MAYBE_FLOAT_CLASS_P (regclass))
41407 return true;
41409 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
41411 /* Vector registers do not support QI or HImode loads. If we don't
41412 disallow a change to these modes, reload will assume it's ok to
41413 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
41414 the vec_dupv4hi pattern. */
41415 if (GET_MODE_SIZE (from) < 4)
41416 return true;
41419 return false;
41422 /* Return the cost of moving data of mode M between a
41423 register and memory. A value of 2 is the default; this cost is
41424 relative to those in `REGISTER_MOVE_COST'.
41426 This function is used extensively by register_move_cost that is used to
41427 build tables at startup. Make it inline in this case.
41428 When IN is 2, return maximum of in and out move cost.
41430 If moving between registers and memory is more expensive than
41431 between two registers, you should define this macro to express the
41432 relative cost.
41434 Model also increased moving costs of QImode registers in non
41435 Q_REGS classes.
41437 static inline int
41438 inline_memory_move_cost (machine_mode mode, enum reg_class regclass,
41439 int in)
41441 int cost;
41442 if (FLOAT_CLASS_P (regclass))
41444 int index;
41445 switch (mode)
41447 case SFmode:
41448 index = 0;
41449 break;
41450 case DFmode:
41451 index = 1;
41452 break;
41453 case XFmode:
41454 index = 2;
41455 break;
41456 default:
41457 return 100;
41459 if (in == 2)
41460 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
41461 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
41463 if (SSE_CLASS_P (regclass))
41465 int index;
41466 switch (GET_MODE_SIZE (mode))
41468 case 4:
41469 index = 0;
41470 break;
41471 case 8:
41472 index = 1;
41473 break;
41474 case 16:
41475 index = 2;
41476 break;
41477 default:
41478 return 100;
41480 if (in == 2)
41481 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
41482 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
41484 if (MMX_CLASS_P (regclass))
41486 int index;
41487 switch (GET_MODE_SIZE (mode))
41489 case 4:
41490 index = 0;
41491 break;
41492 case 8:
41493 index = 1;
41494 break;
41495 default:
41496 return 100;
41498 if (in)
41499 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
41500 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
41502 switch (GET_MODE_SIZE (mode))
41504 case 1:
41505 if (Q_CLASS_P (regclass) || TARGET_64BIT)
41507 if (!in)
41508 return ix86_cost->int_store[0];
41509 if (TARGET_PARTIAL_REG_DEPENDENCY
41510 && optimize_function_for_speed_p (cfun))
41511 cost = ix86_cost->movzbl_load;
41512 else
41513 cost = ix86_cost->int_load[0];
41514 if (in == 2)
41515 return MAX (cost, ix86_cost->int_store[0]);
41516 return cost;
41518 else
41520 if (in == 2)
41521 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
41522 if (in)
41523 return ix86_cost->movzbl_load;
41524 else
41525 return ix86_cost->int_store[0] + 4;
41527 break;
41528 case 2:
41529 if (in == 2)
41530 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
41531 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
41532 default:
41533 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
41534 if (mode == TFmode)
41535 mode = XFmode;
41536 if (in == 2)
41537 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
41538 else if (in)
41539 cost = ix86_cost->int_load[2];
41540 else
41541 cost = ix86_cost->int_store[2];
41542 return (cost * (((int) GET_MODE_SIZE (mode)
41543 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
41547 static int
41548 ix86_memory_move_cost (machine_mode mode, reg_class_t regclass,
41549 bool in)
41551 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
41555 /* Return the cost of moving data from a register in class CLASS1 to
41556 one in class CLASS2.
41558 It is not required that the cost always equal 2 when FROM is the same as TO;
41559 on some machines it is expensive to move between registers if they are not
41560 general registers. */
41562 static int
41563 ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
41564 reg_class_t class2_i)
41566 enum reg_class class1 = (enum reg_class) class1_i;
41567 enum reg_class class2 = (enum reg_class) class2_i;
41569 /* In case we require secondary memory, compute cost of the store followed
41570 by load. In order to avoid bad register allocation choices, we need
41571 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
41573 if (inline_secondary_memory_needed (class1, class2, mode, 0))
41575 int cost = 1;
41577 cost += inline_memory_move_cost (mode, class1, 2);
41578 cost += inline_memory_move_cost (mode, class2, 2);
41580 /* In case of copying from general_purpose_register we may emit multiple
41581 stores followed by single load causing memory size mismatch stall.
41582 Count this as arbitrarily high cost of 20. */
41583 if (targetm.class_max_nregs (class1, mode)
41584 > targetm.class_max_nregs (class2, mode))
41585 cost += 20;
41587 /* In the case of FP/MMX moves, the registers actually overlap, and we
41588 have to switch modes in order to treat them differently. */
41589 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
41590 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
41591 cost += 20;
41593 return cost;
41596 /* Moves between SSE/MMX and integer unit are expensive. */
41597 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
41598 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
41600 /* ??? By keeping returned value relatively high, we limit the number
41601 of moves between integer and MMX/SSE registers for all targets.
41602 Additionally, high value prevents problem with x86_modes_tieable_p(),
41603 where integer modes in MMX/SSE registers are not tieable
41604 because of missing QImode and HImode moves to, from or between
41605 MMX/SSE registers. */
41606 return MAX (8, ix86_cost->mmxsse_to_integer);
41608 if (MAYBE_FLOAT_CLASS_P (class1))
41609 return ix86_cost->fp_move;
41610 if (MAYBE_SSE_CLASS_P (class1))
41611 return ix86_cost->sse_move;
41612 if (MAYBE_MMX_CLASS_P (class1))
41613 return ix86_cost->mmx_move;
41614 return 2;
41617 /* Return TRUE if hard register REGNO can hold a value of machine-mode
41618 MODE. */
41620 bool
41621 ix86_hard_regno_mode_ok (int regno, machine_mode mode)
41623 /* Flags and only flags can only hold CCmode values. */
41624 if (CC_REGNO_P (regno))
41625 return GET_MODE_CLASS (mode) == MODE_CC;
41626 if (GET_MODE_CLASS (mode) == MODE_CC
41627 || GET_MODE_CLASS (mode) == MODE_RANDOM
41628 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
41629 return false;
41630 if (STACK_REGNO_P (regno))
41631 return VALID_FP_MODE_P (mode);
41632 if (MASK_REGNO_P (regno))
41633 return (VALID_MASK_REG_MODE (mode)
41634 || ((TARGET_AVX512BW || TARGET_AVX512VBMI)
41635 && VALID_MASK_AVX512BW_MODE (mode)));
41636 if (BND_REGNO_P (regno))
41637 return VALID_BND_REG_MODE (mode);
41638 if (SSE_REGNO_P (regno))
41640 /* We implement the move patterns for all vector modes into and
41641 out of SSE registers, even when no operation instructions
41642 are available. */
41644 /* For AVX-512 we allow, regardless of regno:
41645 - XI mode
41646 - any of 512-bit wide vector mode
41647 - any scalar mode. */
41648 if (TARGET_AVX512F
41649 && (mode == XImode
41650 || VALID_AVX512F_REG_MODE (mode)
41651 || VALID_AVX512F_SCALAR_MODE (mode)))
41652 return true;
41654 /* TODO check for QI/HI scalars. */
41655 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
41656 if (TARGET_AVX512VL
41657 && (mode == OImode
41658 || mode == TImode
41659 || VALID_AVX256_REG_MODE (mode)
41660 || VALID_AVX512VL_128_REG_MODE (mode)))
41661 return true;
41663 /* xmm16-xmm31 are only available for AVX-512. */
41664 if (EXT_REX_SSE_REGNO_P (regno))
41665 return false;
41667 /* OImode and AVX modes are available only when AVX is enabled. */
41668 return ((TARGET_AVX
41669 && VALID_AVX256_REG_OR_OI_MODE (mode))
41670 || VALID_SSE_REG_MODE (mode)
41671 || VALID_SSE2_REG_MODE (mode)
41672 || VALID_MMX_REG_MODE (mode)
41673 || VALID_MMX_REG_MODE_3DNOW (mode));
41675 if (MMX_REGNO_P (regno))
41677 /* We implement the move patterns for 3DNOW modes even in MMX mode,
41678 so if the register is available at all, then we can move data of
41679 the given mode into or out of it. */
41680 return (VALID_MMX_REG_MODE (mode)
41681 || VALID_MMX_REG_MODE_3DNOW (mode));
41684 if (mode == QImode)
41686 /* Take care for QImode values - they can be in non-QI regs,
41687 but then they do cause partial register stalls. */
41688 if (ANY_QI_REGNO_P (regno))
41689 return true;
41690 if (!TARGET_PARTIAL_REG_STALL)
41691 return true;
41692 /* LRA checks if the hard register is OK for the given mode.
41693 QImode values can live in non-QI regs, so we allow all
41694 registers here. */
41695 if (lra_in_progress)
41696 return true;
41697 return !can_create_pseudo_p ();
41699 /* We handle both integer and floats in the general purpose registers. */
41700 else if (VALID_INT_MODE_P (mode))
41701 return true;
41702 else if (VALID_FP_MODE_P (mode))
41703 return true;
41704 else if (VALID_DFP_MODE_P (mode))
41705 return true;
41706 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
41707 on to use that value in smaller contexts, this can easily force a
41708 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
41709 supporting DImode, allow it. */
41710 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
41711 return true;
41713 return false;
41716 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
41717 tieable integer mode. */
41719 static bool
41720 ix86_tieable_integer_mode_p (machine_mode mode)
41722 switch (mode)
41724 case HImode:
41725 case SImode:
41726 return true;
41728 case QImode:
41729 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
41731 case DImode:
41732 return TARGET_64BIT;
41734 default:
41735 return false;
41739 /* Return true if MODE1 is accessible in a register that can hold MODE2
41740 without copying. That is, all register classes that can hold MODE2
41741 can also hold MODE1. */
41743 bool
41744 ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
41746 if (mode1 == mode2)
41747 return true;
41749 if (ix86_tieable_integer_mode_p (mode1)
41750 && ix86_tieable_integer_mode_p (mode2))
41751 return true;
41753 /* MODE2 being XFmode implies fp stack or general regs, which means we
41754 can tie any smaller floating point modes to it. Note that we do not
41755 tie this with TFmode. */
41756 if (mode2 == XFmode)
41757 return mode1 == SFmode || mode1 == DFmode;
41759 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
41760 that we can tie it with SFmode. */
41761 if (mode2 == DFmode)
41762 return mode1 == SFmode;
41764 /* If MODE2 is only appropriate for an SSE register, then tie with
41765 any other mode acceptable to SSE registers. */
41766 if (GET_MODE_SIZE (mode2) == 32
41767 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
41768 return (GET_MODE_SIZE (mode1) == 32
41769 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
41770 if (GET_MODE_SIZE (mode2) == 16
41771 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
41772 return (GET_MODE_SIZE (mode1) == 16
41773 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
41775 /* If MODE2 is appropriate for an MMX register, then tie
41776 with any other mode acceptable to MMX registers. */
41777 if (GET_MODE_SIZE (mode2) == 8
41778 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
41779 return (GET_MODE_SIZE (mode1) == 8
41780 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
41782 return false;
41785 /* Return the cost of moving between two registers of mode MODE. */
41787 static int
41788 ix86_set_reg_reg_cost (machine_mode mode)
41790 unsigned int units = UNITS_PER_WORD;
41792 switch (GET_MODE_CLASS (mode))
41794 default:
41795 break;
41797 case MODE_CC:
41798 units = GET_MODE_SIZE (CCmode);
41799 break;
41801 case MODE_FLOAT:
41802 if ((TARGET_SSE && mode == TFmode)
41803 || (TARGET_80387 && mode == XFmode)
41804 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
41805 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
41806 units = GET_MODE_SIZE (mode);
41807 break;
41809 case MODE_COMPLEX_FLOAT:
41810 if ((TARGET_SSE && mode == TCmode)
41811 || (TARGET_80387 && mode == XCmode)
41812 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
41813 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
41814 units = GET_MODE_SIZE (mode);
41815 break;
41817 case MODE_VECTOR_INT:
41818 case MODE_VECTOR_FLOAT:
41819 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
41820 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
41821 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
41822 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
41823 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
41824 units = GET_MODE_SIZE (mode);
41827 /* Return the cost of moving between two registers of mode MODE,
41828 assuming that the move will be in pieces of at most UNITS bytes. */
41829 return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
41832 /* Compute a (partial) cost for rtx X. Return true if the complete
41833 cost has been computed, and false if subexpressions should be
41834 scanned. In either case, *TOTAL contains the cost result. */
41836 static bool
41837 ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
41838 bool speed)
41840 rtx mask;
41841 enum rtx_code code = (enum rtx_code) code_i;
41842 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
41843 machine_mode mode = GET_MODE (x);
41844 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
41846 switch (code)
41848 case SET:
41849 if (register_operand (SET_DEST (x), VOIDmode)
41850 && reg_or_0_operand (SET_SRC (x), VOIDmode))
41852 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
41853 return true;
41855 return false;
41857 case CONST_INT:
41858 case CONST:
41859 case LABEL_REF:
41860 case SYMBOL_REF:
41861 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
41862 *total = 3;
41863 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
41864 *total = 2;
41865 else if (flag_pic && SYMBOLIC_CONST (x)
41866 && !(TARGET_64BIT
41867 && (GET_CODE (x) == LABEL_REF
41868 || (GET_CODE (x) == SYMBOL_REF
41869 && SYMBOL_REF_LOCAL_P (x)))))
41870 *total = 1;
41871 else
41872 *total = 0;
41873 return true;
41875 case CONST_DOUBLE:
41876 if (mode == VOIDmode)
41878 *total = 0;
41879 return true;
41881 switch (standard_80387_constant_p (x))
41883 case 1: /* 0.0 */
41884 *total = 1;
41885 return true;
41886 default: /* Other constants */
41887 *total = 2;
41888 return true;
41889 case 0:
41890 case -1:
41891 break;
41893 if (SSE_FLOAT_MODE_P (mode))
41895 case CONST_VECTOR:
41896 switch (standard_sse_constant_p (x))
41898 case 0:
41899 break;
41900 case 1: /* 0: xor eliminates false dependency */
41901 *total = 0;
41902 return true;
41903 default: /* -1: cmp contains false dependency */
41904 *total = 1;
41905 return true;
41908 /* Fall back to (MEM (SYMBOL_REF)), since that's where
41909 it'll probably end up. Add a penalty for size. */
41910 *total = (COSTS_N_INSNS (1)
41911 + (flag_pic != 0 && !TARGET_64BIT)
41912 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
41913 return true;
41915 case ZERO_EXTEND:
41916 /* The zero extensions is often completely free on x86_64, so make
41917 it as cheap as possible. */
41918 if (TARGET_64BIT && mode == DImode
41919 && GET_MODE (XEXP (x, 0)) == SImode)
41920 *total = 1;
41921 else if (TARGET_ZERO_EXTEND_WITH_AND)
41922 *total = cost->add;
41923 else
41924 *total = cost->movzx;
41925 return false;
41927 case SIGN_EXTEND:
41928 *total = cost->movsx;
41929 return false;
41931 case ASHIFT:
41932 if (SCALAR_INT_MODE_P (mode)
41933 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
41934 && CONST_INT_P (XEXP (x, 1)))
41936 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
41937 if (value == 1)
41939 *total = cost->add;
41940 return false;
41942 if ((value == 2 || value == 3)
41943 && cost->lea <= cost->shift_const)
41945 *total = cost->lea;
41946 return false;
41949 /* FALLTHRU */
41951 case ROTATE:
41952 case ASHIFTRT:
41953 case LSHIFTRT:
41954 case ROTATERT:
41955 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
41957 /* ??? Should be SSE vector operation cost. */
41958 /* At least for published AMD latencies, this really is the same
41959 as the latency for a simple fpu operation like fabs. */
41960 /* V*QImode is emulated with 1-11 insns. */
41961 if (mode == V16QImode || mode == V32QImode)
41963 int count = 11;
41964 if (TARGET_XOP && mode == V16QImode)
41966 /* For XOP we use vpshab, which requires a broadcast of the
41967 value to the variable shift insn. For constants this
41968 means a V16Q const in mem; even when we can perform the
41969 shift with one insn set the cost to prefer paddb. */
41970 if (CONSTANT_P (XEXP (x, 1)))
41972 *total = (cost->fabs
41973 + rtx_cost (XEXP (x, 0), code, 0, speed)
41974 + (speed ? 2 : COSTS_N_BYTES (16)));
41975 return true;
41977 count = 3;
41979 else if (TARGET_SSSE3)
41980 count = 7;
41981 *total = cost->fabs * count;
41983 else
41984 *total = cost->fabs;
41986 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
41988 if (CONST_INT_P (XEXP (x, 1)))
41990 if (INTVAL (XEXP (x, 1)) > 32)
41991 *total = cost->shift_const + COSTS_N_INSNS (2);
41992 else
41993 *total = cost->shift_const * 2;
41995 else
41997 if (GET_CODE (XEXP (x, 1)) == AND)
41998 *total = cost->shift_var * 2;
41999 else
42000 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
42003 else
42005 if (CONST_INT_P (XEXP (x, 1)))
42006 *total = cost->shift_const;
42007 else if (GET_CODE (XEXP (x, 1)) == SUBREG
42008 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
42010 /* Return the cost after shift-and truncation. */
42011 *total = cost->shift_var;
42012 return true;
42014 else
42015 *total = cost->shift_var;
42017 return false;
42019 case FMA:
42021 rtx sub;
42023 gcc_assert (FLOAT_MODE_P (mode));
42024 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
42026 /* ??? SSE scalar/vector cost should be used here. */
42027 /* ??? Bald assumption that fma has the same cost as fmul. */
42028 *total = cost->fmul;
42029 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
42031 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
42032 sub = XEXP (x, 0);
42033 if (GET_CODE (sub) == NEG)
42034 sub = XEXP (sub, 0);
42035 *total += rtx_cost (sub, FMA, 0, speed);
42037 sub = XEXP (x, 2);
42038 if (GET_CODE (sub) == NEG)
42039 sub = XEXP (sub, 0);
42040 *total += rtx_cost (sub, FMA, 2, speed);
42041 return true;
42044 case MULT:
42045 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42047 /* ??? SSE scalar cost should be used here. */
42048 *total = cost->fmul;
42049 return false;
42051 else if (X87_FLOAT_MODE_P (mode))
42053 *total = cost->fmul;
42054 return false;
42056 else if (FLOAT_MODE_P (mode))
42058 /* ??? SSE vector cost should be used here. */
42059 *total = cost->fmul;
42060 return false;
42062 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42064 /* V*QImode is emulated with 7-13 insns. */
42065 if (mode == V16QImode || mode == V32QImode)
42067 int extra = 11;
42068 if (TARGET_XOP && mode == V16QImode)
42069 extra = 5;
42070 else if (TARGET_SSSE3)
42071 extra = 6;
42072 *total = cost->fmul * 2 + cost->fabs * extra;
42074 /* V*DImode is emulated with 5-8 insns. */
42075 else if (mode == V2DImode || mode == V4DImode)
42077 if (TARGET_XOP && mode == V2DImode)
42078 *total = cost->fmul * 2 + cost->fabs * 3;
42079 else
42080 *total = cost->fmul * 3 + cost->fabs * 5;
42082 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
42083 insns, including two PMULUDQ. */
42084 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
42085 *total = cost->fmul * 2 + cost->fabs * 5;
42086 else
42087 *total = cost->fmul;
42088 return false;
42090 else
42092 rtx op0 = XEXP (x, 0);
42093 rtx op1 = XEXP (x, 1);
42094 int nbits;
42095 if (CONST_INT_P (XEXP (x, 1)))
42097 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
42098 for (nbits = 0; value != 0; value &= value - 1)
42099 nbits++;
42101 else
42102 /* This is arbitrary. */
42103 nbits = 7;
42105 /* Compute costs correctly for widening multiplication. */
42106 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
42107 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
42108 == GET_MODE_SIZE (mode))
42110 int is_mulwiden = 0;
42111 machine_mode inner_mode = GET_MODE (op0);
42113 if (GET_CODE (op0) == GET_CODE (op1))
42114 is_mulwiden = 1, op1 = XEXP (op1, 0);
42115 else if (CONST_INT_P (op1))
42117 if (GET_CODE (op0) == SIGN_EXTEND)
42118 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
42119 == INTVAL (op1);
42120 else
42121 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
42124 if (is_mulwiden)
42125 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
42128 *total = (cost->mult_init[MODE_INDEX (mode)]
42129 + nbits * cost->mult_bit
42130 + rtx_cost (op0, outer_code, opno, speed)
42131 + rtx_cost (op1, outer_code, opno, speed));
42133 return true;
42136 case DIV:
42137 case UDIV:
42138 case MOD:
42139 case UMOD:
42140 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42141 /* ??? SSE cost should be used here. */
42142 *total = cost->fdiv;
42143 else if (X87_FLOAT_MODE_P (mode))
42144 *total = cost->fdiv;
42145 else if (FLOAT_MODE_P (mode))
42146 /* ??? SSE vector cost should be used here. */
42147 *total = cost->fdiv;
42148 else
42149 *total = cost->divide[MODE_INDEX (mode)];
42150 return false;
42152 case PLUS:
42153 if (GET_MODE_CLASS (mode) == MODE_INT
42154 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
42156 if (GET_CODE (XEXP (x, 0)) == PLUS
42157 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
42158 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
42159 && CONSTANT_P (XEXP (x, 1)))
42161 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
42162 if (val == 2 || val == 4 || val == 8)
42164 *total = cost->lea;
42165 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
42166 outer_code, opno, speed);
42167 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
42168 outer_code, opno, speed);
42169 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42170 return true;
42173 else if (GET_CODE (XEXP (x, 0)) == MULT
42174 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
42176 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
42177 if (val == 2 || val == 4 || val == 8)
42179 *total = cost->lea;
42180 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
42181 outer_code, opno, speed);
42182 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42183 return true;
42186 else if (GET_CODE (XEXP (x, 0)) == PLUS)
42188 *total = cost->lea;
42189 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
42190 outer_code, opno, speed);
42191 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
42192 outer_code, opno, speed);
42193 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42194 return true;
42197 /* FALLTHRU */
42199 case MINUS:
42200 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42202 /* ??? SSE cost should be used here. */
42203 *total = cost->fadd;
42204 return false;
42206 else if (X87_FLOAT_MODE_P (mode))
42208 *total = cost->fadd;
42209 return false;
42211 else if (FLOAT_MODE_P (mode))
42213 /* ??? SSE vector cost should be used here. */
42214 *total = cost->fadd;
42215 return false;
42217 /* FALLTHRU */
42219 case AND:
42220 case IOR:
42221 case XOR:
42222 if (GET_MODE_CLASS (mode) == MODE_INT
42223 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42225 *total = (cost->add * 2
42226 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
42227 << (GET_MODE (XEXP (x, 0)) != DImode))
42228 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
42229 << (GET_MODE (XEXP (x, 1)) != DImode)));
42230 return true;
42232 /* FALLTHRU */
42234 case NEG:
42235 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42237 /* ??? SSE cost should be used here. */
42238 *total = cost->fchs;
42239 return false;
42241 else if (X87_FLOAT_MODE_P (mode))
42243 *total = cost->fchs;
42244 return false;
42246 else if (FLOAT_MODE_P (mode))
42248 /* ??? SSE vector cost should be used here. */
42249 *total = cost->fchs;
42250 return false;
42252 /* FALLTHRU */
42254 case NOT:
42255 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42257 /* ??? Should be SSE vector operation cost. */
42258 /* At least for published AMD latencies, this really is the same
42259 as the latency for a simple fpu operation like fabs. */
42260 *total = cost->fabs;
42262 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42263 *total = cost->add * 2;
42264 else
42265 *total = cost->add;
42266 return false;
42268 case COMPARE:
42269 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
42270 && XEXP (XEXP (x, 0), 1) == const1_rtx
42271 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
42272 && XEXP (x, 1) == const0_rtx)
42274 /* This kind of construct is implemented using test[bwl].
42275 Treat it as if we had an AND. */
42276 *total = (cost->add
42277 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
42278 + rtx_cost (const1_rtx, outer_code, opno, speed));
42279 return true;
42281 return false;
42283 case FLOAT_EXTEND:
42284 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
42285 *total = 0;
42286 return false;
42288 case ABS:
42289 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42290 /* ??? SSE cost should be used here. */
42291 *total = cost->fabs;
42292 else if (X87_FLOAT_MODE_P (mode))
42293 *total = cost->fabs;
42294 else if (FLOAT_MODE_P (mode))
42295 /* ??? SSE vector cost should be used here. */
42296 *total = cost->fabs;
42297 return false;
42299 case SQRT:
42300 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42301 /* ??? SSE cost should be used here. */
42302 *total = cost->fsqrt;
42303 else if (X87_FLOAT_MODE_P (mode))
42304 *total = cost->fsqrt;
42305 else if (FLOAT_MODE_P (mode))
42306 /* ??? SSE vector cost should be used here. */
42307 *total = cost->fsqrt;
42308 return false;
42310 case UNSPEC:
42311 if (XINT (x, 1) == UNSPEC_TP)
42312 *total = 0;
42313 return false;
42315 case VEC_SELECT:
42316 case VEC_CONCAT:
42317 case VEC_DUPLICATE:
42318 /* ??? Assume all of these vector manipulation patterns are
42319 recognizable. In which case they all pretty much have the
42320 same cost. */
42321 *total = cost->fabs;
42322 return true;
42323 case VEC_MERGE:
42324 mask = XEXP (x, 2);
42325 /* This is masked instruction, assume the same cost,
42326 as nonmasked variant. */
42327 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
42328 *total = rtx_cost (XEXP (x, 0), outer_code, opno, speed);
42329 else
42330 *total = cost->fabs;
42331 return true;
42333 default:
42334 return false;
42338 #if TARGET_MACHO
42340 static int current_machopic_label_num;
42342 /* Given a symbol name and its associated stub, write out the
42343 definition of the stub. */
42345 void
42346 machopic_output_stub (FILE *file, const char *symb, const char *stub)
42348 unsigned int length;
42349 char *binder_name, *symbol_name, lazy_ptr_name[32];
42350 int label = ++current_machopic_label_num;
42352 /* For 64-bit we shouldn't get here. */
42353 gcc_assert (!TARGET_64BIT);
42355 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
42356 symb = targetm.strip_name_encoding (symb);
42358 length = strlen (stub);
42359 binder_name = XALLOCAVEC (char, length + 32);
42360 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
42362 length = strlen (symb);
42363 symbol_name = XALLOCAVEC (char, length + 32);
42364 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
42366 sprintf (lazy_ptr_name, "L%d$lz", label);
42368 if (MACHOPIC_ATT_STUB)
42369 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
42370 else if (MACHOPIC_PURE)
42371 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
42372 else
42373 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
42375 fprintf (file, "%s:\n", stub);
42376 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
42378 if (MACHOPIC_ATT_STUB)
42380 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
42382 else if (MACHOPIC_PURE)
42384 /* PIC stub. */
42385 /* 25-byte PIC stub using "CALL get_pc_thunk". */
42386 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
42387 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
42388 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
42389 label, lazy_ptr_name, label);
42390 fprintf (file, "\tjmp\t*%%ecx\n");
42392 else
42393 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
42395 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
42396 it needs no stub-binding-helper. */
42397 if (MACHOPIC_ATT_STUB)
42398 return;
42400 fprintf (file, "%s:\n", binder_name);
42402 if (MACHOPIC_PURE)
42404 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
42405 fprintf (file, "\tpushl\t%%ecx\n");
42407 else
42408 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
42410 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
42412 /* N.B. Keep the correspondence of these
42413 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
42414 old-pic/new-pic/non-pic stubs; altering this will break
42415 compatibility with existing dylibs. */
42416 if (MACHOPIC_PURE)
42418 /* 25-byte PIC stub using "CALL get_pc_thunk". */
42419 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
42421 else
42422 /* 16-byte -mdynamic-no-pic stub. */
42423 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
42425 fprintf (file, "%s:\n", lazy_ptr_name);
42426 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
42427 fprintf (file, ASM_LONG "%s\n", binder_name);
42429 #endif /* TARGET_MACHO */
42431 /* Order the registers for register allocator. */
42433 void
42434 x86_order_regs_for_local_alloc (void)
42436 int pos = 0;
42437 int i;
42439 /* First allocate the local general purpose registers. */
42440 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
42441 if (GENERAL_REGNO_P (i) && call_used_regs[i])
42442 reg_alloc_order [pos++] = i;
42444 /* Global general purpose registers. */
42445 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
42446 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
42447 reg_alloc_order [pos++] = i;
42449 /* x87 registers come first in case we are doing FP math
42450 using them. */
42451 if (!TARGET_SSE_MATH)
42452 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
42453 reg_alloc_order [pos++] = i;
42455 /* SSE registers. */
42456 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
42457 reg_alloc_order [pos++] = i;
42458 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
42459 reg_alloc_order [pos++] = i;
42461 /* Extended REX SSE registers. */
42462 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
42463 reg_alloc_order [pos++] = i;
42465 /* Mask register. */
42466 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
42467 reg_alloc_order [pos++] = i;
42469 /* MPX bound registers. */
42470 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
42471 reg_alloc_order [pos++] = i;
42473 /* x87 registers. */
42474 if (TARGET_SSE_MATH)
42475 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
42476 reg_alloc_order [pos++] = i;
42478 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
42479 reg_alloc_order [pos++] = i;
42481 /* Initialize the rest of array as we do not allocate some registers
42482 at all. */
42483 while (pos < FIRST_PSEUDO_REGISTER)
42484 reg_alloc_order [pos++] = 0;
42487 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
42488 in struct attribute_spec handler. */
42489 static tree
42490 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
42491 tree args,
42492 int,
42493 bool *no_add_attrs)
42495 if (TREE_CODE (*node) != FUNCTION_TYPE
42496 && TREE_CODE (*node) != METHOD_TYPE
42497 && TREE_CODE (*node) != FIELD_DECL
42498 && TREE_CODE (*node) != TYPE_DECL)
42500 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42501 name);
42502 *no_add_attrs = true;
42503 return NULL_TREE;
42505 if (TARGET_64BIT)
42507 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
42508 name);
42509 *no_add_attrs = true;
42510 return NULL_TREE;
42512 if (is_attribute_p ("callee_pop_aggregate_return", name))
42514 tree cst;
42516 cst = TREE_VALUE (args);
42517 if (TREE_CODE (cst) != INTEGER_CST)
42519 warning (OPT_Wattributes,
42520 "%qE attribute requires an integer constant argument",
42521 name);
42522 *no_add_attrs = true;
42524 else if (compare_tree_int (cst, 0) != 0
42525 && compare_tree_int (cst, 1) != 0)
42527 warning (OPT_Wattributes,
42528 "argument to %qE attribute is neither zero, nor one",
42529 name);
42530 *no_add_attrs = true;
42533 return NULL_TREE;
42536 return NULL_TREE;
42539 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
42540 struct attribute_spec.handler. */
42541 static tree
42542 ix86_handle_abi_attribute (tree *node, tree name, tree, int,
42543 bool *no_add_attrs)
42545 if (TREE_CODE (*node) != FUNCTION_TYPE
42546 && TREE_CODE (*node) != METHOD_TYPE
42547 && TREE_CODE (*node) != FIELD_DECL
42548 && TREE_CODE (*node) != TYPE_DECL)
42550 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42551 name);
42552 *no_add_attrs = true;
42553 return NULL_TREE;
42556 /* Can combine regparm with all attributes but fastcall. */
42557 if (is_attribute_p ("ms_abi", name))
42559 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
42561 error ("ms_abi and sysv_abi attributes are not compatible");
42564 return NULL_TREE;
42566 else if (is_attribute_p ("sysv_abi", name))
42568 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
42570 error ("ms_abi and sysv_abi attributes are not compatible");
42573 return NULL_TREE;
42576 return NULL_TREE;
42579 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
42580 struct attribute_spec.handler. */
42581 static tree
42582 ix86_handle_struct_attribute (tree *node, tree name, tree, int,
42583 bool *no_add_attrs)
42585 tree *type = NULL;
42586 if (DECL_P (*node))
42588 if (TREE_CODE (*node) == TYPE_DECL)
42589 type = &TREE_TYPE (*node);
42591 else
42592 type = node;
42594 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
42596 warning (OPT_Wattributes, "%qE attribute ignored",
42597 name);
42598 *no_add_attrs = true;
42601 else if ((is_attribute_p ("ms_struct", name)
42602 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
42603 || ((is_attribute_p ("gcc_struct", name)
42604 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
42606 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
42607 name);
42608 *no_add_attrs = true;
42611 return NULL_TREE;
42614 static tree
42615 ix86_handle_fndecl_attribute (tree *node, tree name, tree, int,
42616 bool *no_add_attrs)
42618 if (TREE_CODE (*node) != FUNCTION_DECL)
42620 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42621 name);
42622 *no_add_attrs = true;
42624 return NULL_TREE;
42627 static bool
42628 ix86_ms_bitfield_layout_p (const_tree record_type)
42630 return ((TARGET_MS_BITFIELD_LAYOUT
42631 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
42632 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
42635 /* Returns an expression indicating where the this parameter is
42636 located on entry to the FUNCTION. */
42638 static rtx
42639 x86_this_parameter (tree function)
42641 tree type = TREE_TYPE (function);
42642 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
42643 int nregs;
42645 if (TARGET_64BIT)
42647 const int *parm_regs;
42649 if (ix86_function_type_abi (type) == MS_ABI)
42650 parm_regs = x86_64_ms_abi_int_parameter_registers;
42651 else
42652 parm_regs = x86_64_int_parameter_registers;
42653 return gen_rtx_REG (Pmode, parm_regs[aggr]);
42656 nregs = ix86_function_regparm (type, function);
42658 if (nregs > 0 && !stdarg_p (type))
42660 int regno;
42661 unsigned int ccvt = ix86_get_callcvt (type);
42663 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
42664 regno = aggr ? DX_REG : CX_REG;
42665 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
42667 regno = CX_REG;
42668 if (aggr)
42669 return gen_rtx_MEM (SImode,
42670 plus_constant (Pmode, stack_pointer_rtx, 4));
42672 else
42674 regno = AX_REG;
42675 if (aggr)
42677 regno = DX_REG;
42678 if (nregs == 1)
42679 return gen_rtx_MEM (SImode,
42680 plus_constant (Pmode,
42681 stack_pointer_rtx, 4));
42684 return gen_rtx_REG (SImode, regno);
42687 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
42688 aggr ? 8 : 4));
42691 /* Determine whether x86_output_mi_thunk can succeed. */
42693 static bool
42694 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
42695 const_tree function)
42697 /* 64-bit can handle anything. */
42698 if (TARGET_64BIT)
42699 return true;
42701 /* For 32-bit, everything's fine if we have one free register. */
42702 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
42703 return true;
42705 /* Need a free register for vcall_offset. */
42706 if (vcall_offset)
42707 return false;
42709 /* Need a free register for GOT references. */
42710 if (flag_pic && !targetm.binds_local_p (function))
42711 return false;
42713 /* Otherwise ok. */
42714 return true;
42717 /* Output the assembler code for a thunk function. THUNK_DECL is the
42718 declaration for the thunk function itself, FUNCTION is the decl for
42719 the target function. DELTA is an immediate constant offset to be
42720 added to THIS. If VCALL_OFFSET is nonzero, the word at
42721 *(*this + vcall_offset) should be added to THIS. */
42723 static void
42724 x86_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
42725 HOST_WIDE_INT vcall_offset, tree function)
42727 rtx this_param = x86_this_parameter (function);
42728 rtx this_reg, tmp, fnaddr;
42729 unsigned int tmp_regno;
42730 rtx_insn *insn;
42732 if (TARGET_64BIT)
42733 tmp_regno = R10_REG;
42734 else
42736 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
42737 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
42738 tmp_regno = AX_REG;
42739 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
42740 tmp_regno = DX_REG;
42741 else
42742 tmp_regno = CX_REG;
42745 emit_note (NOTE_INSN_PROLOGUE_END);
42747 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
42748 pull it in now and let DELTA benefit. */
42749 if (REG_P (this_param))
42750 this_reg = this_param;
42751 else if (vcall_offset)
42753 /* Put the this parameter into %eax. */
42754 this_reg = gen_rtx_REG (Pmode, AX_REG);
42755 emit_move_insn (this_reg, this_param);
42757 else
42758 this_reg = NULL_RTX;
42760 /* Adjust the this parameter by a fixed constant. */
42761 if (delta)
42763 rtx delta_rtx = GEN_INT (delta);
42764 rtx delta_dst = this_reg ? this_reg : this_param;
42766 if (TARGET_64BIT)
42768 if (!x86_64_general_operand (delta_rtx, Pmode))
42770 tmp = gen_rtx_REG (Pmode, tmp_regno);
42771 emit_move_insn (tmp, delta_rtx);
42772 delta_rtx = tmp;
42776 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
42779 /* Adjust the this parameter by a value stored in the vtable. */
42780 if (vcall_offset)
42782 rtx vcall_addr, vcall_mem, this_mem;
42784 tmp = gen_rtx_REG (Pmode, tmp_regno);
42786 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
42787 if (Pmode != ptr_mode)
42788 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
42789 emit_move_insn (tmp, this_mem);
42791 /* Adjust the this parameter. */
42792 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
42793 if (TARGET_64BIT
42794 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
42796 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
42797 emit_move_insn (tmp2, GEN_INT (vcall_offset));
42798 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
42801 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
42802 if (Pmode != ptr_mode)
42803 emit_insn (gen_addsi_1_zext (this_reg,
42804 gen_rtx_REG (ptr_mode,
42805 REGNO (this_reg)),
42806 vcall_mem));
42807 else
42808 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
42811 /* If necessary, drop THIS back to its stack slot. */
42812 if (this_reg && this_reg != this_param)
42813 emit_move_insn (this_param, this_reg);
42815 fnaddr = XEXP (DECL_RTL (function), 0);
42816 if (TARGET_64BIT)
42818 if (!flag_pic || targetm.binds_local_p (function)
42819 || TARGET_PECOFF)
42821 else
42823 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
42824 tmp = gen_rtx_CONST (Pmode, tmp);
42825 fnaddr = gen_const_mem (Pmode, tmp);
42828 else
42830 if (!flag_pic || targetm.binds_local_p (function))
42832 #if TARGET_MACHO
42833 else if (TARGET_MACHO)
42835 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
42836 fnaddr = XEXP (fnaddr, 0);
42838 #endif /* TARGET_MACHO */
42839 else
42841 tmp = gen_rtx_REG (Pmode, CX_REG);
42842 output_set_got (tmp, NULL_RTX);
42844 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
42845 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
42846 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
42847 fnaddr = gen_const_mem (Pmode, fnaddr);
42851 /* Our sibling call patterns do not allow memories, because we have no
42852 predicate that can distinguish between frame and non-frame memory.
42853 For our purposes here, we can get away with (ab)using a jump pattern,
42854 because we're going to do no optimization. */
42855 if (MEM_P (fnaddr))
42857 if (sibcall_insn_operand (fnaddr, word_mode))
42859 fnaddr = XEXP (DECL_RTL (function), 0);
42860 tmp = gen_rtx_MEM (QImode, fnaddr);
42861 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
42862 tmp = emit_call_insn (tmp);
42863 SIBLING_CALL_P (tmp) = 1;
42865 else
42866 emit_jump_insn (gen_indirect_jump (fnaddr));
42868 else
42870 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
42872 // CM_LARGE_PIC always uses pseudo PIC register which is
42873 // uninitialized. Since FUNCTION is local and calling it
42874 // doesn't go through PLT, we use scratch register %r11 as
42875 // PIC register and initialize it here.
42876 pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
42877 ix86_init_large_pic_reg (tmp_regno);
42878 fnaddr = legitimize_pic_address (fnaddr,
42879 gen_rtx_REG (Pmode, tmp_regno));
42882 if (!sibcall_insn_operand (fnaddr, word_mode))
42884 tmp = gen_rtx_REG (word_mode, tmp_regno);
42885 if (GET_MODE (fnaddr) != word_mode)
42886 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
42887 emit_move_insn (tmp, fnaddr);
42888 fnaddr = tmp;
42891 tmp = gen_rtx_MEM (QImode, fnaddr);
42892 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
42893 tmp = emit_call_insn (tmp);
42894 SIBLING_CALL_P (tmp) = 1;
42896 emit_barrier ();
42898 /* Emit just enough of rest_of_compilation to get the insns emitted.
42899 Note that use_thunk calls assemble_start_function et al. */
42900 insn = get_insns ();
42901 shorten_branches (insn);
42902 final_start_function (insn, file, 1);
42903 final (insn, file, 1);
42904 final_end_function ();
42907 static void
42908 x86_file_start (void)
42910 default_file_start ();
42911 if (TARGET_16BIT)
42912 fputs ("\t.code16gcc\n", asm_out_file);
42913 #if TARGET_MACHO
42914 darwin_file_start ();
42915 #endif
42916 if (X86_FILE_START_VERSION_DIRECTIVE)
42917 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
42918 if (X86_FILE_START_FLTUSED)
42919 fputs ("\t.global\t__fltused\n", asm_out_file);
42920 if (ix86_asm_dialect == ASM_INTEL)
42921 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
42925 x86_field_alignment (tree field, int computed)
42927 machine_mode mode;
42928 tree type = TREE_TYPE (field);
42930 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
42931 return computed;
42932 mode = TYPE_MODE (strip_array_types (type));
42933 if (mode == DFmode || mode == DCmode
42934 || GET_MODE_CLASS (mode) == MODE_INT
42935 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
42936 return MIN (32, computed);
42937 return computed;
42940 /* Print call to TARGET to FILE. */
42942 static void
42943 x86_print_call_or_nop (FILE *file, const char *target)
42945 if (flag_nop_mcount)
42946 fprintf (file, "1:\tnopl 0x00(%%eax,%%eax,1)\n"); /* 5 byte nop. */
42947 else
42948 fprintf (file, "1:\tcall\t%s\n", target);
42951 /* Output assembler code to FILE to increment profiler label # LABELNO
42952 for profiling a function entry. */
42953 void
42954 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
42956 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
42957 : MCOUNT_NAME);
42958 if (TARGET_64BIT)
42960 #ifndef NO_PROFILE_COUNTERS
42961 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
42962 #endif
42964 if (!TARGET_PECOFF && flag_pic)
42965 fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
42966 else
42967 x86_print_call_or_nop (file, mcount_name);
42969 else if (flag_pic)
42971 #ifndef NO_PROFILE_COUNTERS
42972 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
42973 LPREFIX, labelno);
42974 #endif
42975 fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
42977 else
42979 #ifndef NO_PROFILE_COUNTERS
42980 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
42981 LPREFIX, labelno);
42982 #endif
42983 x86_print_call_or_nop (file, mcount_name);
42986 if (flag_record_mcount)
42988 fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
42989 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
42990 fprintf (file, "\t.previous\n");
42994 /* We don't have exact information about the insn sizes, but we may assume
42995 quite safely that we are informed about all 1 byte insns and memory
42996 address sizes. This is enough to eliminate unnecessary padding in
42997 99% of cases. */
42999 static int
43000 min_insn_size (rtx_insn *insn)
43002 int l = 0, len;
43004 if (!INSN_P (insn) || !active_insn_p (insn))
43005 return 0;
43007 /* Discard alignments we've emit and jump instructions. */
43008 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
43009 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
43010 return 0;
43012 /* Important case - calls are always 5 bytes.
43013 It is common to have many calls in the row. */
43014 if (CALL_P (insn)
43015 && symbolic_reference_mentioned_p (PATTERN (insn))
43016 && !SIBLING_CALL_P (insn))
43017 return 5;
43018 len = get_attr_length (insn);
43019 if (len <= 1)
43020 return 1;
43022 /* For normal instructions we rely on get_attr_length being exact,
43023 with a few exceptions. */
43024 if (!JUMP_P (insn))
43026 enum attr_type type = get_attr_type (insn);
43028 switch (type)
43030 case TYPE_MULTI:
43031 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
43032 || asm_noperands (PATTERN (insn)) >= 0)
43033 return 0;
43034 break;
43035 case TYPE_OTHER:
43036 case TYPE_FCMP:
43037 break;
43038 default:
43039 /* Otherwise trust get_attr_length. */
43040 return len;
43043 l = get_attr_length_address (insn);
43044 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
43045 l = 4;
43047 if (l)
43048 return 1+l;
43049 else
43050 return 2;
43053 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
43055 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
43056 window. */
43058 static void
43059 ix86_avoid_jump_mispredicts (void)
43061 rtx_insn *insn, *start = get_insns ();
43062 int nbytes = 0, njumps = 0;
43063 int isjump = 0;
43065 /* Look for all minimal intervals of instructions containing 4 jumps.
43066 The intervals are bounded by START and INSN. NBYTES is the total
43067 size of instructions in the interval including INSN and not including
43068 START. When the NBYTES is smaller than 16 bytes, it is possible
43069 that the end of START and INSN ends up in the same 16byte page.
43071 The smallest offset in the page INSN can start is the case where START
43072 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
43073 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
43075 Don't consider asm goto as jump, while it can contain a jump, it doesn't
43076 have to, control transfer to label(s) can be performed through other
43077 means, and also we estimate minimum length of all asm stmts as 0. */
43078 for (insn = start; insn; insn = NEXT_INSN (insn))
43080 int min_size;
43082 if (LABEL_P (insn))
43084 int align = label_to_alignment (insn);
43085 int max_skip = label_to_max_skip (insn);
43087 if (max_skip > 15)
43088 max_skip = 15;
43089 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
43090 already in the current 16 byte page, because otherwise
43091 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
43092 bytes to reach 16 byte boundary. */
43093 if (align <= 0
43094 || (align <= 3 && max_skip != (1 << align) - 1))
43095 max_skip = 0;
43096 if (dump_file)
43097 fprintf (dump_file, "Label %i with max_skip %i\n",
43098 INSN_UID (insn), max_skip);
43099 if (max_skip)
43101 while (nbytes + max_skip >= 16)
43103 start = NEXT_INSN (start);
43104 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
43105 || CALL_P (start))
43106 njumps--, isjump = 1;
43107 else
43108 isjump = 0;
43109 nbytes -= min_insn_size (start);
43112 continue;
43115 min_size = min_insn_size (insn);
43116 nbytes += min_size;
43117 if (dump_file)
43118 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
43119 INSN_UID (insn), min_size);
43120 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
43121 || CALL_P (insn))
43122 njumps++;
43123 else
43124 continue;
43126 while (njumps > 3)
43128 start = NEXT_INSN (start);
43129 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
43130 || CALL_P (start))
43131 njumps--, isjump = 1;
43132 else
43133 isjump = 0;
43134 nbytes -= min_insn_size (start);
43136 gcc_assert (njumps >= 0);
43137 if (dump_file)
43138 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
43139 INSN_UID (start), INSN_UID (insn), nbytes);
43141 if (njumps == 3 && isjump && nbytes < 16)
43143 int padsize = 15 - nbytes + min_insn_size (insn);
43145 if (dump_file)
43146 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
43147 INSN_UID (insn), padsize);
43148 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
43152 #endif
43154 /* AMD Athlon works faster
43155 when RET is not destination of conditional jump or directly preceded
43156 by other jump instruction. We avoid the penalty by inserting NOP just
43157 before the RET instructions in such cases. */
43158 static void
43159 ix86_pad_returns (void)
43161 edge e;
43162 edge_iterator ei;
43164 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43166 basic_block bb = e->src;
43167 rtx_insn *ret = BB_END (bb);
43168 rtx_insn *prev;
43169 bool replace = false;
43171 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
43172 || optimize_bb_for_size_p (bb))
43173 continue;
43174 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
43175 if (active_insn_p (prev) || LABEL_P (prev))
43176 break;
43177 if (prev && LABEL_P (prev))
43179 edge e;
43180 edge_iterator ei;
43182 FOR_EACH_EDGE (e, ei, bb->preds)
43183 if (EDGE_FREQUENCY (e) && e->src->index >= 0
43184 && !(e->flags & EDGE_FALLTHRU))
43186 replace = true;
43187 break;
43190 if (!replace)
43192 prev = prev_active_insn (ret);
43193 if (prev
43194 && ((JUMP_P (prev) && any_condjump_p (prev))
43195 || CALL_P (prev)))
43196 replace = true;
43197 /* Empty functions get branch mispredict even when
43198 the jump destination is not visible to us. */
43199 if (!prev && !optimize_function_for_size_p (cfun))
43200 replace = true;
43202 if (replace)
43204 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
43205 delete_insn (ret);
43210 /* Count the minimum number of instructions in BB. Return 4 if the
43211 number of instructions >= 4. */
43213 static int
43214 ix86_count_insn_bb (basic_block bb)
43216 rtx_insn *insn;
43217 int insn_count = 0;
43219 /* Count number of instructions in this block. Return 4 if the number
43220 of instructions >= 4. */
43221 FOR_BB_INSNS (bb, insn)
43223 /* Only happen in exit blocks. */
43224 if (JUMP_P (insn)
43225 && ANY_RETURN_P (PATTERN (insn)))
43226 break;
43228 if (NONDEBUG_INSN_P (insn)
43229 && GET_CODE (PATTERN (insn)) != USE
43230 && GET_CODE (PATTERN (insn)) != CLOBBER)
43232 insn_count++;
43233 if (insn_count >= 4)
43234 return insn_count;
43238 return insn_count;
43242 /* Count the minimum number of instructions in code path in BB.
43243 Return 4 if the number of instructions >= 4. */
43245 static int
43246 ix86_count_insn (basic_block bb)
43248 edge e;
43249 edge_iterator ei;
43250 int min_prev_count;
43252 /* Only bother counting instructions along paths with no
43253 more than 2 basic blocks between entry and exit. Given
43254 that BB has an edge to exit, determine if a predecessor
43255 of BB has an edge from entry. If so, compute the number
43256 of instructions in the predecessor block. If there
43257 happen to be multiple such blocks, compute the minimum. */
43258 min_prev_count = 4;
43259 FOR_EACH_EDGE (e, ei, bb->preds)
43261 edge prev_e;
43262 edge_iterator prev_ei;
43264 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
43266 min_prev_count = 0;
43267 break;
43269 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
43271 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
43273 int count = ix86_count_insn_bb (e->src);
43274 if (count < min_prev_count)
43275 min_prev_count = count;
43276 break;
43281 if (min_prev_count < 4)
43282 min_prev_count += ix86_count_insn_bb (bb);
43284 return min_prev_count;
43287 /* Pad short function to 4 instructions. */
43289 static void
43290 ix86_pad_short_function (void)
43292 edge e;
43293 edge_iterator ei;
43295 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43297 rtx_insn *ret = BB_END (e->src);
43298 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
43300 int insn_count = ix86_count_insn (e->src);
43302 /* Pad short function. */
43303 if (insn_count < 4)
43305 rtx_insn *insn = ret;
43307 /* Find epilogue. */
43308 while (insn
43309 && (!NOTE_P (insn)
43310 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
43311 insn = PREV_INSN (insn);
43313 if (!insn)
43314 insn = ret;
43316 /* Two NOPs count as one instruction. */
43317 insn_count = 2 * (4 - insn_count);
43318 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
43324 /* Fix up a Windows system unwinder issue. If an EH region falls through into
43325 the epilogue, the Windows system unwinder will apply epilogue logic and
43326 produce incorrect offsets. This can be avoided by adding a nop between
43327 the last insn that can throw and the first insn of the epilogue. */
43329 static void
43330 ix86_seh_fixup_eh_fallthru (void)
43332 edge e;
43333 edge_iterator ei;
43335 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43337 rtx_insn *insn, *next;
43339 /* Find the beginning of the epilogue. */
43340 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
43341 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
43342 break;
43343 if (insn == NULL)
43344 continue;
43346 /* We only care about preceding insns that can throw. */
43347 insn = prev_active_insn (insn);
43348 if (insn == NULL || !can_throw_internal (insn))
43349 continue;
43351 /* Do not separate calls from their debug information. */
43352 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
43353 if (NOTE_P (next)
43354 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
43355 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
43356 insn = next;
43357 else
43358 break;
43360 emit_insn_after (gen_nops (const1_rtx), insn);
43364 /* Implement machine specific optimizations. We implement padding of returns
43365 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
43366 static void
43367 ix86_reorg (void)
43369 /* We are freeing block_for_insn in the toplev to keep compatibility
43370 with old MDEP_REORGS that are not CFG based. Recompute it now. */
43371 compute_bb_for_insn ();
43373 if (TARGET_SEH && current_function_has_exception_handlers ())
43374 ix86_seh_fixup_eh_fallthru ();
43376 if (optimize && optimize_function_for_speed_p (cfun))
43378 if (TARGET_PAD_SHORT_FUNCTION)
43379 ix86_pad_short_function ();
43380 else if (TARGET_PAD_RETURNS)
43381 ix86_pad_returns ();
43382 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
43383 if (TARGET_FOUR_JUMP_LIMIT)
43384 ix86_avoid_jump_mispredicts ();
43385 #endif
43389 /* Return nonzero when QImode register that must be represented via REX prefix
43390 is used. */
43391 bool
43392 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
43394 int i;
43395 extract_insn_cached (insn);
43396 for (i = 0; i < recog_data.n_operands; i++)
43397 if (GENERAL_REG_P (recog_data.operand[i])
43398 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
43399 return true;
43400 return false;
43403 /* Return true when INSN mentions register that must be encoded using REX
43404 prefix. */
43405 bool
43406 x86_extended_reg_mentioned_p (rtx insn)
43408 subrtx_iterator::array_type array;
43409 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
43411 const_rtx x = *iter;
43412 if (REG_P (x)
43413 && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))))
43414 return true;
43416 return false;
43419 /* If profitable, negate (without causing overflow) integer constant
43420 of mode MODE at location LOC. Return true in this case. */
43421 bool
43422 x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
43424 HOST_WIDE_INT val;
43426 if (!CONST_INT_P (*loc))
43427 return false;
43429 switch (mode)
43431 case DImode:
43432 /* DImode x86_64 constants must fit in 32 bits. */
43433 gcc_assert (x86_64_immediate_operand (*loc, mode));
43435 mode = SImode;
43436 break;
43438 case SImode:
43439 case HImode:
43440 case QImode:
43441 break;
43443 default:
43444 gcc_unreachable ();
43447 /* Avoid overflows. */
43448 if (mode_signbit_p (mode, *loc))
43449 return false;
43451 val = INTVAL (*loc);
43453 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
43454 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
43455 if ((val < 0 && val != -128)
43456 || val == 128)
43458 *loc = GEN_INT (-val);
43459 return true;
43462 return false;
43465 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
43466 optabs would emit if we didn't have TFmode patterns. */
43468 void
43469 x86_emit_floatuns (rtx operands[2])
43471 rtx_code_label *neglab, *donelab;
43472 rtx i0, i1, f0, in, out;
43473 machine_mode mode, inmode;
43475 inmode = GET_MODE (operands[1]);
43476 gcc_assert (inmode == SImode || inmode == DImode);
43478 out = operands[0];
43479 in = force_reg (inmode, operands[1]);
43480 mode = GET_MODE (out);
43481 neglab = gen_label_rtx ();
43482 donelab = gen_label_rtx ();
43483 f0 = gen_reg_rtx (mode);
43485 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
43487 expand_float (out, in, 0);
43489 emit_jump_insn (gen_jump (donelab));
43490 emit_barrier ();
43492 emit_label (neglab);
43494 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
43495 1, OPTAB_DIRECT);
43496 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
43497 1, OPTAB_DIRECT);
43498 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
43500 expand_float (f0, i0, 0);
43502 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
43504 emit_label (donelab);
43507 static bool canonicalize_perm (struct expand_vec_perm_d *d);
43508 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
43509 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
43510 static bool expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool);
43512 /* Get a vector mode of the same size as the original but with elements
43513 twice as wide. This is only guaranteed to apply to integral vectors. */
43515 static inline machine_mode
43516 get_mode_wider_vector (machine_mode o)
43518 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
43519 machine_mode n = GET_MODE_WIDER_MODE (o);
43520 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
43521 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
43522 return n;
43525 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
43526 fill target with val via vec_duplicate. */
43528 static bool
43529 ix86_vector_duplicate_value (machine_mode mode, rtx target, rtx val)
43531 bool ok;
43532 rtx_insn *insn;
43533 rtx dup;
43535 /* First attempt to recognize VAL as-is. */
43536 dup = gen_rtx_VEC_DUPLICATE (mode, val);
43537 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
43538 if (recog_memoized (insn) < 0)
43540 rtx_insn *seq;
43541 /* If that fails, force VAL into a register. */
43543 start_sequence ();
43544 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
43545 seq = get_insns ();
43546 end_sequence ();
43547 if (seq)
43548 emit_insn_before (seq, insn);
43550 ok = recog_memoized (insn) >= 0;
43551 gcc_assert (ok);
43553 return true;
43556 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
43557 with all elements equal to VAR. Return true if successful. */
43559 static bool
43560 ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
43561 rtx target, rtx val)
43563 bool ok;
43565 switch (mode)
43567 case V2SImode:
43568 case V2SFmode:
43569 if (!mmx_ok)
43570 return false;
43571 /* FALLTHRU */
43573 case V4DFmode:
43574 case V4DImode:
43575 case V8SFmode:
43576 case V8SImode:
43577 case V2DFmode:
43578 case V2DImode:
43579 case V4SFmode:
43580 case V4SImode:
43581 case V16SImode:
43582 case V8DImode:
43583 case V16SFmode:
43584 case V8DFmode:
43585 return ix86_vector_duplicate_value (mode, target, val);
43587 case V4HImode:
43588 if (!mmx_ok)
43589 return false;
43590 if (TARGET_SSE || TARGET_3DNOW_A)
43592 rtx x;
43594 val = gen_lowpart (SImode, val);
43595 x = gen_rtx_TRUNCATE (HImode, val);
43596 x = gen_rtx_VEC_DUPLICATE (mode, x);
43597 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43598 return true;
43600 goto widen;
43602 case V8QImode:
43603 if (!mmx_ok)
43604 return false;
43605 goto widen;
43607 case V8HImode:
43608 if (TARGET_AVX2)
43609 return ix86_vector_duplicate_value (mode, target, val);
43611 if (TARGET_SSE2)
43613 struct expand_vec_perm_d dperm;
43614 rtx tmp1, tmp2;
43616 permute:
43617 memset (&dperm, 0, sizeof (dperm));
43618 dperm.target = target;
43619 dperm.vmode = mode;
43620 dperm.nelt = GET_MODE_NUNITS (mode);
43621 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
43622 dperm.one_operand_p = true;
43624 /* Extend to SImode using a paradoxical SUBREG. */
43625 tmp1 = gen_reg_rtx (SImode);
43626 emit_move_insn (tmp1, gen_lowpart (SImode, val));
43628 /* Insert the SImode value as low element of a V4SImode vector. */
43629 tmp2 = gen_reg_rtx (V4SImode);
43630 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
43631 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
43633 ok = (expand_vec_perm_1 (&dperm)
43634 || expand_vec_perm_broadcast_1 (&dperm));
43635 gcc_assert (ok);
43636 return ok;
43638 goto widen;
43640 case V16QImode:
43641 if (TARGET_AVX2)
43642 return ix86_vector_duplicate_value (mode, target, val);
43644 if (TARGET_SSE2)
43645 goto permute;
43646 goto widen;
43648 widen:
43649 /* Replicate the value once into the next wider mode and recurse. */
43651 machine_mode smode, wsmode, wvmode;
43652 rtx x;
43654 smode = GET_MODE_INNER (mode);
43655 wvmode = get_mode_wider_vector (mode);
43656 wsmode = GET_MODE_INNER (wvmode);
43658 val = convert_modes (wsmode, smode, val, true);
43659 x = expand_simple_binop (wsmode, ASHIFT, val,
43660 GEN_INT (GET_MODE_BITSIZE (smode)),
43661 NULL_RTX, 1, OPTAB_LIB_WIDEN);
43662 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
43664 x = gen_reg_rtx (wvmode);
43665 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
43666 gcc_assert (ok);
43667 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
43668 return ok;
43671 case V16HImode:
43672 case V32QImode:
43673 if (TARGET_AVX2)
43674 return ix86_vector_duplicate_value (mode, target, val);
43675 else
43677 machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
43678 rtx x = gen_reg_rtx (hvmode);
43680 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
43681 gcc_assert (ok);
43683 x = gen_rtx_VEC_CONCAT (mode, x, x);
43684 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43686 return true;
43688 case V64QImode:
43689 case V32HImode:
43690 if (TARGET_AVX512BW)
43691 return ix86_vector_duplicate_value (mode, target, val);
43692 else
43694 machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode);
43695 rtx x = gen_reg_rtx (hvmode);
43697 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
43698 gcc_assert (ok);
43700 x = gen_rtx_VEC_CONCAT (mode, x, x);
43701 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43703 return true;
43705 default:
43706 return false;
43710 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
43711 whose ONE_VAR element is VAR, and other elements are zero. Return true
43712 if successful. */
43714 static bool
43715 ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
43716 rtx target, rtx var, int one_var)
43718 machine_mode vsimode;
43719 rtx new_target;
43720 rtx x, tmp;
43721 bool use_vector_set = false;
43723 switch (mode)
43725 case V2DImode:
43726 /* For SSE4.1, we normally use vector set. But if the second
43727 element is zero and inter-unit moves are OK, we use movq
43728 instead. */
43729 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
43730 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
43731 && one_var == 0));
43732 break;
43733 case V16QImode:
43734 case V4SImode:
43735 case V4SFmode:
43736 use_vector_set = TARGET_SSE4_1;
43737 break;
43738 case V8HImode:
43739 use_vector_set = TARGET_SSE2;
43740 break;
43741 case V4HImode:
43742 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
43743 break;
43744 case V32QImode:
43745 case V16HImode:
43746 case V8SImode:
43747 case V8SFmode:
43748 case V4DFmode:
43749 use_vector_set = TARGET_AVX;
43750 break;
43751 case V4DImode:
43752 /* Use ix86_expand_vector_set in 64bit mode only. */
43753 use_vector_set = TARGET_AVX && TARGET_64BIT;
43754 break;
43755 default:
43756 break;
43759 if (use_vector_set)
43761 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
43762 var = force_reg (GET_MODE_INNER (mode), var);
43763 ix86_expand_vector_set (mmx_ok, target, var, one_var);
43764 return true;
43767 switch (mode)
43769 case V2SFmode:
43770 case V2SImode:
43771 if (!mmx_ok)
43772 return false;
43773 /* FALLTHRU */
43775 case V2DFmode:
43776 case V2DImode:
43777 if (one_var != 0)
43778 return false;
43779 var = force_reg (GET_MODE_INNER (mode), var);
43780 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
43781 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43782 return true;
43784 case V4SFmode:
43785 case V4SImode:
43786 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
43787 new_target = gen_reg_rtx (mode);
43788 else
43789 new_target = target;
43790 var = force_reg (GET_MODE_INNER (mode), var);
43791 x = gen_rtx_VEC_DUPLICATE (mode, var);
43792 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
43793 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
43794 if (one_var != 0)
43796 /* We need to shuffle the value to the correct position, so
43797 create a new pseudo to store the intermediate result. */
43799 /* With SSE2, we can use the integer shuffle insns. */
43800 if (mode != V4SFmode && TARGET_SSE2)
43802 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
43803 const1_rtx,
43804 GEN_INT (one_var == 1 ? 0 : 1),
43805 GEN_INT (one_var == 2 ? 0 : 1),
43806 GEN_INT (one_var == 3 ? 0 : 1)));
43807 if (target != new_target)
43808 emit_move_insn (target, new_target);
43809 return true;
43812 /* Otherwise convert the intermediate result to V4SFmode and
43813 use the SSE1 shuffle instructions. */
43814 if (mode != V4SFmode)
43816 tmp = gen_reg_rtx (V4SFmode);
43817 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
43819 else
43820 tmp = new_target;
43822 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
43823 const1_rtx,
43824 GEN_INT (one_var == 1 ? 0 : 1),
43825 GEN_INT (one_var == 2 ? 0+4 : 1+4),
43826 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
43828 if (mode != V4SFmode)
43829 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
43830 else if (tmp != target)
43831 emit_move_insn (target, tmp);
43833 else if (target != new_target)
43834 emit_move_insn (target, new_target);
43835 return true;
43837 case V8HImode:
43838 case V16QImode:
43839 vsimode = V4SImode;
43840 goto widen;
43841 case V4HImode:
43842 case V8QImode:
43843 if (!mmx_ok)
43844 return false;
43845 vsimode = V2SImode;
43846 goto widen;
43847 widen:
43848 if (one_var != 0)
43849 return false;
43851 /* Zero extend the variable element to SImode and recurse. */
43852 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
43854 x = gen_reg_rtx (vsimode);
43855 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
43856 var, one_var))
43857 gcc_unreachable ();
43859 emit_move_insn (target, gen_lowpart (mode, x));
43860 return true;
43862 default:
43863 return false;
43867 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
43868 consisting of the values in VALS. It is known that all elements
43869 except ONE_VAR are constants. Return true if successful. */
43871 static bool
43872 ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode,
43873 rtx target, rtx vals, int one_var)
43875 rtx var = XVECEXP (vals, 0, one_var);
43876 machine_mode wmode;
43877 rtx const_vec, x;
43879 const_vec = copy_rtx (vals);
43880 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
43881 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
43883 switch (mode)
43885 case V2DFmode:
43886 case V2DImode:
43887 case V2SFmode:
43888 case V2SImode:
43889 /* For the two element vectors, it's just as easy to use
43890 the general case. */
43891 return false;
43893 case V4DImode:
43894 /* Use ix86_expand_vector_set in 64bit mode only. */
43895 if (!TARGET_64BIT)
43896 return false;
43897 case V4DFmode:
43898 case V8SFmode:
43899 case V8SImode:
43900 case V16HImode:
43901 case V32QImode:
43902 case V4SFmode:
43903 case V4SImode:
43904 case V8HImode:
43905 case V4HImode:
43906 break;
43908 case V16QImode:
43909 if (TARGET_SSE4_1)
43910 break;
43911 wmode = V8HImode;
43912 goto widen;
43913 case V8QImode:
43914 wmode = V4HImode;
43915 goto widen;
43916 widen:
43917 /* There's no way to set one QImode entry easily. Combine
43918 the variable value with its adjacent constant value, and
43919 promote to an HImode set. */
43920 x = XVECEXP (vals, 0, one_var ^ 1);
43921 if (one_var & 1)
43923 var = convert_modes (HImode, QImode, var, true);
43924 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
43925 NULL_RTX, 1, OPTAB_LIB_WIDEN);
43926 x = GEN_INT (INTVAL (x) & 0xff);
43928 else
43930 var = convert_modes (HImode, QImode, var, true);
43931 x = gen_int_mode (INTVAL (x) << 8, HImode);
43933 if (x != const0_rtx)
43934 var = expand_simple_binop (HImode, IOR, var, x, var,
43935 1, OPTAB_LIB_WIDEN);
43937 x = gen_reg_rtx (wmode);
43938 emit_move_insn (x, gen_lowpart (wmode, const_vec));
43939 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
43941 emit_move_insn (target, gen_lowpart (mode, x));
43942 return true;
43944 default:
43945 return false;
43948 emit_move_insn (target, const_vec);
43949 ix86_expand_vector_set (mmx_ok, target, var, one_var);
43950 return true;
43953 /* A subroutine of ix86_expand_vector_init_general. Use vector
43954 concatenate to handle the most general case: all values variable,
43955 and none identical. */
43957 static void
43958 ix86_expand_vector_init_concat (machine_mode mode,
43959 rtx target, rtx *ops, int n)
43961 machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
43962 rtx first[16], second[8], third[4];
43963 rtvec v;
43964 int i, j;
43966 switch (n)
43968 case 2:
43969 switch (mode)
43971 case V16SImode:
43972 cmode = V8SImode;
43973 break;
43974 case V16SFmode:
43975 cmode = V8SFmode;
43976 break;
43977 case V8DImode:
43978 cmode = V4DImode;
43979 break;
43980 case V8DFmode:
43981 cmode = V4DFmode;
43982 break;
43983 case V8SImode:
43984 cmode = V4SImode;
43985 break;
43986 case V8SFmode:
43987 cmode = V4SFmode;
43988 break;
43989 case V4DImode:
43990 cmode = V2DImode;
43991 break;
43992 case V4DFmode:
43993 cmode = V2DFmode;
43994 break;
43995 case V4SImode:
43996 cmode = V2SImode;
43997 break;
43998 case V4SFmode:
43999 cmode = V2SFmode;
44000 break;
44001 case V2DImode:
44002 cmode = DImode;
44003 break;
44004 case V2SImode:
44005 cmode = SImode;
44006 break;
44007 case V2DFmode:
44008 cmode = DFmode;
44009 break;
44010 case V2SFmode:
44011 cmode = SFmode;
44012 break;
44013 default:
44014 gcc_unreachable ();
44017 if (!register_operand (ops[1], cmode))
44018 ops[1] = force_reg (cmode, ops[1]);
44019 if (!register_operand (ops[0], cmode))
44020 ops[0] = force_reg (cmode, ops[0]);
44021 emit_insn (gen_rtx_SET (VOIDmode, target,
44022 gen_rtx_VEC_CONCAT (mode, ops[0],
44023 ops[1])));
44024 break;
44026 case 4:
44027 switch (mode)
44029 case V4DImode:
44030 cmode = V2DImode;
44031 break;
44032 case V4DFmode:
44033 cmode = V2DFmode;
44034 break;
44035 case V4SImode:
44036 cmode = V2SImode;
44037 break;
44038 case V4SFmode:
44039 cmode = V2SFmode;
44040 break;
44041 default:
44042 gcc_unreachable ();
44044 goto half;
44046 case 8:
44047 switch (mode)
44049 case V8DImode:
44050 cmode = V2DImode;
44051 hmode = V4DImode;
44052 break;
44053 case V8DFmode:
44054 cmode = V2DFmode;
44055 hmode = V4DFmode;
44056 break;
44057 case V8SImode:
44058 cmode = V2SImode;
44059 hmode = V4SImode;
44060 break;
44061 case V8SFmode:
44062 cmode = V2SFmode;
44063 hmode = V4SFmode;
44064 break;
44065 default:
44066 gcc_unreachable ();
44068 goto half;
44070 case 16:
44071 switch (mode)
44073 case V16SImode:
44074 cmode = V2SImode;
44075 hmode = V4SImode;
44076 gmode = V8SImode;
44077 break;
44078 case V16SFmode:
44079 cmode = V2SFmode;
44080 hmode = V4SFmode;
44081 gmode = V8SFmode;
44082 break;
44083 default:
44084 gcc_unreachable ();
44086 goto half;
44088 half:
44089 /* FIXME: We process inputs backward to help RA. PR 36222. */
44090 i = n - 1;
44091 j = (n >> 1) - 1;
44092 for (; i > 0; i -= 2, j--)
44094 first[j] = gen_reg_rtx (cmode);
44095 v = gen_rtvec (2, ops[i - 1], ops[i]);
44096 ix86_expand_vector_init (false, first[j],
44097 gen_rtx_PARALLEL (cmode, v));
44100 n >>= 1;
44101 if (n > 4)
44103 gcc_assert (hmode != VOIDmode);
44104 gcc_assert (gmode != VOIDmode);
44105 for (i = j = 0; i < n; i += 2, j++)
44107 second[j] = gen_reg_rtx (hmode);
44108 ix86_expand_vector_init_concat (hmode, second [j],
44109 &first [i], 2);
44111 n >>= 1;
44112 for (i = j = 0; i < n; i += 2, j++)
44114 third[j] = gen_reg_rtx (gmode);
44115 ix86_expand_vector_init_concat (gmode, third[j],
44116 &second[i], 2);
44118 n >>= 1;
44119 ix86_expand_vector_init_concat (mode, target, third, n);
44121 else if (n > 2)
44123 gcc_assert (hmode != VOIDmode);
44124 for (i = j = 0; i < n; i += 2, j++)
44126 second[j] = gen_reg_rtx (hmode);
44127 ix86_expand_vector_init_concat (hmode, second [j],
44128 &first [i], 2);
44130 n >>= 1;
44131 ix86_expand_vector_init_concat (mode, target, second, n);
44133 else
44134 ix86_expand_vector_init_concat (mode, target, first, n);
44135 break;
44137 default:
44138 gcc_unreachable ();
44142 /* A subroutine of ix86_expand_vector_init_general. Use vector
44143 interleave to handle the most general case: all values variable,
44144 and none identical. */
44146 static void
44147 ix86_expand_vector_init_interleave (machine_mode mode,
44148 rtx target, rtx *ops, int n)
44150 machine_mode first_imode, second_imode, third_imode, inner_mode;
44151 int i, j;
44152 rtx op0, op1;
44153 rtx (*gen_load_even) (rtx, rtx, rtx);
44154 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
44155 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
44157 switch (mode)
44159 case V8HImode:
44160 gen_load_even = gen_vec_setv8hi;
44161 gen_interleave_first_low = gen_vec_interleave_lowv4si;
44162 gen_interleave_second_low = gen_vec_interleave_lowv2di;
44163 inner_mode = HImode;
44164 first_imode = V4SImode;
44165 second_imode = V2DImode;
44166 third_imode = VOIDmode;
44167 break;
44168 case V16QImode:
44169 gen_load_even = gen_vec_setv16qi;
44170 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
44171 gen_interleave_second_low = gen_vec_interleave_lowv4si;
44172 inner_mode = QImode;
44173 first_imode = V8HImode;
44174 second_imode = V4SImode;
44175 third_imode = V2DImode;
44176 break;
44177 default:
44178 gcc_unreachable ();
44181 for (i = 0; i < n; i++)
44183 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
44184 op0 = gen_reg_rtx (SImode);
44185 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
44187 /* Insert the SImode value as low element of V4SImode vector. */
44188 op1 = gen_reg_rtx (V4SImode);
44189 op0 = gen_rtx_VEC_MERGE (V4SImode,
44190 gen_rtx_VEC_DUPLICATE (V4SImode,
44191 op0),
44192 CONST0_RTX (V4SImode),
44193 const1_rtx);
44194 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
44196 /* Cast the V4SImode vector back to a vector in orignal mode. */
44197 op0 = gen_reg_rtx (mode);
44198 emit_move_insn (op0, gen_lowpart (mode, op1));
44200 /* Load even elements into the second position. */
44201 emit_insn (gen_load_even (op0,
44202 force_reg (inner_mode,
44203 ops [i + i + 1]),
44204 const1_rtx));
44206 /* Cast vector to FIRST_IMODE vector. */
44207 ops[i] = gen_reg_rtx (first_imode);
44208 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
44211 /* Interleave low FIRST_IMODE vectors. */
44212 for (i = j = 0; i < n; i += 2, j++)
44214 op0 = gen_reg_rtx (first_imode);
44215 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
44217 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
44218 ops[j] = gen_reg_rtx (second_imode);
44219 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
44222 /* Interleave low SECOND_IMODE vectors. */
44223 switch (second_imode)
44225 case V4SImode:
44226 for (i = j = 0; i < n / 2; i += 2, j++)
44228 op0 = gen_reg_rtx (second_imode);
44229 emit_insn (gen_interleave_second_low (op0, ops[i],
44230 ops[i + 1]));
44232 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
44233 vector. */
44234 ops[j] = gen_reg_rtx (third_imode);
44235 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
44237 second_imode = V2DImode;
44238 gen_interleave_second_low = gen_vec_interleave_lowv2di;
44239 /* FALLTHRU */
44241 case V2DImode:
44242 op0 = gen_reg_rtx (second_imode);
44243 emit_insn (gen_interleave_second_low (op0, ops[0],
44244 ops[1]));
44246 /* Cast the SECOND_IMODE vector back to a vector on original
44247 mode. */
44248 emit_insn (gen_rtx_SET (VOIDmode, target,
44249 gen_lowpart (mode, op0)));
44250 break;
44252 default:
44253 gcc_unreachable ();
44257 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
44258 all values variable, and none identical. */
44260 static void
44261 ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode,
44262 rtx target, rtx vals)
44264 rtx ops[64], op0, op1, op2, op3, op4, op5;
44265 machine_mode half_mode = VOIDmode;
44266 machine_mode quarter_mode = VOIDmode;
44267 int n, i;
44269 switch (mode)
44271 case V2SFmode:
44272 case V2SImode:
44273 if (!mmx_ok && !TARGET_SSE)
44274 break;
44275 /* FALLTHRU */
44277 case V16SImode:
44278 case V16SFmode:
44279 case V8DFmode:
44280 case V8DImode:
44281 case V8SFmode:
44282 case V8SImode:
44283 case V4DFmode:
44284 case V4DImode:
44285 case V4SFmode:
44286 case V4SImode:
44287 case V2DFmode:
44288 case V2DImode:
44289 n = GET_MODE_NUNITS (mode);
44290 for (i = 0; i < n; i++)
44291 ops[i] = XVECEXP (vals, 0, i);
44292 ix86_expand_vector_init_concat (mode, target, ops, n);
44293 return;
44295 case V32QImode:
44296 half_mode = V16QImode;
44297 goto half;
44299 case V16HImode:
44300 half_mode = V8HImode;
44301 goto half;
44303 half:
44304 n = GET_MODE_NUNITS (mode);
44305 for (i = 0; i < n; i++)
44306 ops[i] = XVECEXP (vals, 0, i);
44307 op0 = gen_reg_rtx (half_mode);
44308 op1 = gen_reg_rtx (half_mode);
44309 ix86_expand_vector_init_interleave (half_mode, op0, ops,
44310 n >> 2);
44311 ix86_expand_vector_init_interleave (half_mode, op1,
44312 &ops [n >> 1], n >> 2);
44313 emit_insn (gen_rtx_SET (VOIDmode, target,
44314 gen_rtx_VEC_CONCAT (mode, op0, op1)));
44315 return;
44317 case V64QImode:
44318 quarter_mode = V16QImode;
44319 half_mode = V32QImode;
44320 goto quarter;
44322 case V32HImode:
44323 quarter_mode = V8HImode;
44324 half_mode = V16HImode;
44325 goto quarter;
44327 quarter:
44328 n = GET_MODE_NUNITS (mode);
44329 for (i = 0; i < n; i++)
44330 ops[i] = XVECEXP (vals, 0, i);
44331 op0 = gen_reg_rtx (quarter_mode);
44332 op1 = gen_reg_rtx (quarter_mode);
44333 op2 = gen_reg_rtx (quarter_mode);
44334 op3 = gen_reg_rtx (quarter_mode);
44335 op4 = gen_reg_rtx (half_mode);
44336 op5 = gen_reg_rtx (half_mode);
44337 ix86_expand_vector_init_interleave (quarter_mode, op0, ops,
44338 n >> 3);
44339 ix86_expand_vector_init_interleave (quarter_mode, op1,
44340 &ops [n >> 2], n >> 3);
44341 ix86_expand_vector_init_interleave (quarter_mode, op2,
44342 &ops [n >> 1], n >> 3);
44343 ix86_expand_vector_init_interleave (quarter_mode, op3,
44344 &ops [(n >> 1) | (n >> 2)], n >> 3);
44345 emit_insn (gen_rtx_SET (VOIDmode, op4,
44346 gen_rtx_VEC_CONCAT (half_mode, op0, op1)));
44347 emit_insn (gen_rtx_SET (VOIDmode, op5,
44348 gen_rtx_VEC_CONCAT (half_mode, op2, op3)));
44349 emit_insn (gen_rtx_SET (VOIDmode, target,
44350 gen_rtx_VEC_CONCAT (mode, op4, op5)));
44351 return;
44353 case V16QImode:
44354 if (!TARGET_SSE4_1)
44355 break;
44356 /* FALLTHRU */
44358 case V8HImode:
44359 if (!TARGET_SSE2)
44360 break;
44362 /* Don't use ix86_expand_vector_init_interleave if we can't
44363 move from GPR to SSE register directly. */
44364 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
44365 break;
44367 n = GET_MODE_NUNITS (mode);
44368 for (i = 0; i < n; i++)
44369 ops[i] = XVECEXP (vals, 0, i);
44370 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
44371 return;
44373 case V4HImode:
44374 case V8QImode:
44375 break;
44377 default:
44378 gcc_unreachable ();
44382 int i, j, n_elts, n_words, n_elt_per_word;
44383 machine_mode inner_mode;
44384 rtx words[4], shift;
44386 inner_mode = GET_MODE_INNER (mode);
44387 n_elts = GET_MODE_NUNITS (mode);
44388 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
44389 n_elt_per_word = n_elts / n_words;
44390 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
44392 for (i = 0; i < n_words; ++i)
44394 rtx word = NULL_RTX;
44396 for (j = 0; j < n_elt_per_word; ++j)
44398 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
44399 elt = convert_modes (word_mode, inner_mode, elt, true);
44401 if (j == 0)
44402 word = elt;
44403 else
44405 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
44406 word, 1, OPTAB_LIB_WIDEN);
44407 word = expand_simple_binop (word_mode, IOR, word, elt,
44408 word, 1, OPTAB_LIB_WIDEN);
44412 words[i] = word;
44415 if (n_words == 1)
44416 emit_move_insn (target, gen_lowpart (mode, words[0]));
44417 else if (n_words == 2)
44419 rtx tmp = gen_reg_rtx (mode);
44420 emit_clobber (tmp);
44421 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
44422 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
44423 emit_move_insn (target, tmp);
44425 else if (n_words == 4)
44427 rtx tmp = gen_reg_rtx (V4SImode);
44428 gcc_assert (word_mode == SImode);
44429 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
44430 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
44431 emit_move_insn (target, gen_lowpart (mode, tmp));
44433 else
44434 gcc_unreachable ();
44438 /* Initialize vector TARGET via VALS. Suppress the use of MMX
44439 instructions unless MMX_OK is true. */
44441 void
44442 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
44444 machine_mode mode = GET_MODE (target);
44445 machine_mode inner_mode = GET_MODE_INNER (mode);
44446 int n_elts = GET_MODE_NUNITS (mode);
44447 int n_var = 0, one_var = -1;
44448 bool all_same = true, all_const_zero = true;
44449 int i;
44450 rtx x;
44452 for (i = 0; i < n_elts; ++i)
44454 x = XVECEXP (vals, 0, i);
44455 if (!(CONST_INT_P (x)
44456 || GET_CODE (x) == CONST_DOUBLE
44457 || GET_CODE (x) == CONST_FIXED))
44458 n_var++, one_var = i;
44459 else if (x != CONST0_RTX (inner_mode))
44460 all_const_zero = false;
44461 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
44462 all_same = false;
44465 /* Constants are best loaded from the constant pool. */
44466 if (n_var == 0)
44468 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
44469 return;
44472 /* If all values are identical, broadcast the value. */
44473 if (all_same
44474 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
44475 XVECEXP (vals, 0, 0)))
44476 return;
44478 /* Values where only one field is non-constant are best loaded from
44479 the pool and overwritten via move later. */
44480 if (n_var == 1)
44482 if (all_const_zero
44483 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
44484 XVECEXP (vals, 0, one_var),
44485 one_var))
44486 return;
44488 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
44489 return;
44492 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
44495 void
44496 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
44498 machine_mode mode = GET_MODE (target);
44499 machine_mode inner_mode = GET_MODE_INNER (mode);
44500 machine_mode half_mode;
44501 bool use_vec_merge = false;
44502 rtx tmp;
44503 static rtx (*gen_extract[6][2]) (rtx, rtx)
44505 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
44506 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
44507 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
44508 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
44509 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
44510 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
44512 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
44514 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
44515 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
44516 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
44517 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
44518 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
44519 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
44521 int i, j, n;
44523 switch (mode)
44525 case V2SFmode:
44526 case V2SImode:
44527 if (mmx_ok)
44529 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
44530 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
44531 if (elt == 0)
44532 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
44533 else
44534 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
44535 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44536 return;
44538 break;
44540 case V2DImode:
44541 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
44542 if (use_vec_merge)
44543 break;
44545 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
44546 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
44547 if (elt == 0)
44548 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
44549 else
44550 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
44551 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44552 return;
44554 case V2DFmode:
44556 rtx op0, op1;
44558 /* For the two element vectors, we implement a VEC_CONCAT with
44559 the extraction of the other element. */
44561 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
44562 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
44564 if (elt == 0)
44565 op0 = val, op1 = tmp;
44566 else
44567 op0 = tmp, op1 = val;
44569 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
44570 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44572 return;
44574 case V4SFmode:
44575 use_vec_merge = TARGET_SSE4_1;
44576 if (use_vec_merge)
44577 break;
44579 switch (elt)
44581 case 0:
44582 use_vec_merge = true;
44583 break;
44585 case 1:
44586 /* tmp = target = A B C D */
44587 tmp = copy_to_reg (target);
44588 /* target = A A B B */
44589 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
44590 /* target = X A B B */
44591 ix86_expand_vector_set (false, target, val, 0);
44592 /* target = A X C D */
44593 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44594 const1_rtx, const0_rtx,
44595 GEN_INT (2+4), GEN_INT (3+4)));
44596 return;
44598 case 2:
44599 /* tmp = target = A B C D */
44600 tmp = copy_to_reg (target);
44601 /* tmp = X B C D */
44602 ix86_expand_vector_set (false, tmp, val, 0);
44603 /* target = A B X D */
44604 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44605 const0_rtx, const1_rtx,
44606 GEN_INT (0+4), GEN_INT (3+4)));
44607 return;
44609 case 3:
44610 /* tmp = target = A B C D */
44611 tmp = copy_to_reg (target);
44612 /* tmp = X B C D */
44613 ix86_expand_vector_set (false, tmp, val, 0);
44614 /* target = A B X D */
44615 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44616 const0_rtx, const1_rtx,
44617 GEN_INT (2+4), GEN_INT (0+4)));
44618 return;
44620 default:
44621 gcc_unreachable ();
44623 break;
44625 case V4SImode:
44626 use_vec_merge = TARGET_SSE4_1;
44627 if (use_vec_merge)
44628 break;
44630 /* Element 0 handled by vec_merge below. */
44631 if (elt == 0)
44633 use_vec_merge = true;
44634 break;
44637 if (TARGET_SSE2)
44639 /* With SSE2, use integer shuffles to swap element 0 and ELT,
44640 store into element 0, then shuffle them back. */
44642 rtx order[4];
44644 order[0] = GEN_INT (elt);
44645 order[1] = const1_rtx;
44646 order[2] = const2_rtx;
44647 order[3] = GEN_INT (3);
44648 order[elt] = const0_rtx;
44650 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
44651 order[1], order[2], order[3]));
44653 ix86_expand_vector_set (false, target, val, 0);
44655 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
44656 order[1], order[2], order[3]));
44658 else
44660 /* For SSE1, we have to reuse the V4SF code. */
44661 rtx t = gen_reg_rtx (V4SFmode);
44662 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
44663 emit_move_insn (target, gen_lowpart (mode, t));
44665 return;
44667 case V8HImode:
44668 use_vec_merge = TARGET_SSE2;
44669 break;
44670 case V4HImode:
44671 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
44672 break;
44674 case V16QImode:
44675 use_vec_merge = TARGET_SSE4_1;
44676 break;
44678 case V8QImode:
44679 break;
44681 case V32QImode:
44682 half_mode = V16QImode;
44683 j = 0;
44684 n = 16;
44685 goto half;
44687 case V16HImode:
44688 half_mode = V8HImode;
44689 j = 1;
44690 n = 8;
44691 goto half;
44693 case V8SImode:
44694 half_mode = V4SImode;
44695 j = 2;
44696 n = 4;
44697 goto half;
44699 case V4DImode:
44700 half_mode = V2DImode;
44701 j = 3;
44702 n = 2;
44703 goto half;
44705 case V8SFmode:
44706 half_mode = V4SFmode;
44707 j = 4;
44708 n = 4;
44709 goto half;
44711 case V4DFmode:
44712 half_mode = V2DFmode;
44713 j = 5;
44714 n = 2;
44715 goto half;
44717 half:
44718 /* Compute offset. */
44719 i = elt / n;
44720 elt %= n;
44722 gcc_assert (i <= 1);
44724 /* Extract the half. */
44725 tmp = gen_reg_rtx (half_mode);
44726 emit_insn (gen_extract[j][i] (tmp, target));
44728 /* Put val in tmp at elt. */
44729 ix86_expand_vector_set (false, tmp, val, elt);
44731 /* Put it back. */
44732 emit_insn (gen_insert[j][i] (target, target, tmp));
44733 return;
44735 case V8DFmode:
44736 if (TARGET_AVX512F)
44738 tmp = gen_reg_rtx (mode);
44739 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44740 gen_rtx_VEC_DUPLICATE (mode, val)));
44741 emit_insn (gen_avx512f_blendmv8df (target, tmp, target,
44742 force_reg (QImode, GEN_INT (1 << elt))));
44743 return;
44745 else
44746 break;
44747 case V8DImode:
44748 if (TARGET_AVX512F)
44750 tmp = gen_reg_rtx (mode);
44751 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44752 gen_rtx_VEC_DUPLICATE (mode, val)));
44753 emit_insn (gen_avx512f_blendmv8di (target, tmp, target,
44754 force_reg (QImode, GEN_INT (1 << elt))));
44755 return;
44757 else
44758 break;
44759 case V16SFmode:
44760 if (TARGET_AVX512F)
44762 tmp = gen_reg_rtx (mode);
44763 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44764 gen_rtx_VEC_DUPLICATE (mode, val)));
44765 emit_insn (gen_avx512f_blendmv16sf (target, tmp, target,
44766 force_reg (HImode, GEN_INT (1 << elt))));
44767 return;
44769 else
44770 break;
44771 case V16SImode:
44772 if (TARGET_AVX512F)
44774 tmp = gen_reg_rtx (mode);
44775 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44776 gen_rtx_VEC_DUPLICATE (mode, val)));
44777 emit_insn (gen_avx512f_blendmv16si (target, tmp, target,
44778 force_reg (HImode, GEN_INT (1 << elt))));
44779 return;
44781 else
44782 break;
44783 case V32HImode:
44784 if (TARGET_AVX512F && TARGET_AVX512BW)
44786 tmp = gen_reg_rtx (mode);
44787 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44788 gen_rtx_VEC_DUPLICATE (mode, val)));
44789 emit_insn (gen_avx512bw_blendmv32hi (target, tmp, target,
44790 force_reg (SImode, GEN_INT (1 << elt))));
44791 return;
44793 else
44794 break;
44795 case V64QImode:
44796 if (TARGET_AVX512F && TARGET_AVX512BW)
44798 tmp = gen_reg_rtx (mode);
44799 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44800 gen_rtx_VEC_DUPLICATE (mode, val)));
44801 emit_insn (gen_avx512bw_blendmv64qi (target, tmp, target,
44802 force_reg (DImode, GEN_INT (1 << elt))));
44803 return;
44805 else
44806 break;
44808 default:
44809 break;
44812 if (use_vec_merge)
44814 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
44815 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
44816 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44818 else
44820 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
44822 emit_move_insn (mem, target);
44824 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
44825 emit_move_insn (tmp, val);
44827 emit_move_insn (target, mem);
44831 void
44832 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
44834 machine_mode mode = GET_MODE (vec);
44835 machine_mode inner_mode = GET_MODE_INNER (mode);
44836 bool use_vec_extr = false;
44837 rtx tmp;
44839 switch (mode)
44841 case V2SImode:
44842 case V2SFmode:
44843 if (!mmx_ok)
44844 break;
44845 /* FALLTHRU */
44847 case V2DFmode:
44848 case V2DImode:
44849 use_vec_extr = true;
44850 break;
44852 case V4SFmode:
44853 use_vec_extr = TARGET_SSE4_1;
44854 if (use_vec_extr)
44855 break;
44857 switch (elt)
44859 case 0:
44860 tmp = vec;
44861 break;
44863 case 1:
44864 case 3:
44865 tmp = gen_reg_rtx (mode);
44866 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
44867 GEN_INT (elt), GEN_INT (elt),
44868 GEN_INT (elt+4), GEN_INT (elt+4)));
44869 break;
44871 case 2:
44872 tmp = gen_reg_rtx (mode);
44873 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
44874 break;
44876 default:
44877 gcc_unreachable ();
44879 vec = tmp;
44880 use_vec_extr = true;
44881 elt = 0;
44882 break;
44884 case V4SImode:
44885 use_vec_extr = TARGET_SSE4_1;
44886 if (use_vec_extr)
44887 break;
44889 if (TARGET_SSE2)
44891 switch (elt)
44893 case 0:
44894 tmp = vec;
44895 break;
44897 case 1:
44898 case 3:
44899 tmp = gen_reg_rtx (mode);
44900 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
44901 GEN_INT (elt), GEN_INT (elt),
44902 GEN_INT (elt), GEN_INT (elt)));
44903 break;
44905 case 2:
44906 tmp = gen_reg_rtx (mode);
44907 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
44908 break;
44910 default:
44911 gcc_unreachable ();
44913 vec = tmp;
44914 use_vec_extr = true;
44915 elt = 0;
44917 else
44919 /* For SSE1, we have to reuse the V4SF code. */
44920 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
44921 gen_lowpart (V4SFmode, vec), elt);
44922 return;
44924 break;
44926 case V8HImode:
44927 use_vec_extr = TARGET_SSE2;
44928 break;
44929 case V4HImode:
44930 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
44931 break;
44933 case V16QImode:
44934 use_vec_extr = TARGET_SSE4_1;
44935 break;
44937 case V8SFmode:
44938 if (TARGET_AVX)
44940 tmp = gen_reg_rtx (V4SFmode);
44941 if (elt < 4)
44942 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
44943 else
44944 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
44945 ix86_expand_vector_extract (false, target, tmp, elt & 3);
44946 return;
44948 break;
44950 case V4DFmode:
44951 if (TARGET_AVX)
44953 tmp = gen_reg_rtx (V2DFmode);
44954 if (elt < 2)
44955 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
44956 else
44957 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
44958 ix86_expand_vector_extract (false, target, tmp, elt & 1);
44959 return;
44961 break;
44963 case V32QImode:
44964 if (TARGET_AVX)
44966 tmp = gen_reg_rtx (V16QImode);
44967 if (elt < 16)
44968 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
44969 else
44970 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
44971 ix86_expand_vector_extract (false, target, tmp, elt & 15);
44972 return;
44974 break;
44976 case V16HImode:
44977 if (TARGET_AVX)
44979 tmp = gen_reg_rtx (V8HImode);
44980 if (elt < 8)
44981 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
44982 else
44983 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
44984 ix86_expand_vector_extract (false, target, tmp, elt & 7);
44985 return;
44987 break;
44989 case V8SImode:
44990 if (TARGET_AVX)
44992 tmp = gen_reg_rtx (V4SImode);
44993 if (elt < 4)
44994 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
44995 else
44996 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
44997 ix86_expand_vector_extract (false, target, tmp, elt & 3);
44998 return;
45000 break;
45002 case V4DImode:
45003 if (TARGET_AVX)
45005 tmp = gen_reg_rtx (V2DImode);
45006 if (elt < 2)
45007 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
45008 else
45009 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
45010 ix86_expand_vector_extract (false, target, tmp, elt & 1);
45011 return;
45013 break;
45015 case V32HImode:
45016 if (TARGET_AVX512BW)
45018 tmp = gen_reg_rtx (V16HImode);
45019 if (elt < 16)
45020 emit_insn (gen_vec_extract_lo_v32hi (tmp, vec));
45021 else
45022 emit_insn (gen_vec_extract_hi_v32hi (tmp, vec));
45023 ix86_expand_vector_extract (false, target, tmp, elt & 15);
45024 return;
45026 break;
45028 case V64QImode:
45029 if (TARGET_AVX512BW)
45031 tmp = gen_reg_rtx (V32QImode);
45032 if (elt < 32)
45033 emit_insn (gen_vec_extract_lo_v64qi (tmp, vec));
45034 else
45035 emit_insn (gen_vec_extract_hi_v64qi (tmp, vec));
45036 ix86_expand_vector_extract (false, target, tmp, elt & 31);
45037 return;
45039 break;
45041 case V16SFmode:
45042 tmp = gen_reg_rtx (V8SFmode);
45043 if (elt < 8)
45044 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
45045 else
45046 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
45047 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45048 return;
45050 case V8DFmode:
45051 tmp = gen_reg_rtx (V4DFmode);
45052 if (elt < 4)
45053 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
45054 else
45055 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
45056 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45057 return;
45059 case V16SImode:
45060 tmp = gen_reg_rtx (V8SImode);
45061 if (elt < 8)
45062 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
45063 else
45064 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
45065 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45066 return;
45068 case V8DImode:
45069 tmp = gen_reg_rtx (V4DImode);
45070 if (elt < 4)
45071 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
45072 else
45073 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
45074 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45075 return;
45077 case V8QImode:
45078 /* ??? Could extract the appropriate HImode element and shift. */
45079 default:
45080 break;
45083 if (use_vec_extr)
45085 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
45086 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
45088 /* Let the rtl optimizers know about the zero extension performed. */
45089 if (inner_mode == QImode || inner_mode == HImode)
45091 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
45092 target = gen_lowpart (SImode, target);
45095 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
45097 else
45099 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
45101 emit_move_insn (mem, vec);
45103 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
45104 emit_move_insn (target, tmp);
45108 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
45109 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
45110 The upper bits of DEST are undefined, though they shouldn't cause
45111 exceptions (some bits from src or all zeros are ok). */
45113 static void
45114 emit_reduc_half (rtx dest, rtx src, int i)
45116 rtx tem, d = dest;
45117 switch (GET_MODE (src))
45119 case V4SFmode:
45120 if (i == 128)
45121 tem = gen_sse_movhlps (dest, src, src);
45122 else
45123 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
45124 GEN_INT (1 + 4), GEN_INT (1 + 4));
45125 break;
45126 case V2DFmode:
45127 tem = gen_vec_interleave_highv2df (dest, src, src);
45128 break;
45129 case V16QImode:
45130 case V8HImode:
45131 case V4SImode:
45132 case V2DImode:
45133 d = gen_reg_rtx (V1TImode);
45134 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
45135 GEN_INT (i / 2));
45136 break;
45137 case V8SFmode:
45138 if (i == 256)
45139 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
45140 else
45141 tem = gen_avx_shufps256 (dest, src, src,
45142 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
45143 break;
45144 case V4DFmode:
45145 if (i == 256)
45146 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
45147 else
45148 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
45149 break;
45150 case V32QImode:
45151 case V16HImode:
45152 case V8SImode:
45153 case V4DImode:
45154 if (i == 256)
45156 if (GET_MODE (dest) != V4DImode)
45157 d = gen_reg_rtx (V4DImode);
45158 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
45159 gen_lowpart (V4DImode, src),
45160 const1_rtx);
45162 else
45164 d = gen_reg_rtx (V2TImode);
45165 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
45166 GEN_INT (i / 2));
45168 break;
45169 case V64QImode:
45170 case V32HImode:
45171 case V16SImode:
45172 case V16SFmode:
45173 case V8DImode:
45174 case V8DFmode:
45175 if (i > 128)
45176 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
45177 gen_lowpart (V16SImode, src),
45178 gen_lowpart (V16SImode, src),
45179 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
45180 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
45181 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
45182 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
45183 GEN_INT (0xC), GEN_INT (0xD),
45184 GEN_INT (0xE), GEN_INT (0xF),
45185 GEN_INT (0x10), GEN_INT (0x11),
45186 GEN_INT (0x12), GEN_INT (0x13),
45187 GEN_INT (0x14), GEN_INT (0x15),
45188 GEN_INT (0x16), GEN_INT (0x17));
45189 else
45190 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
45191 gen_lowpart (V16SImode, src),
45192 GEN_INT (i == 128 ? 0x2 : 0x1),
45193 GEN_INT (0x3),
45194 GEN_INT (0x3),
45195 GEN_INT (0x3),
45196 GEN_INT (i == 128 ? 0x6 : 0x5),
45197 GEN_INT (0x7),
45198 GEN_INT (0x7),
45199 GEN_INT (0x7),
45200 GEN_INT (i == 128 ? 0xA : 0x9),
45201 GEN_INT (0xB),
45202 GEN_INT (0xB),
45203 GEN_INT (0xB),
45204 GEN_INT (i == 128 ? 0xE : 0xD),
45205 GEN_INT (0xF),
45206 GEN_INT (0xF),
45207 GEN_INT (0xF));
45208 break;
45209 default:
45210 gcc_unreachable ();
45212 emit_insn (tem);
45213 if (d != dest)
45214 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
45217 /* Expand a vector reduction. FN is the binary pattern to reduce;
45218 DEST is the destination; IN is the input vector. */
45220 void
45221 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
45223 rtx half, dst, vec = in;
45224 machine_mode mode = GET_MODE (in);
45225 int i;
45227 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
45228 if (TARGET_SSE4_1
45229 && mode == V8HImode
45230 && fn == gen_uminv8hi3)
45232 emit_insn (gen_sse4_1_phminposuw (dest, in));
45233 return;
45236 for (i = GET_MODE_BITSIZE (mode);
45237 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
45238 i >>= 1)
45240 half = gen_reg_rtx (mode);
45241 emit_reduc_half (half, vec, i);
45242 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
45243 dst = dest;
45244 else
45245 dst = gen_reg_rtx (mode);
45246 emit_insn (fn (dst, half, vec));
45247 vec = dst;
45251 /* Target hook for scalar_mode_supported_p. */
45252 static bool
45253 ix86_scalar_mode_supported_p (machine_mode mode)
45255 if (DECIMAL_FLOAT_MODE_P (mode))
45256 return default_decimal_float_supported_p ();
45257 else if (mode == TFmode)
45258 return true;
45259 else
45260 return default_scalar_mode_supported_p (mode);
45263 /* Implements target hook vector_mode_supported_p. */
45264 static bool
45265 ix86_vector_mode_supported_p (machine_mode mode)
45267 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
45268 return true;
45269 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
45270 return true;
45271 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
45272 return true;
45273 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
45274 return true;
45275 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
45276 return true;
45277 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
45278 return true;
45279 return false;
45282 /* Implement target hook libgcc_floating_mode_supported_p. */
45283 static bool
45284 ix86_libgcc_floating_mode_supported_p (machine_mode mode)
45286 switch (mode)
45288 case SFmode:
45289 case DFmode:
45290 case XFmode:
45291 return true;
45293 case TFmode:
45294 #ifdef IX86_NO_LIBGCC_TFMODE
45295 return false;
45296 #elif defined IX86_MAYBE_NO_LIBGCC_TFMODE
45297 return TARGET_LONG_DOUBLE_128;
45298 #else
45299 return true;
45300 #endif
45302 default:
45303 return false;
45307 /* Target hook for c_mode_for_suffix. */
45308 static machine_mode
45309 ix86_c_mode_for_suffix (char suffix)
45311 if (suffix == 'q')
45312 return TFmode;
45313 if (suffix == 'w')
45314 return XFmode;
45316 return VOIDmode;
45319 /* Worker function for TARGET_MD_ASM_CLOBBERS.
45321 We do this in the new i386 backend to maintain source compatibility
45322 with the old cc0-based compiler. */
45324 static tree
45325 ix86_md_asm_clobbers (tree, tree, tree clobbers)
45327 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
45328 clobbers);
45329 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
45330 clobbers);
45331 return clobbers;
45334 /* Implements target vector targetm.asm.encode_section_info. */
45336 static void ATTRIBUTE_UNUSED
45337 ix86_encode_section_info (tree decl, rtx rtl, int first)
45339 default_encode_section_info (decl, rtl, first);
45341 if (ix86_in_large_data_p (decl))
45342 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
45345 /* Worker function for REVERSE_CONDITION. */
45347 enum rtx_code
45348 ix86_reverse_condition (enum rtx_code code, machine_mode mode)
45350 return (mode != CCFPmode && mode != CCFPUmode
45351 ? reverse_condition (code)
45352 : reverse_condition_maybe_unordered (code));
45355 /* Output code to perform an x87 FP register move, from OPERANDS[1]
45356 to OPERANDS[0]. */
45358 const char *
45359 output_387_reg_move (rtx insn, rtx *operands)
45361 if (REG_P (operands[0]))
45363 if (REG_P (operands[1])
45364 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
45366 if (REGNO (operands[0]) == FIRST_STACK_REG)
45367 return output_387_ffreep (operands, 0);
45368 return "fstp\t%y0";
45370 if (STACK_TOP_P (operands[0]))
45371 return "fld%Z1\t%y1";
45372 return "fst\t%y0";
45374 else if (MEM_P (operands[0]))
45376 gcc_assert (REG_P (operands[1]));
45377 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
45378 return "fstp%Z0\t%y0";
45379 else
45381 /* There is no non-popping store to memory for XFmode.
45382 So if we need one, follow the store with a load. */
45383 if (GET_MODE (operands[0]) == XFmode)
45384 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
45385 else
45386 return "fst%Z0\t%y0";
45389 else
45390 gcc_unreachable();
45393 /* Output code to perform a conditional jump to LABEL, if C2 flag in
45394 FP status register is set. */
45396 void
45397 ix86_emit_fp_unordered_jump (rtx label)
45399 rtx reg = gen_reg_rtx (HImode);
45400 rtx temp;
45402 emit_insn (gen_x86_fnstsw_1 (reg));
45404 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
45406 emit_insn (gen_x86_sahf_1 (reg));
45408 temp = gen_rtx_REG (CCmode, FLAGS_REG);
45409 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
45411 else
45413 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
45415 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
45416 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
45419 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
45420 gen_rtx_LABEL_REF (VOIDmode, label),
45421 pc_rtx);
45422 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
45424 emit_jump_insn (temp);
45425 predict_jump (REG_BR_PROB_BASE * 10 / 100);
45428 /* Output code to perform a log1p XFmode calculation. */
45430 void ix86_emit_i387_log1p (rtx op0, rtx op1)
45432 rtx_code_label *label1 = gen_label_rtx ();
45433 rtx_code_label *label2 = gen_label_rtx ();
45435 rtx tmp = gen_reg_rtx (XFmode);
45436 rtx tmp2 = gen_reg_rtx (XFmode);
45437 rtx test;
45439 emit_insn (gen_absxf2 (tmp, op1));
45440 test = gen_rtx_GE (VOIDmode, tmp,
45441 CONST_DOUBLE_FROM_REAL_VALUE (
45442 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
45443 XFmode));
45444 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
45446 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
45447 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
45448 emit_jump (label2);
45450 emit_label (label1);
45451 emit_move_insn (tmp, CONST1_RTX (XFmode));
45452 emit_insn (gen_addxf3 (tmp, op1, tmp));
45453 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
45454 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
45456 emit_label (label2);
45459 /* Emit code for round calculation. */
45460 void ix86_emit_i387_round (rtx op0, rtx op1)
45462 machine_mode inmode = GET_MODE (op1);
45463 machine_mode outmode = GET_MODE (op0);
45464 rtx e1, e2, res, tmp, tmp1, half;
45465 rtx scratch = gen_reg_rtx (HImode);
45466 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
45467 rtx_code_label *jump_label = gen_label_rtx ();
45468 rtx insn;
45469 rtx (*gen_abs) (rtx, rtx);
45470 rtx (*gen_neg) (rtx, rtx);
45472 switch (inmode)
45474 case SFmode:
45475 gen_abs = gen_abssf2;
45476 break;
45477 case DFmode:
45478 gen_abs = gen_absdf2;
45479 break;
45480 case XFmode:
45481 gen_abs = gen_absxf2;
45482 break;
45483 default:
45484 gcc_unreachable ();
45487 switch (outmode)
45489 case SFmode:
45490 gen_neg = gen_negsf2;
45491 break;
45492 case DFmode:
45493 gen_neg = gen_negdf2;
45494 break;
45495 case XFmode:
45496 gen_neg = gen_negxf2;
45497 break;
45498 case HImode:
45499 gen_neg = gen_neghi2;
45500 break;
45501 case SImode:
45502 gen_neg = gen_negsi2;
45503 break;
45504 case DImode:
45505 gen_neg = gen_negdi2;
45506 break;
45507 default:
45508 gcc_unreachable ();
45511 e1 = gen_reg_rtx (inmode);
45512 e2 = gen_reg_rtx (inmode);
45513 res = gen_reg_rtx (outmode);
45515 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
45517 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
45519 /* scratch = fxam(op1) */
45520 emit_insn (gen_rtx_SET (VOIDmode, scratch,
45521 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
45522 UNSPEC_FXAM)));
45523 /* e1 = fabs(op1) */
45524 emit_insn (gen_abs (e1, op1));
45526 /* e2 = e1 + 0.5 */
45527 half = force_reg (inmode, half);
45528 emit_insn (gen_rtx_SET (VOIDmode, e2,
45529 gen_rtx_PLUS (inmode, e1, half)));
45531 /* res = floor(e2) */
45532 if (inmode != XFmode)
45534 tmp1 = gen_reg_rtx (XFmode);
45536 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
45537 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
45539 else
45540 tmp1 = e2;
45542 switch (outmode)
45544 case SFmode:
45545 case DFmode:
45547 rtx tmp0 = gen_reg_rtx (XFmode);
45549 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
45551 emit_insn (gen_rtx_SET (VOIDmode, res,
45552 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
45553 UNSPEC_TRUNC_NOOP)));
45555 break;
45556 case XFmode:
45557 emit_insn (gen_frndintxf2_floor (res, tmp1));
45558 break;
45559 case HImode:
45560 emit_insn (gen_lfloorxfhi2 (res, tmp1));
45561 break;
45562 case SImode:
45563 emit_insn (gen_lfloorxfsi2 (res, tmp1));
45564 break;
45565 case DImode:
45566 emit_insn (gen_lfloorxfdi2 (res, tmp1));
45567 break;
45568 default:
45569 gcc_unreachable ();
45572 /* flags = signbit(a) */
45573 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
45575 /* if (flags) then res = -res */
45576 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
45577 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
45578 gen_rtx_LABEL_REF (VOIDmode, jump_label),
45579 pc_rtx);
45580 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
45581 predict_jump (REG_BR_PROB_BASE * 50 / 100);
45582 JUMP_LABEL (insn) = jump_label;
45584 emit_insn (gen_neg (res, res));
45586 emit_label (jump_label);
45587 LABEL_NUSES (jump_label) = 1;
45589 emit_move_insn (op0, res);
45592 /* Output code to perform a Newton-Rhapson approximation of a single precision
45593 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
45595 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
45597 rtx x0, x1, e0, e1;
45599 x0 = gen_reg_rtx (mode);
45600 e0 = gen_reg_rtx (mode);
45601 e1 = gen_reg_rtx (mode);
45602 x1 = gen_reg_rtx (mode);
45604 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
45606 b = force_reg (mode, b);
45608 /* x0 = rcp(b) estimate */
45609 if (mode == V16SFmode || mode == V8DFmode)
45610 emit_insn (gen_rtx_SET (VOIDmode, x0,
45611 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
45612 UNSPEC_RCP14)));
45613 else
45614 emit_insn (gen_rtx_SET (VOIDmode, x0,
45615 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
45616 UNSPEC_RCP)));
45618 /* e0 = x0 * b */
45619 emit_insn (gen_rtx_SET (VOIDmode, e0,
45620 gen_rtx_MULT (mode, x0, b)));
45622 /* e0 = x0 * e0 */
45623 emit_insn (gen_rtx_SET (VOIDmode, e0,
45624 gen_rtx_MULT (mode, x0, e0)));
45626 /* e1 = x0 + x0 */
45627 emit_insn (gen_rtx_SET (VOIDmode, e1,
45628 gen_rtx_PLUS (mode, x0, x0)));
45630 /* x1 = e1 - e0 */
45631 emit_insn (gen_rtx_SET (VOIDmode, x1,
45632 gen_rtx_MINUS (mode, e1, e0)));
45634 /* res = a * x1 */
45635 emit_insn (gen_rtx_SET (VOIDmode, res,
45636 gen_rtx_MULT (mode, a, x1)));
45639 /* Output code to perform a Newton-Rhapson approximation of a
45640 single precision floating point [reciprocal] square root. */
45642 void ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode,
45643 bool recip)
45645 rtx x0, e0, e1, e2, e3, mthree, mhalf;
45646 REAL_VALUE_TYPE r;
45647 int unspec;
45649 x0 = gen_reg_rtx (mode);
45650 e0 = gen_reg_rtx (mode);
45651 e1 = gen_reg_rtx (mode);
45652 e2 = gen_reg_rtx (mode);
45653 e3 = gen_reg_rtx (mode);
45655 real_from_integer (&r, VOIDmode, -3, SIGNED);
45656 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
45658 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
45659 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
45660 unspec = UNSPEC_RSQRT;
45662 if (VECTOR_MODE_P (mode))
45664 mthree = ix86_build_const_vector (mode, true, mthree);
45665 mhalf = ix86_build_const_vector (mode, true, mhalf);
45666 /* There is no 512-bit rsqrt. There is however rsqrt14. */
45667 if (GET_MODE_SIZE (mode) == 64)
45668 unspec = UNSPEC_RSQRT14;
45671 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
45672 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
45674 a = force_reg (mode, a);
45676 /* x0 = rsqrt(a) estimate */
45677 emit_insn (gen_rtx_SET (VOIDmode, x0,
45678 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
45679 unspec)));
45681 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
45682 if (!recip)
45684 rtx zero, mask;
45686 zero = gen_reg_rtx (mode);
45687 mask = gen_reg_rtx (mode);
45689 zero = force_reg (mode, CONST0_RTX(mode));
45691 /* Handle masked compare. */
45692 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
45694 mask = gen_reg_rtx (HImode);
45695 /* Imm value 0x4 corresponds to not-equal comparison. */
45696 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
45697 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
45699 else
45701 emit_insn (gen_rtx_SET (VOIDmode, mask,
45702 gen_rtx_NE (mode, zero, a)));
45704 emit_insn (gen_rtx_SET (VOIDmode, x0,
45705 gen_rtx_AND (mode, x0, mask)));
45709 /* e0 = x0 * a */
45710 emit_insn (gen_rtx_SET (VOIDmode, e0,
45711 gen_rtx_MULT (mode, x0, a)));
45712 /* e1 = e0 * x0 */
45713 emit_insn (gen_rtx_SET (VOIDmode, e1,
45714 gen_rtx_MULT (mode, e0, x0)));
45716 /* e2 = e1 - 3. */
45717 mthree = force_reg (mode, mthree);
45718 emit_insn (gen_rtx_SET (VOIDmode, e2,
45719 gen_rtx_PLUS (mode, e1, mthree)));
45721 mhalf = force_reg (mode, mhalf);
45722 if (recip)
45723 /* e3 = -.5 * x0 */
45724 emit_insn (gen_rtx_SET (VOIDmode, e3,
45725 gen_rtx_MULT (mode, x0, mhalf)));
45726 else
45727 /* e3 = -.5 * e0 */
45728 emit_insn (gen_rtx_SET (VOIDmode, e3,
45729 gen_rtx_MULT (mode, e0, mhalf)));
45730 /* ret = e2 * e3 */
45731 emit_insn (gen_rtx_SET (VOIDmode, res,
45732 gen_rtx_MULT (mode, e2, e3)));
45735 #ifdef TARGET_SOLARIS
45736 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
45738 static void
45739 i386_solaris_elf_named_section (const char *name, unsigned int flags,
45740 tree decl)
45742 /* With Binutils 2.15, the "@unwind" marker must be specified on
45743 every occurrence of the ".eh_frame" section, not just the first
45744 one. */
45745 if (TARGET_64BIT
45746 && strcmp (name, ".eh_frame") == 0)
45748 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
45749 flags & SECTION_WRITE ? "aw" : "a");
45750 return;
45753 #ifndef USE_GAS
45754 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
45756 solaris_elf_asm_comdat_section (name, flags, decl);
45757 return;
45759 #endif
45761 default_elf_asm_named_section (name, flags, decl);
45763 #endif /* TARGET_SOLARIS */
45765 /* Return the mangling of TYPE if it is an extended fundamental type. */
45767 static const char *
45768 ix86_mangle_type (const_tree type)
45770 type = TYPE_MAIN_VARIANT (type);
45772 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
45773 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
45774 return NULL;
45776 switch (TYPE_MODE (type))
45778 case TFmode:
45779 /* __float128 is "g". */
45780 return "g";
45781 case XFmode:
45782 /* "long double" or __float80 is "e". */
45783 return "e";
45784 default:
45785 return NULL;
45789 /* For 32-bit code we can save PIC register setup by using
45790 __stack_chk_fail_local hidden function instead of calling
45791 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
45792 register, so it is better to call __stack_chk_fail directly. */
45794 static tree ATTRIBUTE_UNUSED
45795 ix86_stack_protect_fail (void)
45797 return TARGET_64BIT
45798 ? default_external_stack_protect_fail ()
45799 : default_hidden_stack_protect_fail ();
45802 /* Select a format to encode pointers in exception handling data. CODE
45803 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
45804 true if the symbol may be affected by dynamic relocations.
45806 ??? All x86 object file formats are capable of representing this.
45807 After all, the relocation needed is the same as for the call insn.
45808 Whether or not a particular assembler allows us to enter such, I
45809 guess we'll have to see. */
45811 asm_preferred_eh_data_format (int code, int global)
45813 if (flag_pic)
45815 int type = DW_EH_PE_sdata8;
45816 if (!TARGET_64BIT
45817 || ix86_cmodel == CM_SMALL_PIC
45818 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
45819 type = DW_EH_PE_sdata4;
45820 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
45822 if (ix86_cmodel == CM_SMALL
45823 || (ix86_cmodel == CM_MEDIUM && code))
45824 return DW_EH_PE_udata4;
45825 return DW_EH_PE_absptr;
45828 /* Expand copysign from SIGN to the positive value ABS_VALUE
45829 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
45830 the sign-bit. */
45831 static void
45832 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
45834 machine_mode mode = GET_MODE (sign);
45835 rtx sgn = gen_reg_rtx (mode);
45836 if (mask == NULL_RTX)
45838 machine_mode vmode;
45840 if (mode == SFmode)
45841 vmode = V4SFmode;
45842 else if (mode == DFmode)
45843 vmode = V2DFmode;
45844 else
45845 vmode = mode;
45847 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
45848 if (!VECTOR_MODE_P (mode))
45850 /* We need to generate a scalar mode mask in this case. */
45851 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
45852 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
45853 mask = gen_reg_rtx (mode);
45854 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
45857 else
45858 mask = gen_rtx_NOT (mode, mask);
45859 emit_insn (gen_rtx_SET (VOIDmode, sgn,
45860 gen_rtx_AND (mode, mask, sign)));
45861 emit_insn (gen_rtx_SET (VOIDmode, result,
45862 gen_rtx_IOR (mode, abs_value, sgn)));
45865 /* Expand fabs (OP0) and return a new rtx that holds the result. The
45866 mask for masking out the sign-bit is stored in *SMASK, if that is
45867 non-null. */
45868 static rtx
45869 ix86_expand_sse_fabs (rtx op0, rtx *smask)
45871 machine_mode vmode, mode = GET_MODE (op0);
45872 rtx xa, mask;
45874 xa = gen_reg_rtx (mode);
45875 if (mode == SFmode)
45876 vmode = V4SFmode;
45877 else if (mode == DFmode)
45878 vmode = V2DFmode;
45879 else
45880 vmode = mode;
45881 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
45882 if (!VECTOR_MODE_P (mode))
45884 /* We need to generate a scalar mode mask in this case. */
45885 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
45886 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
45887 mask = gen_reg_rtx (mode);
45888 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
45890 emit_insn (gen_rtx_SET (VOIDmode, xa,
45891 gen_rtx_AND (mode, op0, mask)));
45893 if (smask)
45894 *smask = mask;
45896 return xa;
45899 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
45900 swapping the operands if SWAP_OPERANDS is true. The expanded
45901 code is a forward jump to a newly created label in case the
45902 comparison is true. The generated label rtx is returned. */
45903 static rtx_code_label *
45904 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
45905 bool swap_operands)
45907 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
45908 rtx_code_label *label;
45909 rtx tmp;
45911 if (swap_operands)
45912 std::swap (op0, op1);
45914 label = gen_label_rtx ();
45915 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
45916 emit_insn (gen_rtx_SET (VOIDmode, tmp,
45917 gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
45918 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
45919 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
45920 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
45921 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
45922 JUMP_LABEL (tmp) = label;
45924 return label;
45927 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
45928 using comparison code CODE. Operands are swapped for the comparison if
45929 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
45930 static rtx
45931 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
45932 bool swap_operands)
45934 rtx (*insn)(rtx, rtx, rtx, rtx);
45935 machine_mode mode = GET_MODE (op0);
45936 rtx mask = gen_reg_rtx (mode);
45938 if (swap_operands)
45939 std::swap (op0, op1);
45941 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
45943 emit_insn (insn (mask, op0, op1,
45944 gen_rtx_fmt_ee (code, mode, op0, op1)));
45945 return mask;
45948 /* Generate and return a rtx of mode MODE for 2**n where n is the number
45949 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
45950 static rtx
45951 ix86_gen_TWO52 (machine_mode mode)
45953 REAL_VALUE_TYPE TWO52r;
45954 rtx TWO52;
45956 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
45957 TWO52 = const_double_from_real_value (TWO52r, mode);
45958 TWO52 = force_reg (mode, TWO52);
45960 return TWO52;
45963 /* Expand SSE sequence for computing lround from OP1 storing
45964 into OP0. */
45965 void
45966 ix86_expand_lround (rtx op0, rtx op1)
45968 /* C code for the stuff we're doing below:
45969 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
45970 return (long)tmp;
45972 machine_mode mode = GET_MODE (op1);
45973 const struct real_format *fmt;
45974 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
45975 rtx adj;
45977 /* load nextafter (0.5, 0.0) */
45978 fmt = REAL_MODE_FORMAT (mode);
45979 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
45980 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
45982 /* adj = copysign (0.5, op1) */
45983 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
45984 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
45986 /* adj = op1 + adj */
45987 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
45989 /* op0 = (imode)adj */
45990 expand_fix (op0, adj, 0);
45993 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
45994 into OPERAND0. */
45995 void
45996 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
45998 /* C code for the stuff we're doing below (for do_floor):
45999 xi = (long)op1;
46000 xi -= (double)xi > op1 ? 1 : 0;
46001 return xi;
46003 machine_mode fmode = GET_MODE (op1);
46004 machine_mode imode = GET_MODE (op0);
46005 rtx ireg, freg, tmp;
46006 rtx_code_label *label;
46008 /* reg = (long)op1 */
46009 ireg = gen_reg_rtx (imode);
46010 expand_fix (ireg, op1, 0);
46012 /* freg = (double)reg */
46013 freg = gen_reg_rtx (fmode);
46014 expand_float (freg, ireg, 0);
46016 /* ireg = (freg > op1) ? ireg - 1 : ireg */
46017 label = ix86_expand_sse_compare_and_jump (UNLE,
46018 freg, op1, !do_floor);
46019 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
46020 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
46021 emit_move_insn (ireg, tmp);
46023 emit_label (label);
46024 LABEL_NUSES (label) = 1;
46026 emit_move_insn (op0, ireg);
46029 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
46030 result in OPERAND0. */
46031 void
46032 ix86_expand_rint (rtx operand0, rtx operand1)
46034 /* C code for the stuff we're doing below:
46035 xa = fabs (operand1);
46036 if (!isless (xa, 2**52))
46037 return operand1;
46038 xa = xa + 2**52 - 2**52;
46039 return copysign (xa, operand1);
46041 machine_mode mode = GET_MODE (operand0);
46042 rtx res, xa, TWO52, mask;
46043 rtx_code_label *label;
46045 res = gen_reg_rtx (mode);
46046 emit_move_insn (res, operand1);
46048 /* xa = abs (operand1) */
46049 xa = ix86_expand_sse_fabs (res, &mask);
46051 /* if (!isless (xa, TWO52)) goto label; */
46052 TWO52 = ix86_gen_TWO52 (mode);
46053 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46055 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46056 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
46058 ix86_sse_copysign_to_positive (res, xa, res, mask);
46060 emit_label (label);
46061 LABEL_NUSES (label) = 1;
46063 emit_move_insn (operand0, res);
46066 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
46067 into OPERAND0. */
46068 void
46069 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
46071 /* C code for the stuff we expand below.
46072 double xa = fabs (x), x2;
46073 if (!isless (xa, TWO52))
46074 return x;
46075 xa = xa + TWO52 - TWO52;
46076 x2 = copysign (xa, x);
46077 Compensate. Floor:
46078 if (x2 > x)
46079 x2 -= 1;
46080 Compensate. Ceil:
46081 if (x2 < x)
46082 x2 -= -1;
46083 return x2;
46085 machine_mode mode = GET_MODE (operand0);
46086 rtx xa, TWO52, tmp, one, res, mask;
46087 rtx_code_label *label;
46089 TWO52 = ix86_gen_TWO52 (mode);
46091 /* Temporary for holding the result, initialized to the input
46092 operand to ease control flow. */
46093 res = gen_reg_rtx (mode);
46094 emit_move_insn (res, operand1);
46096 /* xa = abs (operand1) */
46097 xa = ix86_expand_sse_fabs (res, &mask);
46099 /* if (!isless (xa, TWO52)) goto label; */
46100 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46102 /* xa = xa + TWO52 - TWO52; */
46103 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46104 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
46106 /* xa = copysign (xa, operand1) */
46107 ix86_sse_copysign_to_positive (xa, xa, res, mask);
46109 /* generate 1.0 or -1.0 */
46110 one = force_reg (mode,
46111 const_double_from_real_value (do_floor
46112 ? dconst1 : dconstm1, mode));
46114 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
46115 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
46116 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46117 gen_rtx_AND (mode, one, tmp)));
46118 /* We always need to subtract here to preserve signed zero. */
46119 tmp = expand_simple_binop (mode, MINUS,
46120 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46121 emit_move_insn (res, tmp);
46123 emit_label (label);
46124 LABEL_NUSES (label) = 1;
46126 emit_move_insn (operand0, res);
46129 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
46130 into OPERAND0. */
46131 void
46132 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
46134 /* C code for the stuff we expand below.
46135 double xa = fabs (x), x2;
46136 if (!isless (xa, TWO52))
46137 return x;
46138 x2 = (double)(long)x;
46139 Compensate. Floor:
46140 if (x2 > x)
46141 x2 -= 1;
46142 Compensate. Ceil:
46143 if (x2 < x)
46144 x2 += 1;
46145 if (HONOR_SIGNED_ZEROS (mode))
46146 return copysign (x2, x);
46147 return x2;
46149 machine_mode mode = GET_MODE (operand0);
46150 rtx xa, xi, TWO52, tmp, one, res, mask;
46151 rtx_code_label *label;
46153 TWO52 = ix86_gen_TWO52 (mode);
46155 /* Temporary for holding the result, initialized to the input
46156 operand to ease control flow. */
46157 res = gen_reg_rtx (mode);
46158 emit_move_insn (res, operand1);
46160 /* xa = abs (operand1) */
46161 xa = ix86_expand_sse_fabs (res, &mask);
46163 /* if (!isless (xa, TWO52)) goto label; */
46164 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46166 /* xa = (double)(long)x */
46167 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46168 expand_fix (xi, res, 0);
46169 expand_float (xa, xi, 0);
46171 /* generate 1.0 */
46172 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
46174 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
46175 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
46176 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46177 gen_rtx_AND (mode, one, tmp)));
46178 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
46179 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46180 emit_move_insn (res, tmp);
46182 if (HONOR_SIGNED_ZEROS (mode))
46183 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
46185 emit_label (label);
46186 LABEL_NUSES (label) = 1;
46188 emit_move_insn (operand0, res);
46191 /* Expand SSE sequence for computing round from OPERAND1 storing
46192 into OPERAND0. Sequence that works without relying on DImode truncation
46193 via cvttsd2siq that is only available on 64bit targets. */
46194 void
46195 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
46197 /* C code for the stuff we expand below.
46198 double xa = fabs (x), xa2, x2;
46199 if (!isless (xa, TWO52))
46200 return x;
46201 Using the absolute value and copying back sign makes
46202 -0.0 -> -0.0 correct.
46203 xa2 = xa + TWO52 - TWO52;
46204 Compensate.
46205 dxa = xa2 - xa;
46206 if (dxa <= -0.5)
46207 xa2 += 1;
46208 else if (dxa > 0.5)
46209 xa2 -= 1;
46210 x2 = copysign (xa2, x);
46211 return x2;
46213 machine_mode mode = GET_MODE (operand0);
46214 rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask;
46215 rtx_code_label *label;
46217 TWO52 = ix86_gen_TWO52 (mode);
46219 /* Temporary for holding the result, initialized to the input
46220 operand to ease control flow. */
46221 res = gen_reg_rtx (mode);
46222 emit_move_insn (res, operand1);
46224 /* xa = abs (operand1) */
46225 xa = ix86_expand_sse_fabs (res, &mask);
46227 /* if (!isless (xa, TWO52)) goto label; */
46228 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46230 /* xa2 = xa + TWO52 - TWO52; */
46231 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46232 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
46234 /* dxa = xa2 - xa; */
46235 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
46237 /* generate 0.5, 1.0 and -0.5 */
46238 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
46239 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
46240 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
46241 0, OPTAB_DIRECT);
46243 /* Compensate. */
46244 tmp = gen_reg_rtx (mode);
46245 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
46246 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
46247 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46248 gen_rtx_AND (mode, one, tmp)));
46249 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46250 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
46251 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
46252 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46253 gen_rtx_AND (mode, one, tmp)));
46254 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46256 /* res = copysign (xa2, operand1) */
46257 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
46259 emit_label (label);
46260 LABEL_NUSES (label) = 1;
46262 emit_move_insn (operand0, res);
46265 /* Expand SSE sequence for computing trunc from OPERAND1 storing
46266 into OPERAND0. */
46267 void
46268 ix86_expand_trunc (rtx operand0, rtx operand1)
46270 /* C code for SSE variant we expand below.
46271 double xa = fabs (x), x2;
46272 if (!isless (xa, TWO52))
46273 return x;
46274 x2 = (double)(long)x;
46275 if (HONOR_SIGNED_ZEROS (mode))
46276 return copysign (x2, x);
46277 return x2;
46279 machine_mode mode = GET_MODE (operand0);
46280 rtx xa, xi, TWO52, res, mask;
46281 rtx_code_label *label;
46283 TWO52 = ix86_gen_TWO52 (mode);
46285 /* Temporary for holding the result, initialized to the input
46286 operand to ease control flow. */
46287 res = gen_reg_rtx (mode);
46288 emit_move_insn (res, operand1);
46290 /* xa = abs (operand1) */
46291 xa = ix86_expand_sse_fabs (res, &mask);
46293 /* if (!isless (xa, TWO52)) goto label; */
46294 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46296 /* x = (double)(long)x */
46297 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46298 expand_fix (xi, res, 0);
46299 expand_float (res, xi, 0);
46301 if (HONOR_SIGNED_ZEROS (mode))
46302 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
46304 emit_label (label);
46305 LABEL_NUSES (label) = 1;
46307 emit_move_insn (operand0, res);
46310 /* Expand SSE sequence for computing trunc from OPERAND1 storing
46311 into OPERAND0. */
46312 void
46313 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
46315 machine_mode mode = GET_MODE (operand0);
46316 rtx xa, mask, TWO52, one, res, smask, tmp;
46317 rtx_code_label *label;
46319 /* C code for SSE variant we expand below.
46320 double xa = fabs (x), x2;
46321 if (!isless (xa, TWO52))
46322 return x;
46323 xa2 = xa + TWO52 - TWO52;
46324 Compensate:
46325 if (xa2 > xa)
46326 xa2 -= 1.0;
46327 x2 = copysign (xa2, x);
46328 return x2;
46331 TWO52 = ix86_gen_TWO52 (mode);
46333 /* Temporary for holding the result, initialized to the input
46334 operand to ease control flow. */
46335 res = gen_reg_rtx (mode);
46336 emit_move_insn (res, operand1);
46338 /* xa = abs (operand1) */
46339 xa = ix86_expand_sse_fabs (res, &smask);
46341 /* if (!isless (xa, TWO52)) goto label; */
46342 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46344 /* res = xa + TWO52 - TWO52; */
46345 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46346 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
46347 emit_move_insn (res, tmp);
46349 /* generate 1.0 */
46350 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
46352 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
46353 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
46354 emit_insn (gen_rtx_SET (VOIDmode, mask,
46355 gen_rtx_AND (mode, mask, one)));
46356 tmp = expand_simple_binop (mode, MINUS,
46357 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
46358 emit_move_insn (res, tmp);
46360 /* res = copysign (res, operand1) */
46361 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
46363 emit_label (label);
46364 LABEL_NUSES (label) = 1;
46366 emit_move_insn (operand0, res);
46369 /* Expand SSE sequence for computing round from OPERAND1 storing
46370 into OPERAND0. */
46371 void
46372 ix86_expand_round (rtx operand0, rtx operand1)
46374 /* C code for the stuff we're doing below:
46375 double xa = fabs (x);
46376 if (!isless (xa, TWO52))
46377 return x;
46378 xa = (double)(long)(xa + nextafter (0.5, 0.0));
46379 return copysign (xa, x);
46381 machine_mode mode = GET_MODE (operand0);
46382 rtx res, TWO52, xa, xi, half, mask;
46383 rtx_code_label *label;
46384 const struct real_format *fmt;
46385 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46387 /* Temporary for holding the result, initialized to the input
46388 operand to ease control flow. */
46389 res = gen_reg_rtx (mode);
46390 emit_move_insn (res, operand1);
46392 TWO52 = ix86_gen_TWO52 (mode);
46393 xa = ix86_expand_sse_fabs (res, &mask);
46394 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46396 /* load nextafter (0.5, 0.0) */
46397 fmt = REAL_MODE_FORMAT (mode);
46398 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46399 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46401 /* xa = xa + 0.5 */
46402 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
46403 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
46405 /* xa = (double)(int64_t)xa */
46406 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46407 expand_fix (xi, xa, 0);
46408 expand_float (xa, xi, 0);
46410 /* res = copysign (xa, operand1) */
46411 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
46413 emit_label (label);
46414 LABEL_NUSES (label) = 1;
46416 emit_move_insn (operand0, res);
46419 /* Expand SSE sequence for computing round
46420 from OP1 storing into OP0 using sse4 round insn. */
46421 void
46422 ix86_expand_round_sse4 (rtx op0, rtx op1)
46424 machine_mode mode = GET_MODE (op0);
46425 rtx e1, e2, res, half;
46426 const struct real_format *fmt;
46427 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46428 rtx (*gen_copysign) (rtx, rtx, rtx);
46429 rtx (*gen_round) (rtx, rtx, rtx);
46431 switch (mode)
46433 case SFmode:
46434 gen_copysign = gen_copysignsf3;
46435 gen_round = gen_sse4_1_roundsf2;
46436 break;
46437 case DFmode:
46438 gen_copysign = gen_copysigndf3;
46439 gen_round = gen_sse4_1_rounddf2;
46440 break;
46441 default:
46442 gcc_unreachable ();
46445 /* round (a) = trunc (a + copysign (0.5, a)) */
46447 /* load nextafter (0.5, 0.0) */
46448 fmt = REAL_MODE_FORMAT (mode);
46449 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46450 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46451 half = const_double_from_real_value (pred_half, mode);
46453 /* e1 = copysign (0.5, op1) */
46454 e1 = gen_reg_rtx (mode);
46455 emit_insn (gen_copysign (e1, half, op1));
46457 /* e2 = op1 + e1 */
46458 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
46460 /* res = trunc (e2) */
46461 res = gen_reg_rtx (mode);
46462 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
46464 emit_move_insn (op0, res);
46468 /* Table of valid machine attributes. */
46469 static const struct attribute_spec ix86_attribute_table[] =
46471 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
46472 affects_type_identity } */
46473 /* Stdcall attribute says callee is responsible for popping arguments
46474 if they are not variable. */
46475 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46476 true },
46477 /* Fastcall attribute says callee is responsible for popping arguments
46478 if they are not variable. */
46479 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46480 true },
46481 /* Thiscall attribute says callee is responsible for popping arguments
46482 if they are not variable. */
46483 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46484 true },
46485 /* Cdecl attribute says the callee is a normal C declaration */
46486 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46487 true },
46488 /* Regparm attribute specifies how many integer arguments are to be
46489 passed in registers. */
46490 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
46491 true },
46492 /* Sseregparm attribute says we are using x86_64 calling conventions
46493 for FP arguments. */
46494 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46495 true },
46496 /* The transactional memory builtins are implicitly regparm or fastcall
46497 depending on the ABI. Override the generic do-nothing attribute that
46498 these builtins were declared with. */
46499 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
46500 true },
46501 /* force_align_arg_pointer says this function realigns the stack at entry. */
46502 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
46503 false, true, true, ix86_handle_cconv_attribute, false },
46504 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
46505 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
46506 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
46507 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
46508 false },
46509 #endif
46510 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
46511 false },
46512 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
46513 false },
46514 #ifdef SUBTARGET_ATTRIBUTE_TABLE
46515 SUBTARGET_ATTRIBUTE_TABLE,
46516 #endif
46517 /* ms_abi and sysv_abi calling convention function attributes. */
46518 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
46519 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
46520 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
46521 false },
46522 { "callee_pop_aggregate_return", 1, 1, false, true, true,
46523 ix86_handle_callee_pop_aggregate_return, true },
46524 /* End element. */
46525 { NULL, 0, 0, false, false, false, NULL, false }
46528 /* Implement targetm.vectorize.builtin_vectorization_cost. */
46529 static int
46530 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
46531 tree vectype, int)
46533 unsigned elements;
46535 switch (type_of_cost)
46537 case scalar_stmt:
46538 return ix86_cost->scalar_stmt_cost;
46540 case scalar_load:
46541 return ix86_cost->scalar_load_cost;
46543 case scalar_store:
46544 return ix86_cost->scalar_store_cost;
46546 case vector_stmt:
46547 return ix86_cost->vec_stmt_cost;
46549 case vector_load:
46550 return ix86_cost->vec_align_load_cost;
46552 case vector_store:
46553 return ix86_cost->vec_store_cost;
46555 case vec_to_scalar:
46556 return ix86_cost->vec_to_scalar_cost;
46558 case scalar_to_vec:
46559 return ix86_cost->scalar_to_vec_cost;
46561 case unaligned_load:
46562 case unaligned_store:
46563 return ix86_cost->vec_unalign_load_cost;
46565 case cond_branch_taken:
46566 return ix86_cost->cond_taken_branch_cost;
46568 case cond_branch_not_taken:
46569 return ix86_cost->cond_not_taken_branch_cost;
46571 case vec_perm:
46572 case vec_promote_demote:
46573 return ix86_cost->vec_stmt_cost;
46575 case vec_construct:
46576 elements = TYPE_VECTOR_SUBPARTS (vectype);
46577 return elements / 2 + 1;
46579 default:
46580 gcc_unreachable ();
46584 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
46585 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
46586 insn every time. */
46588 static GTY(()) rtx_insn *vselect_insn;
46590 /* Initialize vselect_insn. */
46592 static void
46593 init_vselect_insn (void)
46595 unsigned i;
46596 rtx x;
46598 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
46599 for (i = 0; i < MAX_VECT_LEN; ++i)
46600 XVECEXP (x, 0, i) = const0_rtx;
46601 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
46602 const0_rtx), x);
46603 x = gen_rtx_SET (VOIDmode, const0_rtx, x);
46604 start_sequence ();
46605 vselect_insn = emit_insn (x);
46606 end_sequence ();
46609 /* Construct (set target (vec_select op0 (parallel perm))) and
46610 return true if that's a valid instruction in the active ISA. */
46612 static bool
46613 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
46614 unsigned nelt, bool testing_p)
46616 unsigned int i;
46617 rtx x, save_vconcat;
46618 int icode;
46620 if (vselect_insn == NULL_RTX)
46621 init_vselect_insn ();
46623 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
46624 PUT_NUM_ELEM (XVEC (x, 0), nelt);
46625 for (i = 0; i < nelt; ++i)
46626 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
46627 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
46628 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
46629 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
46630 SET_DEST (PATTERN (vselect_insn)) = target;
46631 icode = recog_memoized (vselect_insn);
46633 if (icode >= 0 && !testing_p)
46634 emit_insn (copy_rtx (PATTERN (vselect_insn)));
46636 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
46637 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
46638 INSN_CODE (vselect_insn) = -1;
46640 return icode >= 0;
46643 /* Similar, but generate a vec_concat from op0 and op1 as well. */
46645 static bool
46646 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
46647 const unsigned char *perm, unsigned nelt,
46648 bool testing_p)
46650 machine_mode v2mode;
46651 rtx x;
46652 bool ok;
46654 if (vselect_insn == NULL_RTX)
46655 init_vselect_insn ();
46657 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
46658 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
46659 PUT_MODE (x, v2mode);
46660 XEXP (x, 0) = op0;
46661 XEXP (x, 1) = op1;
46662 ok = expand_vselect (target, x, perm, nelt, testing_p);
46663 XEXP (x, 0) = const0_rtx;
46664 XEXP (x, 1) = const0_rtx;
46665 return ok;
46668 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
46669 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
46671 static bool
46672 expand_vec_perm_blend (struct expand_vec_perm_d *d)
46674 machine_mode vmode = d->vmode;
46675 unsigned i, mask, nelt = d->nelt;
46676 rtx target, op0, op1, x;
46677 rtx rperm[32], vperm;
46679 if (d->one_operand_p)
46680 return false;
46681 if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64
46682 && GET_MODE_SIZE (GET_MODE_INNER (vmode)) >= 4)
46684 else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
46686 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
46688 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
46690 else
46691 return false;
46693 /* This is a blend, not a permute. Elements must stay in their
46694 respective lanes. */
46695 for (i = 0; i < nelt; ++i)
46697 unsigned e = d->perm[i];
46698 if (!(e == i || e == i + nelt))
46699 return false;
46702 if (d->testing_p)
46703 return true;
46705 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
46706 decision should be extracted elsewhere, so that we only try that
46707 sequence once all budget==3 options have been tried. */
46708 target = d->target;
46709 op0 = d->op0;
46710 op1 = d->op1;
46711 mask = 0;
46713 switch (vmode)
46715 case V8DFmode:
46716 case V16SFmode:
46717 case V4DFmode:
46718 case V8SFmode:
46719 case V2DFmode:
46720 case V4SFmode:
46721 case V8HImode:
46722 case V8SImode:
46723 case V32HImode:
46724 case V64QImode:
46725 case V16SImode:
46726 case V8DImode:
46727 for (i = 0; i < nelt; ++i)
46728 mask |= (d->perm[i] >= nelt) << i;
46729 break;
46731 case V2DImode:
46732 for (i = 0; i < 2; ++i)
46733 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
46734 vmode = V8HImode;
46735 goto do_subreg;
46737 case V4SImode:
46738 for (i = 0; i < 4; ++i)
46739 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
46740 vmode = V8HImode;
46741 goto do_subreg;
46743 case V16QImode:
46744 /* See if bytes move in pairs so we can use pblendw with
46745 an immediate argument, rather than pblendvb with a vector
46746 argument. */
46747 for (i = 0; i < 16; i += 2)
46748 if (d->perm[i] + 1 != d->perm[i + 1])
46750 use_pblendvb:
46751 for (i = 0; i < nelt; ++i)
46752 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
46754 finish_pblendvb:
46755 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
46756 vperm = force_reg (vmode, vperm);
46758 if (GET_MODE_SIZE (vmode) == 16)
46759 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
46760 else
46761 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
46762 if (target != d->target)
46763 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
46764 return true;
46767 for (i = 0; i < 8; ++i)
46768 mask |= (d->perm[i * 2] >= 16) << i;
46769 vmode = V8HImode;
46770 /* FALLTHRU */
46772 do_subreg:
46773 target = gen_reg_rtx (vmode);
46774 op0 = gen_lowpart (vmode, op0);
46775 op1 = gen_lowpart (vmode, op1);
46776 break;
46778 case V32QImode:
46779 /* See if bytes move in pairs. If not, vpblendvb must be used. */
46780 for (i = 0; i < 32; i += 2)
46781 if (d->perm[i] + 1 != d->perm[i + 1])
46782 goto use_pblendvb;
46783 /* See if bytes move in quadruplets. If yes, vpblendd
46784 with immediate can be used. */
46785 for (i = 0; i < 32; i += 4)
46786 if (d->perm[i] + 2 != d->perm[i + 2])
46787 break;
46788 if (i < 32)
46790 /* See if bytes move the same in both lanes. If yes,
46791 vpblendw with immediate can be used. */
46792 for (i = 0; i < 16; i += 2)
46793 if (d->perm[i] + 16 != d->perm[i + 16])
46794 goto use_pblendvb;
46796 /* Use vpblendw. */
46797 for (i = 0; i < 16; ++i)
46798 mask |= (d->perm[i * 2] >= 32) << i;
46799 vmode = V16HImode;
46800 goto do_subreg;
46803 /* Use vpblendd. */
46804 for (i = 0; i < 8; ++i)
46805 mask |= (d->perm[i * 4] >= 32) << i;
46806 vmode = V8SImode;
46807 goto do_subreg;
46809 case V16HImode:
46810 /* See if words move in pairs. If yes, vpblendd can be used. */
46811 for (i = 0; i < 16; i += 2)
46812 if (d->perm[i] + 1 != d->perm[i + 1])
46813 break;
46814 if (i < 16)
46816 /* See if words move the same in both lanes. If not,
46817 vpblendvb must be used. */
46818 for (i = 0; i < 8; i++)
46819 if (d->perm[i] + 8 != d->perm[i + 8])
46821 /* Use vpblendvb. */
46822 for (i = 0; i < 32; ++i)
46823 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
46825 vmode = V32QImode;
46826 nelt = 32;
46827 target = gen_reg_rtx (vmode);
46828 op0 = gen_lowpart (vmode, op0);
46829 op1 = gen_lowpart (vmode, op1);
46830 goto finish_pblendvb;
46833 /* Use vpblendw. */
46834 for (i = 0; i < 16; ++i)
46835 mask |= (d->perm[i] >= 16) << i;
46836 break;
46839 /* Use vpblendd. */
46840 for (i = 0; i < 8; ++i)
46841 mask |= (d->perm[i * 2] >= 16) << i;
46842 vmode = V8SImode;
46843 goto do_subreg;
46845 case V4DImode:
46846 /* Use vpblendd. */
46847 for (i = 0; i < 4; ++i)
46848 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
46849 vmode = V8SImode;
46850 goto do_subreg;
46852 default:
46853 gcc_unreachable ();
46856 /* This matches five different patterns with the different modes. */
46857 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
46858 x = gen_rtx_SET (VOIDmode, target, x);
46859 emit_insn (x);
46860 if (target != d->target)
46861 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
46863 return true;
46866 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
46867 in terms of the variable form of vpermilps.
46869 Note that we will have already failed the immediate input vpermilps,
46870 which requires that the high and low part shuffle be identical; the
46871 variable form doesn't require that. */
46873 static bool
46874 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
46876 rtx rperm[8], vperm;
46877 unsigned i;
46879 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
46880 return false;
46882 /* We can only permute within the 128-bit lane. */
46883 for (i = 0; i < 8; ++i)
46885 unsigned e = d->perm[i];
46886 if (i < 4 ? e >= 4 : e < 4)
46887 return false;
46890 if (d->testing_p)
46891 return true;
46893 for (i = 0; i < 8; ++i)
46895 unsigned e = d->perm[i];
46897 /* Within each 128-bit lane, the elements of op0 are numbered
46898 from 0 and the elements of op1 are numbered from 4. */
46899 if (e >= 8 + 4)
46900 e -= 8;
46901 else if (e >= 4)
46902 e -= 4;
46904 rperm[i] = GEN_INT (e);
46907 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
46908 vperm = force_reg (V8SImode, vperm);
46909 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
46911 return true;
46914 /* Return true if permutation D can be performed as VMODE permutation
46915 instead. */
46917 static bool
46918 valid_perm_using_mode_p (machine_mode vmode, struct expand_vec_perm_d *d)
46920 unsigned int i, j, chunk;
46922 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
46923 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
46924 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
46925 return false;
46927 if (GET_MODE_NUNITS (vmode) >= d->nelt)
46928 return true;
46930 chunk = d->nelt / GET_MODE_NUNITS (vmode);
46931 for (i = 0; i < d->nelt; i += chunk)
46932 if (d->perm[i] & (chunk - 1))
46933 return false;
46934 else
46935 for (j = 1; j < chunk; ++j)
46936 if (d->perm[i] + j != d->perm[i + j])
46937 return false;
46939 return true;
46942 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
46943 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
46945 static bool
46946 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
46948 unsigned i, nelt, eltsz, mask;
46949 unsigned char perm[64];
46950 machine_mode vmode = V16QImode;
46951 rtx rperm[64], vperm, target, op0, op1;
46953 nelt = d->nelt;
46955 if (!d->one_operand_p)
46957 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
46959 if (TARGET_AVX2
46960 && valid_perm_using_mode_p (V2TImode, d))
46962 if (d->testing_p)
46963 return true;
46965 /* Use vperm2i128 insn. The pattern uses
46966 V4DImode instead of V2TImode. */
46967 target = d->target;
46968 if (d->vmode != V4DImode)
46969 target = gen_reg_rtx (V4DImode);
46970 op0 = gen_lowpart (V4DImode, d->op0);
46971 op1 = gen_lowpart (V4DImode, d->op1);
46972 rperm[0]
46973 = GEN_INT ((d->perm[0] / (nelt / 2))
46974 | ((d->perm[nelt / 2] / (nelt / 2)) * 16));
46975 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
46976 if (target != d->target)
46977 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
46978 return true;
46980 return false;
46983 else
46985 if (GET_MODE_SIZE (d->vmode) == 16)
46987 if (!TARGET_SSSE3)
46988 return false;
46990 else if (GET_MODE_SIZE (d->vmode) == 32)
46992 if (!TARGET_AVX2)
46993 return false;
46995 /* V4DImode should be already handled through
46996 expand_vselect by vpermq instruction. */
46997 gcc_assert (d->vmode != V4DImode);
46999 vmode = V32QImode;
47000 if (d->vmode == V8SImode
47001 || d->vmode == V16HImode
47002 || d->vmode == V32QImode)
47004 /* First see if vpermq can be used for
47005 V8SImode/V16HImode/V32QImode. */
47006 if (valid_perm_using_mode_p (V4DImode, d))
47008 for (i = 0; i < 4; i++)
47009 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
47010 if (d->testing_p)
47011 return true;
47012 target = gen_reg_rtx (V4DImode);
47013 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
47014 perm, 4, false))
47016 emit_move_insn (d->target,
47017 gen_lowpart (d->vmode, target));
47018 return true;
47020 return false;
47023 /* Next see if vpermd can be used. */
47024 if (valid_perm_using_mode_p (V8SImode, d))
47025 vmode = V8SImode;
47027 /* Or if vpermps can be used. */
47028 else if (d->vmode == V8SFmode)
47029 vmode = V8SImode;
47031 if (vmode == V32QImode)
47033 /* vpshufb only works intra lanes, it is not
47034 possible to shuffle bytes in between the lanes. */
47035 for (i = 0; i < nelt; ++i)
47036 if ((d->perm[i] ^ i) & (nelt / 2))
47037 return false;
47040 else if (GET_MODE_SIZE (d->vmode) == 64)
47042 if (!TARGET_AVX512BW)
47043 return false;
47045 /* If vpermq didn't work, vpshufb won't work either. */
47046 if (d->vmode == V8DFmode || d->vmode == V8DImode)
47047 return false;
47049 vmode = V64QImode;
47050 if (d->vmode == V16SImode
47051 || d->vmode == V32HImode
47052 || d->vmode == V64QImode)
47054 /* First see if vpermq can be used for
47055 V16SImode/V32HImode/V64QImode. */
47056 if (valid_perm_using_mode_p (V8DImode, d))
47058 for (i = 0; i < 8; i++)
47059 perm[i] = (d->perm[i * nelt / 8] * 8 / nelt) & 7;
47060 if (d->testing_p)
47061 return true;
47062 target = gen_reg_rtx (V8DImode);
47063 if (expand_vselect (target, gen_lowpart (V8DImode, d->op0),
47064 perm, 8, false))
47066 emit_move_insn (d->target,
47067 gen_lowpart (d->vmode, target));
47068 return true;
47070 return false;
47073 /* Next see if vpermd can be used. */
47074 if (valid_perm_using_mode_p (V16SImode, d))
47075 vmode = V16SImode;
47077 /* Or if vpermps can be used. */
47078 else if (d->vmode == V16SFmode)
47079 vmode = V16SImode;
47080 if (vmode == V64QImode)
47082 /* vpshufb only works intra lanes, it is not
47083 possible to shuffle bytes in between the lanes. */
47084 for (i = 0; i < nelt; ++i)
47085 if ((d->perm[i] ^ i) & (nelt / 4))
47086 return false;
47089 else
47090 return false;
47093 if (d->testing_p)
47094 return true;
47096 if (vmode == V8SImode)
47097 for (i = 0; i < 8; ++i)
47098 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
47099 else if (vmode == V16SImode)
47100 for (i = 0; i < 16; ++i)
47101 rperm[i] = GEN_INT ((d->perm[i * nelt / 16] * 16 / nelt) & 15);
47102 else
47104 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
47105 if (!d->one_operand_p)
47106 mask = 2 * nelt - 1;
47107 else if (vmode == V16QImode)
47108 mask = nelt - 1;
47109 else if (vmode == V64QImode)
47110 mask = nelt / 4 - 1;
47111 else
47112 mask = nelt / 2 - 1;
47114 for (i = 0; i < nelt; ++i)
47116 unsigned j, e = d->perm[i] & mask;
47117 for (j = 0; j < eltsz; ++j)
47118 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
47122 vperm = gen_rtx_CONST_VECTOR (vmode,
47123 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
47124 vperm = force_reg (vmode, vperm);
47126 target = d->target;
47127 if (d->vmode != vmode)
47128 target = gen_reg_rtx (vmode);
47129 op0 = gen_lowpart (vmode, d->op0);
47130 if (d->one_operand_p)
47132 if (vmode == V16QImode)
47133 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
47134 else if (vmode == V32QImode)
47135 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
47136 else if (vmode == V64QImode)
47137 emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm));
47138 else if (vmode == V8SFmode)
47139 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
47140 else if (vmode == V8SImode)
47141 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
47142 else if (vmode == V16SFmode)
47143 emit_insn (gen_avx512f_permvarv16sf (target, op0, vperm));
47144 else if (vmode == V16SImode)
47145 emit_insn (gen_avx512f_permvarv16si (target, op0, vperm));
47146 else
47147 gcc_unreachable ();
47149 else
47151 op1 = gen_lowpart (vmode, d->op1);
47152 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
47154 if (target != d->target)
47155 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47157 return true;
47160 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
47161 in a single instruction. */
47163 static bool
47164 expand_vec_perm_1 (struct expand_vec_perm_d *d)
47166 unsigned i, nelt = d->nelt;
47167 unsigned char perm2[MAX_VECT_LEN];
47169 /* Check plain VEC_SELECT first, because AVX has instructions that could
47170 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
47171 input where SEL+CONCAT may not. */
47172 if (d->one_operand_p)
47174 int mask = nelt - 1;
47175 bool identity_perm = true;
47176 bool broadcast_perm = true;
47178 for (i = 0; i < nelt; i++)
47180 perm2[i] = d->perm[i] & mask;
47181 if (perm2[i] != i)
47182 identity_perm = false;
47183 if (perm2[i])
47184 broadcast_perm = false;
47187 if (identity_perm)
47189 if (!d->testing_p)
47190 emit_move_insn (d->target, d->op0);
47191 return true;
47193 else if (broadcast_perm && TARGET_AVX2)
47195 /* Use vpbroadcast{b,w,d}. */
47196 rtx (*gen) (rtx, rtx) = NULL;
47197 switch (d->vmode)
47199 case V64QImode:
47200 if (TARGET_AVX512BW)
47201 gen = gen_avx512bw_vec_dupv64qi_1;
47202 break;
47203 case V32QImode:
47204 gen = gen_avx2_pbroadcastv32qi_1;
47205 break;
47206 case V32HImode:
47207 if (TARGET_AVX512BW)
47208 gen = gen_avx512bw_vec_dupv32hi_1;
47209 break;
47210 case V16HImode:
47211 gen = gen_avx2_pbroadcastv16hi_1;
47212 break;
47213 case V16SImode:
47214 if (TARGET_AVX512F)
47215 gen = gen_avx512f_vec_dupv16si_1;
47216 break;
47217 case V8SImode:
47218 gen = gen_avx2_pbroadcastv8si_1;
47219 break;
47220 case V16QImode:
47221 gen = gen_avx2_pbroadcastv16qi;
47222 break;
47223 case V8HImode:
47224 gen = gen_avx2_pbroadcastv8hi;
47225 break;
47226 case V16SFmode:
47227 if (TARGET_AVX512F)
47228 gen = gen_avx512f_vec_dupv16sf_1;
47229 break;
47230 case V8SFmode:
47231 gen = gen_avx2_vec_dupv8sf_1;
47232 break;
47233 case V8DFmode:
47234 if (TARGET_AVX512F)
47235 gen = gen_avx512f_vec_dupv8df_1;
47236 break;
47237 case V8DImode:
47238 if (TARGET_AVX512F)
47239 gen = gen_avx512f_vec_dupv8di_1;
47240 break;
47241 /* For other modes prefer other shuffles this function creates. */
47242 default: break;
47244 if (gen != NULL)
47246 if (!d->testing_p)
47247 emit_insn (gen (d->target, d->op0));
47248 return true;
47252 if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
47253 return true;
47255 /* There are plenty of patterns in sse.md that are written for
47256 SEL+CONCAT and are not replicated for a single op. Perhaps
47257 that should be changed, to avoid the nastiness here. */
47259 /* Recognize interleave style patterns, which means incrementing
47260 every other permutation operand. */
47261 for (i = 0; i < nelt; i += 2)
47263 perm2[i] = d->perm[i] & mask;
47264 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
47266 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
47267 d->testing_p))
47268 return true;
47270 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
47271 if (nelt >= 4)
47273 for (i = 0; i < nelt; i += 4)
47275 perm2[i + 0] = d->perm[i + 0] & mask;
47276 perm2[i + 1] = d->perm[i + 1] & mask;
47277 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
47278 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
47281 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
47282 d->testing_p))
47283 return true;
47287 /* Finally, try the fully general two operand permute. */
47288 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
47289 d->testing_p))
47290 return true;
47292 /* Recognize interleave style patterns with reversed operands. */
47293 if (!d->one_operand_p)
47295 for (i = 0; i < nelt; ++i)
47297 unsigned e = d->perm[i];
47298 if (e >= nelt)
47299 e -= nelt;
47300 else
47301 e += nelt;
47302 perm2[i] = e;
47305 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
47306 d->testing_p))
47307 return true;
47310 /* Try the SSE4.1 blend variable merge instructions. */
47311 if (expand_vec_perm_blend (d))
47312 return true;
47314 /* Try one of the AVX vpermil variable permutations. */
47315 if (expand_vec_perm_vpermil (d))
47316 return true;
47318 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
47319 vpshufb, vpermd, vpermps or vpermq variable permutation. */
47320 if (expand_vec_perm_pshufb (d))
47321 return true;
47323 /* Try the AVX2 vpalignr instruction. */
47324 if (expand_vec_perm_palignr (d, true))
47325 return true;
47327 /* Try the AVX512F vpermi2 instructions. */
47328 if (ix86_expand_vec_perm_vpermi2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d))
47329 return true;
47331 return false;
47334 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47335 in terms of a pair of pshuflw + pshufhw instructions. */
47337 static bool
47338 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
47340 unsigned char perm2[MAX_VECT_LEN];
47341 unsigned i;
47342 bool ok;
47344 if (d->vmode != V8HImode || !d->one_operand_p)
47345 return false;
47347 /* The two permutations only operate in 64-bit lanes. */
47348 for (i = 0; i < 4; ++i)
47349 if (d->perm[i] >= 4)
47350 return false;
47351 for (i = 4; i < 8; ++i)
47352 if (d->perm[i] < 4)
47353 return false;
47355 if (d->testing_p)
47356 return true;
47358 /* Emit the pshuflw. */
47359 memcpy (perm2, d->perm, 4);
47360 for (i = 4; i < 8; ++i)
47361 perm2[i] = i;
47362 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
47363 gcc_assert (ok);
47365 /* Emit the pshufhw. */
47366 memcpy (perm2 + 4, d->perm + 4, 4);
47367 for (i = 0; i < 4; ++i)
47368 perm2[i] = i;
47369 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
47370 gcc_assert (ok);
47372 return true;
47375 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47376 the permutation using the SSSE3 palignr instruction. This succeeds
47377 when all of the elements in PERM fit within one vector and we merely
47378 need to shift them down so that a single vector permutation has a
47379 chance to succeed. If SINGLE_INSN_ONLY_P, succeed if only
47380 the vpalignr instruction itself can perform the requested permutation. */
47382 static bool
47383 expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p)
47385 unsigned i, nelt = d->nelt;
47386 unsigned min, max, minswap, maxswap;
47387 bool in_order, ok, swap = false;
47388 rtx shift, target;
47389 struct expand_vec_perm_d dcopy;
47391 /* Even with AVX, palignr only operates on 128-bit vectors,
47392 in AVX2 palignr operates on both 128-bit lanes. */
47393 if ((!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
47394 && (!TARGET_AVX2 || GET_MODE_SIZE (d->vmode) != 32))
47395 return false;
47397 min = 2 * nelt;
47398 max = 0;
47399 minswap = 2 * nelt;
47400 maxswap = 0;
47401 for (i = 0; i < nelt; ++i)
47403 unsigned e = d->perm[i];
47404 unsigned eswap = d->perm[i] ^ nelt;
47405 if (GET_MODE_SIZE (d->vmode) == 32)
47407 e = (e & ((nelt / 2) - 1)) | ((e & nelt) >> 1);
47408 eswap = e ^ (nelt / 2);
47410 if (e < min)
47411 min = e;
47412 if (e > max)
47413 max = e;
47414 if (eswap < minswap)
47415 minswap = eswap;
47416 if (eswap > maxswap)
47417 maxswap = eswap;
47419 if (min == 0
47420 || max - min >= (GET_MODE_SIZE (d->vmode) == 32 ? nelt / 2 : nelt))
47422 if (d->one_operand_p
47423 || minswap == 0
47424 || maxswap - minswap >= (GET_MODE_SIZE (d->vmode) == 32
47425 ? nelt / 2 : nelt))
47426 return false;
47427 swap = true;
47428 min = minswap;
47429 max = maxswap;
47432 /* Given that we have SSSE3, we know we'll be able to implement the
47433 single operand permutation after the palignr with pshufb for
47434 128-bit vectors. If SINGLE_INSN_ONLY_P, in_order has to be computed
47435 first. */
47436 if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16 && !single_insn_only_p)
47437 return true;
47439 dcopy = *d;
47440 if (swap)
47442 dcopy.op0 = d->op1;
47443 dcopy.op1 = d->op0;
47444 for (i = 0; i < nelt; ++i)
47445 dcopy.perm[i] ^= nelt;
47448 in_order = true;
47449 for (i = 0; i < nelt; ++i)
47451 unsigned e = dcopy.perm[i];
47452 if (GET_MODE_SIZE (d->vmode) == 32
47453 && e >= nelt
47454 && (e & (nelt / 2 - 1)) < min)
47455 e = e - min - (nelt / 2);
47456 else
47457 e = e - min;
47458 if (e != i)
47459 in_order = false;
47460 dcopy.perm[i] = e;
47462 dcopy.one_operand_p = true;
47464 if (single_insn_only_p && !in_order)
47465 return false;
47467 /* For AVX2, test whether we can permute the result in one instruction. */
47468 if (d->testing_p)
47470 if (in_order)
47471 return true;
47472 dcopy.op1 = dcopy.op0;
47473 return expand_vec_perm_1 (&dcopy);
47476 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
47477 if (GET_MODE_SIZE (d->vmode) == 16)
47479 target = gen_reg_rtx (TImode);
47480 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, dcopy.op1),
47481 gen_lowpart (TImode, dcopy.op0), shift));
47483 else
47485 target = gen_reg_rtx (V2TImode);
47486 emit_insn (gen_avx2_palignrv2ti (target,
47487 gen_lowpart (V2TImode, dcopy.op1),
47488 gen_lowpart (V2TImode, dcopy.op0),
47489 shift));
47492 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
47494 /* Test for the degenerate case where the alignment by itself
47495 produces the desired permutation. */
47496 if (in_order)
47498 emit_move_insn (d->target, dcopy.op0);
47499 return true;
47502 ok = expand_vec_perm_1 (&dcopy);
47503 gcc_assert (ok || GET_MODE_SIZE (d->vmode) == 32);
47505 return ok;
47508 /* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
47509 the permutation using the SSE4_1 pblendv instruction. Potentially
47510 reduces permutation from 2 pshufb and or to 1 pshufb and pblendv. */
47512 static bool
47513 expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
47515 unsigned i, which, nelt = d->nelt;
47516 struct expand_vec_perm_d dcopy, dcopy1;
47517 machine_mode vmode = d->vmode;
47518 bool ok;
47520 /* Use the same checks as in expand_vec_perm_blend. */
47521 if (d->one_operand_p)
47522 return false;
47523 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
47525 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
47527 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
47529 else
47530 return false;
47532 /* Figure out where permutation elements stay not in their
47533 respective lanes. */
47534 for (i = 0, which = 0; i < nelt; ++i)
47536 unsigned e = d->perm[i];
47537 if (e != i)
47538 which |= (e < nelt ? 1 : 2);
47540 /* We can pblend the part where elements stay not in their
47541 respective lanes only when these elements are all in one
47542 half of a permutation.
47543 {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
47544 lanes, but both 8 and 9 >= 8
47545 {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
47546 respective lanes and 8 >= 8, but 2 not. */
47547 if (which != 1 && which != 2)
47548 return false;
47549 if (d->testing_p && GET_MODE_SIZE (vmode) == 16)
47550 return true;
47552 /* First we apply one operand permutation to the part where
47553 elements stay not in their respective lanes. */
47554 dcopy = *d;
47555 if (which == 2)
47556 dcopy.op0 = dcopy.op1 = d->op1;
47557 else
47558 dcopy.op0 = dcopy.op1 = d->op0;
47559 dcopy.one_operand_p = true;
47561 for (i = 0; i < nelt; ++i)
47562 dcopy.perm[i] = d->perm[i] & (nelt - 1);
47564 ok = expand_vec_perm_1 (&dcopy);
47565 if (GET_MODE_SIZE (vmode) != 16 && !ok)
47566 return false;
47567 else
47568 gcc_assert (ok);
47569 if (d->testing_p)
47570 return true;
47572 /* Next we put permuted elements into their positions. */
47573 dcopy1 = *d;
47574 if (which == 2)
47575 dcopy1.op1 = dcopy.target;
47576 else
47577 dcopy1.op0 = dcopy.target;
47579 for (i = 0; i < nelt; ++i)
47580 dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i);
47582 ok = expand_vec_perm_blend (&dcopy1);
47583 gcc_assert (ok);
47585 return true;
47588 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
47590 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47591 a two vector permutation into a single vector permutation by using
47592 an interleave operation to merge the vectors. */
47594 static bool
47595 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
47597 struct expand_vec_perm_d dremap, dfinal;
47598 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
47599 unsigned HOST_WIDE_INT contents;
47600 unsigned char remap[2 * MAX_VECT_LEN];
47601 rtx_insn *seq;
47602 bool ok, same_halves = false;
47604 if (GET_MODE_SIZE (d->vmode) == 16)
47606 if (d->one_operand_p)
47607 return false;
47609 else if (GET_MODE_SIZE (d->vmode) == 32)
47611 if (!TARGET_AVX)
47612 return false;
47613 /* For 32-byte modes allow even d->one_operand_p.
47614 The lack of cross-lane shuffling in some instructions
47615 might prevent a single insn shuffle. */
47616 dfinal = *d;
47617 dfinal.testing_p = true;
47618 /* If expand_vec_perm_interleave3 can expand this into
47619 a 3 insn sequence, give up and let it be expanded as
47620 3 insn sequence. While that is one insn longer,
47621 it doesn't need a memory operand and in the common
47622 case that both interleave low and high permutations
47623 with the same operands are adjacent needs 4 insns
47624 for both after CSE. */
47625 if (expand_vec_perm_interleave3 (&dfinal))
47626 return false;
47628 else
47629 return false;
47631 /* Examine from whence the elements come. */
47632 contents = 0;
47633 for (i = 0; i < nelt; ++i)
47634 contents |= ((unsigned HOST_WIDE_INT) 1) << d->perm[i];
47636 memset (remap, 0xff, sizeof (remap));
47637 dremap = *d;
47639 if (GET_MODE_SIZE (d->vmode) == 16)
47641 unsigned HOST_WIDE_INT h1, h2, h3, h4;
47643 /* Split the two input vectors into 4 halves. */
47644 h1 = (((unsigned HOST_WIDE_INT) 1) << nelt2) - 1;
47645 h2 = h1 << nelt2;
47646 h3 = h2 << nelt2;
47647 h4 = h3 << nelt2;
47649 /* If the elements from the low halves use interleave low, and similarly
47650 for interleave high. If the elements are from mis-matched halves, we
47651 can use shufps for V4SF/V4SI or do a DImode shuffle. */
47652 if ((contents & (h1 | h3)) == contents)
47654 /* punpckl* */
47655 for (i = 0; i < nelt2; ++i)
47657 remap[i] = i * 2;
47658 remap[i + nelt] = i * 2 + 1;
47659 dremap.perm[i * 2] = i;
47660 dremap.perm[i * 2 + 1] = i + nelt;
47662 if (!TARGET_SSE2 && d->vmode == V4SImode)
47663 dremap.vmode = V4SFmode;
47665 else if ((contents & (h2 | h4)) == contents)
47667 /* punpckh* */
47668 for (i = 0; i < nelt2; ++i)
47670 remap[i + nelt2] = i * 2;
47671 remap[i + nelt + nelt2] = i * 2 + 1;
47672 dremap.perm[i * 2] = i + nelt2;
47673 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
47675 if (!TARGET_SSE2 && d->vmode == V4SImode)
47676 dremap.vmode = V4SFmode;
47678 else if ((contents & (h1 | h4)) == contents)
47680 /* shufps */
47681 for (i = 0; i < nelt2; ++i)
47683 remap[i] = i;
47684 remap[i + nelt + nelt2] = i + nelt2;
47685 dremap.perm[i] = i;
47686 dremap.perm[i + nelt2] = i + nelt + nelt2;
47688 if (nelt != 4)
47690 /* shufpd */
47691 dremap.vmode = V2DImode;
47692 dremap.nelt = 2;
47693 dremap.perm[0] = 0;
47694 dremap.perm[1] = 3;
47697 else if ((contents & (h2 | h3)) == contents)
47699 /* shufps */
47700 for (i = 0; i < nelt2; ++i)
47702 remap[i + nelt2] = i;
47703 remap[i + nelt] = i + nelt2;
47704 dremap.perm[i] = i + nelt2;
47705 dremap.perm[i + nelt2] = i + nelt;
47707 if (nelt != 4)
47709 /* shufpd */
47710 dremap.vmode = V2DImode;
47711 dremap.nelt = 2;
47712 dremap.perm[0] = 1;
47713 dremap.perm[1] = 2;
47716 else
47717 return false;
47719 else
47721 unsigned int nelt4 = nelt / 4, nzcnt = 0;
47722 unsigned HOST_WIDE_INT q[8];
47723 unsigned int nonzero_halves[4];
47725 /* Split the two input vectors into 8 quarters. */
47726 q[0] = (((unsigned HOST_WIDE_INT) 1) << nelt4) - 1;
47727 for (i = 1; i < 8; ++i)
47728 q[i] = q[0] << (nelt4 * i);
47729 for (i = 0; i < 4; ++i)
47730 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
47732 nonzero_halves[nzcnt] = i;
47733 ++nzcnt;
47736 if (nzcnt == 1)
47738 gcc_assert (d->one_operand_p);
47739 nonzero_halves[1] = nonzero_halves[0];
47740 same_halves = true;
47742 else if (d->one_operand_p)
47744 gcc_assert (nonzero_halves[0] == 0);
47745 gcc_assert (nonzero_halves[1] == 1);
47748 if (nzcnt <= 2)
47750 if (d->perm[0] / nelt2 == nonzero_halves[1])
47752 /* Attempt to increase the likelihood that dfinal
47753 shuffle will be intra-lane. */
47754 char tmph = nonzero_halves[0];
47755 nonzero_halves[0] = nonzero_halves[1];
47756 nonzero_halves[1] = tmph;
47759 /* vperm2f128 or vperm2i128. */
47760 for (i = 0; i < nelt2; ++i)
47762 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
47763 remap[i + nonzero_halves[0] * nelt2] = i;
47764 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
47765 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
47768 if (d->vmode != V8SFmode
47769 && d->vmode != V4DFmode
47770 && d->vmode != V8SImode)
47772 dremap.vmode = V8SImode;
47773 dremap.nelt = 8;
47774 for (i = 0; i < 4; ++i)
47776 dremap.perm[i] = i + nonzero_halves[0] * 4;
47777 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
47781 else if (d->one_operand_p)
47782 return false;
47783 else if (TARGET_AVX2
47784 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
47786 /* vpunpckl* */
47787 for (i = 0; i < nelt4; ++i)
47789 remap[i] = i * 2;
47790 remap[i + nelt] = i * 2 + 1;
47791 remap[i + nelt2] = i * 2 + nelt2;
47792 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
47793 dremap.perm[i * 2] = i;
47794 dremap.perm[i * 2 + 1] = i + nelt;
47795 dremap.perm[i * 2 + nelt2] = i + nelt2;
47796 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
47799 else if (TARGET_AVX2
47800 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
47802 /* vpunpckh* */
47803 for (i = 0; i < nelt4; ++i)
47805 remap[i + nelt4] = i * 2;
47806 remap[i + nelt + nelt4] = i * 2 + 1;
47807 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
47808 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
47809 dremap.perm[i * 2] = i + nelt4;
47810 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
47811 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
47812 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
47815 else
47816 return false;
47819 /* Use the remapping array set up above to move the elements from their
47820 swizzled locations into their final destinations. */
47821 dfinal = *d;
47822 for (i = 0; i < nelt; ++i)
47824 unsigned e = remap[d->perm[i]];
47825 gcc_assert (e < nelt);
47826 /* If same_halves is true, both halves of the remapped vector are the
47827 same. Avoid cross-lane accesses if possible. */
47828 if (same_halves && i >= nelt2)
47830 gcc_assert (e < nelt2);
47831 dfinal.perm[i] = e + nelt2;
47833 else
47834 dfinal.perm[i] = e;
47836 if (!d->testing_p)
47838 dremap.target = gen_reg_rtx (dremap.vmode);
47839 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
47841 dfinal.op1 = dfinal.op0;
47842 dfinal.one_operand_p = true;
47844 /* Test if the final remap can be done with a single insn. For V4SFmode or
47845 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
47846 start_sequence ();
47847 ok = expand_vec_perm_1 (&dfinal);
47848 seq = get_insns ();
47849 end_sequence ();
47851 if (!ok)
47852 return false;
47854 if (d->testing_p)
47855 return true;
47857 if (dremap.vmode != dfinal.vmode)
47859 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
47860 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
47863 ok = expand_vec_perm_1 (&dremap);
47864 gcc_assert (ok);
47866 emit_insn (seq);
47867 return true;
47870 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47871 a single vector cross-lane permutation into vpermq followed
47872 by any of the single insn permutations. */
47874 static bool
47875 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
47877 struct expand_vec_perm_d dremap, dfinal;
47878 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
47879 unsigned contents[2];
47880 bool ok;
47882 if (!(TARGET_AVX2
47883 && (d->vmode == V32QImode || d->vmode == V16HImode)
47884 && d->one_operand_p))
47885 return false;
47887 contents[0] = 0;
47888 contents[1] = 0;
47889 for (i = 0; i < nelt2; ++i)
47891 contents[0] |= 1u << (d->perm[i] / nelt4);
47892 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
47895 for (i = 0; i < 2; ++i)
47897 unsigned int cnt = 0;
47898 for (j = 0; j < 4; ++j)
47899 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
47900 return false;
47903 if (d->testing_p)
47904 return true;
47906 dremap = *d;
47907 dremap.vmode = V4DImode;
47908 dremap.nelt = 4;
47909 dremap.target = gen_reg_rtx (V4DImode);
47910 dremap.op0 = gen_lowpart (V4DImode, d->op0);
47911 dremap.op1 = dremap.op0;
47912 dremap.one_operand_p = true;
47913 for (i = 0; i < 2; ++i)
47915 unsigned int cnt = 0;
47916 for (j = 0; j < 4; ++j)
47917 if ((contents[i] & (1u << j)) != 0)
47918 dremap.perm[2 * i + cnt++] = j;
47919 for (; cnt < 2; ++cnt)
47920 dremap.perm[2 * i + cnt] = 0;
47923 dfinal = *d;
47924 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
47925 dfinal.op1 = dfinal.op0;
47926 dfinal.one_operand_p = true;
47927 for (i = 0, j = 0; i < nelt; ++i)
47929 if (i == nelt2)
47930 j = 2;
47931 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
47932 if ((d->perm[i] / nelt4) == dremap.perm[j])
47934 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
47935 dfinal.perm[i] |= nelt4;
47936 else
47937 gcc_unreachable ();
47940 ok = expand_vec_perm_1 (&dremap);
47941 gcc_assert (ok);
47943 ok = expand_vec_perm_1 (&dfinal);
47944 gcc_assert (ok);
47946 return true;
47949 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
47950 a vector permutation using two instructions, vperm2f128 resp.
47951 vperm2i128 followed by any single in-lane permutation. */
47953 static bool
47954 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
47956 struct expand_vec_perm_d dfirst, dsecond;
47957 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
47958 bool ok;
47960 if (!TARGET_AVX
47961 || GET_MODE_SIZE (d->vmode) != 32
47962 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
47963 return false;
47965 dsecond = *d;
47966 dsecond.one_operand_p = false;
47967 dsecond.testing_p = true;
47969 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
47970 immediate. For perm < 16 the second permutation uses
47971 d->op0 as first operand, for perm >= 16 it uses d->op1
47972 as first operand. The second operand is the result of
47973 vperm2[fi]128. */
47974 for (perm = 0; perm < 32; perm++)
47976 /* Ignore permutations which do not move anything cross-lane. */
47977 if (perm < 16)
47979 /* The second shuffle for e.g. V4DFmode has
47980 0123 and ABCD operands.
47981 Ignore AB23, as 23 is already in the second lane
47982 of the first operand. */
47983 if ((perm & 0xc) == (1 << 2)) continue;
47984 /* And 01CD, as 01 is in the first lane of the first
47985 operand. */
47986 if ((perm & 3) == 0) continue;
47987 /* And 4567, as then the vperm2[fi]128 doesn't change
47988 anything on the original 4567 second operand. */
47989 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
47991 else
47993 /* The second shuffle for e.g. V4DFmode has
47994 4567 and ABCD operands.
47995 Ignore AB67, as 67 is already in the second lane
47996 of the first operand. */
47997 if ((perm & 0xc) == (3 << 2)) continue;
47998 /* And 45CD, as 45 is in the first lane of the first
47999 operand. */
48000 if ((perm & 3) == 2) continue;
48001 /* And 0123, as then the vperm2[fi]128 doesn't change
48002 anything on the original 0123 first operand. */
48003 if ((perm & 0xf) == (1 << 2)) continue;
48006 for (i = 0; i < nelt; i++)
48008 j = d->perm[i] / nelt2;
48009 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
48010 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
48011 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
48012 dsecond.perm[i] = d->perm[i] & (nelt - 1);
48013 else
48014 break;
48017 if (i == nelt)
48019 start_sequence ();
48020 ok = expand_vec_perm_1 (&dsecond);
48021 end_sequence ();
48023 else
48024 ok = false;
48026 if (ok)
48028 if (d->testing_p)
48029 return true;
48031 /* Found a usable second shuffle. dfirst will be
48032 vperm2f128 on d->op0 and d->op1. */
48033 dsecond.testing_p = false;
48034 dfirst = *d;
48035 dfirst.target = gen_reg_rtx (d->vmode);
48036 for (i = 0; i < nelt; i++)
48037 dfirst.perm[i] = (i & (nelt2 - 1))
48038 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
48040 canonicalize_perm (&dfirst);
48041 ok = expand_vec_perm_1 (&dfirst);
48042 gcc_assert (ok);
48044 /* And dsecond is some single insn shuffle, taking
48045 d->op0 and result of vperm2f128 (if perm < 16) or
48046 d->op1 and result of vperm2f128 (otherwise). */
48047 if (perm >= 16)
48048 dsecond.op0 = dsecond.op1;
48049 dsecond.op1 = dfirst.target;
48051 ok = expand_vec_perm_1 (&dsecond);
48052 gcc_assert (ok);
48054 return true;
48057 /* For one operand, the only useful vperm2f128 permutation is 0x01
48058 aka lanes swap. */
48059 if (d->one_operand_p)
48060 return false;
48063 return false;
48066 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
48067 a two vector permutation using 2 intra-lane interleave insns
48068 and cross-lane shuffle for 32-byte vectors. */
48070 static bool
48071 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
48073 unsigned i, nelt;
48074 rtx (*gen) (rtx, rtx, rtx);
48076 if (d->one_operand_p)
48077 return false;
48078 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
48080 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
48082 else
48083 return false;
48085 nelt = d->nelt;
48086 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
48087 return false;
48088 for (i = 0; i < nelt; i += 2)
48089 if (d->perm[i] != d->perm[0] + i / 2
48090 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
48091 return false;
48093 if (d->testing_p)
48094 return true;
48096 switch (d->vmode)
48098 case V32QImode:
48099 if (d->perm[0])
48100 gen = gen_vec_interleave_highv32qi;
48101 else
48102 gen = gen_vec_interleave_lowv32qi;
48103 break;
48104 case V16HImode:
48105 if (d->perm[0])
48106 gen = gen_vec_interleave_highv16hi;
48107 else
48108 gen = gen_vec_interleave_lowv16hi;
48109 break;
48110 case V8SImode:
48111 if (d->perm[0])
48112 gen = gen_vec_interleave_highv8si;
48113 else
48114 gen = gen_vec_interleave_lowv8si;
48115 break;
48116 case V4DImode:
48117 if (d->perm[0])
48118 gen = gen_vec_interleave_highv4di;
48119 else
48120 gen = gen_vec_interleave_lowv4di;
48121 break;
48122 case V8SFmode:
48123 if (d->perm[0])
48124 gen = gen_vec_interleave_highv8sf;
48125 else
48126 gen = gen_vec_interleave_lowv8sf;
48127 break;
48128 case V4DFmode:
48129 if (d->perm[0])
48130 gen = gen_vec_interleave_highv4df;
48131 else
48132 gen = gen_vec_interleave_lowv4df;
48133 break;
48134 default:
48135 gcc_unreachable ();
48138 emit_insn (gen (d->target, d->op0, d->op1));
48139 return true;
48142 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
48143 a single vector permutation using a single intra-lane vector
48144 permutation, vperm2f128 swapping the lanes and vblend* insn blending
48145 the non-swapped and swapped vectors together. */
48147 static bool
48148 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
48150 struct expand_vec_perm_d dfirst, dsecond;
48151 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
48152 rtx_insn *seq;
48153 bool ok;
48154 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
48156 if (!TARGET_AVX
48157 || TARGET_AVX2
48158 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
48159 || !d->one_operand_p)
48160 return false;
48162 dfirst = *d;
48163 for (i = 0; i < nelt; i++)
48164 dfirst.perm[i] = 0xff;
48165 for (i = 0, msk = 0; i < nelt; i++)
48167 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
48168 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
48169 return false;
48170 dfirst.perm[j] = d->perm[i];
48171 if (j != i)
48172 msk |= (1 << i);
48174 for (i = 0; i < nelt; i++)
48175 if (dfirst.perm[i] == 0xff)
48176 dfirst.perm[i] = i;
48178 if (!d->testing_p)
48179 dfirst.target = gen_reg_rtx (dfirst.vmode);
48181 start_sequence ();
48182 ok = expand_vec_perm_1 (&dfirst);
48183 seq = get_insns ();
48184 end_sequence ();
48186 if (!ok)
48187 return false;
48189 if (d->testing_p)
48190 return true;
48192 emit_insn (seq);
48194 dsecond = *d;
48195 dsecond.op0 = dfirst.target;
48196 dsecond.op1 = dfirst.target;
48197 dsecond.one_operand_p = true;
48198 dsecond.target = gen_reg_rtx (dsecond.vmode);
48199 for (i = 0; i < nelt; i++)
48200 dsecond.perm[i] = i ^ nelt2;
48202 ok = expand_vec_perm_1 (&dsecond);
48203 gcc_assert (ok);
48205 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
48206 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
48207 return true;
48210 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
48211 permutation using two vperm2f128, followed by a vshufpd insn blending
48212 the two vectors together. */
48214 static bool
48215 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
48217 struct expand_vec_perm_d dfirst, dsecond, dthird;
48218 bool ok;
48220 if (!TARGET_AVX || (d->vmode != V4DFmode))
48221 return false;
48223 if (d->testing_p)
48224 return true;
48226 dfirst = *d;
48227 dsecond = *d;
48228 dthird = *d;
48230 dfirst.perm[0] = (d->perm[0] & ~1);
48231 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
48232 dfirst.perm[2] = (d->perm[2] & ~1);
48233 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
48234 dsecond.perm[0] = (d->perm[1] & ~1);
48235 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
48236 dsecond.perm[2] = (d->perm[3] & ~1);
48237 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
48238 dthird.perm[0] = (d->perm[0] % 2);
48239 dthird.perm[1] = (d->perm[1] % 2) + 4;
48240 dthird.perm[2] = (d->perm[2] % 2) + 2;
48241 dthird.perm[3] = (d->perm[3] % 2) + 6;
48243 dfirst.target = gen_reg_rtx (dfirst.vmode);
48244 dsecond.target = gen_reg_rtx (dsecond.vmode);
48245 dthird.op0 = dfirst.target;
48246 dthird.op1 = dsecond.target;
48247 dthird.one_operand_p = false;
48249 canonicalize_perm (&dfirst);
48250 canonicalize_perm (&dsecond);
48252 ok = expand_vec_perm_1 (&dfirst)
48253 && expand_vec_perm_1 (&dsecond)
48254 && expand_vec_perm_1 (&dthird);
48256 gcc_assert (ok);
48258 return true;
48261 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
48262 permutation with two pshufb insns and an ior. We should have already
48263 failed all two instruction sequences. */
48265 static bool
48266 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
48268 rtx rperm[2][16], vperm, l, h, op, m128;
48269 unsigned int i, nelt, eltsz;
48271 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
48272 return false;
48273 gcc_assert (!d->one_operand_p);
48275 if (d->testing_p)
48276 return true;
48278 nelt = d->nelt;
48279 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48281 /* Generate two permutation masks. If the required element is within
48282 the given vector it is shuffled into the proper lane. If the required
48283 element is in the other vector, force a zero into the lane by setting
48284 bit 7 in the permutation mask. */
48285 m128 = GEN_INT (-128);
48286 for (i = 0; i < nelt; ++i)
48288 unsigned j, e = d->perm[i];
48289 unsigned which = (e >= nelt);
48290 if (e >= nelt)
48291 e -= nelt;
48293 for (j = 0; j < eltsz; ++j)
48295 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
48296 rperm[1-which][i*eltsz + j] = m128;
48300 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
48301 vperm = force_reg (V16QImode, vperm);
48303 l = gen_reg_rtx (V16QImode);
48304 op = gen_lowpart (V16QImode, d->op0);
48305 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
48307 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
48308 vperm = force_reg (V16QImode, vperm);
48310 h = gen_reg_rtx (V16QImode);
48311 op = gen_lowpart (V16QImode, d->op1);
48312 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
48314 op = d->target;
48315 if (d->vmode != V16QImode)
48316 op = gen_reg_rtx (V16QImode);
48317 emit_insn (gen_iorv16qi3 (op, l, h));
48318 if (op != d->target)
48319 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48321 return true;
48324 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
48325 with two vpshufb insns, vpermq and vpor. We should have already failed
48326 all two or three instruction sequences. */
48328 static bool
48329 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
48331 rtx rperm[2][32], vperm, l, h, hp, op, m128;
48332 unsigned int i, nelt, eltsz;
48334 if (!TARGET_AVX2
48335 || !d->one_operand_p
48336 || (d->vmode != V32QImode && d->vmode != V16HImode))
48337 return false;
48339 if (d->testing_p)
48340 return true;
48342 nelt = d->nelt;
48343 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48345 /* Generate two permutation masks. If the required element is within
48346 the same lane, it is shuffled in. If the required element from the
48347 other lane, force a zero by setting bit 7 in the permutation mask.
48348 In the other mask the mask has non-negative elements if element
48349 is requested from the other lane, but also moved to the other lane,
48350 so that the result of vpshufb can have the two V2TImode halves
48351 swapped. */
48352 m128 = GEN_INT (-128);
48353 for (i = 0; i < nelt; ++i)
48355 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48356 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
48358 for (j = 0; j < eltsz; ++j)
48360 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
48361 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
48365 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48366 vperm = force_reg (V32QImode, vperm);
48368 h = gen_reg_rtx (V32QImode);
48369 op = gen_lowpart (V32QImode, d->op0);
48370 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
48372 /* Swap the 128-byte lanes of h into hp. */
48373 hp = gen_reg_rtx (V4DImode);
48374 op = gen_lowpart (V4DImode, h);
48375 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
48376 const1_rtx));
48378 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48379 vperm = force_reg (V32QImode, vperm);
48381 l = gen_reg_rtx (V32QImode);
48382 op = gen_lowpart (V32QImode, d->op0);
48383 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48385 op = d->target;
48386 if (d->vmode != V32QImode)
48387 op = gen_reg_rtx (V32QImode);
48388 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
48389 if (op != d->target)
48390 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48392 return true;
48395 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
48396 and extract-odd permutations of two V32QImode and V16QImode operand
48397 with two vpshufb insns, vpor and vpermq. We should have already
48398 failed all two or three instruction sequences. */
48400 static bool
48401 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
48403 rtx rperm[2][32], vperm, l, h, ior, op, m128;
48404 unsigned int i, nelt, eltsz;
48406 if (!TARGET_AVX2
48407 || d->one_operand_p
48408 || (d->vmode != V32QImode && d->vmode != V16HImode))
48409 return false;
48411 for (i = 0; i < d->nelt; ++i)
48412 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
48413 return false;
48415 if (d->testing_p)
48416 return true;
48418 nelt = d->nelt;
48419 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48421 /* Generate two permutation masks. In the first permutation mask
48422 the first quarter will contain indexes for the first half
48423 of the op0, the second quarter will contain bit 7 set, third quarter
48424 will contain indexes for the second half of the op0 and the
48425 last quarter bit 7 set. In the second permutation mask
48426 the first quarter will contain bit 7 set, the second quarter
48427 indexes for the first half of the op1, the third quarter bit 7 set
48428 and last quarter indexes for the second half of the op1.
48429 I.e. the first mask e.g. for V32QImode extract even will be:
48430 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
48431 (all values masked with 0xf except for -128) and second mask
48432 for extract even will be
48433 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
48434 m128 = GEN_INT (-128);
48435 for (i = 0; i < nelt; ++i)
48437 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48438 unsigned which = d->perm[i] >= nelt;
48439 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
48441 for (j = 0; j < eltsz; ++j)
48443 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
48444 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
48448 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48449 vperm = force_reg (V32QImode, vperm);
48451 l = gen_reg_rtx (V32QImode);
48452 op = gen_lowpart (V32QImode, d->op0);
48453 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48455 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48456 vperm = force_reg (V32QImode, vperm);
48458 h = gen_reg_rtx (V32QImode);
48459 op = gen_lowpart (V32QImode, d->op1);
48460 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
48462 ior = gen_reg_rtx (V32QImode);
48463 emit_insn (gen_iorv32qi3 (ior, l, h));
48465 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
48466 op = gen_reg_rtx (V4DImode);
48467 ior = gen_lowpart (V4DImode, ior);
48468 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
48469 const1_rtx, GEN_INT (3)));
48470 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48472 return true;
48475 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
48476 and extract-odd permutations of two V16QI, V8HI, V16HI or V32QI operands
48477 with two "and" and "pack" or two "shift" and "pack" insns. We should
48478 have already failed all two instruction sequences. */
48480 static bool
48481 expand_vec_perm_even_odd_pack (struct expand_vec_perm_d *d)
48483 rtx op, dop0, dop1, t, rperm[16];
48484 unsigned i, odd, c, s, nelt = d->nelt;
48485 bool end_perm = false;
48486 machine_mode half_mode;
48487 rtx (*gen_and) (rtx, rtx, rtx);
48488 rtx (*gen_pack) (rtx, rtx, rtx);
48489 rtx (*gen_shift) (rtx, rtx, rtx);
48491 if (d->one_operand_p)
48492 return false;
48494 switch (d->vmode)
48496 case V8HImode:
48497 /* Required for "pack". */
48498 if (!TARGET_SSE4_1)
48499 return false;
48500 c = 0xffff;
48501 s = 16;
48502 half_mode = V4SImode;
48503 gen_and = gen_andv4si3;
48504 gen_pack = gen_sse4_1_packusdw;
48505 gen_shift = gen_lshrv4si3;
48506 break;
48507 case V16QImode:
48508 /* No check as all instructions are SSE2. */
48509 c = 0xff;
48510 s = 8;
48511 half_mode = V8HImode;
48512 gen_and = gen_andv8hi3;
48513 gen_pack = gen_sse2_packuswb;
48514 gen_shift = gen_lshrv8hi3;
48515 break;
48516 case V16HImode:
48517 if (!TARGET_AVX2)
48518 return false;
48519 c = 0xffff;
48520 s = 16;
48521 half_mode = V8SImode;
48522 gen_and = gen_andv8si3;
48523 gen_pack = gen_avx2_packusdw;
48524 gen_shift = gen_lshrv8si3;
48525 end_perm = true;
48526 break;
48527 case V32QImode:
48528 if (!TARGET_AVX2)
48529 return false;
48530 c = 0xff;
48531 s = 8;
48532 half_mode = V16HImode;
48533 gen_and = gen_andv16hi3;
48534 gen_pack = gen_avx2_packuswb;
48535 gen_shift = gen_lshrv16hi3;
48536 end_perm = true;
48537 break;
48538 default:
48539 /* Only V8HI, V16QI, V16HI and V32QI modes are more profitable than
48540 general shuffles. */
48541 return false;
48544 /* Check that permutation is even or odd. */
48545 odd = d->perm[0];
48546 if (odd > 1)
48547 return false;
48549 for (i = 1; i < nelt; ++i)
48550 if (d->perm[i] != 2 * i + odd)
48551 return false;
48553 if (d->testing_p)
48554 return true;
48556 dop0 = gen_reg_rtx (half_mode);
48557 dop1 = gen_reg_rtx (half_mode);
48558 if (odd == 0)
48560 for (i = 0; i < nelt / 2; i++)
48561 rperm[i] = GEN_INT (c);
48562 t = gen_rtx_CONST_VECTOR (half_mode, gen_rtvec_v (nelt / 2, rperm));
48563 t = force_reg (half_mode, t);
48564 emit_insn (gen_and (dop0, t, gen_lowpart (half_mode, d->op0)));
48565 emit_insn (gen_and (dop1, t, gen_lowpart (half_mode, d->op1)));
48567 else
48569 emit_insn (gen_shift (dop0,
48570 gen_lowpart (half_mode, d->op0),
48571 GEN_INT (s)));
48572 emit_insn (gen_shift (dop1,
48573 gen_lowpart (half_mode, d->op1),
48574 GEN_INT (s)));
48576 /* In AVX2 for 256 bit case we need to permute pack result. */
48577 if (TARGET_AVX2 && end_perm)
48579 op = gen_reg_rtx (d->vmode);
48580 t = gen_reg_rtx (V4DImode);
48581 emit_insn (gen_pack (op, dop0, dop1));
48582 emit_insn (gen_avx2_permv4di_1 (t,
48583 gen_lowpart (V4DImode, op),
48584 const0_rtx,
48585 const2_rtx,
48586 const1_rtx,
48587 GEN_INT (3)));
48588 emit_move_insn (d->target, gen_lowpart (d->vmode, t));
48590 else
48591 emit_insn (gen_pack (d->target, dop0, dop1));
48593 return true;
48596 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
48597 and extract-odd permutations. */
48599 static bool
48600 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
48602 rtx t1, t2, t3, t4, t5;
48604 switch (d->vmode)
48606 case V4DFmode:
48607 if (d->testing_p)
48608 break;
48609 t1 = gen_reg_rtx (V4DFmode);
48610 t2 = gen_reg_rtx (V4DFmode);
48612 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
48613 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
48614 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
48616 /* Now an unpck[lh]pd will produce the result required. */
48617 if (odd)
48618 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
48619 else
48620 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
48621 emit_insn (t3);
48622 break;
48624 case V8SFmode:
48626 int mask = odd ? 0xdd : 0x88;
48628 if (d->testing_p)
48629 break;
48630 t1 = gen_reg_rtx (V8SFmode);
48631 t2 = gen_reg_rtx (V8SFmode);
48632 t3 = gen_reg_rtx (V8SFmode);
48634 /* Shuffle within the 128-bit lanes to produce:
48635 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
48636 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
48637 GEN_INT (mask)));
48639 /* Shuffle the lanes around to produce:
48640 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
48641 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
48642 GEN_INT (0x3)));
48644 /* Shuffle within the 128-bit lanes to produce:
48645 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
48646 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
48648 /* Shuffle within the 128-bit lanes to produce:
48649 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
48650 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
48652 /* Shuffle the lanes around to produce:
48653 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
48654 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
48655 GEN_INT (0x20)));
48657 break;
48659 case V2DFmode:
48660 case V4SFmode:
48661 case V2DImode:
48662 case V4SImode:
48663 /* These are always directly implementable by expand_vec_perm_1. */
48664 gcc_unreachable ();
48666 case V8HImode:
48667 if (TARGET_SSE4_1)
48668 return expand_vec_perm_even_odd_pack (d);
48669 else if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
48670 return expand_vec_perm_pshufb2 (d);
48671 else
48673 if (d->testing_p)
48674 break;
48675 /* We need 2*log2(N)-1 operations to achieve odd/even
48676 with interleave. */
48677 t1 = gen_reg_rtx (V8HImode);
48678 t2 = gen_reg_rtx (V8HImode);
48679 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
48680 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
48681 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
48682 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
48683 if (odd)
48684 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
48685 else
48686 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
48687 emit_insn (t3);
48689 break;
48691 case V16QImode:
48692 return expand_vec_perm_even_odd_pack (d);
48694 case V16HImode:
48695 case V32QImode:
48696 return expand_vec_perm_even_odd_pack (d);
48698 case V4DImode:
48699 if (!TARGET_AVX2)
48701 struct expand_vec_perm_d d_copy = *d;
48702 d_copy.vmode = V4DFmode;
48703 if (d->testing_p)
48704 d_copy.target = gen_lowpart (V4DFmode, d->target);
48705 else
48706 d_copy.target = gen_reg_rtx (V4DFmode);
48707 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
48708 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
48709 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
48711 if (!d->testing_p)
48712 emit_move_insn (d->target,
48713 gen_lowpart (V4DImode, d_copy.target));
48714 return true;
48716 return false;
48719 if (d->testing_p)
48720 break;
48722 t1 = gen_reg_rtx (V4DImode);
48723 t2 = gen_reg_rtx (V4DImode);
48725 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
48726 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
48727 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
48729 /* Now an vpunpck[lh]qdq will produce the result required. */
48730 if (odd)
48731 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
48732 else
48733 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
48734 emit_insn (t3);
48735 break;
48737 case V8SImode:
48738 if (!TARGET_AVX2)
48740 struct expand_vec_perm_d d_copy = *d;
48741 d_copy.vmode = V8SFmode;
48742 if (d->testing_p)
48743 d_copy.target = gen_lowpart (V8SFmode, d->target);
48744 else
48745 d_copy.target = gen_reg_rtx (V8SFmode);
48746 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
48747 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
48748 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
48750 if (!d->testing_p)
48751 emit_move_insn (d->target,
48752 gen_lowpart (V8SImode, d_copy.target));
48753 return true;
48755 return false;
48758 if (d->testing_p)
48759 break;
48761 t1 = gen_reg_rtx (V8SImode);
48762 t2 = gen_reg_rtx (V8SImode);
48763 t3 = gen_reg_rtx (V4DImode);
48764 t4 = gen_reg_rtx (V4DImode);
48765 t5 = gen_reg_rtx (V4DImode);
48767 /* Shuffle the lanes around into
48768 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
48769 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
48770 gen_lowpart (V4DImode, d->op1),
48771 GEN_INT (0x20)));
48772 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
48773 gen_lowpart (V4DImode, d->op1),
48774 GEN_INT (0x31)));
48776 /* Swap the 2nd and 3rd position in each lane into
48777 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
48778 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
48779 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
48780 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
48781 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
48783 /* Now an vpunpck[lh]qdq will produce
48784 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
48785 if (odd)
48786 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
48787 gen_lowpart (V4DImode, t2));
48788 else
48789 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
48790 gen_lowpart (V4DImode, t2));
48791 emit_insn (t3);
48792 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
48793 break;
48795 default:
48796 gcc_unreachable ();
48799 return true;
48802 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
48803 extract-even and extract-odd permutations. */
48805 static bool
48806 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
48808 unsigned i, odd, nelt = d->nelt;
48810 odd = d->perm[0];
48811 if (odd != 0 && odd != 1)
48812 return false;
48814 for (i = 1; i < nelt; ++i)
48815 if (d->perm[i] != 2 * i + odd)
48816 return false;
48818 return expand_vec_perm_even_odd_1 (d, odd);
48821 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
48822 permutations. We assume that expand_vec_perm_1 has already failed. */
48824 static bool
48825 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
48827 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
48828 machine_mode vmode = d->vmode;
48829 unsigned char perm2[4];
48830 rtx op0 = d->op0, dest;
48831 bool ok;
48833 switch (vmode)
48835 case V4DFmode:
48836 case V8SFmode:
48837 /* These are special-cased in sse.md so that we can optionally
48838 use the vbroadcast instruction. They expand to two insns
48839 if the input happens to be in a register. */
48840 gcc_unreachable ();
48842 case V2DFmode:
48843 case V2DImode:
48844 case V4SFmode:
48845 case V4SImode:
48846 /* These are always implementable using standard shuffle patterns. */
48847 gcc_unreachable ();
48849 case V8HImode:
48850 case V16QImode:
48851 /* These can be implemented via interleave. We save one insn by
48852 stopping once we have promoted to V4SImode and then use pshufd. */
48853 if (d->testing_p)
48854 return true;
48857 rtx dest;
48858 rtx (*gen) (rtx, rtx, rtx)
48859 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
48860 : gen_vec_interleave_lowv8hi;
48862 if (elt >= nelt2)
48864 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
48865 : gen_vec_interleave_highv8hi;
48866 elt -= nelt2;
48868 nelt2 /= 2;
48870 dest = gen_reg_rtx (vmode);
48871 emit_insn (gen (dest, op0, op0));
48872 vmode = get_mode_wider_vector (vmode);
48873 op0 = gen_lowpart (vmode, dest);
48875 while (vmode != V4SImode);
48877 memset (perm2, elt, 4);
48878 dest = gen_reg_rtx (V4SImode);
48879 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
48880 gcc_assert (ok);
48881 if (!d->testing_p)
48882 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
48883 return true;
48885 case V32QImode:
48886 case V16HImode:
48887 case V8SImode:
48888 case V4DImode:
48889 /* For AVX2 broadcasts of the first element vpbroadcast* or
48890 vpermq should be used by expand_vec_perm_1. */
48891 gcc_assert (!TARGET_AVX2 || d->perm[0]);
48892 return false;
48894 default:
48895 gcc_unreachable ();
48899 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
48900 broadcast permutations. */
48902 static bool
48903 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
48905 unsigned i, elt, nelt = d->nelt;
48907 if (!d->one_operand_p)
48908 return false;
48910 elt = d->perm[0];
48911 for (i = 1; i < nelt; ++i)
48912 if (d->perm[i] != elt)
48913 return false;
48915 return expand_vec_perm_broadcast_1 (d);
48918 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
48919 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
48920 all the shorter instruction sequences. */
48922 static bool
48923 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
48925 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
48926 unsigned int i, nelt, eltsz;
48927 bool used[4];
48929 if (!TARGET_AVX2
48930 || d->one_operand_p
48931 || (d->vmode != V32QImode && d->vmode != V16HImode))
48932 return false;
48934 if (d->testing_p)
48935 return true;
48937 nelt = d->nelt;
48938 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48940 /* Generate 4 permutation masks. If the required element is within
48941 the same lane, it is shuffled in. If the required element from the
48942 other lane, force a zero by setting bit 7 in the permutation mask.
48943 In the other mask the mask has non-negative elements if element
48944 is requested from the other lane, but also moved to the other lane,
48945 so that the result of vpshufb can have the two V2TImode halves
48946 swapped. */
48947 m128 = GEN_INT (-128);
48948 for (i = 0; i < 32; ++i)
48950 rperm[0][i] = m128;
48951 rperm[1][i] = m128;
48952 rperm[2][i] = m128;
48953 rperm[3][i] = m128;
48955 used[0] = false;
48956 used[1] = false;
48957 used[2] = false;
48958 used[3] = false;
48959 for (i = 0; i < nelt; ++i)
48961 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48962 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
48963 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
48965 for (j = 0; j < eltsz; ++j)
48966 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
48967 used[which] = true;
48970 for (i = 0; i < 2; ++i)
48972 if (!used[2 * i + 1])
48974 h[i] = NULL_RTX;
48975 continue;
48977 vperm = gen_rtx_CONST_VECTOR (V32QImode,
48978 gen_rtvec_v (32, rperm[2 * i + 1]));
48979 vperm = force_reg (V32QImode, vperm);
48980 h[i] = gen_reg_rtx (V32QImode);
48981 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
48982 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
48985 /* Swap the 128-byte lanes of h[X]. */
48986 for (i = 0; i < 2; ++i)
48988 if (h[i] == NULL_RTX)
48989 continue;
48990 op = gen_reg_rtx (V4DImode);
48991 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
48992 const2_rtx, GEN_INT (3), const0_rtx,
48993 const1_rtx));
48994 h[i] = gen_lowpart (V32QImode, op);
48997 for (i = 0; i < 2; ++i)
48999 if (!used[2 * i])
49001 l[i] = NULL_RTX;
49002 continue;
49004 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
49005 vperm = force_reg (V32QImode, vperm);
49006 l[i] = gen_reg_rtx (V32QImode);
49007 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
49008 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
49011 for (i = 0; i < 2; ++i)
49013 if (h[i] && l[i])
49015 op = gen_reg_rtx (V32QImode);
49016 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
49017 l[i] = op;
49019 else if (h[i])
49020 l[i] = h[i];
49023 gcc_assert (l[0] && l[1]);
49024 op = d->target;
49025 if (d->vmode != V32QImode)
49026 op = gen_reg_rtx (V32QImode);
49027 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
49028 if (op != d->target)
49029 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
49030 return true;
49033 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
49034 With all of the interface bits taken care of, perform the expansion
49035 in D and return true on success. */
49037 static bool
49038 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
49040 /* Try a single instruction expansion. */
49041 if (expand_vec_perm_1 (d))
49042 return true;
49044 /* Try sequences of two instructions. */
49046 if (expand_vec_perm_pshuflw_pshufhw (d))
49047 return true;
49049 if (expand_vec_perm_palignr (d, false))
49050 return true;
49052 if (expand_vec_perm_interleave2 (d))
49053 return true;
49055 if (expand_vec_perm_broadcast (d))
49056 return true;
49058 if (expand_vec_perm_vpermq_perm_1 (d))
49059 return true;
49061 if (expand_vec_perm_vperm2f128 (d))
49062 return true;
49064 if (expand_vec_perm_pblendv (d))
49065 return true;
49067 /* Try sequences of three instructions. */
49069 if (expand_vec_perm_even_odd_pack (d))
49070 return true;
49072 if (expand_vec_perm_2vperm2f128_vshuf (d))
49073 return true;
49075 if (expand_vec_perm_pshufb2 (d))
49076 return true;
49078 if (expand_vec_perm_interleave3 (d))
49079 return true;
49081 if (expand_vec_perm_vperm2f128_vblend (d))
49082 return true;
49084 /* Try sequences of four instructions. */
49086 if (expand_vec_perm_vpshufb2_vpermq (d))
49087 return true;
49089 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
49090 return true;
49092 /* ??? Look for narrow permutations whose element orderings would
49093 allow the promotion to a wider mode. */
49095 /* ??? Look for sequences of interleave or a wider permute that place
49096 the data into the correct lanes for a half-vector shuffle like
49097 pshuf[lh]w or vpermilps. */
49099 /* ??? Look for sequences of interleave that produce the desired results.
49100 The combinatorics of punpck[lh] get pretty ugly... */
49102 if (expand_vec_perm_even_odd (d))
49103 return true;
49105 /* Even longer sequences. */
49106 if (expand_vec_perm_vpshufb4_vpermq2 (d))
49107 return true;
49109 return false;
49112 /* If a permutation only uses one operand, make it clear. Returns true
49113 if the permutation references both operands. */
49115 static bool
49116 canonicalize_perm (struct expand_vec_perm_d *d)
49118 int i, which, nelt = d->nelt;
49120 for (i = which = 0; i < nelt; ++i)
49121 which |= (d->perm[i] < nelt ? 1 : 2);
49123 d->one_operand_p = true;
49124 switch (which)
49126 default:
49127 gcc_unreachable();
49129 case 3:
49130 if (!rtx_equal_p (d->op0, d->op1))
49132 d->one_operand_p = false;
49133 break;
49135 /* The elements of PERM do not suggest that only the first operand
49136 is used, but both operands are identical. Allow easier matching
49137 of the permutation by folding the permutation into the single
49138 input vector. */
49139 /* FALLTHRU */
49141 case 2:
49142 for (i = 0; i < nelt; ++i)
49143 d->perm[i] &= nelt - 1;
49144 d->op0 = d->op1;
49145 break;
49147 case 1:
49148 d->op1 = d->op0;
49149 break;
49152 return (which == 3);
49155 bool
49156 ix86_expand_vec_perm_const (rtx operands[4])
49158 struct expand_vec_perm_d d;
49159 unsigned char perm[MAX_VECT_LEN];
49160 int i, nelt;
49161 bool two_args;
49162 rtx sel;
49164 d.target = operands[0];
49165 d.op0 = operands[1];
49166 d.op1 = operands[2];
49167 sel = operands[3];
49169 d.vmode = GET_MODE (d.target);
49170 gcc_assert (VECTOR_MODE_P (d.vmode));
49171 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49172 d.testing_p = false;
49174 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
49175 gcc_assert (XVECLEN (sel, 0) == nelt);
49176 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
49178 for (i = 0; i < nelt; ++i)
49180 rtx e = XVECEXP (sel, 0, i);
49181 int ei = INTVAL (e) & (2 * nelt - 1);
49182 d.perm[i] = ei;
49183 perm[i] = ei;
49186 two_args = canonicalize_perm (&d);
49188 if (ix86_expand_vec_perm_const_1 (&d))
49189 return true;
49191 /* If the selector says both arguments are needed, but the operands are the
49192 same, the above tried to expand with one_operand_p and flattened selector.
49193 If that didn't work, retry without one_operand_p; we succeeded with that
49194 during testing. */
49195 if (two_args && d.one_operand_p)
49197 d.one_operand_p = false;
49198 memcpy (d.perm, perm, sizeof (perm));
49199 return ix86_expand_vec_perm_const_1 (&d);
49202 return false;
49205 /* Implement targetm.vectorize.vec_perm_const_ok. */
49207 static bool
49208 ix86_vectorize_vec_perm_const_ok (machine_mode vmode,
49209 const unsigned char *sel)
49211 struct expand_vec_perm_d d;
49212 unsigned int i, nelt, which;
49213 bool ret;
49215 d.vmode = vmode;
49216 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49217 d.testing_p = true;
49219 /* Given sufficient ISA support we can just return true here
49220 for selected vector modes. */
49221 switch (d.vmode)
49223 case V16SFmode:
49224 case V16SImode:
49225 case V8DImode:
49226 case V8DFmode:
49227 if (TARGET_AVX512F)
49228 /* All implementable with a single vpermi2 insn. */
49229 return true;
49230 break;
49231 case V32HImode:
49232 if (TARGET_AVX512BW)
49233 /* All implementable with a single vpermi2 insn. */
49234 return true;
49235 break;
49236 case V8SImode:
49237 case V8SFmode:
49238 case V4DFmode:
49239 case V4DImode:
49240 if (TARGET_AVX512VL)
49241 /* All implementable with a single vpermi2 insn. */
49242 return true;
49243 break;
49244 case V16HImode:
49245 if (TARGET_AVX2)
49246 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
49247 return true;
49248 break;
49249 case V32QImode:
49250 if (TARGET_AVX2)
49251 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
49252 return true;
49253 break;
49254 case V4SImode:
49255 case V4SFmode:
49256 case V8HImode:
49257 case V16QImode:
49258 /* All implementable with a single vpperm insn. */
49259 if (TARGET_XOP)
49260 return true;
49261 /* All implementable with 2 pshufb + 1 ior. */
49262 if (TARGET_SSSE3)
49263 return true;
49264 break;
49265 case V2DImode:
49266 case V2DFmode:
49267 /* All implementable with shufpd or unpck[lh]pd. */
49268 return true;
49269 default:
49270 return false;
49273 /* Extract the values from the vector CST into the permutation
49274 array in D. */
49275 memcpy (d.perm, sel, nelt);
49276 for (i = which = 0; i < nelt; ++i)
49278 unsigned char e = d.perm[i];
49279 gcc_assert (e < 2 * nelt);
49280 which |= (e < nelt ? 1 : 2);
49283 /* For all elements from second vector, fold the elements to first. */
49284 if (which == 2)
49285 for (i = 0; i < nelt; ++i)
49286 d.perm[i] -= nelt;
49288 /* Check whether the mask can be applied to the vector type. */
49289 d.one_operand_p = (which != 3);
49291 /* Implementable with shufps or pshufd. */
49292 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
49293 return true;
49295 /* Otherwise we have to go through the motions and see if we can
49296 figure out how to generate the requested permutation. */
49297 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
49298 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
49299 if (!d.one_operand_p)
49300 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
49302 start_sequence ();
49303 ret = ix86_expand_vec_perm_const_1 (&d);
49304 end_sequence ();
49306 return ret;
49309 void
49310 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
49312 struct expand_vec_perm_d d;
49313 unsigned i, nelt;
49315 d.target = targ;
49316 d.op0 = op0;
49317 d.op1 = op1;
49318 d.vmode = GET_MODE (targ);
49319 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49320 d.one_operand_p = false;
49321 d.testing_p = false;
49323 for (i = 0; i < nelt; ++i)
49324 d.perm[i] = i * 2 + odd;
49326 /* We'll either be able to implement the permutation directly... */
49327 if (expand_vec_perm_1 (&d))
49328 return;
49330 /* ... or we use the special-case patterns. */
49331 expand_vec_perm_even_odd_1 (&d, odd);
49334 static void
49335 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
49337 struct expand_vec_perm_d d;
49338 unsigned i, nelt, base;
49339 bool ok;
49341 d.target = targ;
49342 d.op0 = op0;
49343 d.op1 = op1;
49344 d.vmode = GET_MODE (targ);
49345 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49346 d.one_operand_p = false;
49347 d.testing_p = false;
49349 base = high_p ? nelt / 2 : 0;
49350 for (i = 0; i < nelt / 2; ++i)
49352 d.perm[i * 2] = i + base;
49353 d.perm[i * 2 + 1] = i + base + nelt;
49356 /* Note that for AVX this isn't one instruction. */
49357 ok = ix86_expand_vec_perm_const_1 (&d);
49358 gcc_assert (ok);
49362 /* Expand a vector operation CODE for a V*QImode in terms of the
49363 same operation on V*HImode. */
49365 void
49366 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
49368 machine_mode qimode = GET_MODE (dest);
49369 machine_mode himode;
49370 rtx (*gen_il) (rtx, rtx, rtx);
49371 rtx (*gen_ih) (rtx, rtx, rtx);
49372 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
49373 struct expand_vec_perm_d d;
49374 bool ok, full_interleave;
49375 bool uns_p = false;
49376 int i;
49378 switch (qimode)
49380 case V16QImode:
49381 himode = V8HImode;
49382 gen_il = gen_vec_interleave_lowv16qi;
49383 gen_ih = gen_vec_interleave_highv16qi;
49384 break;
49385 case V32QImode:
49386 himode = V16HImode;
49387 gen_il = gen_avx2_interleave_lowv32qi;
49388 gen_ih = gen_avx2_interleave_highv32qi;
49389 break;
49390 case V64QImode:
49391 himode = V32HImode;
49392 gen_il = gen_avx512bw_interleave_lowv64qi;
49393 gen_ih = gen_avx512bw_interleave_highv64qi;
49394 break;
49395 default:
49396 gcc_unreachable ();
49399 op2_l = op2_h = op2;
49400 switch (code)
49402 case MULT:
49403 /* Unpack data such that we've got a source byte in each low byte of
49404 each word. We don't care what goes into the high byte of each word.
49405 Rather than trying to get zero in there, most convenient is to let
49406 it be a copy of the low byte. */
49407 op2_l = gen_reg_rtx (qimode);
49408 op2_h = gen_reg_rtx (qimode);
49409 emit_insn (gen_il (op2_l, op2, op2));
49410 emit_insn (gen_ih (op2_h, op2, op2));
49411 /* FALLTHRU */
49413 op1_l = gen_reg_rtx (qimode);
49414 op1_h = gen_reg_rtx (qimode);
49415 emit_insn (gen_il (op1_l, op1, op1));
49416 emit_insn (gen_ih (op1_h, op1, op1));
49417 full_interleave = qimode == V16QImode;
49418 break;
49420 case ASHIFT:
49421 case LSHIFTRT:
49422 uns_p = true;
49423 /* FALLTHRU */
49424 case ASHIFTRT:
49425 op1_l = gen_reg_rtx (himode);
49426 op1_h = gen_reg_rtx (himode);
49427 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
49428 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
49429 full_interleave = true;
49430 break;
49431 default:
49432 gcc_unreachable ();
49435 /* Perform the operation. */
49436 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
49437 1, OPTAB_DIRECT);
49438 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
49439 1, OPTAB_DIRECT);
49440 gcc_assert (res_l && res_h);
49442 /* Merge the data back into the right place. */
49443 d.target = dest;
49444 d.op0 = gen_lowpart (qimode, res_l);
49445 d.op1 = gen_lowpart (qimode, res_h);
49446 d.vmode = qimode;
49447 d.nelt = GET_MODE_NUNITS (qimode);
49448 d.one_operand_p = false;
49449 d.testing_p = false;
49451 if (full_interleave)
49453 /* For SSE2, we used an full interleave, so the desired
49454 results are in the even elements. */
49455 for (i = 0; i < 64; ++i)
49456 d.perm[i] = i * 2;
49458 else
49460 /* For AVX, the interleave used above was not cross-lane. So the
49461 extraction is evens but with the second and third quarter swapped.
49462 Happily, that is even one insn shorter than even extraction. */
49463 for (i = 0; i < 64; ++i)
49464 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
49467 ok = ix86_expand_vec_perm_const_1 (&d);
49468 gcc_assert (ok);
49470 set_unique_reg_note (get_last_insn (), REG_EQUAL,
49471 gen_rtx_fmt_ee (code, qimode, op1, op2));
49474 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
49475 if op is CONST_VECTOR with all odd elements equal to their
49476 preceding element. */
49478 static bool
49479 const_vector_equal_evenodd_p (rtx op)
49481 machine_mode mode = GET_MODE (op);
49482 int i, nunits = GET_MODE_NUNITS (mode);
49483 if (GET_CODE (op) != CONST_VECTOR
49484 || nunits != CONST_VECTOR_NUNITS (op))
49485 return false;
49486 for (i = 0; i < nunits; i += 2)
49487 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
49488 return false;
49489 return true;
49492 void
49493 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
49494 bool uns_p, bool odd_p)
49496 machine_mode mode = GET_MODE (op1);
49497 machine_mode wmode = GET_MODE (dest);
49498 rtx x;
49499 rtx orig_op1 = op1, orig_op2 = op2;
49501 if (!nonimmediate_operand (op1, mode))
49502 op1 = force_reg (mode, op1);
49503 if (!nonimmediate_operand (op2, mode))
49504 op2 = force_reg (mode, op2);
49506 /* We only play even/odd games with vectors of SImode. */
49507 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
49509 /* If we're looking for the odd results, shift those members down to
49510 the even slots. For some cpus this is faster than a PSHUFD. */
49511 if (odd_p)
49513 /* For XOP use vpmacsdqh, but only for smult, as it is only
49514 signed. */
49515 if (TARGET_XOP && mode == V4SImode && !uns_p)
49517 x = force_reg (wmode, CONST0_RTX (wmode));
49518 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
49519 return;
49522 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
49523 if (!const_vector_equal_evenodd_p (orig_op1))
49524 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
49525 x, NULL, 1, OPTAB_DIRECT);
49526 if (!const_vector_equal_evenodd_p (orig_op2))
49527 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
49528 x, NULL, 1, OPTAB_DIRECT);
49529 op1 = gen_lowpart (mode, op1);
49530 op2 = gen_lowpart (mode, op2);
49533 if (mode == V16SImode)
49535 if (uns_p)
49536 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
49537 else
49538 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
49540 else if (mode == V8SImode)
49542 if (uns_p)
49543 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
49544 else
49545 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
49547 else if (uns_p)
49548 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
49549 else if (TARGET_SSE4_1)
49550 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
49551 else
49553 rtx s1, s2, t0, t1, t2;
49555 /* The easiest way to implement this without PMULDQ is to go through
49556 the motions as if we are performing a full 64-bit multiply. With
49557 the exception that we need to do less shuffling of the elements. */
49559 /* Compute the sign-extension, aka highparts, of the two operands. */
49560 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
49561 op1, pc_rtx, pc_rtx);
49562 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
49563 op2, pc_rtx, pc_rtx);
49565 /* Multiply LO(A) * HI(B), and vice-versa. */
49566 t1 = gen_reg_rtx (wmode);
49567 t2 = gen_reg_rtx (wmode);
49568 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
49569 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
49571 /* Multiply LO(A) * LO(B). */
49572 t0 = gen_reg_rtx (wmode);
49573 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
49575 /* Combine and shift the highparts into place. */
49576 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
49577 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
49578 1, OPTAB_DIRECT);
49580 /* Combine high and low parts. */
49581 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
49582 return;
49584 emit_insn (x);
49587 void
49588 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
49589 bool uns_p, bool high_p)
49591 machine_mode wmode = GET_MODE (dest);
49592 machine_mode mode = GET_MODE (op1);
49593 rtx t1, t2, t3, t4, mask;
49595 switch (mode)
49597 case V4SImode:
49598 t1 = gen_reg_rtx (mode);
49599 t2 = gen_reg_rtx (mode);
49600 if (TARGET_XOP && !uns_p)
49602 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
49603 shuffle the elements once so that all elements are in the right
49604 place for immediate use: { A C B D }. */
49605 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
49606 const1_rtx, GEN_INT (3)));
49607 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
49608 const1_rtx, GEN_INT (3)));
49610 else
49612 /* Put the elements into place for the multiply. */
49613 ix86_expand_vec_interleave (t1, op1, op1, high_p);
49614 ix86_expand_vec_interleave (t2, op2, op2, high_p);
49615 high_p = false;
49617 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
49618 break;
49620 case V8SImode:
49621 /* Shuffle the elements between the lanes. After this we
49622 have { A B E F | C D G H } for each operand. */
49623 t1 = gen_reg_rtx (V4DImode);
49624 t2 = gen_reg_rtx (V4DImode);
49625 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
49626 const0_rtx, const2_rtx,
49627 const1_rtx, GEN_INT (3)));
49628 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
49629 const0_rtx, const2_rtx,
49630 const1_rtx, GEN_INT (3)));
49632 /* Shuffle the elements within the lanes. After this we
49633 have { A A B B | C C D D } or { E E F F | G G H H }. */
49634 t3 = gen_reg_rtx (V8SImode);
49635 t4 = gen_reg_rtx (V8SImode);
49636 mask = GEN_INT (high_p
49637 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
49638 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
49639 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
49640 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
49642 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
49643 break;
49645 case V8HImode:
49646 case V16HImode:
49647 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
49648 uns_p, OPTAB_DIRECT);
49649 t2 = expand_binop (mode,
49650 uns_p ? umul_highpart_optab : smul_highpart_optab,
49651 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
49652 gcc_assert (t1 && t2);
49654 t3 = gen_reg_rtx (mode);
49655 ix86_expand_vec_interleave (t3, t1, t2, high_p);
49656 emit_move_insn (dest, gen_lowpart (wmode, t3));
49657 break;
49659 case V16QImode:
49660 case V32QImode:
49661 case V32HImode:
49662 case V16SImode:
49663 case V64QImode:
49664 t1 = gen_reg_rtx (wmode);
49665 t2 = gen_reg_rtx (wmode);
49666 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
49667 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
49669 emit_insn (gen_rtx_SET (VOIDmode, dest, gen_rtx_MULT (wmode, t1, t2)));
49670 break;
49672 default:
49673 gcc_unreachable ();
49677 void
49678 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
49680 rtx res_1, res_2, res_3, res_4;
49682 res_1 = gen_reg_rtx (V4SImode);
49683 res_2 = gen_reg_rtx (V4SImode);
49684 res_3 = gen_reg_rtx (V2DImode);
49685 res_4 = gen_reg_rtx (V2DImode);
49686 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
49687 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
49689 /* Move the results in element 2 down to element 1; we don't care
49690 what goes in elements 2 and 3. Then we can merge the parts
49691 back together with an interleave.
49693 Note that two other sequences were tried:
49694 (1) Use interleaves at the start instead of psrldq, which allows
49695 us to use a single shufps to merge things back at the end.
49696 (2) Use shufps here to combine the two vectors, then pshufd to
49697 put the elements in the correct order.
49698 In both cases the cost of the reformatting stall was too high
49699 and the overall sequence slower. */
49701 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
49702 const0_rtx, const2_rtx,
49703 const0_rtx, const0_rtx));
49704 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
49705 const0_rtx, const2_rtx,
49706 const0_rtx, const0_rtx));
49707 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
49709 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
49712 void
49713 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
49715 machine_mode mode = GET_MODE (op0);
49716 rtx t1, t2, t3, t4, t5, t6;
49718 if (TARGET_AVX512DQ && mode == V8DImode)
49719 emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2));
49720 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode)
49721 emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2));
49722 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V2DImode)
49723 emit_insn (gen_avx512dq_mulv2di3 (op0, op1, op2));
49724 else if (TARGET_XOP && mode == V2DImode)
49726 /* op1: A,B,C,D, op2: E,F,G,H */
49727 op1 = gen_lowpart (V4SImode, op1);
49728 op2 = gen_lowpart (V4SImode, op2);
49730 t1 = gen_reg_rtx (V4SImode);
49731 t2 = gen_reg_rtx (V4SImode);
49732 t3 = gen_reg_rtx (V2DImode);
49733 t4 = gen_reg_rtx (V2DImode);
49735 /* t1: B,A,D,C */
49736 emit_insn (gen_sse2_pshufd_1 (t1, op1,
49737 GEN_INT (1),
49738 GEN_INT (0),
49739 GEN_INT (3),
49740 GEN_INT (2)));
49742 /* t2: (B*E),(A*F),(D*G),(C*H) */
49743 emit_insn (gen_mulv4si3 (t2, t1, op2));
49745 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
49746 emit_insn (gen_xop_phadddq (t3, t2));
49748 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
49749 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
49751 /* Multiply lower parts and add all */
49752 t5 = gen_reg_rtx (V2DImode);
49753 emit_insn (gen_vec_widen_umult_even_v4si (t5,
49754 gen_lowpart (V4SImode, op1),
49755 gen_lowpart (V4SImode, op2)));
49756 op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
49759 else
49761 machine_mode nmode;
49762 rtx (*umul) (rtx, rtx, rtx);
49764 if (mode == V2DImode)
49766 umul = gen_vec_widen_umult_even_v4si;
49767 nmode = V4SImode;
49769 else if (mode == V4DImode)
49771 umul = gen_vec_widen_umult_even_v8si;
49772 nmode = V8SImode;
49774 else if (mode == V8DImode)
49776 umul = gen_vec_widen_umult_even_v16si;
49777 nmode = V16SImode;
49779 else
49780 gcc_unreachable ();
49783 /* Multiply low parts. */
49784 t1 = gen_reg_rtx (mode);
49785 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
49787 /* Shift input vectors right 32 bits so we can multiply high parts. */
49788 t6 = GEN_INT (32);
49789 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
49790 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
49792 /* Multiply high parts by low parts. */
49793 t4 = gen_reg_rtx (mode);
49794 t5 = gen_reg_rtx (mode);
49795 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
49796 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
49798 /* Combine and shift the highparts back. */
49799 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
49800 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
49802 /* Combine high and low parts. */
49803 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
49806 set_unique_reg_note (get_last_insn (), REG_EQUAL,
49807 gen_rtx_MULT (mode, op1, op2));
49810 /* Return 1 if control tansfer instruction INSN
49811 should be encoded with bnd prefix.
49812 If insn is NULL then return 1 when control
49813 transfer instructions should be prefixed with
49814 bnd by default for current function. */
49816 bool
49817 ix86_bnd_prefixed_insn_p (rtx insn)
49819 /* For call insns check special flag. */
49820 if (insn && CALL_P (insn))
49822 rtx call = get_call_rtx_from (insn);
49823 if (call)
49824 return CALL_EXPR_WITH_BOUNDS_P (call);
49827 /* All other insns are prefixed only if function is instrumented. */
49828 return chkp_function_instrumented_p (current_function_decl);
49831 /* Calculate integer abs() using only SSE2 instructions. */
49833 void
49834 ix86_expand_sse2_abs (rtx target, rtx input)
49836 machine_mode mode = GET_MODE (target);
49837 rtx tmp0, tmp1, x;
49839 switch (mode)
49841 /* For 32-bit signed integer X, the best way to calculate the absolute
49842 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
49843 case V4SImode:
49844 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
49845 GEN_INT (GET_MODE_BITSIZE
49846 (GET_MODE_INNER (mode)) - 1),
49847 NULL, 0, OPTAB_DIRECT);
49848 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
49849 NULL, 0, OPTAB_DIRECT);
49850 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
49851 target, 0, OPTAB_DIRECT);
49852 break;
49854 /* For 16-bit signed integer X, the best way to calculate the absolute
49855 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
49856 case V8HImode:
49857 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
49859 x = expand_simple_binop (mode, SMAX, tmp0, input,
49860 target, 0, OPTAB_DIRECT);
49861 break;
49863 /* For 8-bit signed integer X, the best way to calculate the absolute
49864 value of X is min ((unsigned char) X, (unsigned char) (-X)),
49865 as SSE2 provides the PMINUB insn. */
49866 case V16QImode:
49867 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
49869 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
49870 target, 0, OPTAB_DIRECT);
49871 break;
49873 default:
49874 gcc_unreachable ();
49877 if (x != target)
49878 emit_move_insn (target, x);
49881 /* Expand an insert into a vector register through pinsr insn.
49882 Return true if successful. */
49884 bool
49885 ix86_expand_pinsr (rtx *operands)
49887 rtx dst = operands[0];
49888 rtx src = operands[3];
49890 unsigned int size = INTVAL (operands[1]);
49891 unsigned int pos = INTVAL (operands[2]);
49893 if (GET_CODE (dst) == SUBREG)
49895 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
49896 dst = SUBREG_REG (dst);
49899 if (GET_CODE (src) == SUBREG)
49900 src = SUBREG_REG (src);
49902 switch (GET_MODE (dst))
49904 case V16QImode:
49905 case V8HImode:
49906 case V4SImode:
49907 case V2DImode:
49909 machine_mode srcmode, dstmode;
49910 rtx (*pinsr)(rtx, rtx, rtx, rtx);
49912 srcmode = mode_for_size (size, MODE_INT, 0);
49914 switch (srcmode)
49916 case QImode:
49917 if (!TARGET_SSE4_1)
49918 return false;
49919 dstmode = V16QImode;
49920 pinsr = gen_sse4_1_pinsrb;
49921 break;
49923 case HImode:
49924 if (!TARGET_SSE2)
49925 return false;
49926 dstmode = V8HImode;
49927 pinsr = gen_sse2_pinsrw;
49928 break;
49930 case SImode:
49931 if (!TARGET_SSE4_1)
49932 return false;
49933 dstmode = V4SImode;
49934 pinsr = gen_sse4_1_pinsrd;
49935 break;
49937 case DImode:
49938 gcc_assert (TARGET_64BIT);
49939 if (!TARGET_SSE4_1)
49940 return false;
49941 dstmode = V2DImode;
49942 pinsr = gen_sse4_1_pinsrq;
49943 break;
49945 default:
49946 return false;
49949 rtx d = dst;
49950 if (GET_MODE (dst) != dstmode)
49951 d = gen_reg_rtx (dstmode);
49952 src = gen_lowpart (srcmode, src);
49954 pos /= size;
49956 emit_insn (pinsr (d, gen_lowpart (dstmode, dst), src,
49957 GEN_INT (1 << pos)));
49958 if (d != dst)
49959 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
49960 return true;
49963 default:
49964 return false;
49968 /* This function returns the calling abi specific va_list type node.
49969 It returns the FNDECL specific va_list type. */
49971 static tree
49972 ix86_fn_abi_va_list (tree fndecl)
49974 if (!TARGET_64BIT)
49975 return va_list_type_node;
49976 gcc_assert (fndecl != NULL_TREE);
49978 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
49979 return ms_va_list_type_node;
49980 else
49981 return sysv_va_list_type_node;
49984 /* Returns the canonical va_list type specified by TYPE. If there
49985 is no valid TYPE provided, it return NULL_TREE. */
49987 static tree
49988 ix86_canonical_va_list_type (tree type)
49990 tree wtype, htype;
49992 /* Resolve references and pointers to va_list type. */
49993 if (TREE_CODE (type) == MEM_REF)
49994 type = TREE_TYPE (type);
49995 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
49996 type = TREE_TYPE (type);
49997 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
49998 type = TREE_TYPE (type);
50000 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
50002 wtype = va_list_type_node;
50003 gcc_assert (wtype != NULL_TREE);
50004 htype = type;
50005 if (TREE_CODE (wtype) == ARRAY_TYPE)
50007 /* If va_list is an array type, the argument may have decayed
50008 to a pointer type, e.g. by being passed to another function.
50009 In that case, unwrap both types so that we can compare the
50010 underlying records. */
50011 if (TREE_CODE (htype) == ARRAY_TYPE
50012 || POINTER_TYPE_P (htype))
50014 wtype = TREE_TYPE (wtype);
50015 htype = TREE_TYPE (htype);
50018 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50019 return va_list_type_node;
50020 wtype = sysv_va_list_type_node;
50021 gcc_assert (wtype != NULL_TREE);
50022 htype = type;
50023 if (TREE_CODE (wtype) == ARRAY_TYPE)
50025 /* If va_list is an array type, the argument may have decayed
50026 to a pointer type, e.g. by being passed to another function.
50027 In that case, unwrap both types so that we can compare the
50028 underlying records. */
50029 if (TREE_CODE (htype) == ARRAY_TYPE
50030 || POINTER_TYPE_P (htype))
50032 wtype = TREE_TYPE (wtype);
50033 htype = TREE_TYPE (htype);
50036 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50037 return sysv_va_list_type_node;
50038 wtype = ms_va_list_type_node;
50039 gcc_assert (wtype != NULL_TREE);
50040 htype = type;
50041 if (TREE_CODE (wtype) == ARRAY_TYPE)
50043 /* If va_list is an array type, the argument may have decayed
50044 to a pointer type, e.g. by being passed to another function.
50045 In that case, unwrap both types so that we can compare the
50046 underlying records. */
50047 if (TREE_CODE (htype) == ARRAY_TYPE
50048 || POINTER_TYPE_P (htype))
50050 wtype = TREE_TYPE (wtype);
50051 htype = TREE_TYPE (htype);
50054 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50055 return ms_va_list_type_node;
50056 return NULL_TREE;
50058 return std_canonical_va_list_type (type);
50061 /* Iterate through the target-specific builtin types for va_list.
50062 IDX denotes the iterator, *PTREE is set to the result type of
50063 the va_list builtin, and *PNAME to its internal type.
50064 Returns zero if there is no element for this index, otherwise
50065 IDX should be increased upon the next call.
50066 Note, do not iterate a base builtin's name like __builtin_va_list.
50067 Used from c_common_nodes_and_builtins. */
50069 static int
50070 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
50072 if (TARGET_64BIT)
50074 switch (idx)
50076 default:
50077 break;
50079 case 0:
50080 *ptree = ms_va_list_type_node;
50081 *pname = "__builtin_ms_va_list";
50082 return 1;
50084 case 1:
50085 *ptree = sysv_va_list_type_node;
50086 *pname = "__builtin_sysv_va_list";
50087 return 1;
50091 return 0;
50094 #undef TARGET_SCHED_DISPATCH
50095 #define TARGET_SCHED_DISPATCH has_dispatch
50096 #undef TARGET_SCHED_DISPATCH_DO
50097 #define TARGET_SCHED_DISPATCH_DO do_dispatch
50098 #undef TARGET_SCHED_REASSOCIATION_WIDTH
50099 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
50100 #undef TARGET_SCHED_REORDER
50101 #define TARGET_SCHED_REORDER ix86_sched_reorder
50102 #undef TARGET_SCHED_ADJUST_PRIORITY
50103 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
50104 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
50105 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
50106 ix86_dependencies_evaluation_hook
50108 /* The size of the dispatch window is the total number of bytes of
50109 object code allowed in a window. */
50110 #define DISPATCH_WINDOW_SIZE 16
50112 /* Number of dispatch windows considered for scheduling. */
50113 #define MAX_DISPATCH_WINDOWS 3
50115 /* Maximum number of instructions in a window. */
50116 #define MAX_INSN 4
50118 /* Maximum number of immediate operands in a window. */
50119 #define MAX_IMM 4
50121 /* Maximum number of immediate bits allowed in a window. */
50122 #define MAX_IMM_SIZE 128
50124 /* Maximum number of 32 bit immediates allowed in a window. */
50125 #define MAX_IMM_32 4
50127 /* Maximum number of 64 bit immediates allowed in a window. */
50128 #define MAX_IMM_64 2
50130 /* Maximum total of loads or prefetches allowed in a window. */
50131 #define MAX_LOAD 2
50133 /* Maximum total of stores allowed in a window. */
50134 #define MAX_STORE 1
50136 #undef BIG
50137 #define BIG 100
50140 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
50141 enum dispatch_group {
50142 disp_no_group = 0,
50143 disp_load,
50144 disp_store,
50145 disp_load_store,
50146 disp_prefetch,
50147 disp_imm,
50148 disp_imm_32,
50149 disp_imm_64,
50150 disp_branch,
50151 disp_cmp,
50152 disp_jcc,
50153 disp_last
50156 /* Number of allowable groups in a dispatch window. It is an array
50157 indexed by dispatch_group enum. 100 is used as a big number,
50158 because the number of these kind of operations does not have any
50159 effect in dispatch window, but we need them for other reasons in
50160 the table. */
50161 static unsigned int num_allowable_groups[disp_last] = {
50162 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
50165 char group_name[disp_last + 1][16] = {
50166 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
50167 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
50168 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
50171 /* Instruction path. */
50172 enum insn_path {
50173 no_path = 0,
50174 path_single, /* Single micro op. */
50175 path_double, /* Double micro op. */
50176 path_multi, /* Instructions with more than 2 micro op.. */
50177 last_path
50180 /* sched_insn_info defines a window to the instructions scheduled in
50181 the basic block. It contains a pointer to the insn_info table and
50182 the instruction scheduled.
50184 Windows are allocated for each basic block and are linked
50185 together. */
50186 typedef struct sched_insn_info_s {
50187 rtx insn;
50188 enum dispatch_group group;
50189 enum insn_path path;
50190 int byte_len;
50191 int imm_bytes;
50192 } sched_insn_info;
50194 /* Linked list of dispatch windows. This is a two way list of
50195 dispatch windows of a basic block. It contains information about
50196 the number of uops in the window and the total number of
50197 instructions and of bytes in the object code for this dispatch
50198 window. */
50199 typedef struct dispatch_windows_s {
50200 int num_insn; /* Number of insn in the window. */
50201 int num_uops; /* Number of uops in the window. */
50202 int window_size; /* Number of bytes in the window. */
50203 int window_num; /* Window number between 0 or 1. */
50204 int num_imm; /* Number of immediates in an insn. */
50205 int num_imm_32; /* Number of 32 bit immediates in an insn. */
50206 int num_imm_64; /* Number of 64 bit immediates in an insn. */
50207 int imm_size; /* Total immediates in the window. */
50208 int num_loads; /* Total memory loads in the window. */
50209 int num_stores; /* Total memory stores in the window. */
50210 int violation; /* Violation exists in window. */
50211 sched_insn_info *window; /* Pointer to the window. */
50212 struct dispatch_windows_s *next;
50213 struct dispatch_windows_s *prev;
50214 } dispatch_windows;
50216 /* Immediate valuse used in an insn. */
50217 typedef struct imm_info_s
50219 int imm;
50220 int imm32;
50221 int imm64;
50222 } imm_info;
50224 static dispatch_windows *dispatch_window_list;
50225 static dispatch_windows *dispatch_window_list1;
50227 /* Get dispatch group of insn. */
50229 static enum dispatch_group
50230 get_mem_group (rtx_insn *insn)
50232 enum attr_memory memory;
50234 if (INSN_CODE (insn) < 0)
50235 return disp_no_group;
50236 memory = get_attr_memory (insn);
50237 if (memory == MEMORY_STORE)
50238 return disp_store;
50240 if (memory == MEMORY_LOAD)
50241 return disp_load;
50243 if (memory == MEMORY_BOTH)
50244 return disp_load_store;
50246 return disp_no_group;
50249 /* Return true if insn is a compare instruction. */
50251 static bool
50252 is_cmp (rtx_insn *insn)
50254 enum attr_type type;
50256 type = get_attr_type (insn);
50257 return (type == TYPE_TEST
50258 || type == TYPE_ICMP
50259 || type == TYPE_FCMP
50260 || GET_CODE (PATTERN (insn)) == COMPARE);
50263 /* Return true if a dispatch violation encountered. */
50265 static bool
50266 dispatch_violation (void)
50268 if (dispatch_window_list->next)
50269 return dispatch_window_list->next->violation;
50270 return dispatch_window_list->violation;
50273 /* Return true if insn is a branch instruction. */
50275 static bool
50276 is_branch (rtx insn)
50278 return (CALL_P (insn) || JUMP_P (insn));
50281 /* Return true if insn is a prefetch instruction. */
50283 static bool
50284 is_prefetch (rtx insn)
50286 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
50289 /* This function initializes a dispatch window and the list container holding a
50290 pointer to the window. */
50292 static void
50293 init_window (int window_num)
50295 int i;
50296 dispatch_windows *new_list;
50298 if (window_num == 0)
50299 new_list = dispatch_window_list;
50300 else
50301 new_list = dispatch_window_list1;
50303 new_list->num_insn = 0;
50304 new_list->num_uops = 0;
50305 new_list->window_size = 0;
50306 new_list->next = NULL;
50307 new_list->prev = NULL;
50308 new_list->window_num = window_num;
50309 new_list->num_imm = 0;
50310 new_list->num_imm_32 = 0;
50311 new_list->num_imm_64 = 0;
50312 new_list->imm_size = 0;
50313 new_list->num_loads = 0;
50314 new_list->num_stores = 0;
50315 new_list->violation = false;
50317 for (i = 0; i < MAX_INSN; i++)
50319 new_list->window[i].insn = NULL;
50320 new_list->window[i].group = disp_no_group;
50321 new_list->window[i].path = no_path;
50322 new_list->window[i].byte_len = 0;
50323 new_list->window[i].imm_bytes = 0;
50325 return;
50328 /* This function allocates and initializes a dispatch window and the
50329 list container holding a pointer to the window. */
50331 static dispatch_windows *
50332 allocate_window (void)
50334 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
50335 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
50337 return new_list;
50340 /* This routine initializes the dispatch scheduling information. It
50341 initiates building dispatch scheduler tables and constructs the
50342 first dispatch window. */
50344 static void
50345 init_dispatch_sched (void)
50347 /* Allocate a dispatch list and a window. */
50348 dispatch_window_list = allocate_window ();
50349 dispatch_window_list1 = allocate_window ();
50350 init_window (0);
50351 init_window (1);
50354 /* This function returns true if a branch is detected. End of a basic block
50355 does not have to be a branch, but here we assume only branches end a
50356 window. */
50358 static bool
50359 is_end_basic_block (enum dispatch_group group)
50361 return group == disp_branch;
50364 /* This function is called when the end of a window processing is reached. */
50366 static void
50367 process_end_window (void)
50369 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
50370 if (dispatch_window_list->next)
50372 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
50373 gcc_assert (dispatch_window_list->window_size
50374 + dispatch_window_list1->window_size <= 48);
50375 init_window (1);
50377 init_window (0);
50380 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
50381 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
50382 for 48 bytes of instructions. Note that these windows are not dispatch
50383 windows that their sizes are DISPATCH_WINDOW_SIZE. */
50385 static dispatch_windows *
50386 allocate_next_window (int window_num)
50388 if (window_num == 0)
50390 if (dispatch_window_list->next)
50391 init_window (1);
50392 init_window (0);
50393 return dispatch_window_list;
50396 dispatch_window_list->next = dispatch_window_list1;
50397 dispatch_window_list1->prev = dispatch_window_list;
50399 return dispatch_window_list1;
50402 /* Compute number of immediate operands of an instruction. */
50404 static void
50405 find_constant (rtx in_rtx, imm_info *imm_values)
50407 if (INSN_P (in_rtx))
50408 in_rtx = PATTERN (in_rtx);
50409 subrtx_iterator::array_type array;
50410 FOR_EACH_SUBRTX (iter, array, in_rtx, ALL)
50411 if (const_rtx x = *iter)
50412 switch (GET_CODE (x))
50414 case CONST:
50415 case SYMBOL_REF:
50416 case CONST_INT:
50417 (imm_values->imm)++;
50418 if (x86_64_immediate_operand (CONST_CAST_RTX (x), SImode))
50419 (imm_values->imm32)++;
50420 else
50421 (imm_values->imm64)++;
50422 break;
50424 case CONST_DOUBLE:
50425 (imm_values->imm)++;
50426 (imm_values->imm64)++;
50427 break;
50429 case CODE_LABEL:
50430 if (LABEL_KIND (x) == LABEL_NORMAL)
50432 (imm_values->imm)++;
50433 (imm_values->imm32)++;
50435 break;
50437 default:
50438 break;
50442 /* Return total size of immediate operands of an instruction along with number
50443 of corresponding immediate-operands. It initializes its parameters to zero
50444 befor calling FIND_CONSTANT.
50445 INSN is the input instruction. IMM is the total of immediates.
50446 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
50447 bit immediates. */
50449 static int
50450 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
50452 imm_info imm_values = {0, 0, 0};
50454 find_constant (insn, &imm_values);
50455 *imm = imm_values.imm;
50456 *imm32 = imm_values.imm32;
50457 *imm64 = imm_values.imm64;
50458 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
50461 /* This function indicates if an operand of an instruction is an
50462 immediate. */
50464 static bool
50465 has_immediate (rtx insn)
50467 int num_imm_operand;
50468 int num_imm32_operand;
50469 int num_imm64_operand;
50471 if (insn)
50472 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50473 &num_imm64_operand);
50474 return false;
50477 /* Return single or double path for instructions. */
50479 static enum insn_path
50480 get_insn_path (rtx_insn *insn)
50482 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
50484 if ((int)path == 0)
50485 return path_single;
50487 if ((int)path == 1)
50488 return path_double;
50490 return path_multi;
50493 /* Return insn dispatch group. */
50495 static enum dispatch_group
50496 get_insn_group (rtx_insn *insn)
50498 enum dispatch_group group = get_mem_group (insn);
50499 if (group)
50500 return group;
50502 if (is_branch (insn))
50503 return disp_branch;
50505 if (is_cmp (insn))
50506 return disp_cmp;
50508 if (has_immediate (insn))
50509 return disp_imm;
50511 if (is_prefetch (insn))
50512 return disp_prefetch;
50514 return disp_no_group;
50517 /* Count number of GROUP restricted instructions in a dispatch
50518 window WINDOW_LIST. */
50520 static int
50521 count_num_restricted (rtx_insn *insn, dispatch_windows *window_list)
50523 enum dispatch_group group = get_insn_group (insn);
50524 int imm_size;
50525 int num_imm_operand;
50526 int num_imm32_operand;
50527 int num_imm64_operand;
50529 if (group == disp_no_group)
50530 return 0;
50532 if (group == disp_imm)
50534 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50535 &num_imm64_operand);
50536 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
50537 || num_imm_operand + window_list->num_imm > MAX_IMM
50538 || (num_imm32_operand > 0
50539 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
50540 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
50541 || (num_imm64_operand > 0
50542 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
50543 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
50544 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
50545 && num_imm64_operand > 0
50546 && ((window_list->num_imm_64 > 0
50547 && window_list->num_insn >= 2)
50548 || window_list->num_insn >= 3)))
50549 return BIG;
50551 return 1;
50554 if ((group == disp_load_store
50555 && (window_list->num_loads >= MAX_LOAD
50556 || window_list->num_stores >= MAX_STORE))
50557 || ((group == disp_load
50558 || group == disp_prefetch)
50559 && window_list->num_loads >= MAX_LOAD)
50560 || (group == disp_store
50561 && window_list->num_stores >= MAX_STORE))
50562 return BIG;
50564 return 1;
50567 /* This function returns true if insn satisfies dispatch rules on the
50568 last window scheduled. */
50570 static bool
50571 fits_dispatch_window (rtx_insn *insn)
50573 dispatch_windows *window_list = dispatch_window_list;
50574 dispatch_windows *window_list_next = dispatch_window_list->next;
50575 unsigned int num_restrict;
50576 enum dispatch_group group = get_insn_group (insn);
50577 enum insn_path path = get_insn_path (insn);
50578 int sum;
50580 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
50581 instructions should be given the lowest priority in the
50582 scheduling process in Haifa scheduler to make sure they will be
50583 scheduled in the same dispatch window as the reference to them. */
50584 if (group == disp_jcc || group == disp_cmp)
50585 return false;
50587 /* Check nonrestricted. */
50588 if (group == disp_no_group || group == disp_branch)
50589 return true;
50591 /* Get last dispatch window. */
50592 if (window_list_next)
50593 window_list = window_list_next;
50595 if (window_list->window_num == 1)
50597 sum = window_list->prev->window_size + window_list->window_size;
50599 if (sum == 32
50600 || (min_insn_size (insn) + sum) >= 48)
50601 /* Window 1 is full. Go for next window. */
50602 return true;
50605 num_restrict = count_num_restricted (insn, window_list);
50607 if (num_restrict > num_allowable_groups[group])
50608 return false;
50610 /* See if it fits in the first window. */
50611 if (window_list->window_num == 0)
50613 /* The first widow should have only single and double path
50614 uops. */
50615 if (path == path_double
50616 && (window_list->num_uops + 2) > MAX_INSN)
50617 return false;
50618 else if (path != path_single)
50619 return false;
50621 return true;
50624 /* Add an instruction INSN with NUM_UOPS micro-operations to the
50625 dispatch window WINDOW_LIST. */
50627 static void
50628 add_insn_window (rtx_insn *insn, dispatch_windows *window_list, int num_uops)
50630 int byte_len = min_insn_size (insn);
50631 int num_insn = window_list->num_insn;
50632 int imm_size;
50633 sched_insn_info *window = window_list->window;
50634 enum dispatch_group group = get_insn_group (insn);
50635 enum insn_path path = get_insn_path (insn);
50636 int num_imm_operand;
50637 int num_imm32_operand;
50638 int num_imm64_operand;
50640 if (!window_list->violation && group != disp_cmp
50641 && !fits_dispatch_window (insn))
50642 window_list->violation = true;
50644 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50645 &num_imm64_operand);
50647 /* Initialize window with new instruction. */
50648 window[num_insn].insn = insn;
50649 window[num_insn].byte_len = byte_len;
50650 window[num_insn].group = group;
50651 window[num_insn].path = path;
50652 window[num_insn].imm_bytes = imm_size;
50654 window_list->window_size += byte_len;
50655 window_list->num_insn = num_insn + 1;
50656 window_list->num_uops = window_list->num_uops + num_uops;
50657 window_list->imm_size += imm_size;
50658 window_list->num_imm += num_imm_operand;
50659 window_list->num_imm_32 += num_imm32_operand;
50660 window_list->num_imm_64 += num_imm64_operand;
50662 if (group == disp_store)
50663 window_list->num_stores += 1;
50664 else if (group == disp_load
50665 || group == disp_prefetch)
50666 window_list->num_loads += 1;
50667 else if (group == disp_load_store)
50669 window_list->num_stores += 1;
50670 window_list->num_loads += 1;
50674 /* Adds a scheduled instruction, INSN, to the current dispatch window.
50675 If the total bytes of instructions or the number of instructions in
50676 the window exceed allowable, it allocates a new window. */
50678 static void
50679 add_to_dispatch_window (rtx_insn *insn)
50681 int byte_len;
50682 dispatch_windows *window_list;
50683 dispatch_windows *next_list;
50684 dispatch_windows *window0_list;
50685 enum insn_path path;
50686 enum dispatch_group insn_group;
50687 bool insn_fits;
50688 int num_insn;
50689 int num_uops;
50690 int window_num;
50691 int insn_num_uops;
50692 int sum;
50694 if (INSN_CODE (insn) < 0)
50695 return;
50697 byte_len = min_insn_size (insn);
50698 window_list = dispatch_window_list;
50699 next_list = window_list->next;
50700 path = get_insn_path (insn);
50701 insn_group = get_insn_group (insn);
50703 /* Get the last dispatch window. */
50704 if (next_list)
50705 window_list = dispatch_window_list->next;
50707 if (path == path_single)
50708 insn_num_uops = 1;
50709 else if (path == path_double)
50710 insn_num_uops = 2;
50711 else
50712 insn_num_uops = (int) path;
50714 /* If current window is full, get a new window.
50715 Window number zero is full, if MAX_INSN uops are scheduled in it.
50716 Window number one is full, if window zero's bytes plus window
50717 one's bytes is 32, or if the bytes of the new instruction added
50718 to the total makes it greater than 48, or it has already MAX_INSN
50719 instructions in it. */
50720 num_insn = window_list->num_insn;
50721 num_uops = window_list->num_uops;
50722 window_num = window_list->window_num;
50723 insn_fits = fits_dispatch_window (insn);
50725 if (num_insn >= MAX_INSN
50726 || num_uops + insn_num_uops > MAX_INSN
50727 || !(insn_fits))
50729 window_num = ~window_num & 1;
50730 window_list = allocate_next_window (window_num);
50733 if (window_num == 0)
50735 add_insn_window (insn, window_list, insn_num_uops);
50736 if (window_list->num_insn >= MAX_INSN
50737 && insn_group == disp_branch)
50739 process_end_window ();
50740 return;
50743 else if (window_num == 1)
50745 window0_list = window_list->prev;
50746 sum = window0_list->window_size + window_list->window_size;
50747 if (sum == 32
50748 || (byte_len + sum) >= 48)
50750 process_end_window ();
50751 window_list = dispatch_window_list;
50754 add_insn_window (insn, window_list, insn_num_uops);
50756 else
50757 gcc_unreachable ();
50759 if (is_end_basic_block (insn_group))
50761 /* End of basic block is reached do end-basic-block process. */
50762 process_end_window ();
50763 return;
50767 /* Print the dispatch window, WINDOW_NUM, to FILE. */
50769 DEBUG_FUNCTION static void
50770 debug_dispatch_window_file (FILE *file, int window_num)
50772 dispatch_windows *list;
50773 int i;
50775 if (window_num == 0)
50776 list = dispatch_window_list;
50777 else
50778 list = dispatch_window_list1;
50780 fprintf (file, "Window #%d:\n", list->window_num);
50781 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
50782 list->num_insn, list->num_uops, list->window_size);
50783 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
50784 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
50786 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
50787 list->num_stores);
50788 fprintf (file, " insn info:\n");
50790 for (i = 0; i < MAX_INSN; i++)
50792 if (!list->window[i].insn)
50793 break;
50794 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
50795 i, group_name[list->window[i].group],
50796 i, (void *)list->window[i].insn,
50797 i, list->window[i].path,
50798 i, list->window[i].byte_len,
50799 i, list->window[i].imm_bytes);
50803 /* Print to stdout a dispatch window. */
50805 DEBUG_FUNCTION void
50806 debug_dispatch_window (int window_num)
50808 debug_dispatch_window_file (stdout, window_num);
50811 /* Print INSN dispatch information to FILE. */
50813 DEBUG_FUNCTION static void
50814 debug_insn_dispatch_info_file (FILE *file, rtx_insn *insn)
50816 int byte_len;
50817 enum insn_path path;
50818 enum dispatch_group group;
50819 int imm_size;
50820 int num_imm_operand;
50821 int num_imm32_operand;
50822 int num_imm64_operand;
50824 if (INSN_CODE (insn) < 0)
50825 return;
50827 byte_len = min_insn_size (insn);
50828 path = get_insn_path (insn);
50829 group = get_insn_group (insn);
50830 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50831 &num_imm64_operand);
50833 fprintf (file, " insn info:\n");
50834 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
50835 group_name[group], path, byte_len);
50836 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
50837 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
50840 /* Print to STDERR the status of the ready list with respect to
50841 dispatch windows. */
50843 DEBUG_FUNCTION void
50844 debug_ready_dispatch (void)
50846 int i;
50847 int no_ready = number_in_ready ();
50849 fprintf (stdout, "Number of ready: %d\n", no_ready);
50851 for (i = 0; i < no_ready; i++)
50852 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
50855 /* This routine is the driver of the dispatch scheduler. */
50857 static void
50858 do_dispatch (rtx_insn *insn, int mode)
50860 if (mode == DISPATCH_INIT)
50861 init_dispatch_sched ();
50862 else if (mode == ADD_TO_DISPATCH_WINDOW)
50863 add_to_dispatch_window (insn);
50866 /* Return TRUE if Dispatch Scheduling is supported. */
50868 static bool
50869 has_dispatch (rtx_insn *insn, int action)
50871 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
50872 && flag_dispatch_scheduler)
50873 switch (action)
50875 default:
50876 return false;
50878 case IS_DISPATCH_ON:
50879 return true;
50880 break;
50882 case IS_CMP:
50883 return is_cmp (insn);
50885 case DISPATCH_VIOLATION:
50886 return dispatch_violation ();
50888 case FITS_DISPATCH_WINDOW:
50889 return fits_dispatch_window (insn);
50892 return false;
50895 /* Implementation of reassociation_width target hook used by
50896 reassoc phase to identify parallelism level in reassociated
50897 tree. Statements tree_code is passed in OPC. Arguments type
50898 is passed in MODE.
50900 Currently parallel reassociation is enabled for Atom
50901 processors only and we set reassociation width to be 2
50902 because Atom may issue up to 2 instructions per cycle.
50904 Return value should be fixed if parallel reassociation is
50905 enabled for other processors. */
50907 static int
50908 ix86_reassociation_width (unsigned int, machine_mode mode)
50910 int res = 1;
50912 /* Vector part. */
50913 if (VECTOR_MODE_P (mode))
50915 if (TARGET_VECTOR_PARALLEL_EXECUTION)
50916 return 2;
50917 else
50918 return 1;
50921 /* Scalar part. */
50922 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
50923 res = 2;
50924 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
50925 res = 2;
50927 return res;
50930 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
50931 place emms and femms instructions. */
50933 static machine_mode
50934 ix86_preferred_simd_mode (machine_mode mode)
50936 if (!TARGET_SSE)
50937 return word_mode;
50939 switch (mode)
50941 case QImode:
50942 return TARGET_AVX512BW ? V64QImode :
50943 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
50944 case HImode:
50945 return TARGET_AVX512BW ? V32HImode :
50946 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
50947 case SImode:
50948 return TARGET_AVX512F ? V16SImode :
50949 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
50950 case DImode:
50951 return TARGET_AVX512F ? V8DImode :
50952 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
50954 case SFmode:
50955 if (TARGET_AVX512F)
50956 return V16SFmode;
50957 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
50958 return V8SFmode;
50959 else
50960 return V4SFmode;
50962 case DFmode:
50963 if (!TARGET_VECTORIZE_DOUBLE)
50964 return word_mode;
50965 else if (TARGET_AVX512F)
50966 return V8DFmode;
50967 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
50968 return V4DFmode;
50969 else if (TARGET_SSE2)
50970 return V2DFmode;
50971 /* FALLTHRU */
50973 default:
50974 return word_mode;
50978 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
50979 vectors. If AVX512F is enabled then try vectorizing with 512bit,
50980 256bit and 128bit vectors. */
50982 static unsigned int
50983 ix86_autovectorize_vector_sizes (void)
50985 return TARGET_AVX512F ? 64 | 32 | 16 :
50986 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
50991 /* Return class of registers which could be used for pseudo of MODE
50992 and of class RCLASS for spilling instead of memory. Return NO_REGS
50993 if it is not possible or non-profitable. */
50994 static reg_class_t
50995 ix86_spill_class (reg_class_t rclass, machine_mode mode)
50997 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
50998 && (mode == SImode || (TARGET_64BIT && mode == DImode))
50999 && rclass != NO_REGS && INTEGER_CLASS_P (rclass))
51000 return ALL_SSE_REGS;
51001 return NO_REGS;
51004 /* Implement targetm.vectorize.init_cost. */
51006 static void *
51007 ix86_init_cost (struct loop *)
51009 unsigned *cost = XNEWVEC (unsigned, 3);
51010 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
51011 return cost;
51014 /* Implement targetm.vectorize.add_stmt_cost. */
51016 static unsigned
51017 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
51018 struct _stmt_vec_info *stmt_info, int misalign,
51019 enum vect_cost_model_location where)
51021 unsigned *cost = (unsigned *) data;
51022 unsigned retval = 0;
51024 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
51025 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
51027 /* Statements in an inner loop relative to the loop being
51028 vectorized are weighted more heavily. The value here is
51029 arbitrary and could potentially be improved with analysis. */
51030 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
51031 count *= 50; /* FIXME. */
51033 retval = (unsigned) (count * stmt_cost);
51035 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
51036 for Silvermont as it has out of order integer pipeline and can execute
51037 2 scalar instruction per tick, but has in order SIMD pipeline. */
51038 if (TARGET_SILVERMONT || TARGET_INTEL)
51039 if (stmt_info && stmt_info->stmt)
51041 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
51042 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
51043 retval = (retval * 17) / 10;
51046 cost[where] += retval;
51048 return retval;
51051 /* Implement targetm.vectorize.finish_cost. */
51053 static void
51054 ix86_finish_cost (void *data, unsigned *prologue_cost,
51055 unsigned *body_cost, unsigned *epilogue_cost)
51057 unsigned *cost = (unsigned *) data;
51058 *prologue_cost = cost[vect_prologue];
51059 *body_cost = cost[vect_body];
51060 *epilogue_cost = cost[vect_epilogue];
51063 /* Implement targetm.vectorize.destroy_cost_data. */
51065 static void
51066 ix86_destroy_cost_data (void *data)
51068 free (data);
51071 /* Validate target specific memory model bits in VAL. */
51073 static unsigned HOST_WIDE_INT
51074 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
51076 unsigned HOST_WIDE_INT model = val & MEMMODEL_MASK;
51077 bool strong;
51079 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
51080 |MEMMODEL_MASK)
51081 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
51083 warning (OPT_Winvalid_memory_model,
51084 "Unknown architecture specific memory model");
51085 return MEMMODEL_SEQ_CST;
51087 strong = (model == MEMMODEL_ACQ_REL || model == MEMMODEL_SEQ_CST);
51088 if (val & IX86_HLE_ACQUIRE && !(model == MEMMODEL_ACQUIRE || strong))
51090 warning (OPT_Winvalid_memory_model,
51091 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
51092 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
51094 if (val & IX86_HLE_RELEASE && !(model == MEMMODEL_RELEASE || strong))
51096 warning (OPT_Winvalid_memory_model,
51097 "HLE_RELEASE not used with RELEASE or stronger memory model");
51098 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
51100 return val;
51103 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
51104 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
51105 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
51106 or number of vecsize_mangle variants that should be emitted. */
51108 static int
51109 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
51110 struct cgraph_simd_clone *clonei,
51111 tree base_type, int num)
51113 int ret = 1;
51115 if (clonei->simdlen
51116 && (clonei->simdlen < 2
51117 || clonei->simdlen > 16
51118 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
51120 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51121 "unsupported simdlen %d", clonei->simdlen);
51122 return 0;
51125 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
51126 if (TREE_CODE (ret_type) != VOID_TYPE)
51127 switch (TYPE_MODE (ret_type))
51129 case QImode:
51130 case HImode:
51131 case SImode:
51132 case DImode:
51133 case SFmode:
51134 case DFmode:
51135 /* case SCmode: */
51136 /* case DCmode: */
51137 break;
51138 default:
51139 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51140 "unsupported return type %qT for simd\n", ret_type);
51141 return 0;
51144 tree t;
51145 int i;
51147 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
51148 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
51149 switch (TYPE_MODE (TREE_TYPE (t)))
51151 case QImode:
51152 case HImode:
51153 case SImode:
51154 case DImode:
51155 case SFmode:
51156 case DFmode:
51157 /* case SCmode: */
51158 /* case DCmode: */
51159 break;
51160 default:
51161 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51162 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
51163 return 0;
51166 if (clonei->cilk_elemental)
51168 /* Parse here processor clause. If not present, default to 'b'. */
51169 clonei->vecsize_mangle = 'b';
51171 else if (!TREE_PUBLIC (node->decl))
51173 /* If the function isn't exported, we can pick up just one ISA
51174 for the clones. */
51175 if (TARGET_AVX2)
51176 clonei->vecsize_mangle = 'd';
51177 else if (TARGET_AVX)
51178 clonei->vecsize_mangle = 'c';
51179 else
51180 clonei->vecsize_mangle = 'b';
51181 ret = 1;
51183 else
51185 clonei->vecsize_mangle = "bcd"[num];
51186 ret = 3;
51188 switch (clonei->vecsize_mangle)
51190 case 'b':
51191 clonei->vecsize_int = 128;
51192 clonei->vecsize_float = 128;
51193 break;
51194 case 'c':
51195 clonei->vecsize_int = 128;
51196 clonei->vecsize_float = 256;
51197 break;
51198 case 'd':
51199 clonei->vecsize_int = 256;
51200 clonei->vecsize_float = 256;
51201 break;
51203 if (clonei->simdlen == 0)
51205 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
51206 clonei->simdlen = clonei->vecsize_int;
51207 else
51208 clonei->simdlen = clonei->vecsize_float;
51209 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
51210 if (clonei->simdlen > 16)
51211 clonei->simdlen = 16;
51213 return ret;
51216 /* Add target attribute to SIMD clone NODE if needed. */
51218 static void
51219 ix86_simd_clone_adjust (struct cgraph_node *node)
51221 const char *str = NULL;
51222 gcc_assert (node->decl == cfun->decl);
51223 switch (node->simdclone->vecsize_mangle)
51225 case 'b':
51226 if (!TARGET_SSE2)
51227 str = "sse2";
51228 break;
51229 case 'c':
51230 if (!TARGET_AVX)
51231 str = "avx";
51232 break;
51233 case 'd':
51234 if (!TARGET_AVX2)
51235 str = "avx2";
51236 break;
51237 default:
51238 gcc_unreachable ();
51240 if (str == NULL)
51241 return;
51242 push_cfun (NULL);
51243 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
51244 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
51245 gcc_assert (ok);
51246 pop_cfun ();
51247 ix86_reset_previous_fndecl ();
51248 ix86_set_current_function (node->decl);
51251 /* If SIMD clone NODE can't be used in a vectorized loop
51252 in current function, return -1, otherwise return a badness of using it
51253 (0 if it is most desirable from vecsize_mangle point of view, 1
51254 slightly less desirable, etc.). */
51256 static int
51257 ix86_simd_clone_usable (struct cgraph_node *node)
51259 switch (node->simdclone->vecsize_mangle)
51261 case 'b':
51262 if (!TARGET_SSE2)
51263 return -1;
51264 if (!TARGET_AVX)
51265 return 0;
51266 return TARGET_AVX2 ? 2 : 1;
51267 case 'c':
51268 if (!TARGET_AVX)
51269 return -1;
51270 return TARGET_AVX2 ? 1 : 0;
51271 break;
51272 case 'd':
51273 if (!TARGET_AVX2)
51274 return -1;
51275 return 0;
51276 default:
51277 gcc_unreachable ();
51281 /* This function adjusts the unroll factor based on
51282 the hardware capabilities. For ex, bdver3 has
51283 a loop buffer which makes unrolling of smaller
51284 loops less important. This function decides the
51285 unroll factor using number of memory references
51286 (value 32 is used) as a heuristic. */
51288 static unsigned
51289 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
51291 basic_block *bbs;
51292 rtx_insn *insn;
51293 unsigned i;
51294 unsigned mem_count = 0;
51296 if (!TARGET_ADJUST_UNROLL)
51297 return nunroll;
51299 /* Count the number of memory references within the loop body.
51300 This value determines the unrolling factor for bdver3 and bdver4
51301 architectures. */
51302 subrtx_iterator::array_type array;
51303 bbs = get_loop_body (loop);
51304 for (i = 0; i < loop->num_nodes; i++)
51305 FOR_BB_INSNS (bbs[i], insn)
51306 if (NONDEBUG_INSN_P (insn))
51307 FOR_EACH_SUBRTX (iter, array, insn, NONCONST)
51308 if (const_rtx x = *iter)
51309 if (MEM_P (x))
51311 machine_mode mode = GET_MODE (x);
51312 unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
51313 if (n_words > 4)
51314 mem_count += 2;
51315 else
51316 mem_count += 1;
51318 free (bbs);
51320 if (mem_count && mem_count <=32)
51321 return 32/mem_count;
51323 return nunroll;
51327 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
51329 static bool
51330 ix86_float_exceptions_rounding_supported_p (void)
51332 /* For x87 floating point with standard excess precision handling,
51333 there is no adddf3 pattern (since x87 floating point only has
51334 XFmode operations) so the default hook implementation gets this
51335 wrong. */
51336 return TARGET_80387 || TARGET_SSE_MATH;
51339 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
51341 static void
51342 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
51344 if (!TARGET_80387 && !TARGET_SSE_MATH)
51345 return;
51346 tree exceptions_var = create_tmp_var (integer_type_node);
51347 if (TARGET_80387)
51349 tree fenv_index_type = build_index_type (size_int (6));
51350 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
51351 tree fenv_var = create_tmp_var (fenv_type);
51352 mark_addressable (fenv_var);
51353 tree fenv_ptr = build_pointer_type (fenv_type);
51354 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
51355 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
51356 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
51357 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
51358 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
51359 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
51360 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
51361 tree hold_fnclex = build_call_expr (fnclex, 0);
51362 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_fnstenv,
51363 hold_fnclex);
51364 *clear = build_call_expr (fnclex, 0);
51365 tree sw_var = create_tmp_var (short_unsigned_type_node);
51366 tree fnstsw_call = build_call_expr (fnstsw, 0);
51367 tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
51368 sw_var, fnstsw_call);
51369 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
51370 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
51371 exceptions_var, exceptions_x87);
51372 *update = build2 (COMPOUND_EXPR, integer_type_node,
51373 sw_mod, update_mod);
51374 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
51375 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
51377 if (TARGET_SSE_MATH)
51379 tree mxcsr_orig_var = create_tmp_var (unsigned_type_node);
51380 tree mxcsr_mod_var = create_tmp_var (unsigned_type_node);
51381 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
51382 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
51383 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
51384 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
51385 mxcsr_orig_var, stmxcsr_hold_call);
51386 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
51387 mxcsr_orig_var,
51388 build_int_cst (unsigned_type_node, 0x1f80));
51389 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
51390 build_int_cst (unsigned_type_node, 0xffffffc0));
51391 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
51392 mxcsr_mod_var, hold_mod_val);
51393 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
51394 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
51395 hold_assign_orig, hold_assign_mod);
51396 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
51397 ldmxcsr_hold_call);
51398 if (*hold)
51399 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
51400 else
51401 *hold = hold_all;
51402 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
51403 if (*clear)
51404 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
51405 ldmxcsr_clear_call);
51406 else
51407 *clear = ldmxcsr_clear_call;
51408 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
51409 tree exceptions_sse = fold_convert (integer_type_node,
51410 stxmcsr_update_call);
51411 if (*update)
51413 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
51414 exceptions_var, exceptions_sse);
51415 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
51416 exceptions_var, exceptions_mod);
51417 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
51418 exceptions_assign);
51420 else
51421 *update = build2 (MODIFY_EXPR, integer_type_node,
51422 exceptions_var, exceptions_sse);
51423 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
51424 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
51425 ldmxcsr_update_call);
51427 tree atomic_feraiseexcept
51428 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
51429 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
51430 1, exceptions_var);
51431 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
51432 atomic_feraiseexcept_call);
51435 /* Return mode to be used for bounds or VOIDmode
51436 if bounds are not supported. */
51438 static enum machine_mode
51439 ix86_mpx_bound_mode ()
51441 /* Do not support pointer checker if MPX
51442 is not enabled. */
51443 if (!TARGET_MPX)
51445 if (flag_check_pointer_bounds)
51446 warning (0, "Pointer Checker requires MPX support on this target."
51447 " Use -mmpx options to enable MPX.");
51448 return VOIDmode;
51451 return BNDmode;
51454 /* Return constant used to statically initialize constant bounds.
51456 This function is used to create special bound values. For now
51457 only INIT bounds and NONE bounds are expected. More special
51458 values may be added later. */
51460 static tree
51461 ix86_make_bounds_constant (HOST_WIDE_INT lb, HOST_WIDE_INT ub)
51463 tree low = lb ? build_minus_one_cst (pointer_sized_int_node)
51464 : build_zero_cst (pointer_sized_int_node);
51465 tree high = ub ? build_zero_cst (pointer_sized_int_node)
51466 : build_minus_one_cst (pointer_sized_int_node);
51468 /* This function is supposed to be used to create INIT and
51469 NONE bounds only. */
51470 gcc_assert ((lb == 0 && ub == -1)
51471 || (lb == -1 && ub == 0));
51473 return build_complex (NULL, low, high);
51476 /* Generate a list of statements STMTS to initialize pointer bounds
51477 variable VAR with bounds LB and UB. Return the number of generated
51478 statements. */
51480 static int
51481 ix86_initialize_bounds (tree var, tree lb, tree ub, tree *stmts)
51483 tree bnd_ptr = build_pointer_type (pointer_sized_int_node);
51484 tree lhs, modify, var_p;
51486 ub = build1 (BIT_NOT_EXPR, pointer_sized_int_node, ub);
51487 var_p = fold_convert (bnd_ptr, build_fold_addr_expr (var));
51489 lhs = build1 (INDIRECT_REF, pointer_sized_int_node, var_p);
51490 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, lb);
51491 append_to_statement_list (modify, stmts);
51493 lhs = build1 (INDIRECT_REF, pointer_sized_int_node,
51494 build2 (POINTER_PLUS_EXPR, bnd_ptr, var_p,
51495 TYPE_SIZE_UNIT (pointer_sized_int_node)));
51496 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, ub);
51497 append_to_statement_list (modify, stmts);
51499 return 2;
51502 /* Initialize the GCC target structure. */
51503 #undef TARGET_RETURN_IN_MEMORY
51504 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
51506 #undef TARGET_LEGITIMIZE_ADDRESS
51507 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
51509 #undef TARGET_ATTRIBUTE_TABLE
51510 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
51511 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
51512 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
51513 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
51514 # undef TARGET_MERGE_DECL_ATTRIBUTES
51515 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
51516 #endif
51518 #undef TARGET_COMP_TYPE_ATTRIBUTES
51519 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
51521 #undef TARGET_INIT_BUILTINS
51522 #define TARGET_INIT_BUILTINS ix86_init_builtins
51523 #undef TARGET_BUILTIN_DECL
51524 #define TARGET_BUILTIN_DECL ix86_builtin_decl
51525 #undef TARGET_EXPAND_BUILTIN
51526 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
51528 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
51529 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
51530 ix86_builtin_vectorized_function
51532 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
51533 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
51535 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
51536 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
51538 #undef TARGET_VECTORIZE_BUILTIN_GATHER
51539 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
51541 #undef TARGET_BUILTIN_RECIPROCAL
51542 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
51544 #undef TARGET_ASM_FUNCTION_EPILOGUE
51545 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
51547 #undef TARGET_ENCODE_SECTION_INFO
51548 #ifndef SUBTARGET_ENCODE_SECTION_INFO
51549 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
51550 #else
51551 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
51552 #endif
51554 #undef TARGET_ASM_OPEN_PAREN
51555 #define TARGET_ASM_OPEN_PAREN ""
51556 #undef TARGET_ASM_CLOSE_PAREN
51557 #define TARGET_ASM_CLOSE_PAREN ""
51559 #undef TARGET_ASM_BYTE_OP
51560 #define TARGET_ASM_BYTE_OP ASM_BYTE
51562 #undef TARGET_ASM_ALIGNED_HI_OP
51563 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
51564 #undef TARGET_ASM_ALIGNED_SI_OP
51565 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
51566 #ifdef ASM_QUAD
51567 #undef TARGET_ASM_ALIGNED_DI_OP
51568 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
51569 #endif
51571 #undef TARGET_PROFILE_BEFORE_PROLOGUE
51572 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
51574 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
51575 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
51577 #undef TARGET_ASM_UNALIGNED_HI_OP
51578 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
51579 #undef TARGET_ASM_UNALIGNED_SI_OP
51580 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
51581 #undef TARGET_ASM_UNALIGNED_DI_OP
51582 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
51584 #undef TARGET_PRINT_OPERAND
51585 #define TARGET_PRINT_OPERAND ix86_print_operand
51586 #undef TARGET_PRINT_OPERAND_ADDRESS
51587 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
51588 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
51589 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
51590 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
51591 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
51593 #undef TARGET_SCHED_INIT_GLOBAL
51594 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
51595 #undef TARGET_SCHED_ADJUST_COST
51596 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
51597 #undef TARGET_SCHED_ISSUE_RATE
51598 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
51599 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
51600 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
51601 ia32_multipass_dfa_lookahead
51602 #undef TARGET_SCHED_MACRO_FUSION_P
51603 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
51604 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
51605 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
51607 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
51608 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
51610 #undef TARGET_MEMMODEL_CHECK
51611 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
51613 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
51614 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
51616 #ifdef HAVE_AS_TLS
51617 #undef TARGET_HAVE_TLS
51618 #define TARGET_HAVE_TLS true
51619 #endif
51620 #undef TARGET_CANNOT_FORCE_CONST_MEM
51621 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
51622 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
51623 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
51625 #undef TARGET_DELEGITIMIZE_ADDRESS
51626 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
51628 #undef TARGET_MS_BITFIELD_LAYOUT_P
51629 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
51631 #if TARGET_MACHO
51632 #undef TARGET_BINDS_LOCAL_P
51633 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
51634 #endif
51635 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
51636 #undef TARGET_BINDS_LOCAL_P
51637 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
51638 #endif
51640 #undef TARGET_ASM_OUTPUT_MI_THUNK
51641 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
51642 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
51643 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
51645 #undef TARGET_ASM_FILE_START
51646 #define TARGET_ASM_FILE_START x86_file_start
51648 #undef TARGET_OPTION_OVERRIDE
51649 #define TARGET_OPTION_OVERRIDE ix86_option_override
51651 #undef TARGET_REGISTER_MOVE_COST
51652 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
51653 #undef TARGET_MEMORY_MOVE_COST
51654 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
51655 #undef TARGET_RTX_COSTS
51656 #define TARGET_RTX_COSTS ix86_rtx_costs
51657 #undef TARGET_ADDRESS_COST
51658 #define TARGET_ADDRESS_COST ix86_address_cost
51660 #undef TARGET_FIXED_CONDITION_CODE_REGS
51661 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
51662 #undef TARGET_CC_MODES_COMPATIBLE
51663 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
51665 #undef TARGET_MACHINE_DEPENDENT_REORG
51666 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
51668 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
51669 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
51671 #undef TARGET_BUILD_BUILTIN_VA_LIST
51672 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
51674 #undef TARGET_FOLD_BUILTIN
51675 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
51677 #undef TARGET_COMPARE_VERSION_PRIORITY
51678 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
51680 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
51681 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
51682 ix86_generate_version_dispatcher_body
51684 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
51685 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
51686 ix86_get_function_versions_dispatcher
51688 #undef TARGET_ENUM_VA_LIST_P
51689 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
51691 #undef TARGET_FN_ABI_VA_LIST
51692 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
51694 #undef TARGET_CANONICAL_VA_LIST_TYPE
51695 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
51697 #undef TARGET_EXPAND_BUILTIN_VA_START
51698 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
51700 #undef TARGET_MD_ASM_CLOBBERS
51701 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
51703 #undef TARGET_PROMOTE_PROTOTYPES
51704 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
51705 #undef TARGET_SETUP_INCOMING_VARARGS
51706 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
51707 #undef TARGET_MUST_PASS_IN_STACK
51708 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
51709 #undef TARGET_FUNCTION_ARG_ADVANCE
51710 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
51711 #undef TARGET_FUNCTION_ARG
51712 #define TARGET_FUNCTION_ARG ix86_function_arg
51713 #undef TARGET_INIT_PIC_REG
51714 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
51715 #undef TARGET_USE_PSEUDO_PIC_REG
51716 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
51717 #undef TARGET_FUNCTION_ARG_BOUNDARY
51718 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
51719 #undef TARGET_PASS_BY_REFERENCE
51720 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
51721 #undef TARGET_INTERNAL_ARG_POINTER
51722 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
51723 #undef TARGET_UPDATE_STACK_BOUNDARY
51724 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
51725 #undef TARGET_GET_DRAP_RTX
51726 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
51727 #undef TARGET_STRICT_ARGUMENT_NAMING
51728 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
51729 #undef TARGET_STATIC_CHAIN
51730 #define TARGET_STATIC_CHAIN ix86_static_chain
51731 #undef TARGET_TRAMPOLINE_INIT
51732 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
51733 #undef TARGET_RETURN_POPS_ARGS
51734 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
51736 #undef TARGET_LEGITIMATE_COMBINED_INSN
51737 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
51739 #undef TARGET_ASAN_SHADOW_OFFSET
51740 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
51742 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
51743 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
51745 #undef TARGET_SCALAR_MODE_SUPPORTED_P
51746 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
51748 #undef TARGET_VECTOR_MODE_SUPPORTED_P
51749 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
51751 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
51752 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
51753 ix86_libgcc_floating_mode_supported_p
51755 #undef TARGET_C_MODE_FOR_SUFFIX
51756 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
51758 #ifdef HAVE_AS_TLS
51759 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
51760 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
51761 #endif
51763 #ifdef SUBTARGET_INSERT_ATTRIBUTES
51764 #undef TARGET_INSERT_ATTRIBUTES
51765 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
51766 #endif
51768 #undef TARGET_MANGLE_TYPE
51769 #define TARGET_MANGLE_TYPE ix86_mangle_type
51771 #if !TARGET_MACHO
51772 #undef TARGET_STACK_PROTECT_FAIL
51773 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
51774 #endif
51776 #undef TARGET_FUNCTION_VALUE
51777 #define TARGET_FUNCTION_VALUE ix86_function_value
51779 #undef TARGET_FUNCTION_VALUE_REGNO_P
51780 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
51782 #undef TARGET_PROMOTE_FUNCTION_MODE
51783 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
51785 #undef TARGET_MEMBER_TYPE_FORCES_BLK
51786 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
51788 #undef TARGET_INSTANTIATE_DECLS
51789 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
51791 #undef TARGET_SECONDARY_RELOAD
51792 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
51794 #undef TARGET_CLASS_MAX_NREGS
51795 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
51797 #undef TARGET_PREFERRED_RELOAD_CLASS
51798 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
51799 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
51800 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
51801 #undef TARGET_CLASS_LIKELY_SPILLED_P
51802 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
51804 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
51805 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
51806 ix86_builtin_vectorization_cost
51807 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
51808 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
51809 ix86_vectorize_vec_perm_const_ok
51810 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
51811 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
51812 ix86_preferred_simd_mode
51813 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
51814 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
51815 ix86_autovectorize_vector_sizes
51816 #undef TARGET_VECTORIZE_INIT_COST
51817 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
51818 #undef TARGET_VECTORIZE_ADD_STMT_COST
51819 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
51820 #undef TARGET_VECTORIZE_FINISH_COST
51821 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
51822 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
51823 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
51825 #undef TARGET_SET_CURRENT_FUNCTION
51826 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
51828 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
51829 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
51831 #undef TARGET_OPTION_SAVE
51832 #define TARGET_OPTION_SAVE ix86_function_specific_save
51834 #undef TARGET_OPTION_RESTORE
51835 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
51837 #undef TARGET_OPTION_PRINT
51838 #define TARGET_OPTION_PRINT ix86_function_specific_print
51840 #undef TARGET_OPTION_FUNCTION_VERSIONS
51841 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
51843 #undef TARGET_CAN_INLINE_P
51844 #define TARGET_CAN_INLINE_P ix86_can_inline_p
51846 #undef TARGET_EXPAND_TO_RTL_HOOK
51847 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
51849 #undef TARGET_LEGITIMATE_ADDRESS_P
51850 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
51852 #undef TARGET_LRA_P
51853 #define TARGET_LRA_P hook_bool_void_true
51855 #undef TARGET_REGISTER_PRIORITY
51856 #define TARGET_REGISTER_PRIORITY ix86_register_priority
51858 #undef TARGET_REGISTER_USAGE_LEVELING_P
51859 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
51861 #undef TARGET_LEGITIMATE_CONSTANT_P
51862 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
51864 #undef TARGET_FRAME_POINTER_REQUIRED
51865 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
51867 #undef TARGET_CAN_ELIMINATE
51868 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
51870 #undef TARGET_EXTRA_LIVE_ON_ENTRY
51871 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
51873 #undef TARGET_ASM_CODE_END
51874 #define TARGET_ASM_CODE_END ix86_code_end
51876 #undef TARGET_CONDITIONAL_REGISTER_USAGE
51877 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
51879 #if TARGET_MACHO
51880 #undef TARGET_INIT_LIBFUNCS
51881 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
51882 #endif
51884 #undef TARGET_LOOP_UNROLL_ADJUST
51885 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
51887 #undef TARGET_SPILL_CLASS
51888 #define TARGET_SPILL_CLASS ix86_spill_class
51890 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
51891 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
51892 ix86_simd_clone_compute_vecsize_and_simdlen
51894 #undef TARGET_SIMD_CLONE_ADJUST
51895 #define TARGET_SIMD_CLONE_ADJUST \
51896 ix86_simd_clone_adjust
51898 #undef TARGET_SIMD_CLONE_USABLE
51899 #define TARGET_SIMD_CLONE_USABLE \
51900 ix86_simd_clone_usable
51902 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
51903 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
51904 ix86_float_exceptions_rounding_supported_p
51906 #undef TARGET_MODE_EMIT
51907 #define TARGET_MODE_EMIT ix86_emit_mode_set
51909 #undef TARGET_MODE_NEEDED
51910 #define TARGET_MODE_NEEDED ix86_mode_needed
51912 #undef TARGET_MODE_AFTER
51913 #define TARGET_MODE_AFTER ix86_mode_after
51915 #undef TARGET_MODE_ENTRY
51916 #define TARGET_MODE_ENTRY ix86_mode_entry
51918 #undef TARGET_MODE_EXIT
51919 #define TARGET_MODE_EXIT ix86_mode_exit
51921 #undef TARGET_MODE_PRIORITY
51922 #define TARGET_MODE_PRIORITY ix86_mode_priority
51924 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
51925 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
51927 #undef TARGET_LOAD_BOUNDS_FOR_ARG
51928 #define TARGET_LOAD_BOUNDS_FOR_ARG ix86_load_bounds
51930 #undef TARGET_STORE_BOUNDS_FOR_ARG
51931 #define TARGET_STORE_BOUNDS_FOR_ARG ix86_store_bounds
51933 #undef TARGET_LOAD_RETURNED_BOUNDS
51934 #define TARGET_LOAD_RETURNED_BOUNDS ix86_load_returned_bounds
51936 #undef TARGET_STORE_RETURNED_BOUNDS
51937 #define TARGET_STORE_RETURNED_BOUNDS ix86_store_returned_bounds
51939 #undef TARGET_CHKP_BOUND_MODE
51940 #define TARGET_CHKP_BOUND_MODE ix86_mpx_bound_mode
51942 #undef TARGET_BUILTIN_CHKP_FUNCTION
51943 #define TARGET_BUILTIN_CHKP_FUNCTION ix86_builtin_mpx_function
51945 #undef TARGET_CHKP_FUNCTION_VALUE_BOUNDS
51946 #define TARGET_CHKP_FUNCTION_VALUE_BOUNDS ix86_function_value_bounds
51948 #undef TARGET_CHKP_MAKE_BOUNDS_CONSTANT
51949 #define TARGET_CHKP_MAKE_BOUNDS_CONSTANT ix86_make_bounds_constant
51951 #undef TARGET_CHKP_INITIALIZE_BOUNDS
51952 #define TARGET_CHKP_INITIALIZE_BOUNDS ix86_initialize_bounds
51954 #undef TARGET_SETUP_INCOMING_VARARG_BOUNDS
51955 #define TARGET_SETUP_INCOMING_VARARG_BOUNDS ix86_setup_incoming_vararg_bounds
51957 #undef TARGET_OFFLOAD_OPTIONS
51958 #define TARGET_OFFLOAD_OPTIONS \
51959 ix86_offload_options
51961 struct gcc_target targetm = TARGET_INITIALIZER;
51963 #include "gt-i386.h"