Stop only if there aren't any usable algorithms
[official-gcc.git] / gcc / config / i386 / i386.c
blob211c9e6a65ecccd956daf3b60cdc1d8a4b914f7b
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2014 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "stringpool.h"
27 #include "attribs.h"
28 #include "calls.h"
29 #include "stor-layout.h"
30 #include "varasm.h"
31 #include "tm_p.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-codes.h"
38 #include "insn-attr.h"
39 #include "flags.h"
40 #include "except.h"
41 #include "hashtab.h"
42 #include "hash-set.h"
43 #include "vec.h"
44 #include "machmode.h"
45 #include "input.h"
46 #include "function.h"
47 #include "recog.h"
48 #include "expr.h"
49 #include "optabs.h"
50 #include "diagnostic-core.h"
51 #include "toplev.h"
52 #include "predict.h"
53 #include "dominance.h"
54 #include "cfg.h"
55 #include "cfgrtl.h"
56 #include "cfganal.h"
57 #include "lcm.h"
58 #include "cfgbuild.h"
59 #include "cfgcleanup.h"
60 #include "basic-block.h"
61 #include "ggc.h"
62 #include "target.h"
63 #include "target-def.h"
64 #include "common/common-target.h"
65 #include "langhooks.h"
66 #include "reload.h"
67 #include "hash-map.h"
68 #include "is-a.h"
69 #include "plugin-api.h"
70 #include "ipa-ref.h"
71 #include "cgraph.h"
72 #include "hash-table.h"
73 #include "tree-ssa-alias.h"
74 #include "internal-fn.h"
75 #include "gimple-fold.h"
76 #include "tree-eh.h"
77 #include "gimple-expr.h"
78 #include "gimple.h"
79 #include "gimplify.h"
80 #include "cfgloop.h"
81 #include "dwarf2.h"
82 #include "df.h"
83 #include "tm-constrs.h"
84 #include "params.h"
85 #include "cselib.h"
86 #include "debug.h"
87 #include "sched-int.h"
88 #include "sbitmap.h"
89 #include "fibheap.h"
90 #include "opts.h"
91 #include "diagnostic.h"
92 #include "dumpfile.h"
93 #include "tree-pass.h"
94 #include "wide-int.h"
95 #include "context.h"
96 #include "pass_manager.h"
97 #include "target-globals.h"
98 #include "tree-vectorizer.h"
99 #include "shrink-wrap.h"
100 #include "builtins.h"
101 #include "rtl-iter.h"
102 #include "tree-iterator.h"
103 #include "tree-chkp.h"
104 #include "rtl-chkp.h"
106 static rtx legitimize_dllimport_symbol (rtx, bool);
107 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
108 static rtx legitimize_pe_coff_symbol (rtx, bool);
110 #ifndef CHECK_STACK_LIMIT
111 #define CHECK_STACK_LIMIT (-1)
112 #endif
114 /* Return index of given mode in mult and division cost tables. */
115 #define MODE_INDEX(mode) \
116 ((mode) == QImode ? 0 \
117 : (mode) == HImode ? 1 \
118 : (mode) == SImode ? 2 \
119 : (mode) == DImode ? 3 \
120 : 4)
122 /* Processor costs (relative to an add) */
123 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
124 #define COSTS_N_BYTES(N) ((N) * 2)
126 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
128 static stringop_algs ix86_size_memcpy[2] = {
129 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
130 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
131 static stringop_algs ix86_size_memset[2] = {
132 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
133 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
135 const
136 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
137 COSTS_N_BYTES (2), /* cost of an add instruction */
138 COSTS_N_BYTES (3), /* cost of a lea instruction */
139 COSTS_N_BYTES (2), /* variable shift costs */
140 COSTS_N_BYTES (3), /* constant shift costs */
141 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
142 COSTS_N_BYTES (3), /* HI */
143 COSTS_N_BYTES (3), /* SI */
144 COSTS_N_BYTES (3), /* DI */
145 COSTS_N_BYTES (5)}, /* other */
146 0, /* cost of multiply per each bit set */
147 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
148 COSTS_N_BYTES (3), /* HI */
149 COSTS_N_BYTES (3), /* SI */
150 COSTS_N_BYTES (3), /* DI */
151 COSTS_N_BYTES (5)}, /* other */
152 COSTS_N_BYTES (3), /* cost of movsx */
153 COSTS_N_BYTES (3), /* cost of movzx */
154 0, /* "large" insn */
155 2, /* MOVE_RATIO */
156 2, /* cost for loading QImode using movzbl */
157 {2, 2, 2}, /* cost of loading integer registers
158 in QImode, HImode and SImode.
159 Relative to reg-reg move (2). */
160 {2, 2, 2}, /* cost of storing integer registers */
161 2, /* cost of reg,reg fld/fst */
162 {2, 2, 2}, /* cost of loading fp registers
163 in SFmode, DFmode and XFmode */
164 {2, 2, 2}, /* cost of storing fp registers
165 in SFmode, DFmode and XFmode */
166 3, /* cost of moving MMX register */
167 {3, 3}, /* cost of loading MMX registers
168 in SImode and DImode */
169 {3, 3}, /* cost of storing MMX registers
170 in SImode and DImode */
171 3, /* cost of moving SSE register */
172 {3, 3, 3}, /* cost of loading SSE registers
173 in SImode, DImode and TImode */
174 {3, 3, 3}, /* cost of storing SSE registers
175 in SImode, DImode and TImode */
176 3, /* MMX or SSE register to integer */
177 0, /* size of l1 cache */
178 0, /* size of l2 cache */
179 0, /* size of prefetch block */
180 0, /* number of parallel prefetches */
181 2, /* Branch cost */
182 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
183 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
184 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
185 COSTS_N_BYTES (2), /* cost of FABS instruction. */
186 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
187 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
188 ix86_size_memcpy,
189 ix86_size_memset,
190 1, /* scalar_stmt_cost. */
191 1, /* scalar load_cost. */
192 1, /* scalar_store_cost. */
193 1, /* vec_stmt_cost. */
194 1, /* vec_to_scalar_cost. */
195 1, /* scalar_to_vec_cost. */
196 1, /* vec_align_load_cost. */
197 1, /* vec_unalign_load_cost. */
198 1, /* vec_store_cost. */
199 1, /* cond_taken_branch_cost. */
200 1, /* cond_not_taken_branch_cost. */
203 /* Processor costs (relative to an add) */
204 static stringop_algs i386_memcpy[2] = {
205 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
206 DUMMY_STRINGOP_ALGS};
207 static stringop_algs i386_memset[2] = {
208 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
209 DUMMY_STRINGOP_ALGS};
211 static const
212 struct processor_costs i386_cost = { /* 386 specific costs */
213 COSTS_N_INSNS (1), /* cost of an add instruction */
214 COSTS_N_INSNS (1), /* cost of a lea instruction */
215 COSTS_N_INSNS (3), /* variable shift costs */
216 COSTS_N_INSNS (2), /* constant shift costs */
217 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
218 COSTS_N_INSNS (6), /* HI */
219 COSTS_N_INSNS (6), /* SI */
220 COSTS_N_INSNS (6), /* DI */
221 COSTS_N_INSNS (6)}, /* other */
222 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
223 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
224 COSTS_N_INSNS (23), /* HI */
225 COSTS_N_INSNS (23), /* SI */
226 COSTS_N_INSNS (23), /* DI */
227 COSTS_N_INSNS (23)}, /* other */
228 COSTS_N_INSNS (3), /* cost of movsx */
229 COSTS_N_INSNS (2), /* cost of movzx */
230 15, /* "large" insn */
231 3, /* MOVE_RATIO */
232 4, /* cost for loading QImode using movzbl */
233 {2, 4, 2}, /* cost of loading integer registers
234 in QImode, HImode and SImode.
235 Relative to reg-reg move (2). */
236 {2, 4, 2}, /* cost of storing integer registers */
237 2, /* cost of reg,reg fld/fst */
238 {8, 8, 8}, /* cost of loading fp registers
239 in SFmode, DFmode and XFmode */
240 {8, 8, 8}, /* cost of storing fp registers
241 in SFmode, DFmode and XFmode */
242 2, /* cost of moving MMX register */
243 {4, 8}, /* cost of loading MMX registers
244 in SImode and DImode */
245 {4, 8}, /* cost of storing MMX registers
246 in SImode and DImode */
247 2, /* cost of moving SSE register */
248 {4, 8, 16}, /* cost of loading SSE registers
249 in SImode, DImode and TImode */
250 {4, 8, 16}, /* cost of storing SSE registers
251 in SImode, DImode and TImode */
252 3, /* MMX or SSE register to integer */
253 0, /* size of l1 cache */
254 0, /* size of l2 cache */
255 0, /* size of prefetch block */
256 0, /* number of parallel prefetches */
257 1, /* Branch cost */
258 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
259 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
260 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
261 COSTS_N_INSNS (22), /* cost of FABS instruction. */
262 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
263 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
264 i386_memcpy,
265 i386_memset,
266 1, /* scalar_stmt_cost. */
267 1, /* scalar load_cost. */
268 1, /* scalar_store_cost. */
269 1, /* vec_stmt_cost. */
270 1, /* vec_to_scalar_cost. */
271 1, /* scalar_to_vec_cost. */
272 1, /* vec_align_load_cost. */
273 2, /* vec_unalign_load_cost. */
274 1, /* vec_store_cost. */
275 3, /* cond_taken_branch_cost. */
276 1, /* cond_not_taken_branch_cost. */
279 static stringop_algs i486_memcpy[2] = {
280 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
281 DUMMY_STRINGOP_ALGS};
282 static stringop_algs i486_memset[2] = {
283 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
284 DUMMY_STRINGOP_ALGS};
286 static const
287 struct processor_costs i486_cost = { /* 486 specific costs */
288 COSTS_N_INSNS (1), /* cost of an add instruction */
289 COSTS_N_INSNS (1), /* cost of a lea instruction */
290 COSTS_N_INSNS (3), /* variable shift costs */
291 COSTS_N_INSNS (2), /* constant shift costs */
292 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
293 COSTS_N_INSNS (12), /* HI */
294 COSTS_N_INSNS (12), /* SI */
295 COSTS_N_INSNS (12), /* DI */
296 COSTS_N_INSNS (12)}, /* other */
297 1, /* cost of multiply per each bit set */
298 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
299 COSTS_N_INSNS (40), /* HI */
300 COSTS_N_INSNS (40), /* SI */
301 COSTS_N_INSNS (40), /* DI */
302 COSTS_N_INSNS (40)}, /* other */
303 COSTS_N_INSNS (3), /* cost of movsx */
304 COSTS_N_INSNS (2), /* cost of movzx */
305 15, /* "large" insn */
306 3, /* MOVE_RATIO */
307 4, /* cost for loading QImode using movzbl */
308 {2, 4, 2}, /* cost of loading integer registers
309 in QImode, HImode and SImode.
310 Relative to reg-reg move (2). */
311 {2, 4, 2}, /* cost of storing integer registers */
312 2, /* cost of reg,reg fld/fst */
313 {8, 8, 8}, /* cost of loading fp registers
314 in SFmode, DFmode and XFmode */
315 {8, 8, 8}, /* cost of storing fp registers
316 in SFmode, DFmode and XFmode */
317 2, /* cost of moving MMX register */
318 {4, 8}, /* cost of loading MMX registers
319 in SImode and DImode */
320 {4, 8}, /* cost of storing MMX registers
321 in SImode and DImode */
322 2, /* cost of moving SSE register */
323 {4, 8, 16}, /* cost of loading SSE registers
324 in SImode, DImode and TImode */
325 {4, 8, 16}, /* cost of storing SSE registers
326 in SImode, DImode and TImode */
327 3, /* MMX or SSE register to integer */
328 4, /* size of l1 cache. 486 has 8kB cache
329 shared for code and data, so 4kB is
330 not really precise. */
331 4, /* size of l2 cache */
332 0, /* size of prefetch block */
333 0, /* number of parallel prefetches */
334 1, /* Branch cost */
335 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
336 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
337 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
338 COSTS_N_INSNS (3), /* cost of FABS instruction. */
339 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
340 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
341 i486_memcpy,
342 i486_memset,
343 1, /* scalar_stmt_cost. */
344 1, /* scalar load_cost. */
345 1, /* scalar_store_cost. */
346 1, /* vec_stmt_cost. */
347 1, /* vec_to_scalar_cost. */
348 1, /* scalar_to_vec_cost. */
349 1, /* vec_align_load_cost. */
350 2, /* vec_unalign_load_cost. */
351 1, /* vec_store_cost. */
352 3, /* cond_taken_branch_cost. */
353 1, /* cond_not_taken_branch_cost. */
356 static stringop_algs pentium_memcpy[2] = {
357 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
358 DUMMY_STRINGOP_ALGS};
359 static stringop_algs pentium_memset[2] = {
360 {libcall, {{-1, rep_prefix_4_byte, false}}},
361 DUMMY_STRINGOP_ALGS};
363 static const
364 struct processor_costs pentium_cost = {
365 COSTS_N_INSNS (1), /* cost of an add instruction */
366 COSTS_N_INSNS (1), /* cost of a lea instruction */
367 COSTS_N_INSNS (4), /* variable shift costs */
368 COSTS_N_INSNS (1), /* constant shift costs */
369 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
370 COSTS_N_INSNS (11), /* HI */
371 COSTS_N_INSNS (11), /* SI */
372 COSTS_N_INSNS (11), /* DI */
373 COSTS_N_INSNS (11)}, /* other */
374 0, /* cost of multiply per each bit set */
375 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
376 COSTS_N_INSNS (25), /* HI */
377 COSTS_N_INSNS (25), /* SI */
378 COSTS_N_INSNS (25), /* DI */
379 COSTS_N_INSNS (25)}, /* other */
380 COSTS_N_INSNS (3), /* cost of movsx */
381 COSTS_N_INSNS (2), /* cost of movzx */
382 8, /* "large" insn */
383 6, /* MOVE_RATIO */
384 6, /* cost for loading QImode using movzbl */
385 {2, 4, 2}, /* cost of loading integer registers
386 in QImode, HImode and SImode.
387 Relative to reg-reg move (2). */
388 {2, 4, 2}, /* cost of storing integer registers */
389 2, /* cost of reg,reg fld/fst */
390 {2, 2, 6}, /* cost of loading fp registers
391 in SFmode, DFmode and XFmode */
392 {4, 4, 6}, /* cost of storing fp registers
393 in SFmode, DFmode and XFmode */
394 8, /* cost of moving MMX register */
395 {8, 8}, /* cost of loading MMX registers
396 in SImode and DImode */
397 {8, 8}, /* cost of storing MMX registers
398 in SImode and DImode */
399 2, /* cost of moving SSE register */
400 {4, 8, 16}, /* cost of loading SSE registers
401 in SImode, DImode and TImode */
402 {4, 8, 16}, /* cost of storing SSE registers
403 in SImode, DImode and TImode */
404 3, /* MMX or SSE register to integer */
405 8, /* size of l1 cache. */
406 8, /* size of l2 cache */
407 0, /* size of prefetch block */
408 0, /* number of parallel prefetches */
409 2, /* Branch cost */
410 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
411 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
412 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
413 COSTS_N_INSNS (1), /* cost of FABS instruction. */
414 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
415 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
416 pentium_memcpy,
417 pentium_memset,
418 1, /* scalar_stmt_cost. */
419 1, /* scalar load_cost. */
420 1, /* scalar_store_cost. */
421 1, /* vec_stmt_cost. */
422 1, /* vec_to_scalar_cost. */
423 1, /* scalar_to_vec_cost. */
424 1, /* vec_align_load_cost. */
425 2, /* vec_unalign_load_cost. */
426 1, /* vec_store_cost. */
427 3, /* cond_taken_branch_cost. */
428 1, /* cond_not_taken_branch_cost. */
431 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
432 (we ensure the alignment). For small blocks inline loop is still a
433 noticeable win, for bigger blocks either rep movsl or rep movsb is
434 way to go. Rep movsb has apparently more expensive startup time in CPU,
435 but after 4K the difference is down in the noise. */
436 static stringop_algs pentiumpro_memcpy[2] = {
437 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
438 {8192, rep_prefix_4_byte, false},
439 {-1, rep_prefix_1_byte, false}}},
440 DUMMY_STRINGOP_ALGS};
441 static stringop_algs pentiumpro_memset[2] = {
442 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
443 {8192, rep_prefix_4_byte, false},
444 {-1, libcall, false}}},
445 DUMMY_STRINGOP_ALGS};
446 static const
447 struct processor_costs pentiumpro_cost = {
448 COSTS_N_INSNS (1), /* cost of an add instruction */
449 COSTS_N_INSNS (1), /* cost of a lea instruction */
450 COSTS_N_INSNS (1), /* variable shift costs */
451 COSTS_N_INSNS (1), /* constant shift costs */
452 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
453 COSTS_N_INSNS (4), /* HI */
454 COSTS_N_INSNS (4), /* SI */
455 COSTS_N_INSNS (4), /* DI */
456 COSTS_N_INSNS (4)}, /* other */
457 0, /* cost of multiply per each bit set */
458 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
459 COSTS_N_INSNS (17), /* HI */
460 COSTS_N_INSNS (17), /* SI */
461 COSTS_N_INSNS (17), /* DI */
462 COSTS_N_INSNS (17)}, /* other */
463 COSTS_N_INSNS (1), /* cost of movsx */
464 COSTS_N_INSNS (1), /* cost of movzx */
465 8, /* "large" insn */
466 6, /* MOVE_RATIO */
467 2, /* cost for loading QImode using movzbl */
468 {4, 4, 4}, /* cost of loading integer registers
469 in QImode, HImode and SImode.
470 Relative to reg-reg move (2). */
471 {2, 2, 2}, /* cost of storing integer registers */
472 2, /* cost of reg,reg fld/fst */
473 {2, 2, 6}, /* cost of loading fp registers
474 in SFmode, DFmode and XFmode */
475 {4, 4, 6}, /* cost of storing fp registers
476 in SFmode, DFmode and XFmode */
477 2, /* cost of moving MMX register */
478 {2, 2}, /* cost of loading MMX registers
479 in SImode and DImode */
480 {2, 2}, /* cost of storing MMX registers
481 in SImode and DImode */
482 2, /* cost of moving SSE register */
483 {2, 2, 8}, /* cost of loading SSE registers
484 in SImode, DImode and TImode */
485 {2, 2, 8}, /* cost of storing SSE registers
486 in SImode, DImode and TImode */
487 3, /* MMX or SSE register to integer */
488 8, /* size of l1 cache. */
489 256, /* size of l2 cache */
490 32, /* size of prefetch block */
491 6, /* number of parallel prefetches */
492 2, /* Branch cost */
493 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
494 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
495 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
496 COSTS_N_INSNS (2), /* cost of FABS instruction. */
497 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
498 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
499 pentiumpro_memcpy,
500 pentiumpro_memset,
501 1, /* scalar_stmt_cost. */
502 1, /* scalar load_cost. */
503 1, /* scalar_store_cost. */
504 1, /* vec_stmt_cost. */
505 1, /* vec_to_scalar_cost. */
506 1, /* scalar_to_vec_cost. */
507 1, /* vec_align_load_cost. */
508 2, /* vec_unalign_load_cost. */
509 1, /* vec_store_cost. */
510 3, /* cond_taken_branch_cost. */
511 1, /* cond_not_taken_branch_cost. */
514 static stringop_algs geode_memcpy[2] = {
515 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
516 DUMMY_STRINGOP_ALGS};
517 static stringop_algs geode_memset[2] = {
518 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
519 DUMMY_STRINGOP_ALGS};
520 static const
521 struct processor_costs geode_cost = {
522 COSTS_N_INSNS (1), /* cost of an add instruction */
523 COSTS_N_INSNS (1), /* cost of a lea instruction */
524 COSTS_N_INSNS (2), /* variable shift costs */
525 COSTS_N_INSNS (1), /* constant shift costs */
526 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
527 COSTS_N_INSNS (4), /* HI */
528 COSTS_N_INSNS (7), /* SI */
529 COSTS_N_INSNS (7), /* DI */
530 COSTS_N_INSNS (7)}, /* other */
531 0, /* cost of multiply per each bit set */
532 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
533 COSTS_N_INSNS (23), /* HI */
534 COSTS_N_INSNS (39), /* SI */
535 COSTS_N_INSNS (39), /* DI */
536 COSTS_N_INSNS (39)}, /* other */
537 COSTS_N_INSNS (1), /* cost of movsx */
538 COSTS_N_INSNS (1), /* cost of movzx */
539 8, /* "large" insn */
540 4, /* MOVE_RATIO */
541 1, /* cost for loading QImode using movzbl */
542 {1, 1, 1}, /* cost of loading integer registers
543 in QImode, HImode and SImode.
544 Relative to reg-reg move (2). */
545 {1, 1, 1}, /* cost of storing integer registers */
546 1, /* cost of reg,reg fld/fst */
547 {1, 1, 1}, /* cost of loading fp registers
548 in SFmode, DFmode and XFmode */
549 {4, 6, 6}, /* cost of storing fp registers
550 in SFmode, DFmode and XFmode */
552 1, /* cost of moving MMX register */
553 {1, 1}, /* cost of loading MMX registers
554 in SImode and DImode */
555 {1, 1}, /* cost of storing MMX registers
556 in SImode and DImode */
557 1, /* cost of moving SSE register */
558 {1, 1, 1}, /* cost of loading SSE registers
559 in SImode, DImode and TImode */
560 {1, 1, 1}, /* cost of storing SSE registers
561 in SImode, DImode and TImode */
562 1, /* MMX or SSE register to integer */
563 64, /* size of l1 cache. */
564 128, /* size of l2 cache. */
565 32, /* size of prefetch block */
566 1, /* number of parallel prefetches */
567 1, /* Branch cost */
568 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
569 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
570 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
571 COSTS_N_INSNS (1), /* cost of FABS instruction. */
572 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
573 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
574 geode_memcpy,
575 geode_memset,
576 1, /* scalar_stmt_cost. */
577 1, /* scalar load_cost. */
578 1, /* scalar_store_cost. */
579 1, /* vec_stmt_cost. */
580 1, /* vec_to_scalar_cost. */
581 1, /* scalar_to_vec_cost. */
582 1, /* vec_align_load_cost. */
583 2, /* vec_unalign_load_cost. */
584 1, /* vec_store_cost. */
585 3, /* cond_taken_branch_cost. */
586 1, /* cond_not_taken_branch_cost. */
589 static stringop_algs k6_memcpy[2] = {
590 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
591 DUMMY_STRINGOP_ALGS};
592 static stringop_algs k6_memset[2] = {
593 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
594 DUMMY_STRINGOP_ALGS};
595 static const
596 struct processor_costs k6_cost = {
597 COSTS_N_INSNS (1), /* cost of an add instruction */
598 COSTS_N_INSNS (2), /* cost of a lea instruction */
599 COSTS_N_INSNS (1), /* variable shift costs */
600 COSTS_N_INSNS (1), /* constant shift costs */
601 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
602 COSTS_N_INSNS (3), /* HI */
603 COSTS_N_INSNS (3), /* SI */
604 COSTS_N_INSNS (3), /* DI */
605 COSTS_N_INSNS (3)}, /* other */
606 0, /* cost of multiply per each bit set */
607 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
608 COSTS_N_INSNS (18), /* HI */
609 COSTS_N_INSNS (18), /* SI */
610 COSTS_N_INSNS (18), /* DI */
611 COSTS_N_INSNS (18)}, /* other */
612 COSTS_N_INSNS (2), /* cost of movsx */
613 COSTS_N_INSNS (2), /* cost of movzx */
614 8, /* "large" insn */
615 4, /* MOVE_RATIO */
616 3, /* cost for loading QImode using movzbl */
617 {4, 5, 4}, /* cost of loading integer registers
618 in QImode, HImode and SImode.
619 Relative to reg-reg move (2). */
620 {2, 3, 2}, /* cost of storing integer registers */
621 4, /* cost of reg,reg fld/fst */
622 {6, 6, 6}, /* cost of loading fp registers
623 in SFmode, DFmode and XFmode */
624 {4, 4, 4}, /* cost of storing fp registers
625 in SFmode, DFmode and XFmode */
626 2, /* cost of moving MMX register */
627 {2, 2}, /* cost of loading MMX registers
628 in SImode and DImode */
629 {2, 2}, /* cost of storing MMX registers
630 in SImode and DImode */
631 2, /* cost of moving SSE register */
632 {2, 2, 8}, /* cost of loading SSE registers
633 in SImode, DImode and TImode */
634 {2, 2, 8}, /* cost of storing SSE registers
635 in SImode, DImode and TImode */
636 6, /* MMX or SSE register to integer */
637 32, /* size of l1 cache. */
638 32, /* size of l2 cache. Some models
639 have integrated l2 cache, but
640 optimizing for k6 is not important
641 enough to worry about that. */
642 32, /* size of prefetch block */
643 1, /* number of parallel prefetches */
644 1, /* Branch cost */
645 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
646 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
647 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
648 COSTS_N_INSNS (2), /* cost of FABS instruction. */
649 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
650 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
651 k6_memcpy,
652 k6_memset,
653 1, /* scalar_stmt_cost. */
654 1, /* scalar load_cost. */
655 1, /* scalar_store_cost. */
656 1, /* vec_stmt_cost. */
657 1, /* vec_to_scalar_cost. */
658 1, /* scalar_to_vec_cost. */
659 1, /* vec_align_load_cost. */
660 2, /* vec_unalign_load_cost. */
661 1, /* vec_store_cost. */
662 3, /* cond_taken_branch_cost. */
663 1, /* cond_not_taken_branch_cost. */
666 /* For some reason, Athlon deals better with REP prefix (relative to loops)
667 compared to K8. Alignment becomes important after 8 bytes for memcpy and
668 128 bytes for memset. */
669 static stringop_algs athlon_memcpy[2] = {
670 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
671 DUMMY_STRINGOP_ALGS};
672 static stringop_algs athlon_memset[2] = {
673 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
674 DUMMY_STRINGOP_ALGS};
675 static const
676 struct processor_costs athlon_cost = {
677 COSTS_N_INSNS (1), /* cost of an add instruction */
678 COSTS_N_INSNS (2), /* cost of a lea instruction */
679 COSTS_N_INSNS (1), /* variable shift costs */
680 COSTS_N_INSNS (1), /* constant shift costs */
681 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
682 COSTS_N_INSNS (5), /* HI */
683 COSTS_N_INSNS (5), /* SI */
684 COSTS_N_INSNS (5), /* DI */
685 COSTS_N_INSNS (5)}, /* other */
686 0, /* cost of multiply per each bit set */
687 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
688 COSTS_N_INSNS (26), /* HI */
689 COSTS_N_INSNS (42), /* SI */
690 COSTS_N_INSNS (74), /* DI */
691 COSTS_N_INSNS (74)}, /* other */
692 COSTS_N_INSNS (1), /* cost of movsx */
693 COSTS_N_INSNS (1), /* cost of movzx */
694 8, /* "large" insn */
695 9, /* MOVE_RATIO */
696 4, /* cost for loading QImode using movzbl */
697 {3, 4, 3}, /* cost of loading integer registers
698 in QImode, HImode and SImode.
699 Relative to reg-reg move (2). */
700 {3, 4, 3}, /* cost of storing integer registers */
701 4, /* cost of reg,reg fld/fst */
702 {4, 4, 12}, /* cost of loading fp registers
703 in SFmode, DFmode and XFmode */
704 {6, 6, 8}, /* cost of storing fp registers
705 in SFmode, DFmode and XFmode */
706 2, /* cost of moving MMX register */
707 {4, 4}, /* cost of loading MMX registers
708 in SImode and DImode */
709 {4, 4}, /* cost of storing MMX registers
710 in SImode and DImode */
711 2, /* cost of moving SSE register */
712 {4, 4, 6}, /* cost of loading SSE registers
713 in SImode, DImode and TImode */
714 {4, 4, 5}, /* cost of storing SSE registers
715 in SImode, DImode and TImode */
716 5, /* MMX or SSE register to integer */
717 64, /* size of l1 cache. */
718 256, /* size of l2 cache. */
719 64, /* size of prefetch block */
720 6, /* number of parallel prefetches */
721 5, /* Branch cost */
722 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
723 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
724 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
725 COSTS_N_INSNS (2), /* cost of FABS instruction. */
726 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
727 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
728 athlon_memcpy,
729 athlon_memset,
730 1, /* scalar_stmt_cost. */
731 1, /* scalar load_cost. */
732 1, /* scalar_store_cost. */
733 1, /* vec_stmt_cost. */
734 1, /* vec_to_scalar_cost. */
735 1, /* scalar_to_vec_cost. */
736 1, /* vec_align_load_cost. */
737 2, /* vec_unalign_load_cost. */
738 1, /* vec_store_cost. */
739 3, /* cond_taken_branch_cost. */
740 1, /* cond_not_taken_branch_cost. */
743 /* K8 has optimized REP instruction for medium sized blocks, but for very
744 small blocks it is better to use loop. For large blocks, libcall can
745 do nontemporary accesses and beat inline considerably. */
746 static stringop_algs k8_memcpy[2] = {
747 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
748 {-1, rep_prefix_4_byte, false}}},
749 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
750 {-1, libcall, false}}}};
751 static stringop_algs k8_memset[2] = {
752 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
753 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
754 {libcall, {{48, unrolled_loop, false},
755 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
756 static const
757 struct processor_costs k8_cost = {
758 COSTS_N_INSNS (1), /* cost of an add instruction */
759 COSTS_N_INSNS (2), /* cost of a lea instruction */
760 COSTS_N_INSNS (1), /* variable shift costs */
761 COSTS_N_INSNS (1), /* constant shift costs */
762 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
763 COSTS_N_INSNS (4), /* HI */
764 COSTS_N_INSNS (3), /* SI */
765 COSTS_N_INSNS (4), /* DI */
766 COSTS_N_INSNS (5)}, /* other */
767 0, /* cost of multiply per each bit set */
768 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
769 COSTS_N_INSNS (26), /* HI */
770 COSTS_N_INSNS (42), /* SI */
771 COSTS_N_INSNS (74), /* DI */
772 COSTS_N_INSNS (74)}, /* other */
773 COSTS_N_INSNS (1), /* cost of movsx */
774 COSTS_N_INSNS (1), /* cost of movzx */
775 8, /* "large" insn */
776 9, /* MOVE_RATIO */
777 4, /* cost for loading QImode using movzbl */
778 {3, 4, 3}, /* cost of loading integer registers
779 in QImode, HImode and SImode.
780 Relative to reg-reg move (2). */
781 {3, 4, 3}, /* cost of storing integer registers */
782 4, /* cost of reg,reg fld/fst */
783 {4, 4, 12}, /* cost of loading fp registers
784 in SFmode, DFmode and XFmode */
785 {6, 6, 8}, /* cost of storing fp registers
786 in SFmode, DFmode and XFmode */
787 2, /* cost of moving MMX register */
788 {3, 3}, /* cost of loading MMX registers
789 in SImode and DImode */
790 {4, 4}, /* cost of storing MMX registers
791 in SImode and DImode */
792 2, /* cost of moving SSE register */
793 {4, 3, 6}, /* cost of loading SSE registers
794 in SImode, DImode and TImode */
795 {4, 4, 5}, /* cost of storing SSE registers
796 in SImode, DImode and TImode */
797 5, /* MMX or SSE register to integer */
798 64, /* size of l1 cache. */
799 512, /* size of l2 cache. */
800 64, /* size of prefetch block */
801 /* New AMD processors never drop prefetches; if they cannot be performed
802 immediately, they are queued. We set number of simultaneous prefetches
803 to a large constant to reflect this (it probably is not a good idea not
804 to limit number of prefetches at all, as their execution also takes some
805 time). */
806 100, /* number of parallel prefetches */
807 3, /* Branch cost */
808 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
809 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
810 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
811 COSTS_N_INSNS (2), /* cost of FABS instruction. */
812 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
813 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
815 k8_memcpy,
816 k8_memset,
817 4, /* scalar_stmt_cost. */
818 2, /* scalar load_cost. */
819 2, /* scalar_store_cost. */
820 5, /* vec_stmt_cost. */
821 0, /* vec_to_scalar_cost. */
822 2, /* scalar_to_vec_cost. */
823 2, /* vec_align_load_cost. */
824 3, /* vec_unalign_load_cost. */
825 3, /* vec_store_cost. */
826 3, /* cond_taken_branch_cost. */
827 2, /* cond_not_taken_branch_cost. */
830 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
831 very small blocks it is better to use loop. For large blocks, libcall can
832 do nontemporary accesses and beat inline considerably. */
833 static stringop_algs amdfam10_memcpy[2] = {
834 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
835 {-1, rep_prefix_4_byte, false}}},
836 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
837 {-1, libcall, false}}}};
838 static stringop_algs amdfam10_memset[2] = {
839 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
840 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
841 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
842 {-1, libcall, false}}}};
843 struct processor_costs amdfam10_cost = {
844 COSTS_N_INSNS (1), /* cost of an add instruction */
845 COSTS_N_INSNS (2), /* cost of a lea instruction */
846 COSTS_N_INSNS (1), /* variable shift costs */
847 COSTS_N_INSNS (1), /* constant shift costs */
848 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
849 COSTS_N_INSNS (4), /* HI */
850 COSTS_N_INSNS (3), /* SI */
851 COSTS_N_INSNS (4), /* DI */
852 COSTS_N_INSNS (5)}, /* other */
853 0, /* cost of multiply per each bit set */
854 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
855 COSTS_N_INSNS (35), /* HI */
856 COSTS_N_INSNS (51), /* SI */
857 COSTS_N_INSNS (83), /* DI */
858 COSTS_N_INSNS (83)}, /* other */
859 COSTS_N_INSNS (1), /* cost of movsx */
860 COSTS_N_INSNS (1), /* cost of movzx */
861 8, /* "large" insn */
862 9, /* MOVE_RATIO */
863 4, /* cost for loading QImode using movzbl */
864 {3, 4, 3}, /* cost of loading integer registers
865 in QImode, HImode and SImode.
866 Relative to reg-reg move (2). */
867 {3, 4, 3}, /* cost of storing integer registers */
868 4, /* cost of reg,reg fld/fst */
869 {4, 4, 12}, /* cost of loading fp registers
870 in SFmode, DFmode and XFmode */
871 {6, 6, 8}, /* cost of storing fp registers
872 in SFmode, DFmode and XFmode */
873 2, /* cost of moving MMX register */
874 {3, 3}, /* cost of loading MMX registers
875 in SImode and DImode */
876 {4, 4}, /* cost of storing MMX registers
877 in SImode and DImode */
878 2, /* cost of moving SSE register */
879 {4, 4, 3}, /* cost of loading SSE registers
880 in SImode, DImode and TImode */
881 {4, 4, 5}, /* cost of storing SSE registers
882 in SImode, DImode and TImode */
883 3, /* MMX or SSE register to integer */
884 /* On K8:
885 MOVD reg64, xmmreg Double FSTORE 4
886 MOVD reg32, xmmreg Double FSTORE 4
887 On AMDFAM10:
888 MOVD reg64, xmmreg Double FADD 3
889 1/1 1/1
890 MOVD reg32, xmmreg Double FADD 3
891 1/1 1/1 */
892 64, /* size of l1 cache. */
893 512, /* size of l2 cache. */
894 64, /* size of prefetch block */
895 /* New AMD processors never drop prefetches; if they cannot be performed
896 immediately, they are queued. We set number of simultaneous prefetches
897 to a large constant to reflect this (it probably is not a good idea not
898 to limit number of prefetches at all, as their execution also takes some
899 time). */
900 100, /* number of parallel prefetches */
901 2, /* Branch cost */
902 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
903 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
904 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
905 COSTS_N_INSNS (2), /* cost of FABS instruction. */
906 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
907 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
909 amdfam10_memcpy,
910 amdfam10_memset,
911 4, /* scalar_stmt_cost. */
912 2, /* scalar load_cost. */
913 2, /* scalar_store_cost. */
914 6, /* vec_stmt_cost. */
915 0, /* vec_to_scalar_cost. */
916 2, /* scalar_to_vec_cost. */
917 2, /* vec_align_load_cost. */
918 2, /* vec_unalign_load_cost. */
919 2, /* vec_store_cost. */
920 2, /* cond_taken_branch_cost. */
921 1, /* cond_not_taken_branch_cost. */
924 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
925 very small blocks it is better to use loop. For large blocks, libcall
926 can do nontemporary accesses and beat inline considerably. */
927 static stringop_algs bdver1_memcpy[2] = {
928 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
929 {-1, rep_prefix_4_byte, false}}},
930 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
931 {-1, libcall, false}}}};
932 static stringop_algs bdver1_memset[2] = {
933 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
934 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
935 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
936 {-1, libcall, false}}}};
938 const struct processor_costs bdver1_cost = {
939 COSTS_N_INSNS (1), /* cost of an add instruction */
940 COSTS_N_INSNS (1), /* cost of a lea instruction */
941 COSTS_N_INSNS (1), /* variable shift costs */
942 COSTS_N_INSNS (1), /* constant shift costs */
943 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
944 COSTS_N_INSNS (4), /* HI */
945 COSTS_N_INSNS (4), /* SI */
946 COSTS_N_INSNS (6), /* DI */
947 COSTS_N_INSNS (6)}, /* other */
948 0, /* cost of multiply per each bit set */
949 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
950 COSTS_N_INSNS (35), /* HI */
951 COSTS_N_INSNS (51), /* SI */
952 COSTS_N_INSNS (83), /* DI */
953 COSTS_N_INSNS (83)}, /* other */
954 COSTS_N_INSNS (1), /* cost of movsx */
955 COSTS_N_INSNS (1), /* cost of movzx */
956 8, /* "large" insn */
957 9, /* MOVE_RATIO */
958 4, /* cost for loading QImode using movzbl */
959 {5, 5, 4}, /* cost of loading integer registers
960 in QImode, HImode and SImode.
961 Relative to reg-reg move (2). */
962 {4, 4, 4}, /* cost of storing integer registers */
963 2, /* cost of reg,reg fld/fst */
964 {5, 5, 12}, /* cost of loading fp registers
965 in SFmode, DFmode and XFmode */
966 {4, 4, 8}, /* cost of storing fp registers
967 in SFmode, DFmode and XFmode */
968 2, /* cost of moving MMX register */
969 {4, 4}, /* cost of loading MMX registers
970 in SImode and DImode */
971 {4, 4}, /* cost of storing MMX registers
972 in SImode and DImode */
973 2, /* cost of moving SSE register */
974 {4, 4, 4}, /* cost of loading SSE registers
975 in SImode, DImode and TImode */
976 {4, 4, 4}, /* cost of storing SSE registers
977 in SImode, DImode and TImode */
978 2, /* MMX or SSE register to integer */
979 /* On K8:
980 MOVD reg64, xmmreg Double FSTORE 4
981 MOVD reg32, xmmreg Double FSTORE 4
982 On AMDFAM10:
983 MOVD reg64, xmmreg Double FADD 3
984 1/1 1/1
985 MOVD reg32, xmmreg Double FADD 3
986 1/1 1/1 */
987 16, /* size of l1 cache. */
988 2048, /* size of l2 cache. */
989 64, /* size of prefetch block */
990 /* New AMD processors never drop prefetches; if they cannot be performed
991 immediately, they are queued. We set number of simultaneous prefetches
992 to a large constant to reflect this (it probably is not a good idea not
993 to limit number of prefetches at all, as their execution also takes some
994 time). */
995 100, /* number of parallel prefetches */
996 2, /* Branch cost */
997 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
998 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
999 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1000 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1001 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1002 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1004 bdver1_memcpy,
1005 bdver1_memset,
1006 6, /* scalar_stmt_cost. */
1007 4, /* scalar load_cost. */
1008 4, /* scalar_store_cost. */
1009 6, /* vec_stmt_cost. */
1010 0, /* vec_to_scalar_cost. */
1011 2, /* scalar_to_vec_cost. */
1012 4, /* vec_align_load_cost. */
1013 4, /* vec_unalign_load_cost. */
1014 4, /* vec_store_cost. */
1015 2, /* cond_taken_branch_cost. */
1016 1, /* cond_not_taken_branch_cost. */
1019 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1020 very small blocks it is better to use loop. For large blocks, libcall
1021 can do nontemporary accesses and beat inline considerably. */
1023 static stringop_algs bdver2_memcpy[2] = {
1024 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1025 {-1, rep_prefix_4_byte, false}}},
1026 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1027 {-1, libcall, false}}}};
1028 static stringop_algs bdver2_memset[2] = {
1029 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1030 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1031 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1032 {-1, libcall, false}}}};
1034 const struct processor_costs bdver2_cost = {
1035 COSTS_N_INSNS (1), /* cost of an add instruction */
1036 COSTS_N_INSNS (1), /* cost of a lea instruction */
1037 COSTS_N_INSNS (1), /* variable shift costs */
1038 COSTS_N_INSNS (1), /* constant shift costs */
1039 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1040 COSTS_N_INSNS (4), /* HI */
1041 COSTS_N_INSNS (4), /* SI */
1042 COSTS_N_INSNS (6), /* DI */
1043 COSTS_N_INSNS (6)}, /* other */
1044 0, /* cost of multiply per each bit set */
1045 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1046 COSTS_N_INSNS (35), /* HI */
1047 COSTS_N_INSNS (51), /* SI */
1048 COSTS_N_INSNS (83), /* DI */
1049 COSTS_N_INSNS (83)}, /* other */
1050 COSTS_N_INSNS (1), /* cost of movsx */
1051 COSTS_N_INSNS (1), /* cost of movzx */
1052 8, /* "large" insn */
1053 9, /* MOVE_RATIO */
1054 4, /* cost for loading QImode using movzbl */
1055 {5, 5, 4}, /* cost of loading integer registers
1056 in QImode, HImode and SImode.
1057 Relative to reg-reg move (2). */
1058 {4, 4, 4}, /* cost of storing integer registers */
1059 2, /* cost of reg,reg fld/fst */
1060 {5, 5, 12}, /* cost of loading fp registers
1061 in SFmode, DFmode and XFmode */
1062 {4, 4, 8}, /* cost of storing fp registers
1063 in SFmode, DFmode and XFmode */
1064 2, /* cost of moving MMX register */
1065 {4, 4}, /* cost of loading MMX registers
1066 in SImode and DImode */
1067 {4, 4}, /* cost of storing MMX registers
1068 in SImode and DImode */
1069 2, /* cost of moving SSE register */
1070 {4, 4, 4}, /* cost of loading SSE registers
1071 in SImode, DImode and TImode */
1072 {4, 4, 4}, /* cost of storing SSE registers
1073 in SImode, DImode and TImode */
1074 2, /* MMX or SSE register to integer */
1075 /* On K8:
1076 MOVD reg64, xmmreg Double FSTORE 4
1077 MOVD reg32, xmmreg Double FSTORE 4
1078 On AMDFAM10:
1079 MOVD reg64, xmmreg Double FADD 3
1080 1/1 1/1
1081 MOVD reg32, xmmreg Double FADD 3
1082 1/1 1/1 */
1083 16, /* size of l1 cache. */
1084 2048, /* size of l2 cache. */
1085 64, /* size of prefetch block */
1086 /* New AMD processors never drop prefetches; if they cannot be performed
1087 immediately, they are queued. We set number of simultaneous prefetches
1088 to a large constant to reflect this (it probably is not a good idea not
1089 to limit number of prefetches at all, as their execution also takes some
1090 time). */
1091 100, /* number of parallel prefetches */
1092 2, /* Branch cost */
1093 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1094 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1095 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1096 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1097 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1098 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1100 bdver2_memcpy,
1101 bdver2_memset,
1102 6, /* scalar_stmt_cost. */
1103 4, /* scalar load_cost. */
1104 4, /* scalar_store_cost. */
1105 6, /* vec_stmt_cost. */
1106 0, /* vec_to_scalar_cost. */
1107 2, /* scalar_to_vec_cost. */
1108 4, /* vec_align_load_cost. */
1109 4, /* vec_unalign_load_cost. */
1110 4, /* vec_store_cost. */
1111 2, /* cond_taken_branch_cost. */
1112 1, /* cond_not_taken_branch_cost. */
1116 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1117 very small blocks it is better to use loop. For large blocks, libcall
1118 can do nontemporary accesses and beat inline considerably. */
1119 static stringop_algs bdver3_memcpy[2] = {
1120 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1121 {-1, rep_prefix_4_byte, false}}},
1122 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1123 {-1, libcall, false}}}};
1124 static stringop_algs bdver3_memset[2] = {
1125 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1126 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1127 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1128 {-1, libcall, false}}}};
1129 struct processor_costs bdver3_cost = {
1130 COSTS_N_INSNS (1), /* cost of an add instruction */
1131 COSTS_N_INSNS (1), /* cost of a lea instruction */
1132 COSTS_N_INSNS (1), /* variable shift costs */
1133 COSTS_N_INSNS (1), /* constant shift costs */
1134 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1135 COSTS_N_INSNS (4), /* HI */
1136 COSTS_N_INSNS (4), /* SI */
1137 COSTS_N_INSNS (6), /* DI */
1138 COSTS_N_INSNS (6)}, /* other */
1139 0, /* cost of multiply per each bit set */
1140 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1141 COSTS_N_INSNS (35), /* HI */
1142 COSTS_N_INSNS (51), /* SI */
1143 COSTS_N_INSNS (83), /* DI */
1144 COSTS_N_INSNS (83)}, /* other */
1145 COSTS_N_INSNS (1), /* cost of movsx */
1146 COSTS_N_INSNS (1), /* cost of movzx */
1147 8, /* "large" insn */
1148 9, /* MOVE_RATIO */
1149 4, /* cost for loading QImode using movzbl */
1150 {5, 5, 4}, /* cost of loading integer registers
1151 in QImode, HImode and SImode.
1152 Relative to reg-reg move (2). */
1153 {4, 4, 4}, /* cost of storing integer registers */
1154 2, /* cost of reg,reg fld/fst */
1155 {5, 5, 12}, /* cost of loading fp registers
1156 in SFmode, DFmode and XFmode */
1157 {4, 4, 8}, /* cost of storing fp registers
1158 in SFmode, DFmode and XFmode */
1159 2, /* cost of moving MMX register */
1160 {4, 4}, /* cost of loading MMX registers
1161 in SImode and DImode */
1162 {4, 4}, /* cost of storing MMX registers
1163 in SImode and DImode */
1164 2, /* cost of moving SSE register */
1165 {4, 4, 4}, /* cost of loading SSE registers
1166 in SImode, DImode and TImode */
1167 {4, 4, 4}, /* cost of storing SSE registers
1168 in SImode, DImode and TImode */
1169 2, /* MMX or SSE register to integer */
1170 16, /* size of l1 cache. */
1171 2048, /* size of l2 cache. */
1172 64, /* size of prefetch block */
1173 /* New AMD processors never drop prefetches; if they cannot be performed
1174 immediately, they are queued. We set number of simultaneous prefetches
1175 to a large constant to reflect this (it probably is not a good idea not
1176 to limit number of prefetches at all, as their execution also takes some
1177 time). */
1178 100, /* number of parallel prefetches */
1179 2, /* Branch cost */
1180 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1181 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1182 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1183 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1184 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1185 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1187 bdver3_memcpy,
1188 bdver3_memset,
1189 6, /* scalar_stmt_cost. */
1190 4, /* scalar load_cost. */
1191 4, /* scalar_store_cost. */
1192 6, /* vec_stmt_cost. */
1193 0, /* vec_to_scalar_cost. */
1194 2, /* scalar_to_vec_cost. */
1195 4, /* vec_align_load_cost. */
1196 4, /* vec_unalign_load_cost. */
1197 4, /* vec_store_cost. */
1198 2, /* cond_taken_branch_cost. */
1199 1, /* cond_not_taken_branch_cost. */
1202 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1203 very small blocks it is better to use loop. For large blocks, libcall
1204 can do nontemporary accesses and beat inline considerably. */
1205 static stringop_algs bdver4_memcpy[2] = {
1206 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1207 {-1, rep_prefix_4_byte, false}}},
1208 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1209 {-1, libcall, false}}}};
1210 static stringop_algs bdver4_memset[2] = {
1211 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1212 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1213 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1214 {-1, libcall, false}}}};
1215 struct processor_costs bdver4_cost = {
1216 COSTS_N_INSNS (1), /* cost of an add instruction */
1217 COSTS_N_INSNS (1), /* cost of a lea instruction */
1218 COSTS_N_INSNS (1), /* variable shift costs */
1219 COSTS_N_INSNS (1), /* constant shift costs */
1220 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1221 COSTS_N_INSNS (4), /* HI */
1222 COSTS_N_INSNS (4), /* SI */
1223 COSTS_N_INSNS (6), /* DI */
1224 COSTS_N_INSNS (6)}, /* other */
1225 0, /* cost of multiply per each bit set */
1226 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1227 COSTS_N_INSNS (35), /* HI */
1228 COSTS_N_INSNS (51), /* SI */
1229 COSTS_N_INSNS (83), /* DI */
1230 COSTS_N_INSNS (83)}, /* other */
1231 COSTS_N_INSNS (1), /* cost of movsx */
1232 COSTS_N_INSNS (1), /* cost of movzx */
1233 8, /* "large" insn */
1234 9, /* MOVE_RATIO */
1235 4, /* cost for loading QImode using movzbl */
1236 {5, 5, 4}, /* cost of loading integer registers
1237 in QImode, HImode and SImode.
1238 Relative to reg-reg move (2). */
1239 {4, 4, 4}, /* cost of storing integer registers */
1240 2, /* cost of reg,reg fld/fst */
1241 {5, 5, 12}, /* cost of loading fp registers
1242 in SFmode, DFmode and XFmode */
1243 {4, 4, 8}, /* cost of storing fp registers
1244 in SFmode, DFmode and XFmode */
1245 2, /* cost of moving MMX register */
1246 {4, 4}, /* cost of loading MMX registers
1247 in SImode and DImode */
1248 {4, 4}, /* cost of storing MMX registers
1249 in SImode and DImode */
1250 2, /* cost of moving SSE register */
1251 {4, 4, 4}, /* cost of loading SSE registers
1252 in SImode, DImode and TImode */
1253 {4, 4, 4}, /* cost of storing SSE registers
1254 in SImode, DImode and TImode */
1255 2, /* MMX or SSE register to integer */
1256 16, /* size of l1 cache. */
1257 2048, /* size of l2 cache. */
1258 64, /* size of prefetch block */
1259 /* New AMD processors never drop prefetches; if they cannot be performed
1260 immediately, they are queued. We set number of simultaneous prefetches
1261 to a large constant to reflect this (it probably is not a good idea not
1262 to limit number of prefetches at all, as their execution also takes some
1263 time). */
1264 100, /* number of parallel prefetches */
1265 2, /* Branch cost */
1266 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1267 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1268 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1269 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1270 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1271 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1273 bdver4_memcpy,
1274 bdver4_memset,
1275 6, /* scalar_stmt_cost. */
1276 4, /* scalar load_cost. */
1277 4, /* scalar_store_cost. */
1278 6, /* vec_stmt_cost. */
1279 0, /* vec_to_scalar_cost. */
1280 2, /* scalar_to_vec_cost. */
1281 4, /* vec_align_load_cost. */
1282 4, /* vec_unalign_load_cost. */
1283 4, /* vec_store_cost. */
1284 2, /* cond_taken_branch_cost. */
1285 1, /* cond_not_taken_branch_cost. */
1288 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1289 very small blocks it is better to use loop. For large blocks, libcall can
1290 do nontemporary accesses and beat inline considerably. */
1291 static stringop_algs btver1_memcpy[2] = {
1292 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1293 {-1, rep_prefix_4_byte, false}}},
1294 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1295 {-1, libcall, false}}}};
1296 static stringop_algs btver1_memset[2] = {
1297 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1298 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1299 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1300 {-1, libcall, false}}}};
1301 const struct processor_costs btver1_cost = {
1302 COSTS_N_INSNS (1), /* cost of an add instruction */
1303 COSTS_N_INSNS (2), /* cost of a lea instruction */
1304 COSTS_N_INSNS (1), /* variable shift costs */
1305 COSTS_N_INSNS (1), /* constant shift costs */
1306 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1307 COSTS_N_INSNS (4), /* HI */
1308 COSTS_N_INSNS (3), /* SI */
1309 COSTS_N_INSNS (4), /* DI */
1310 COSTS_N_INSNS (5)}, /* other */
1311 0, /* cost of multiply per each bit set */
1312 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1313 COSTS_N_INSNS (35), /* HI */
1314 COSTS_N_INSNS (51), /* SI */
1315 COSTS_N_INSNS (83), /* DI */
1316 COSTS_N_INSNS (83)}, /* other */
1317 COSTS_N_INSNS (1), /* cost of movsx */
1318 COSTS_N_INSNS (1), /* cost of movzx */
1319 8, /* "large" insn */
1320 9, /* MOVE_RATIO */
1321 4, /* cost for loading QImode using movzbl */
1322 {3, 4, 3}, /* cost of loading integer registers
1323 in QImode, HImode and SImode.
1324 Relative to reg-reg move (2). */
1325 {3, 4, 3}, /* cost of storing integer registers */
1326 4, /* cost of reg,reg fld/fst */
1327 {4, 4, 12}, /* cost of loading fp registers
1328 in SFmode, DFmode and XFmode */
1329 {6, 6, 8}, /* cost of storing fp registers
1330 in SFmode, DFmode and XFmode */
1331 2, /* cost of moving MMX register */
1332 {3, 3}, /* cost of loading MMX registers
1333 in SImode and DImode */
1334 {4, 4}, /* cost of storing MMX registers
1335 in SImode and DImode */
1336 2, /* cost of moving SSE register */
1337 {4, 4, 3}, /* cost of loading SSE registers
1338 in SImode, DImode and TImode */
1339 {4, 4, 5}, /* cost of storing SSE registers
1340 in SImode, DImode and TImode */
1341 3, /* MMX or SSE register to integer */
1342 /* On K8:
1343 MOVD reg64, xmmreg Double FSTORE 4
1344 MOVD reg32, xmmreg Double FSTORE 4
1345 On AMDFAM10:
1346 MOVD reg64, xmmreg Double FADD 3
1347 1/1 1/1
1348 MOVD reg32, xmmreg Double FADD 3
1349 1/1 1/1 */
1350 32, /* size of l1 cache. */
1351 512, /* size of l2 cache. */
1352 64, /* size of prefetch block */
1353 100, /* number of parallel prefetches */
1354 2, /* Branch cost */
1355 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1356 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1357 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1358 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1359 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1360 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1362 btver1_memcpy,
1363 btver1_memset,
1364 4, /* scalar_stmt_cost. */
1365 2, /* scalar load_cost. */
1366 2, /* scalar_store_cost. */
1367 6, /* vec_stmt_cost. */
1368 0, /* vec_to_scalar_cost. */
1369 2, /* scalar_to_vec_cost. */
1370 2, /* vec_align_load_cost. */
1371 2, /* vec_unalign_load_cost. */
1372 2, /* vec_store_cost. */
1373 2, /* cond_taken_branch_cost. */
1374 1, /* cond_not_taken_branch_cost. */
1377 static stringop_algs btver2_memcpy[2] = {
1378 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1379 {-1, rep_prefix_4_byte, false}}},
1380 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1381 {-1, libcall, false}}}};
1382 static stringop_algs btver2_memset[2] = {
1383 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1384 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1385 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1386 {-1, libcall, false}}}};
1387 const struct processor_costs btver2_cost = {
1388 COSTS_N_INSNS (1), /* cost of an add instruction */
1389 COSTS_N_INSNS (2), /* cost of a lea instruction */
1390 COSTS_N_INSNS (1), /* variable shift costs */
1391 COSTS_N_INSNS (1), /* constant shift costs */
1392 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1393 COSTS_N_INSNS (4), /* HI */
1394 COSTS_N_INSNS (3), /* SI */
1395 COSTS_N_INSNS (4), /* DI */
1396 COSTS_N_INSNS (5)}, /* other */
1397 0, /* cost of multiply per each bit set */
1398 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1399 COSTS_N_INSNS (35), /* HI */
1400 COSTS_N_INSNS (51), /* SI */
1401 COSTS_N_INSNS (83), /* DI */
1402 COSTS_N_INSNS (83)}, /* other */
1403 COSTS_N_INSNS (1), /* cost of movsx */
1404 COSTS_N_INSNS (1), /* cost of movzx */
1405 8, /* "large" insn */
1406 9, /* MOVE_RATIO */
1407 4, /* cost for loading QImode using movzbl */
1408 {3, 4, 3}, /* cost of loading integer registers
1409 in QImode, HImode and SImode.
1410 Relative to reg-reg move (2). */
1411 {3, 4, 3}, /* cost of storing integer registers */
1412 4, /* cost of reg,reg fld/fst */
1413 {4, 4, 12}, /* cost of loading fp registers
1414 in SFmode, DFmode and XFmode */
1415 {6, 6, 8}, /* cost of storing fp registers
1416 in SFmode, DFmode and XFmode */
1417 2, /* cost of moving MMX register */
1418 {3, 3}, /* cost of loading MMX registers
1419 in SImode and DImode */
1420 {4, 4}, /* cost of storing MMX registers
1421 in SImode and DImode */
1422 2, /* cost of moving SSE register */
1423 {4, 4, 3}, /* cost of loading SSE registers
1424 in SImode, DImode and TImode */
1425 {4, 4, 5}, /* cost of storing SSE registers
1426 in SImode, DImode and TImode */
1427 3, /* MMX or SSE register to integer */
1428 /* On K8:
1429 MOVD reg64, xmmreg Double FSTORE 4
1430 MOVD reg32, xmmreg Double FSTORE 4
1431 On AMDFAM10:
1432 MOVD reg64, xmmreg Double FADD 3
1433 1/1 1/1
1434 MOVD reg32, xmmreg Double FADD 3
1435 1/1 1/1 */
1436 32, /* size of l1 cache. */
1437 2048, /* size of l2 cache. */
1438 64, /* size of prefetch block */
1439 100, /* number of parallel prefetches */
1440 2, /* Branch cost */
1441 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1442 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1443 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1444 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1445 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1446 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1447 btver2_memcpy,
1448 btver2_memset,
1449 4, /* scalar_stmt_cost. */
1450 2, /* scalar load_cost. */
1451 2, /* scalar_store_cost. */
1452 6, /* vec_stmt_cost. */
1453 0, /* vec_to_scalar_cost. */
1454 2, /* scalar_to_vec_cost. */
1455 2, /* vec_align_load_cost. */
1456 2, /* vec_unalign_load_cost. */
1457 2, /* vec_store_cost. */
1458 2, /* cond_taken_branch_cost. */
1459 1, /* cond_not_taken_branch_cost. */
1462 static stringop_algs pentium4_memcpy[2] = {
1463 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1464 DUMMY_STRINGOP_ALGS};
1465 static stringop_algs pentium4_memset[2] = {
1466 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1467 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1468 DUMMY_STRINGOP_ALGS};
1470 static const
1471 struct processor_costs pentium4_cost = {
1472 COSTS_N_INSNS (1), /* cost of an add instruction */
1473 COSTS_N_INSNS (3), /* cost of a lea instruction */
1474 COSTS_N_INSNS (4), /* variable shift costs */
1475 COSTS_N_INSNS (4), /* constant shift costs */
1476 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1477 COSTS_N_INSNS (15), /* HI */
1478 COSTS_N_INSNS (15), /* SI */
1479 COSTS_N_INSNS (15), /* DI */
1480 COSTS_N_INSNS (15)}, /* other */
1481 0, /* cost of multiply per each bit set */
1482 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1483 COSTS_N_INSNS (56), /* HI */
1484 COSTS_N_INSNS (56), /* SI */
1485 COSTS_N_INSNS (56), /* DI */
1486 COSTS_N_INSNS (56)}, /* other */
1487 COSTS_N_INSNS (1), /* cost of movsx */
1488 COSTS_N_INSNS (1), /* cost of movzx */
1489 16, /* "large" insn */
1490 6, /* MOVE_RATIO */
1491 2, /* cost for loading QImode using movzbl */
1492 {4, 5, 4}, /* cost of loading integer registers
1493 in QImode, HImode and SImode.
1494 Relative to reg-reg move (2). */
1495 {2, 3, 2}, /* cost of storing integer registers */
1496 2, /* cost of reg,reg fld/fst */
1497 {2, 2, 6}, /* cost of loading fp registers
1498 in SFmode, DFmode and XFmode */
1499 {4, 4, 6}, /* cost of storing fp registers
1500 in SFmode, DFmode and XFmode */
1501 2, /* cost of moving MMX register */
1502 {2, 2}, /* cost of loading MMX registers
1503 in SImode and DImode */
1504 {2, 2}, /* cost of storing MMX registers
1505 in SImode and DImode */
1506 12, /* cost of moving SSE register */
1507 {12, 12, 12}, /* cost of loading SSE registers
1508 in SImode, DImode and TImode */
1509 {2, 2, 8}, /* cost of storing SSE registers
1510 in SImode, DImode and TImode */
1511 10, /* MMX or SSE register to integer */
1512 8, /* size of l1 cache. */
1513 256, /* size of l2 cache. */
1514 64, /* size of prefetch block */
1515 6, /* number of parallel prefetches */
1516 2, /* Branch cost */
1517 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1518 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1519 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1520 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1521 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1522 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1523 pentium4_memcpy,
1524 pentium4_memset,
1525 1, /* scalar_stmt_cost. */
1526 1, /* scalar load_cost. */
1527 1, /* scalar_store_cost. */
1528 1, /* vec_stmt_cost. */
1529 1, /* vec_to_scalar_cost. */
1530 1, /* scalar_to_vec_cost. */
1531 1, /* vec_align_load_cost. */
1532 2, /* vec_unalign_load_cost. */
1533 1, /* vec_store_cost. */
1534 3, /* cond_taken_branch_cost. */
1535 1, /* cond_not_taken_branch_cost. */
1538 static stringop_algs nocona_memcpy[2] = {
1539 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1540 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1541 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1543 static stringop_algs nocona_memset[2] = {
1544 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1545 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1546 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1547 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1549 static const
1550 struct processor_costs nocona_cost = {
1551 COSTS_N_INSNS (1), /* cost of an add instruction */
1552 COSTS_N_INSNS (1), /* cost of a lea instruction */
1553 COSTS_N_INSNS (1), /* variable shift costs */
1554 COSTS_N_INSNS (1), /* constant shift costs */
1555 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1556 COSTS_N_INSNS (10), /* HI */
1557 COSTS_N_INSNS (10), /* SI */
1558 COSTS_N_INSNS (10), /* DI */
1559 COSTS_N_INSNS (10)}, /* other */
1560 0, /* cost of multiply per each bit set */
1561 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1562 COSTS_N_INSNS (66), /* HI */
1563 COSTS_N_INSNS (66), /* SI */
1564 COSTS_N_INSNS (66), /* DI */
1565 COSTS_N_INSNS (66)}, /* other */
1566 COSTS_N_INSNS (1), /* cost of movsx */
1567 COSTS_N_INSNS (1), /* cost of movzx */
1568 16, /* "large" insn */
1569 17, /* MOVE_RATIO */
1570 4, /* cost for loading QImode using movzbl */
1571 {4, 4, 4}, /* cost of loading integer registers
1572 in QImode, HImode and SImode.
1573 Relative to reg-reg move (2). */
1574 {4, 4, 4}, /* cost of storing integer registers */
1575 3, /* cost of reg,reg fld/fst */
1576 {12, 12, 12}, /* cost of loading fp registers
1577 in SFmode, DFmode and XFmode */
1578 {4, 4, 4}, /* cost of storing fp registers
1579 in SFmode, DFmode and XFmode */
1580 6, /* cost of moving MMX register */
1581 {12, 12}, /* cost of loading MMX registers
1582 in SImode and DImode */
1583 {12, 12}, /* cost of storing MMX registers
1584 in SImode and DImode */
1585 6, /* cost of moving SSE register */
1586 {12, 12, 12}, /* cost of loading SSE registers
1587 in SImode, DImode and TImode */
1588 {12, 12, 12}, /* cost of storing SSE registers
1589 in SImode, DImode and TImode */
1590 8, /* MMX or SSE register to integer */
1591 8, /* size of l1 cache. */
1592 1024, /* size of l2 cache. */
1593 64, /* size of prefetch block */
1594 8, /* number of parallel prefetches */
1595 1, /* Branch cost */
1596 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1597 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1598 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1599 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1600 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1601 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1602 nocona_memcpy,
1603 nocona_memset,
1604 1, /* scalar_stmt_cost. */
1605 1, /* scalar load_cost. */
1606 1, /* scalar_store_cost. */
1607 1, /* vec_stmt_cost. */
1608 1, /* vec_to_scalar_cost. */
1609 1, /* scalar_to_vec_cost. */
1610 1, /* vec_align_load_cost. */
1611 2, /* vec_unalign_load_cost. */
1612 1, /* vec_store_cost. */
1613 3, /* cond_taken_branch_cost. */
1614 1, /* cond_not_taken_branch_cost. */
1617 static stringop_algs atom_memcpy[2] = {
1618 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1619 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1620 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1621 static stringop_algs atom_memset[2] = {
1622 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1623 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1624 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1625 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1626 static const
1627 struct processor_costs atom_cost = {
1628 COSTS_N_INSNS (1), /* cost of an add instruction */
1629 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1630 COSTS_N_INSNS (1), /* variable shift costs */
1631 COSTS_N_INSNS (1), /* constant shift costs */
1632 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1633 COSTS_N_INSNS (4), /* HI */
1634 COSTS_N_INSNS (3), /* SI */
1635 COSTS_N_INSNS (4), /* DI */
1636 COSTS_N_INSNS (2)}, /* other */
1637 0, /* cost of multiply per each bit set */
1638 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1639 COSTS_N_INSNS (26), /* HI */
1640 COSTS_N_INSNS (42), /* SI */
1641 COSTS_N_INSNS (74), /* DI */
1642 COSTS_N_INSNS (74)}, /* other */
1643 COSTS_N_INSNS (1), /* cost of movsx */
1644 COSTS_N_INSNS (1), /* cost of movzx */
1645 8, /* "large" insn */
1646 17, /* MOVE_RATIO */
1647 4, /* cost for loading QImode using movzbl */
1648 {4, 4, 4}, /* cost of loading integer registers
1649 in QImode, HImode and SImode.
1650 Relative to reg-reg move (2). */
1651 {4, 4, 4}, /* cost of storing integer registers */
1652 4, /* cost of reg,reg fld/fst */
1653 {12, 12, 12}, /* cost of loading fp registers
1654 in SFmode, DFmode and XFmode */
1655 {6, 6, 8}, /* cost of storing fp registers
1656 in SFmode, DFmode and XFmode */
1657 2, /* cost of moving MMX register */
1658 {8, 8}, /* cost of loading MMX registers
1659 in SImode and DImode */
1660 {8, 8}, /* cost of storing MMX registers
1661 in SImode and DImode */
1662 2, /* cost of moving SSE register */
1663 {8, 8, 8}, /* cost of loading SSE registers
1664 in SImode, DImode and TImode */
1665 {8, 8, 8}, /* cost of storing SSE registers
1666 in SImode, DImode and TImode */
1667 5, /* MMX or SSE register to integer */
1668 32, /* size of l1 cache. */
1669 256, /* size of l2 cache. */
1670 64, /* size of prefetch block */
1671 6, /* number of parallel prefetches */
1672 3, /* Branch cost */
1673 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1674 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1675 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1676 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1677 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1678 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1679 atom_memcpy,
1680 atom_memset,
1681 1, /* scalar_stmt_cost. */
1682 1, /* scalar load_cost. */
1683 1, /* scalar_store_cost. */
1684 1, /* vec_stmt_cost. */
1685 1, /* vec_to_scalar_cost. */
1686 1, /* scalar_to_vec_cost. */
1687 1, /* vec_align_load_cost. */
1688 2, /* vec_unalign_load_cost. */
1689 1, /* vec_store_cost. */
1690 3, /* cond_taken_branch_cost. */
1691 1, /* cond_not_taken_branch_cost. */
1694 static stringop_algs slm_memcpy[2] = {
1695 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1696 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1697 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1698 static stringop_algs slm_memset[2] = {
1699 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1700 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1701 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1702 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1703 static const
1704 struct processor_costs slm_cost = {
1705 COSTS_N_INSNS (1), /* cost of an add instruction */
1706 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1707 COSTS_N_INSNS (1), /* variable shift costs */
1708 COSTS_N_INSNS (1), /* constant shift costs */
1709 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1710 COSTS_N_INSNS (3), /* HI */
1711 COSTS_N_INSNS (3), /* SI */
1712 COSTS_N_INSNS (4), /* DI */
1713 COSTS_N_INSNS (2)}, /* other */
1714 0, /* cost of multiply per each bit set */
1715 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1716 COSTS_N_INSNS (26), /* HI */
1717 COSTS_N_INSNS (42), /* SI */
1718 COSTS_N_INSNS (74), /* DI */
1719 COSTS_N_INSNS (74)}, /* other */
1720 COSTS_N_INSNS (1), /* cost of movsx */
1721 COSTS_N_INSNS (1), /* cost of movzx */
1722 8, /* "large" insn */
1723 17, /* MOVE_RATIO */
1724 4, /* cost for loading QImode using movzbl */
1725 {4, 4, 4}, /* cost of loading integer registers
1726 in QImode, HImode and SImode.
1727 Relative to reg-reg move (2). */
1728 {4, 4, 4}, /* cost of storing integer registers */
1729 4, /* cost of reg,reg fld/fst */
1730 {12, 12, 12}, /* cost of loading fp registers
1731 in SFmode, DFmode and XFmode */
1732 {6, 6, 8}, /* cost of storing fp registers
1733 in SFmode, DFmode and XFmode */
1734 2, /* cost of moving MMX register */
1735 {8, 8}, /* cost of loading MMX registers
1736 in SImode and DImode */
1737 {8, 8}, /* cost of storing MMX registers
1738 in SImode and DImode */
1739 2, /* cost of moving SSE register */
1740 {8, 8, 8}, /* cost of loading SSE registers
1741 in SImode, DImode and TImode */
1742 {8, 8, 8}, /* cost of storing SSE registers
1743 in SImode, DImode and TImode */
1744 5, /* MMX or SSE register to integer */
1745 32, /* size of l1 cache. */
1746 256, /* size of l2 cache. */
1747 64, /* size of prefetch block */
1748 6, /* number of parallel prefetches */
1749 3, /* Branch cost */
1750 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1751 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1752 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1753 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1754 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1755 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1756 slm_memcpy,
1757 slm_memset,
1758 1, /* scalar_stmt_cost. */
1759 1, /* scalar load_cost. */
1760 1, /* scalar_store_cost. */
1761 1, /* vec_stmt_cost. */
1762 4, /* vec_to_scalar_cost. */
1763 1, /* scalar_to_vec_cost. */
1764 1, /* vec_align_load_cost. */
1765 2, /* vec_unalign_load_cost. */
1766 1, /* vec_store_cost. */
1767 3, /* cond_taken_branch_cost. */
1768 1, /* cond_not_taken_branch_cost. */
1771 static stringop_algs intel_memcpy[2] = {
1772 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1773 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1774 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1775 static stringop_algs intel_memset[2] = {
1776 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1777 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1778 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1779 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1780 static const
1781 struct processor_costs intel_cost = {
1782 COSTS_N_INSNS (1), /* cost of an add instruction */
1783 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1784 COSTS_N_INSNS (1), /* variable shift costs */
1785 COSTS_N_INSNS (1), /* constant shift costs */
1786 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1787 COSTS_N_INSNS (3), /* HI */
1788 COSTS_N_INSNS (3), /* SI */
1789 COSTS_N_INSNS (4), /* DI */
1790 COSTS_N_INSNS (2)}, /* other */
1791 0, /* cost of multiply per each bit set */
1792 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1793 COSTS_N_INSNS (26), /* HI */
1794 COSTS_N_INSNS (42), /* SI */
1795 COSTS_N_INSNS (74), /* DI */
1796 COSTS_N_INSNS (74)}, /* other */
1797 COSTS_N_INSNS (1), /* cost of movsx */
1798 COSTS_N_INSNS (1), /* cost of movzx */
1799 8, /* "large" insn */
1800 17, /* MOVE_RATIO */
1801 4, /* cost for loading QImode using movzbl */
1802 {4, 4, 4}, /* cost of loading integer registers
1803 in QImode, HImode and SImode.
1804 Relative to reg-reg move (2). */
1805 {4, 4, 4}, /* cost of storing integer registers */
1806 4, /* cost of reg,reg fld/fst */
1807 {12, 12, 12}, /* cost of loading fp registers
1808 in SFmode, DFmode and XFmode */
1809 {6, 6, 8}, /* cost of storing fp registers
1810 in SFmode, DFmode and XFmode */
1811 2, /* cost of moving MMX register */
1812 {8, 8}, /* cost of loading MMX registers
1813 in SImode and DImode */
1814 {8, 8}, /* cost of storing MMX registers
1815 in SImode and DImode */
1816 2, /* cost of moving SSE register */
1817 {8, 8, 8}, /* cost of loading SSE registers
1818 in SImode, DImode and TImode */
1819 {8, 8, 8}, /* cost of storing SSE registers
1820 in SImode, DImode and TImode */
1821 5, /* MMX or SSE register to integer */
1822 32, /* size of l1 cache. */
1823 256, /* size of l2 cache. */
1824 64, /* size of prefetch block */
1825 6, /* number of parallel prefetches */
1826 3, /* Branch cost */
1827 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1828 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1829 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1830 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1831 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1832 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1833 intel_memcpy,
1834 intel_memset,
1835 1, /* scalar_stmt_cost. */
1836 1, /* scalar load_cost. */
1837 1, /* scalar_store_cost. */
1838 1, /* vec_stmt_cost. */
1839 4, /* vec_to_scalar_cost. */
1840 1, /* scalar_to_vec_cost. */
1841 1, /* vec_align_load_cost. */
1842 2, /* vec_unalign_load_cost. */
1843 1, /* vec_store_cost. */
1844 3, /* cond_taken_branch_cost. */
1845 1, /* cond_not_taken_branch_cost. */
1848 /* Generic should produce code tuned for Core-i7 (and newer chips)
1849 and btver1 (and newer chips). */
1851 static stringop_algs generic_memcpy[2] = {
1852 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1853 {-1, libcall, false}}},
1854 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1855 {-1, libcall, false}}}};
1856 static stringop_algs generic_memset[2] = {
1857 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1858 {-1, libcall, false}}},
1859 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1860 {-1, libcall, false}}}};
1861 static const
1862 struct processor_costs generic_cost = {
1863 COSTS_N_INSNS (1), /* cost of an add instruction */
1864 /* On all chips taken into consideration lea is 2 cycles and more. With
1865 this cost however our current implementation of synth_mult results in
1866 use of unnecessary temporary registers causing regression on several
1867 SPECfp benchmarks. */
1868 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1869 COSTS_N_INSNS (1), /* variable shift costs */
1870 COSTS_N_INSNS (1), /* constant shift costs */
1871 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1872 COSTS_N_INSNS (4), /* HI */
1873 COSTS_N_INSNS (3), /* SI */
1874 COSTS_N_INSNS (4), /* DI */
1875 COSTS_N_INSNS (2)}, /* other */
1876 0, /* cost of multiply per each bit set */
1877 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1878 COSTS_N_INSNS (26), /* HI */
1879 COSTS_N_INSNS (42), /* SI */
1880 COSTS_N_INSNS (74), /* DI */
1881 COSTS_N_INSNS (74)}, /* other */
1882 COSTS_N_INSNS (1), /* cost of movsx */
1883 COSTS_N_INSNS (1), /* cost of movzx */
1884 8, /* "large" insn */
1885 17, /* MOVE_RATIO */
1886 4, /* cost for loading QImode using movzbl */
1887 {4, 4, 4}, /* cost of loading integer registers
1888 in QImode, HImode and SImode.
1889 Relative to reg-reg move (2). */
1890 {4, 4, 4}, /* cost of storing integer registers */
1891 4, /* cost of reg,reg fld/fst */
1892 {12, 12, 12}, /* cost of loading fp registers
1893 in SFmode, DFmode and XFmode */
1894 {6, 6, 8}, /* cost of storing fp registers
1895 in SFmode, DFmode and XFmode */
1896 2, /* cost of moving MMX register */
1897 {8, 8}, /* cost of loading MMX registers
1898 in SImode and DImode */
1899 {8, 8}, /* cost of storing MMX registers
1900 in SImode and DImode */
1901 2, /* cost of moving SSE register */
1902 {8, 8, 8}, /* cost of loading SSE registers
1903 in SImode, DImode and TImode */
1904 {8, 8, 8}, /* cost of storing SSE registers
1905 in SImode, DImode and TImode */
1906 5, /* MMX or SSE register to integer */
1907 32, /* size of l1 cache. */
1908 512, /* size of l2 cache. */
1909 64, /* size of prefetch block */
1910 6, /* number of parallel prefetches */
1911 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1912 value is increased to perhaps more appropriate value of 5. */
1913 3, /* Branch cost */
1914 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1915 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1916 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1917 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1918 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1919 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1920 generic_memcpy,
1921 generic_memset,
1922 1, /* scalar_stmt_cost. */
1923 1, /* scalar load_cost. */
1924 1, /* scalar_store_cost. */
1925 1, /* vec_stmt_cost. */
1926 1, /* vec_to_scalar_cost. */
1927 1, /* scalar_to_vec_cost. */
1928 1, /* vec_align_load_cost. */
1929 2, /* vec_unalign_load_cost. */
1930 1, /* vec_store_cost. */
1931 3, /* cond_taken_branch_cost. */
1932 1, /* cond_not_taken_branch_cost. */
1935 /* core_cost should produce code tuned for Core familly of CPUs. */
1936 static stringop_algs core_memcpy[2] = {
1937 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
1938 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
1939 {-1, libcall, false}}}};
1940 static stringop_algs core_memset[2] = {
1941 {libcall, {{6, loop_1_byte, true},
1942 {24, loop, true},
1943 {8192, rep_prefix_4_byte, true},
1944 {-1, libcall, false}}},
1945 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
1946 {-1, libcall, false}}}};
1948 static const
1949 struct processor_costs core_cost = {
1950 COSTS_N_INSNS (1), /* cost of an add instruction */
1951 /* On all chips taken into consideration lea is 2 cycles and more. With
1952 this cost however our current implementation of synth_mult results in
1953 use of unnecessary temporary registers causing regression on several
1954 SPECfp benchmarks. */
1955 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1956 COSTS_N_INSNS (1), /* variable shift costs */
1957 COSTS_N_INSNS (1), /* constant shift costs */
1958 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1959 COSTS_N_INSNS (4), /* HI */
1960 COSTS_N_INSNS (3), /* SI */
1961 COSTS_N_INSNS (4), /* DI */
1962 COSTS_N_INSNS (2)}, /* other */
1963 0, /* cost of multiply per each bit set */
1964 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1965 COSTS_N_INSNS (26), /* HI */
1966 COSTS_N_INSNS (42), /* SI */
1967 COSTS_N_INSNS (74), /* DI */
1968 COSTS_N_INSNS (74)}, /* other */
1969 COSTS_N_INSNS (1), /* cost of movsx */
1970 COSTS_N_INSNS (1), /* cost of movzx */
1971 8, /* "large" insn */
1972 17, /* MOVE_RATIO */
1973 4, /* cost for loading QImode using movzbl */
1974 {4, 4, 4}, /* cost of loading integer registers
1975 in QImode, HImode and SImode.
1976 Relative to reg-reg move (2). */
1977 {4, 4, 4}, /* cost of storing integer registers */
1978 4, /* cost of reg,reg fld/fst */
1979 {12, 12, 12}, /* cost of loading fp registers
1980 in SFmode, DFmode and XFmode */
1981 {6, 6, 8}, /* cost of storing fp registers
1982 in SFmode, DFmode and XFmode */
1983 2, /* cost of moving MMX register */
1984 {8, 8}, /* cost of loading MMX registers
1985 in SImode and DImode */
1986 {8, 8}, /* cost of storing MMX registers
1987 in SImode and DImode */
1988 2, /* cost of moving SSE register */
1989 {8, 8, 8}, /* cost of loading SSE registers
1990 in SImode, DImode and TImode */
1991 {8, 8, 8}, /* cost of storing SSE registers
1992 in SImode, DImode and TImode */
1993 5, /* MMX or SSE register to integer */
1994 64, /* size of l1 cache. */
1995 512, /* size of l2 cache. */
1996 64, /* size of prefetch block */
1997 6, /* number of parallel prefetches */
1998 /* FIXME perhaps more appropriate value is 5. */
1999 3, /* Branch cost */
2000 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
2001 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
2002 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
2003 COSTS_N_INSNS (8), /* cost of FABS instruction. */
2004 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
2005 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
2006 core_memcpy,
2007 core_memset,
2008 1, /* scalar_stmt_cost. */
2009 1, /* scalar load_cost. */
2010 1, /* scalar_store_cost. */
2011 1, /* vec_stmt_cost. */
2012 1, /* vec_to_scalar_cost. */
2013 1, /* scalar_to_vec_cost. */
2014 1, /* vec_align_load_cost. */
2015 2, /* vec_unalign_load_cost. */
2016 1, /* vec_store_cost. */
2017 3, /* cond_taken_branch_cost. */
2018 1, /* cond_not_taken_branch_cost. */
2022 /* Set by -mtune. */
2023 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2025 /* Set by -mtune or -Os. */
2026 const struct processor_costs *ix86_cost = &pentium_cost;
2028 /* Processor feature/optimization bitmasks. */
2029 #define m_386 (1<<PROCESSOR_I386)
2030 #define m_486 (1<<PROCESSOR_I486)
2031 #define m_PENT (1<<PROCESSOR_PENTIUM)
2032 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2033 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2034 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2035 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2036 #define m_CORE2 (1<<PROCESSOR_CORE2)
2037 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2038 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2039 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2040 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2041 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2042 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2043 #define m_INTEL (1<<PROCESSOR_INTEL)
2045 #define m_GEODE (1<<PROCESSOR_GEODE)
2046 #define m_K6 (1<<PROCESSOR_K6)
2047 #define m_K6_GEODE (m_K6 | m_GEODE)
2048 #define m_K8 (1<<PROCESSOR_K8)
2049 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2050 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2051 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2052 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2053 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2054 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2055 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2056 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2057 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2058 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2059 #define m_BTVER (m_BTVER1 | m_BTVER2)
2060 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
2062 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2064 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2065 #undef DEF_TUNE
2066 #define DEF_TUNE(tune, name, selector) name,
2067 #include "x86-tune.def"
2068 #undef DEF_TUNE
2071 /* Feature tests against the various tunings. */
2072 unsigned char ix86_tune_features[X86_TUNE_LAST];
2074 /* Feature tests against the various tunings used to create ix86_tune_features
2075 based on the processor mask. */
2076 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2077 #undef DEF_TUNE
2078 #define DEF_TUNE(tune, name, selector) selector,
2079 #include "x86-tune.def"
2080 #undef DEF_TUNE
2083 /* Feature tests against the various architecture variations. */
2084 unsigned char ix86_arch_features[X86_ARCH_LAST];
2086 /* Feature tests against the various architecture variations, used to create
2087 ix86_arch_features based on the processor mask. */
2088 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2089 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2090 ~(m_386 | m_486 | m_PENT | m_K6),
2092 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2093 ~m_386,
2095 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2096 ~(m_386 | m_486),
2098 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2099 ~m_386,
2101 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2102 ~m_386,
2105 /* In case the average insn count for single function invocation is
2106 lower than this constant, emit fast (but longer) prologue and
2107 epilogue code. */
2108 #define FAST_PROLOGUE_INSN_COUNT 20
2110 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2111 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2112 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2113 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2115 /* Array of the smallest class containing reg number REGNO, indexed by
2116 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2118 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2120 /* ax, dx, cx, bx */
2121 AREG, DREG, CREG, BREG,
2122 /* si, di, bp, sp */
2123 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2124 /* FP registers */
2125 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2126 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2127 /* arg pointer */
2128 NON_Q_REGS,
2129 /* flags, fpsr, fpcr, frame */
2130 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2131 /* SSE registers */
2132 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2133 SSE_REGS, SSE_REGS,
2134 /* MMX registers */
2135 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2136 MMX_REGS, MMX_REGS,
2137 /* REX registers */
2138 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2139 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2140 /* SSE REX registers */
2141 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2142 SSE_REGS, SSE_REGS,
2143 /* AVX-512 SSE registers */
2144 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2145 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2146 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2147 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2148 /* Mask registers. */
2149 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2150 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2151 /* MPX bound registers */
2152 BND_REGS, BND_REGS, BND_REGS, BND_REGS,
2155 /* The "default" register map used in 32bit mode. */
2157 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2159 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2160 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2161 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2162 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2163 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2164 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2165 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2166 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2167 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2168 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2169 101, 102, 103, 104, /* bound registers */
2172 /* The "default" register map used in 64bit mode. */
2174 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2176 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2177 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2178 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2179 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2180 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2181 8,9,10,11,12,13,14,15, /* extended integer registers */
2182 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2183 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2184 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2185 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2186 126, 127, 128, 129, /* bound registers */
2189 /* Define the register numbers to be used in Dwarf debugging information.
2190 The SVR4 reference port C compiler uses the following register numbers
2191 in its Dwarf output code:
2192 0 for %eax (gcc regno = 0)
2193 1 for %ecx (gcc regno = 2)
2194 2 for %edx (gcc regno = 1)
2195 3 for %ebx (gcc regno = 3)
2196 4 for %esp (gcc regno = 7)
2197 5 for %ebp (gcc regno = 6)
2198 6 for %esi (gcc regno = 4)
2199 7 for %edi (gcc regno = 5)
2200 The following three DWARF register numbers are never generated by
2201 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2202 believes these numbers have these meanings.
2203 8 for %eip (no gcc equivalent)
2204 9 for %eflags (gcc regno = 17)
2205 10 for %trapno (no gcc equivalent)
2206 It is not at all clear how we should number the FP stack registers
2207 for the x86 architecture. If the version of SDB on x86/svr4 were
2208 a bit less brain dead with respect to floating-point then we would
2209 have a precedent to follow with respect to DWARF register numbers
2210 for x86 FP registers, but the SDB on x86/svr4 is so completely
2211 broken with respect to FP registers that it is hardly worth thinking
2212 of it as something to strive for compatibility with.
2213 The version of x86/svr4 SDB I have at the moment does (partially)
2214 seem to believe that DWARF register number 11 is associated with
2215 the x86 register %st(0), but that's about all. Higher DWARF
2216 register numbers don't seem to be associated with anything in
2217 particular, and even for DWARF regno 11, SDB only seems to under-
2218 stand that it should say that a variable lives in %st(0) (when
2219 asked via an `=' command) if we said it was in DWARF regno 11,
2220 but SDB still prints garbage when asked for the value of the
2221 variable in question (via a `/' command).
2222 (Also note that the labels SDB prints for various FP stack regs
2223 when doing an `x' command are all wrong.)
2224 Note that these problems generally don't affect the native SVR4
2225 C compiler because it doesn't allow the use of -O with -g and
2226 because when it is *not* optimizing, it allocates a memory
2227 location for each floating-point variable, and the memory
2228 location is what gets described in the DWARF AT_location
2229 attribute for the variable in question.
2230 Regardless of the severe mental illness of the x86/svr4 SDB, we
2231 do something sensible here and we use the following DWARF
2232 register numbers. Note that these are all stack-top-relative
2233 numbers.
2234 11 for %st(0) (gcc regno = 8)
2235 12 for %st(1) (gcc regno = 9)
2236 13 for %st(2) (gcc regno = 10)
2237 14 for %st(3) (gcc regno = 11)
2238 15 for %st(4) (gcc regno = 12)
2239 16 for %st(5) (gcc regno = 13)
2240 17 for %st(6) (gcc regno = 14)
2241 18 for %st(7) (gcc regno = 15)
2243 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2245 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2246 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2247 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2248 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2249 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2250 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2251 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2252 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2253 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2254 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2255 101, 102, 103, 104, /* bound registers */
2258 /* Define parameter passing and return registers. */
2260 static int const x86_64_int_parameter_registers[6] =
2262 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2265 static int const x86_64_ms_abi_int_parameter_registers[4] =
2267 CX_REG, DX_REG, R8_REG, R9_REG
2270 static int const x86_64_int_return_registers[4] =
2272 AX_REG, DX_REG, DI_REG, SI_REG
2275 /* Additional registers that are clobbered by SYSV calls. */
2277 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2279 SI_REG, DI_REG,
2280 XMM6_REG, XMM7_REG,
2281 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2282 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2285 /* Define the structure for the machine field in struct function. */
2287 struct GTY(()) stack_local_entry {
2288 unsigned short mode;
2289 unsigned short n;
2290 rtx rtl;
2291 struct stack_local_entry *next;
2294 /* Structure describing stack frame layout.
2295 Stack grows downward:
2297 [arguments]
2298 <- ARG_POINTER
2299 saved pc
2301 saved static chain if ix86_static_chain_on_stack
2303 saved frame pointer if frame_pointer_needed
2304 <- HARD_FRAME_POINTER
2305 [saved regs]
2306 <- regs_save_offset
2307 [padding0]
2309 [saved SSE regs]
2310 <- sse_regs_save_offset
2311 [padding1] |
2312 | <- FRAME_POINTER
2313 [va_arg registers] |
2315 [frame] |
2317 [padding2] | = to_allocate
2318 <- STACK_POINTER
2320 struct ix86_frame
2322 int nsseregs;
2323 int nregs;
2324 int va_arg_size;
2325 int red_zone_size;
2326 int outgoing_arguments_size;
2328 /* The offsets relative to ARG_POINTER. */
2329 HOST_WIDE_INT frame_pointer_offset;
2330 HOST_WIDE_INT hard_frame_pointer_offset;
2331 HOST_WIDE_INT stack_pointer_offset;
2332 HOST_WIDE_INT hfp_save_offset;
2333 HOST_WIDE_INT reg_save_offset;
2334 HOST_WIDE_INT sse_reg_save_offset;
2336 /* When save_regs_using_mov is set, emit prologue using
2337 move instead of push instructions. */
2338 bool save_regs_using_mov;
2341 /* Which cpu are we scheduling for. */
2342 enum attr_cpu ix86_schedule;
2344 /* Which cpu are we optimizing for. */
2345 enum processor_type ix86_tune;
2347 /* Which instruction set architecture to use. */
2348 enum processor_type ix86_arch;
2350 /* True if processor has SSE prefetch instruction. */
2351 unsigned char x86_prefetch_sse;
2353 /* -mstackrealign option */
2354 static const char ix86_force_align_arg_pointer_string[]
2355 = "force_align_arg_pointer";
2357 static rtx (*ix86_gen_leave) (void);
2358 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2359 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2360 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2361 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2362 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2363 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2364 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2365 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2366 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2367 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2368 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2370 /* Preferred alignment for stack boundary in bits. */
2371 unsigned int ix86_preferred_stack_boundary;
2373 /* Alignment for incoming stack boundary in bits specified at
2374 command line. */
2375 static unsigned int ix86_user_incoming_stack_boundary;
2377 /* Default alignment for incoming stack boundary in bits. */
2378 static unsigned int ix86_default_incoming_stack_boundary;
2380 /* Alignment for incoming stack boundary in bits. */
2381 unsigned int ix86_incoming_stack_boundary;
2383 /* Calling abi specific va_list type nodes. */
2384 static GTY(()) tree sysv_va_list_type_node;
2385 static GTY(()) tree ms_va_list_type_node;
2387 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2388 char internal_label_prefix[16];
2389 int internal_label_prefix_len;
2391 /* Fence to use after loop using movnt. */
2392 tree x86_mfence;
2394 /* Register class used for passing given 64bit part of the argument.
2395 These represent classes as documented by the PS ABI, with the exception
2396 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2397 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2399 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2400 whenever possible (upper half does contain padding). */
2401 enum x86_64_reg_class
2403 X86_64_NO_CLASS,
2404 X86_64_INTEGER_CLASS,
2405 X86_64_INTEGERSI_CLASS,
2406 X86_64_SSE_CLASS,
2407 X86_64_SSESF_CLASS,
2408 X86_64_SSEDF_CLASS,
2409 X86_64_SSEUP_CLASS,
2410 X86_64_X87_CLASS,
2411 X86_64_X87UP_CLASS,
2412 X86_64_COMPLEX_X87_CLASS,
2413 X86_64_MEMORY_CLASS
2416 #define MAX_CLASSES 8
2418 /* Table of constants used by fldpi, fldln2, etc.... */
2419 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2420 static bool ext_80387_constants_init = 0;
2423 static struct machine_function * ix86_init_machine_status (void);
2424 static rtx ix86_function_value (const_tree, const_tree, bool);
2425 static bool ix86_function_value_regno_p (const unsigned int);
2426 static unsigned int ix86_function_arg_boundary (machine_mode,
2427 const_tree);
2428 static rtx ix86_static_chain (const_tree, bool);
2429 static int ix86_function_regparm (const_tree, const_tree);
2430 static void ix86_compute_frame_layout (struct ix86_frame *);
2431 static bool ix86_expand_vector_init_one_nonzero (bool, machine_mode,
2432 rtx, rtx, int);
2433 static void ix86_add_new_builtins (HOST_WIDE_INT);
2434 static tree ix86_canonical_va_list_type (tree);
2435 static void predict_jump (int);
2436 static unsigned int split_stack_prologue_scratch_regno (void);
2437 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2439 enum ix86_function_specific_strings
2441 IX86_FUNCTION_SPECIFIC_ARCH,
2442 IX86_FUNCTION_SPECIFIC_TUNE,
2443 IX86_FUNCTION_SPECIFIC_MAX
2446 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2447 const char *, enum fpmath_unit, bool);
2448 static void ix86_function_specific_save (struct cl_target_option *,
2449 struct gcc_options *opts);
2450 static void ix86_function_specific_restore (struct gcc_options *opts,
2451 struct cl_target_option *);
2452 static void ix86_function_specific_print (FILE *, int,
2453 struct cl_target_option *);
2454 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2455 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2456 struct gcc_options *,
2457 struct gcc_options *,
2458 struct gcc_options *);
2459 static bool ix86_can_inline_p (tree, tree);
2460 static void ix86_set_current_function (tree);
2461 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2463 static enum calling_abi ix86_function_abi (const_tree);
2466 #ifndef SUBTARGET32_DEFAULT_CPU
2467 #define SUBTARGET32_DEFAULT_CPU "i386"
2468 #endif
2470 /* Whether -mtune= or -march= were specified */
2471 static int ix86_tune_defaulted;
2472 static int ix86_arch_specified;
2474 /* Vectorization library interface and handlers. */
2475 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2477 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2478 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2480 /* Processor target table, indexed by processor number */
2481 struct ptt
2483 const char *const name; /* processor name */
2484 const struct processor_costs *cost; /* Processor costs */
2485 const int align_loop; /* Default alignments. */
2486 const int align_loop_max_skip;
2487 const int align_jump;
2488 const int align_jump_max_skip;
2489 const int align_func;
2492 /* This table must be in sync with enum processor_type in i386.h. */
2493 static const struct ptt processor_target_table[PROCESSOR_max] =
2495 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2496 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2497 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2498 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2499 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2500 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2501 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2502 {"core2", &core_cost, 16, 10, 16, 10, 16},
2503 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2504 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2505 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2506 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2507 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2508 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2509 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2510 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2511 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2512 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2513 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2514 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2515 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2516 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2517 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2518 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2519 {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2522 static unsigned int
2523 rest_of_handle_insert_vzeroupper (void)
2525 int i;
2527 /* vzeroupper instructions are inserted immediately after reload to
2528 account for possible spills from 256bit registers. The pass
2529 reuses mode switching infrastructure by re-running mode insertion
2530 pass, so disable entities that have already been processed. */
2531 for (i = 0; i < MAX_386_ENTITIES; i++)
2532 ix86_optimize_mode_switching[i] = 0;
2534 ix86_optimize_mode_switching[AVX_U128] = 1;
2536 /* Call optimize_mode_switching. */
2537 g->get_passes ()->execute_pass_mode_switching ();
2538 return 0;
2541 namespace {
2543 const pass_data pass_data_insert_vzeroupper =
2545 RTL_PASS, /* type */
2546 "vzeroupper", /* name */
2547 OPTGROUP_NONE, /* optinfo_flags */
2548 TV_NONE, /* tv_id */
2549 0, /* properties_required */
2550 0, /* properties_provided */
2551 0, /* properties_destroyed */
2552 0, /* todo_flags_start */
2553 TODO_df_finish, /* todo_flags_finish */
2556 class pass_insert_vzeroupper : public rtl_opt_pass
2558 public:
2559 pass_insert_vzeroupper(gcc::context *ctxt)
2560 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2563 /* opt_pass methods: */
2564 virtual bool gate (function *)
2566 return TARGET_AVX && !TARGET_AVX512F && TARGET_VZEROUPPER;
2569 virtual unsigned int execute (function *)
2571 return rest_of_handle_insert_vzeroupper ();
2574 }; // class pass_insert_vzeroupper
2576 } // anon namespace
2578 rtl_opt_pass *
2579 make_pass_insert_vzeroupper (gcc::context *ctxt)
2581 return new pass_insert_vzeroupper (ctxt);
2584 /* Return true if a red-zone is in use. */
2586 static inline bool
2587 ix86_using_red_zone (void)
2589 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2592 /* Return a string that documents the current -m options. The caller is
2593 responsible for freeing the string. */
2595 static char *
2596 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2597 const char *tune, enum fpmath_unit fpmath,
2598 bool add_nl_p)
2600 struct ix86_target_opts
2602 const char *option; /* option string */
2603 HOST_WIDE_INT mask; /* isa mask options */
2606 /* This table is ordered so that options like -msse4.2 that imply
2607 preceding options while match those first. */
2608 static struct ix86_target_opts isa_opts[] =
2610 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2611 { "-mfma", OPTION_MASK_ISA_FMA },
2612 { "-mxop", OPTION_MASK_ISA_XOP },
2613 { "-mlwp", OPTION_MASK_ISA_LWP },
2614 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2615 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2616 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2617 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2618 { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ },
2619 { "-mavx512bw", OPTION_MASK_ISA_AVX512BW },
2620 { "-mavx512vl", OPTION_MASK_ISA_AVX512VL },
2621 { "-mavx512ifma", OPTION_MASK_ISA_AVX512IFMA },
2622 { "-mavx512vbmi", OPTION_MASK_ISA_AVX512VBMI },
2623 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2624 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2625 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2626 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2627 { "-msse3", OPTION_MASK_ISA_SSE3 },
2628 { "-msse2", OPTION_MASK_ISA_SSE2 },
2629 { "-msse", OPTION_MASK_ISA_SSE },
2630 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2631 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2632 { "-mmmx", OPTION_MASK_ISA_MMX },
2633 { "-mabm", OPTION_MASK_ISA_ABM },
2634 { "-mbmi", OPTION_MASK_ISA_BMI },
2635 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2636 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2637 { "-mhle", OPTION_MASK_ISA_HLE },
2638 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2639 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2640 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2641 { "-madx", OPTION_MASK_ISA_ADX },
2642 { "-mtbm", OPTION_MASK_ISA_TBM },
2643 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2644 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2645 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2646 { "-maes", OPTION_MASK_ISA_AES },
2647 { "-msha", OPTION_MASK_ISA_SHA },
2648 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2649 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2650 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2651 { "-mf16c", OPTION_MASK_ISA_F16C },
2652 { "-mrtm", OPTION_MASK_ISA_RTM },
2653 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2654 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2655 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
2656 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT },
2657 { "-mxsavec", OPTION_MASK_ISA_XSAVEC },
2658 { "-mxsaves", OPTION_MASK_ISA_XSAVES },
2659 { "-mmpx", OPTION_MASK_ISA_MPX },
2660 { "-mclwb", OPTION_MASK_ISA_CLWB },
2661 { "-mpcommit", OPTION_MASK_ISA_PCOMMIT },
2664 /* Flag options. */
2665 static struct ix86_target_opts flag_opts[] =
2667 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2668 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2669 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2670 { "-m80387", MASK_80387 },
2671 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2672 { "-malign-double", MASK_ALIGN_DOUBLE },
2673 { "-mcld", MASK_CLD },
2674 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2675 { "-mieee-fp", MASK_IEEE_FP },
2676 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2677 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2678 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2679 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2680 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2681 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2682 { "-mno-red-zone", MASK_NO_RED_ZONE },
2683 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2684 { "-mrecip", MASK_RECIP },
2685 { "-mrtd", MASK_RTD },
2686 { "-msseregparm", MASK_SSEREGPARM },
2687 { "-mstack-arg-probe", MASK_STACK_PROBE },
2688 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2689 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2690 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2691 { "-mvzeroupper", MASK_VZEROUPPER },
2692 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2693 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2694 { "-mprefer-avx128", MASK_PREFER_AVX128},
2697 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2699 char isa_other[40];
2700 char target_other[40];
2701 unsigned num = 0;
2702 unsigned i, j;
2703 char *ret;
2704 char *ptr;
2705 size_t len;
2706 size_t line_len;
2707 size_t sep_len;
2708 const char *abi;
2710 memset (opts, '\0', sizeof (opts));
2712 /* Add -march= option. */
2713 if (arch)
2715 opts[num][0] = "-march=";
2716 opts[num++][1] = arch;
2719 /* Add -mtune= option. */
2720 if (tune)
2722 opts[num][0] = "-mtune=";
2723 opts[num++][1] = tune;
2726 /* Add -m32/-m64/-mx32. */
2727 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2729 if ((isa & OPTION_MASK_ABI_64) != 0)
2730 abi = "-m64";
2731 else
2732 abi = "-mx32";
2733 isa &= ~ (OPTION_MASK_ISA_64BIT
2734 | OPTION_MASK_ABI_64
2735 | OPTION_MASK_ABI_X32);
2737 else
2738 abi = "-m32";
2739 opts[num++][0] = abi;
2741 /* Pick out the options in isa options. */
2742 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2744 if ((isa & isa_opts[i].mask) != 0)
2746 opts[num++][0] = isa_opts[i].option;
2747 isa &= ~ isa_opts[i].mask;
2751 if (isa && add_nl_p)
2753 opts[num++][0] = isa_other;
2754 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2755 isa);
2758 /* Add flag options. */
2759 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2761 if ((flags & flag_opts[i].mask) != 0)
2763 opts[num++][0] = flag_opts[i].option;
2764 flags &= ~ flag_opts[i].mask;
2768 if (flags && add_nl_p)
2770 opts[num++][0] = target_other;
2771 sprintf (target_other, "(other flags: %#x)", flags);
2774 /* Add -fpmath= option. */
2775 if (fpmath)
2777 opts[num][0] = "-mfpmath=";
2778 switch ((int) fpmath)
2780 case FPMATH_387:
2781 opts[num++][1] = "387";
2782 break;
2784 case FPMATH_SSE:
2785 opts[num++][1] = "sse";
2786 break;
2788 case FPMATH_387 | FPMATH_SSE:
2789 opts[num++][1] = "sse+387";
2790 break;
2792 default:
2793 gcc_unreachable ();
2797 /* Any options? */
2798 if (num == 0)
2799 return NULL;
2801 gcc_assert (num < ARRAY_SIZE (opts));
2803 /* Size the string. */
2804 len = 0;
2805 sep_len = (add_nl_p) ? 3 : 1;
2806 for (i = 0; i < num; i++)
2808 len += sep_len;
2809 for (j = 0; j < 2; j++)
2810 if (opts[i][j])
2811 len += strlen (opts[i][j]);
2814 /* Build the string. */
2815 ret = ptr = (char *) xmalloc (len);
2816 line_len = 0;
2818 for (i = 0; i < num; i++)
2820 size_t len2[2];
2822 for (j = 0; j < 2; j++)
2823 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2825 if (i != 0)
2827 *ptr++ = ' ';
2828 line_len++;
2830 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2832 *ptr++ = '\\';
2833 *ptr++ = '\n';
2834 line_len = 0;
2838 for (j = 0; j < 2; j++)
2839 if (opts[i][j])
2841 memcpy (ptr, opts[i][j], len2[j]);
2842 ptr += len2[j];
2843 line_len += len2[j];
2847 *ptr = '\0';
2848 gcc_assert (ret + len >= ptr);
2850 return ret;
2853 /* Return true, if profiling code should be emitted before
2854 prologue. Otherwise it returns false.
2855 Note: For x86 with "hotfix" it is sorried. */
2856 static bool
2857 ix86_profile_before_prologue (void)
2859 return flag_fentry != 0;
2862 /* Function that is callable from the debugger to print the current
2863 options. */
2864 void ATTRIBUTE_UNUSED
2865 ix86_debug_options (void)
2867 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2868 ix86_arch_string, ix86_tune_string,
2869 ix86_fpmath, true);
2871 if (opts)
2873 fprintf (stderr, "%s\n\n", opts);
2874 free (opts);
2876 else
2877 fputs ("<no options>\n\n", stderr);
2879 return;
2882 static const char *stringop_alg_names[] = {
2883 #define DEF_ENUM
2884 #define DEF_ALG(alg, name) #name,
2885 #include "stringop.def"
2886 #undef DEF_ENUM
2887 #undef DEF_ALG
2890 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2891 The string is of the following form (or comma separated list of it):
2893 strategy_alg:max_size:[align|noalign]
2895 where the full size range for the strategy is either [0, max_size] or
2896 [min_size, max_size], in which min_size is the max_size + 1 of the
2897 preceding range. The last size range must have max_size == -1.
2899 Examples:
2902 -mmemcpy-strategy=libcall:-1:noalign
2904 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2908 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2910 This is to tell the compiler to use the following strategy for memset
2911 1) when the expected size is between [1, 16], use rep_8byte strategy;
2912 2) when the size is between [17, 2048], use vector_loop;
2913 3) when the size is > 2048, use libcall. */
2915 struct stringop_size_range
2917 int max;
2918 stringop_alg alg;
2919 bool noalign;
2922 static void
2923 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
2925 const struct stringop_algs *default_algs;
2926 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
2927 char *curr_range_str, *next_range_str;
2928 int i = 0, n = 0;
2930 if (is_memset)
2931 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
2932 else
2933 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
2935 curr_range_str = strategy_str;
2939 int maxs;
2940 char alg_name[128];
2941 char align[16];
2942 next_range_str = strchr (curr_range_str, ',');
2943 if (next_range_str)
2944 *next_range_str++ = '\0';
2946 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
2947 alg_name, &maxs, align))
2949 error ("wrong arg %s to option %s", curr_range_str,
2950 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2951 return;
2954 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
2956 error ("size ranges of option %s should be increasing",
2957 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2958 return;
2961 for (i = 0; i < last_alg; i++)
2962 if (!strcmp (alg_name, stringop_alg_names[i]))
2963 break;
2965 if (i == last_alg)
2967 error ("wrong stringop strategy name %s specified for option %s",
2968 alg_name,
2969 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2970 return;
2973 input_ranges[n].max = maxs;
2974 input_ranges[n].alg = (stringop_alg) i;
2975 if (!strcmp (align, "align"))
2976 input_ranges[n].noalign = false;
2977 else if (!strcmp (align, "noalign"))
2978 input_ranges[n].noalign = true;
2979 else
2981 error ("unknown alignment %s specified for option %s",
2982 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2983 return;
2985 n++;
2986 curr_range_str = next_range_str;
2988 while (curr_range_str);
2990 if (input_ranges[n - 1].max != -1)
2992 error ("the max value for the last size range should be -1"
2993 " for option %s",
2994 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2995 return;
2998 if (n > MAX_STRINGOP_ALGS)
3000 error ("too many size ranges specified in option %s",
3001 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3002 return;
3005 /* Now override the default algs array. */
3006 for (i = 0; i < n; i++)
3008 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
3009 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
3010 = input_ranges[i].alg;
3011 *const_cast<int *>(&default_algs->size[i].noalign)
3012 = input_ranges[i].noalign;
3017 /* parse -mtune-ctrl= option. When DUMP is true,
3018 print the features that are explicitly set. */
3020 static void
3021 parse_mtune_ctrl_str (bool dump)
3023 if (!ix86_tune_ctrl_string)
3024 return;
3026 char *next_feature_string = NULL;
3027 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
3028 char *orig = curr_feature_string;
3029 int i;
3032 bool clear = false;
3034 next_feature_string = strchr (curr_feature_string, ',');
3035 if (next_feature_string)
3036 *next_feature_string++ = '\0';
3037 if (*curr_feature_string == '^')
3039 curr_feature_string++;
3040 clear = true;
3042 for (i = 0; i < X86_TUNE_LAST; i++)
3044 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3046 ix86_tune_features[i] = !clear;
3047 if (dump)
3048 fprintf (stderr, "Explicitly %s feature %s\n",
3049 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3050 break;
3053 if (i == X86_TUNE_LAST)
3054 error ("Unknown parameter to option -mtune-ctrl: %s",
3055 clear ? curr_feature_string - 1 : curr_feature_string);
3056 curr_feature_string = next_feature_string;
3058 while (curr_feature_string);
3059 free (orig);
3062 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3063 processor type. */
3065 static void
3066 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3068 unsigned int ix86_tune_mask = 1u << ix86_tune;
3069 int i;
3071 for (i = 0; i < X86_TUNE_LAST; ++i)
3073 if (ix86_tune_no_default)
3074 ix86_tune_features[i] = 0;
3075 else
3076 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3079 if (dump)
3081 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3082 for (i = 0; i < X86_TUNE_LAST; i++)
3083 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3084 ix86_tune_features[i] ? "on" : "off");
3087 parse_mtune_ctrl_str (dump);
3091 /* Override various settings based on options. If MAIN_ARGS_P, the
3092 options are from the command line, otherwise they are from
3093 attributes. */
3095 static void
3096 ix86_option_override_internal (bool main_args_p,
3097 struct gcc_options *opts,
3098 struct gcc_options *opts_set)
3100 int i;
3101 unsigned int ix86_arch_mask;
3102 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3103 const char *prefix;
3104 const char *suffix;
3105 const char *sw;
3107 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3108 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3109 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3110 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3111 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3112 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3113 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3114 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3115 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3116 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3117 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3118 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3119 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3120 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3121 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3122 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3123 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3124 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3125 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3126 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3127 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3128 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3129 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3130 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3131 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3132 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3133 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3134 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3135 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3136 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3137 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3138 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3139 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3140 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3141 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3142 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3143 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3144 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3145 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3146 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3147 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3148 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3149 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3150 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3151 #define PTA_MPX (HOST_WIDE_INT_1 << 44)
3152 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3153 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
3154 #define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
3155 #define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
3156 #define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
3157 #define PTA_AVX512DQ (HOST_WIDE_INT_1 << 50)
3158 #define PTA_AVX512BW (HOST_WIDE_INT_1 << 51)
3159 #define PTA_AVX512VL (HOST_WIDE_INT_1 << 52)
3160 #define PTA_AVX512IFMA (HOST_WIDE_INT_1 << 53)
3161 #define PTA_AVX512VBMI (HOST_WIDE_INT_1 << 54)
3162 #define PTA_CLWB (HOST_WIDE_INT_1 << 55)
3163 #define PTA_PCOMMIT (HOST_WIDE_INT_1 << 56)
3165 #define PTA_CORE2 \
3166 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3167 | PTA_CX16 | PTA_FXSR)
3168 #define PTA_NEHALEM \
3169 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3170 #define PTA_WESTMERE \
3171 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3172 #define PTA_SANDYBRIDGE \
3173 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3174 #define PTA_IVYBRIDGE \
3175 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3176 #define PTA_HASWELL \
3177 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3178 | PTA_FMA | PTA_MOVBE | PTA_HLE)
3179 #define PTA_BROADWELL \
3180 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3181 #define PTA_BONNELL \
3182 (PTA_CORE2 | PTA_MOVBE)
3183 #define PTA_SILVERMONT \
3184 (PTA_WESTMERE | PTA_MOVBE)
3186 /* if this reaches 64, need to widen struct pta flags below */
3188 static struct pta
3190 const char *const name; /* processor name or nickname. */
3191 const enum processor_type processor;
3192 const enum attr_cpu schedule;
3193 const unsigned HOST_WIDE_INT flags;
3195 const processor_alias_table[] =
3197 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3198 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3199 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3200 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3201 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3202 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3203 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3204 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3205 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3206 PTA_MMX | PTA_SSE | PTA_FXSR},
3207 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3208 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3209 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3210 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3211 PTA_MMX | PTA_SSE | PTA_FXSR},
3212 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3213 PTA_MMX | PTA_SSE | PTA_FXSR},
3214 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3215 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3216 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3217 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3218 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3219 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3220 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3221 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3222 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3223 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3224 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3225 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3226 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3227 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3228 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3229 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3230 PTA_SANDYBRIDGE},
3231 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3232 PTA_SANDYBRIDGE},
3233 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3234 PTA_IVYBRIDGE},
3235 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3236 PTA_IVYBRIDGE},
3237 {"haswell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3238 {"core-avx2", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3239 {"broadwell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_BROADWELL},
3240 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3241 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3242 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3243 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3244 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3245 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3246 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3247 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3248 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3249 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3250 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3251 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3252 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3253 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3254 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3255 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3256 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3257 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3258 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3259 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3260 {"x86-64", PROCESSOR_K8, CPU_K8,
3261 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3262 {"k8", PROCESSOR_K8, CPU_K8,
3263 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3264 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3265 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3266 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3267 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3268 {"opteron", PROCESSOR_K8, CPU_K8,
3269 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3270 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3271 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3272 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3273 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3274 {"athlon64", PROCESSOR_K8, CPU_K8,
3275 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3276 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3277 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3278 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3279 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3280 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3281 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3282 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3283 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3284 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3285 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3286 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3287 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3288 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3289 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3290 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3291 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3292 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3293 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3294 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3295 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3296 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3297 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3298 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3299 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3300 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3301 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3302 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3303 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3304 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3305 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3306 | PTA_XSAVEOPT | PTA_FSGSBASE},
3307 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3308 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3309 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3310 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3311 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3312 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3313 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
3314 | PTA_MOVBE},
3315 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3316 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3317 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3318 | PTA_FXSR | PTA_XSAVE},
3319 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3320 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3321 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3322 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3323 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3324 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3326 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3327 PTA_64BIT
3328 | PTA_HLE /* flags are only used for -march switch. */ },
3331 /* -mrecip options. */
3332 static struct
3334 const char *string; /* option name */
3335 unsigned int mask; /* mask bits to set */
3337 const recip_options[] =
3339 { "all", RECIP_MASK_ALL },
3340 { "none", RECIP_MASK_NONE },
3341 { "div", RECIP_MASK_DIV },
3342 { "sqrt", RECIP_MASK_SQRT },
3343 { "vec-div", RECIP_MASK_VEC_DIV },
3344 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3347 int const pta_size = ARRAY_SIZE (processor_alias_table);
3349 /* Set up prefix/suffix so the error messages refer to either the command
3350 line argument, or the attribute(target). */
3351 if (main_args_p)
3353 prefix = "-m";
3354 suffix = "";
3355 sw = "switch";
3357 else
3359 prefix = "option(\"";
3360 suffix = "\")";
3361 sw = "attribute";
3364 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3365 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3366 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3367 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3368 #ifdef TARGET_BI_ARCH
3369 else
3371 #if TARGET_BI_ARCH == 1
3372 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3373 is on and OPTION_MASK_ABI_X32 is off. We turn off
3374 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3375 -mx32. */
3376 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3377 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3378 #else
3379 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3380 on and OPTION_MASK_ABI_64 is off. We turn off
3381 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3382 -m64 or OPTION_MASK_CODE16 is turned on by -m16. */
3383 if (TARGET_LP64_P (opts->x_ix86_isa_flags)
3384 || TARGET_16BIT_P (opts->x_ix86_isa_flags))
3385 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3386 #endif
3388 #endif
3390 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3392 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3393 OPTION_MASK_ABI_64 for TARGET_X32. */
3394 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3395 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3397 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3398 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3399 | OPTION_MASK_ABI_X32
3400 | OPTION_MASK_ABI_64);
3401 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3403 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3404 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3405 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3406 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3409 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3410 SUBTARGET_OVERRIDE_OPTIONS;
3411 #endif
3413 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3414 SUBSUBTARGET_OVERRIDE_OPTIONS;
3415 #endif
3417 /* -fPIC is the default for x86_64. */
3418 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3419 opts->x_flag_pic = 2;
3421 /* Need to check -mtune=generic first. */
3422 if (opts->x_ix86_tune_string)
3424 /* As special support for cross compilers we read -mtune=native
3425 as -mtune=generic. With native compilers we won't see the
3426 -mtune=native, as it was changed by the driver. */
3427 if (!strcmp (opts->x_ix86_tune_string, "native"))
3429 opts->x_ix86_tune_string = "generic";
3431 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3432 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3433 "%stune=k8%s or %stune=generic%s instead as appropriate",
3434 prefix, suffix, prefix, suffix, prefix, suffix);
3436 else
3438 if (opts->x_ix86_arch_string)
3439 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3440 if (!opts->x_ix86_tune_string)
3442 opts->x_ix86_tune_string
3443 = processor_target_table[TARGET_CPU_DEFAULT].name;
3444 ix86_tune_defaulted = 1;
3447 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3448 or defaulted. We need to use a sensible tune option. */
3449 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3451 opts->x_ix86_tune_string = "generic";
3455 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3456 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3458 /* rep; movq isn't available in 32-bit code. */
3459 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3460 opts->x_ix86_stringop_alg = no_stringop;
3463 if (!opts->x_ix86_arch_string)
3464 opts->x_ix86_arch_string
3465 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3466 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3467 else
3468 ix86_arch_specified = 1;
3470 if (opts_set->x_ix86_pmode)
3472 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3473 && opts->x_ix86_pmode == PMODE_SI)
3474 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3475 && opts->x_ix86_pmode == PMODE_DI))
3476 error ("address mode %qs not supported in the %s bit mode",
3477 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3478 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3480 else
3481 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3482 ? PMODE_DI : PMODE_SI;
3484 if (!opts_set->x_ix86_abi)
3485 opts->x_ix86_abi = DEFAULT_ABI;
3487 /* For targets using ms ABI enable ms-extensions, if not
3488 explicit turned off. For non-ms ABI we turn off this
3489 option. */
3490 if (!opts_set->x_flag_ms_extensions)
3491 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3493 if (opts_set->x_ix86_cmodel)
3495 switch (opts->x_ix86_cmodel)
3497 case CM_SMALL:
3498 case CM_SMALL_PIC:
3499 if (opts->x_flag_pic)
3500 opts->x_ix86_cmodel = CM_SMALL_PIC;
3501 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3502 error ("code model %qs not supported in the %s bit mode",
3503 "small", "32");
3504 break;
3506 case CM_MEDIUM:
3507 case CM_MEDIUM_PIC:
3508 if (opts->x_flag_pic)
3509 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3510 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3511 error ("code model %qs not supported in the %s bit mode",
3512 "medium", "32");
3513 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3514 error ("code model %qs not supported in x32 mode",
3515 "medium");
3516 break;
3518 case CM_LARGE:
3519 case CM_LARGE_PIC:
3520 if (opts->x_flag_pic)
3521 opts->x_ix86_cmodel = CM_LARGE_PIC;
3522 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3523 error ("code model %qs not supported in the %s bit mode",
3524 "large", "32");
3525 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3526 error ("code model %qs not supported in x32 mode",
3527 "large");
3528 break;
3530 case CM_32:
3531 if (opts->x_flag_pic)
3532 error ("code model %s does not support PIC mode", "32");
3533 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3534 error ("code model %qs not supported in the %s bit mode",
3535 "32", "64");
3536 break;
3538 case CM_KERNEL:
3539 if (opts->x_flag_pic)
3541 error ("code model %s does not support PIC mode", "kernel");
3542 opts->x_ix86_cmodel = CM_32;
3544 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3545 error ("code model %qs not supported in the %s bit mode",
3546 "kernel", "32");
3547 break;
3549 default:
3550 gcc_unreachable ();
3553 else
3555 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3556 use of rip-relative addressing. This eliminates fixups that
3557 would otherwise be needed if this object is to be placed in a
3558 DLL, and is essentially just as efficient as direct addressing. */
3559 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3560 && (TARGET_RDOS || TARGET_PECOFF))
3561 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3562 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3563 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3564 else
3565 opts->x_ix86_cmodel = CM_32;
3567 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3569 error ("-masm=intel not supported in this configuration");
3570 opts->x_ix86_asm_dialect = ASM_ATT;
3572 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3573 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3574 sorry ("%i-bit mode not compiled in",
3575 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3577 for (i = 0; i < pta_size; i++)
3578 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3580 ix86_schedule = processor_alias_table[i].schedule;
3581 ix86_arch = processor_alias_table[i].processor;
3582 /* Default cpu tuning to the architecture. */
3583 ix86_tune = ix86_arch;
3585 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3586 && !(processor_alias_table[i].flags & PTA_64BIT))
3587 error ("CPU you selected does not support x86-64 "
3588 "instruction set");
3590 if (processor_alias_table[i].flags & PTA_MMX
3591 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3592 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3593 if (processor_alias_table[i].flags & PTA_3DNOW
3594 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3595 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3596 if (processor_alias_table[i].flags & PTA_3DNOW_A
3597 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3598 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3599 if (processor_alias_table[i].flags & PTA_SSE
3600 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3601 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3602 if (processor_alias_table[i].flags & PTA_SSE2
3603 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3604 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3605 if (processor_alias_table[i].flags & PTA_SSE3
3606 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3607 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3608 if (processor_alias_table[i].flags & PTA_SSSE3
3609 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3610 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3611 if (processor_alias_table[i].flags & PTA_SSE4_1
3612 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3613 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3614 if (processor_alias_table[i].flags & PTA_SSE4_2
3615 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3616 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3617 if (processor_alias_table[i].flags & PTA_AVX
3618 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3619 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3620 if (processor_alias_table[i].flags & PTA_AVX2
3621 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3622 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3623 if (processor_alias_table[i].flags & PTA_FMA
3624 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3625 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3626 if (processor_alias_table[i].flags & PTA_SSE4A
3627 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3628 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3629 if (processor_alias_table[i].flags & PTA_FMA4
3630 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3631 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3632 if (processor_alias_table[i].flags & PTA_XOP
3633 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3634 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3635 if (processor_alias_table[i].flags & PTA_LWP
3636 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3637 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3638 if (processor_alias_table[i].flags & PTA_ABM
3639 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3640 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3641 if (processor_alias_table[i].flags & PTA_BMI
3642 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3643 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3644 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3645 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3646 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3647 if (processor_alias_table[i].flags & PTA_TBM
3648 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3649 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3650 if (processor_alias_table[i].flags & PTA_BMI2
3651 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3652 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3653 if (processor_alias_table[i].flags & PTA_CX16
3654 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3655 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3656 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3657 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3658 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3659 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3660 && (processor_alias_table[i].flags & PTA_NO_SAHF))
3661 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3662 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3663 if (processor_alias_table[i].flags & PTA_MOVBE
3664 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3665 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3666 if (processor_alias_table[i].flags & PTA_AES
3667 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3668 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3669 if (processor_alias_table[i].flags & PTA_SHA
3670 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
3671 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
3672 if (processor_alias_table[i].flags & PTA_PCLMUL
3673 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3674 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3675 if (processor_alias_table[i].flags & PTA_FSGSBASE
3676 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3677 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3678 if (processor_alias_table[i].flags & PTA_RDRND
3679 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3680 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3681 if (processor_alias_table[i].flags & PTA_F16C
3682 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3683 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3684 if (processor_alias_table[i].flags & PTA_RTM
3685 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3686 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3687 if (processor_alias_table[i].flags & PTA_HLE
3688 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3689 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3690 if (processor_alias_table[i].flags & PTA_PRFCHW
3691 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3692 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3693 if (processor_alias_table[i].flags & PTA_RDSEED
3694 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3695 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3696 if (processor_alias_table[i].flags & PTA_ADX
3697 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3698 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3699 if (processor_alias_table[i].flags & PTA_FXSR
3700 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3701 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3702 if (processor_alias_table[i].flags & PTA_XSAVE
3703 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3704 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3705 if (processor_alias_table[i].flags & PTA_XSAVEOPT
3706 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3707 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3708 if (processor_alias_table[i].flags & PTA_AVX512F
3709 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
3710 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
3711 if (processor_alias_table[i].flags & PTA_AVX512ER
3712 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
3713 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
3714 if (processor_alias_table[i].flags & PTA_AVX512PF
3715 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
3716 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
3717 if (processor_alias_table[i].flags & PTA_AVX512CD
3718 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
3719 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
3720 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
3721 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
3722 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
3723 if (processor_alias_table[i].flags & PTA_PCOMMIT
3724 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCOMMIT))
3725 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCOMMIT;
3726 if (processor_alias_table[i].flags & PTA_CLWB
3727 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLWB))
3728 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLWB;
3729 if (processor_alias_table[i].flags & PTA_CLFLUSHOPT
3730 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
3731 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
3732 if (processor_alias_table[i].flags & PTA_XSAVEC
3733 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
3734 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
3735 if (processor_alias_table[i].flags & PTA_XSAVES
3736 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
3737 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
3738 if (processor_alias_table[i].flags & PTA_AVX512DQ
3739 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ))
3740 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ;
3741 if (processor_alias_table[i].flags & PTA_AVX512BW
3742 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW))
3743 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW;
3744 if (processor_alias_table[i].flags & PTA_AVX512VL
3745 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL))
3746 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL;
3747 if (processor_alias_table[i].flags & PTA_MPX
3748 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MPX))
3749 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MPX;
3750 if (processor_alias_table[i].flags & PTA_AVX512VBMI
3751 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VBMI))
3752 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VBMI;
3753 if (processor_alias_table[i].flags & PTA_AVX512IFMA
3754 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512IFMA))
3755 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512IFMA;
3756 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3757 x86_prefetch_sse = true;
3759 break;
3762 if (TARGET_X32 && (opts->x_ix86_isa_flags & OPTION_MASK_ISA_MPX))
3763 error ("Intel MPX does not support x32");
3765 if (TARGET_X32 && (ix86_isa_flags & OPTION_MASK_ISA_MPX))
3766 error ("Intel MPX does not support x32");
3768 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3769 error ("generic CPU can be used only for %stune=%s %s",
3770 prefix, suffix, sw);
3771 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3772 error ("intel CPU can be used only for %stune=%s %s",
3773 prefix, suffix, sw);
3774 else if (i == pta_size)
3775 error ("bad value (%s) for %sarch=%s %s",
3776 opts->x_ix86_arch_string, prefix, suffix, sw);
3778 ix86_arch_mask = 1u << ix86_arch;
3779 for (i = 0; i < X86_ARCH_LAST; ++i)
3780 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3782 for (i = 0; i < pta_size; i++)
3783 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
3785 ix86_schedule = processor_alias_table[i].schedule;
3786 ix86_tune = processor_alias_table[i].processor;
3787 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3789 if (!(processor_alias_table[i].flags & PTA_64BIT))
3791 if (ix86_tune_defaulted)
3793 opts->x_ix86_tune_string = "x86-64";
3794 for (i = 0; i < pta_size; i++)
3795 if (! strcmp (opts->x_ix86_tune_string,
3796 processor_alias_table[i].name))
3797 break;
3798 ix86_schedule = processor_alias_table[i].schedule;
3799 ix86_tune = processor_alias_table[i].processor;
3801 else
3802 error ("CPU you selected does not support x86-64 "
3803 "instruction set");
3806 /* Intel CPUs have always interpreted SSE prefetch instructions as
3807 NOPs; so, we can enable SSE prefetch instructions even when
3808 -mtune (rather than -march) points us to a processor that has them.
3809 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3810 higher processors. */
3811 if (TARGET_CMOV
3812 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3813 x86_prefetch_sse = true;
3814 break;
3817 if (ix86_tune_specified && i == pta_size)
3818 error ("bad value (%s) for %stune=%s %s",
3819 opts->x_ix86_tune_string, prefix, suffix, sw);
3821 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
3823 #ifndef USE_IX86_FRAME_POINTER
3824 #define USE_IX86_FRAME_POINTER 0
3825 #endif
3827 #ifndef USE_X86_64_FRAME_POINTER
3828 #define USE_X86_64_FRAME_POINTER 0
3829 #endif
3831 /* Set the default values for switches whose default depends on TARGET_64BIT
3832 in case they weren't overwritten by command line options. */
3833 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3835 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3836 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3837 if (opts->x_flag_asynchronous_unwind_tables
3838 && !opts_set->x_flag_unwind_tables
3839 && TARGET_64BIT_MS_ABI)
3840 opts->x_flag_unwind_tables = 1;
3841 if (opts->x_flag_asynchronous_unwind_tables == 2)
3842 opts->x_flag_unwind_tables
3843 = opts->x_flag_asynchronous_unwind_tables = 1;
3844 if (opts->x_flag_pcc_struct_return == 2)
3845 opts->x_flag_pcc_struct_return = 0;
3847 else
3849 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3850 opts->x_flag_omit_frame_pointer
3851 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
3852 if (opts->x_flag_asynchronous_unwind_tables == 2)
3853 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3854 if (opts->x_flag_pcc_struct_return == 2)
3855 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3858 ix86_tune_cost = processor_target_table[ix86_tune].cost;
3859 if (opts->x_optimize_size)
3860 ix86_cost = &ix86_size_cost;
3861 else
3862 ix86_cost = ix86_tune_cost;
3864 /* Arrange to set up i386_stack_locals for all functions. */
3865 init_machine_status = ix86_init_machine_status;
3867 /* Validate -mregparm= value. */
3868 if (opts_set->x_ix86_regparm)
3870 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3871 warning (0, "-mregparm is ignored in 64-bit mode");
3872 if (opts->x_ix86_regparm > REGPARM_MAX)
3874 error ("-mregparm=%d is not between 0 and %d",
3875 opts->x_ix86_regparm, REGPARM_MAX);
3876 opts->x_ix86_regparm = 0;
3879 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3880 opts->x_ix86_regparm = REGPARM_MAX;
3882 /* Default align_* from the processor table. */
3883 if (opts->x_align_loops == 0)
3885 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3886 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3888 if (opts->x_align_jumps == 0)
3890 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3891 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3893 if (opts->x_align_functions == 0)
3895 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3898 /* Provide default for -mbranch-cost= value. */
3899 if (!opts_set->x_ix86_branch_cost)
3900 opts->x_ix86_branch_cost = ix86_cost->branch_cost;
3902 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3904 opts->x_target_flags
3905 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
3907 /* Enable by default the SSE and MMX builtins. Do allow the user to
3908 explicitly disable any of these. In particular, disabling SSE and
3909 MMX for kernel code is extremely useful. */
3910 if (!ix86_arch_specified)
3911 opts->x_ix86_isa_flags
3912 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3913 | TARGET_SUBTARGET64_ISA_DEFAULT)
3914 & ~opts->x_ix86_isa_flags_explicit);
3916 if (TARGET_RTD_P (opts->x_target_flags))
3917 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3919 else
3921 opts->x_target_flags
3922 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
3924 if (!ix86_arch_specified)
3925 opts->x_ix86_isa_flags
3926 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
3928 /* i386 ABI does not specify red zone. It still makes sense to use it
3929 when programmer takes care to stack from being destroyed. */
3930 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
3931 opts->x_target_flags |= MASK_NO_RED_ZONE;
3934 /* Keep nonleaf frame pointers. */
3935 if (opts->x_flag_omit_frame_pointer)
3936 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3937 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
3938 opts->x_flag_omit_frame_pointer = 1;
3940 /* If we're doing fast math, we don't care about comparison order
3941 wrt NaNs. This lets us use a shorter comparison sequence. */
3942 if (opts->x_flag_finite_math_only)
3943 opts->x_target_flags &= ~MASK_IEEE_FP;
3945 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3946 since the insns won't need emulation. */
3947 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
3948 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
3950 /* Likewise, if the target doesn't have a 387, or we've specified
3951 software floating point, don't use 387 inline intrinsics. */
3952 if (!TARGET_80387_P (opts->x_target_flags))
3953 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
3955 /* Turn on MMX builtins for -msse. */
3956 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
3957 opts->x_ix86_isa_flags
3958 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
3960 /* Enable SSE prefetch. */
3961 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
3962 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
3963 x86_prefetch_sse = true;
3965 /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
3966 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
3967 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
3968 opts->x_ix86_isa_flags
3969 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
3971 /* Enable popcnt instruction for -msse4.2 or -mabm. */
3972 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
3973 || TARGET_ABM_P (opts->x_ix86_isa_flags))
3974 opts->x_ix86_isa_flags
3975 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
3977 /* Enable lzcnt instruction for -mabm. */
3978 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
3979 opts->x_ix86_isa_flags
3980 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
3982 /* Validate -mpreferred-stack-boundary= value or default it to
3983 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3984 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3985 if (opts_set->x_ix86_preferred_stack_boundary_arg)
3987 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3988 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
3989 int max = (TARGET_SEH ? 4 : 12);
3991 if (opts->x_ix86_preferred_stack_boundary_arg < min
3992 || opts->x_ix86_preferred_stack_boundary_arg > max)
3994 if (min == max)
3995 error ("-mpreferred-stack-boundary is not supported "
3996 "for this target");
3997 else
3998 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3999 opts->x_ix86_preferred_stack_boundary_arg, min, max);
4001 else
4002 ix86_preferred_stack_boundary
4003 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
4006 /* Set the default value for -mstackrealign. */
4007 if (opts->x_ix86_force_align_arg_pointer == -1)
4008 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
4010 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
4012 /* Validate -mincoming-stack-boundary= value or default it to
4013 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
4014 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
4015 if (opts_set->x_ix86_incoming_stack_boundary_arg)
4017 if (opts->x_ix86_incoming_stack_boundary_arg
4018 < (TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2)
4019 || opts->x_ix86_incoming_stack_boundary_arg > 12)
4020 error ("-mincoming-stack-boundary=%d is not between %d and 12",
4021 opts->x_ix86_incoming_stack_boundary_arg,
4022 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2);
4023 else
4025 ix86_user_incoming_stack_boundary
4026 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
4027 ix86_incoming_stack_boundary
4028 = ix86_user_incoming_stack_boundary;
4032 #ifndef NO_PROFILE_COUNTERS
4033 if (flag_nop_mcount)
4034 error ("-mnop-mcount is not compatible with this target");
4035 #endif
4036 if (flag_nop_mcount && flag_pic)
4037 error ("-mnop-mcount is not implemented for -fPIC");
4039 /* Accept -msseregparm only if at least SSE support is enabled. */
4040 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
4041 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
4042 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
4044 if (opts_set->x_ix86_fpmath)
4046 if (opts->x_ix86_fpmath & FPMATH_SSE)
4048 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
4050 warning (0, "SSE instruction set disabled, using 387 arithmetics");
4051 opts->x_ix86_fpmath = FPMATH_387;
4053 else if ((opts->x_ix86_fpmath & FPMATH_387)
4054 && !TARGET_80387_P (opts->x_target_flags))
4056 warning (0, "387 instruction set disabled, using SSE arithmetics");
4057 opts->x_ix86_fpmath = FPMATH_SSE;
4061 /* For all chips supporting SSE2, -mfpmath=sse performs better than
4062 fpmath=387. The second is however default at many targets since the
4063 extra 80bit precision of temporaries is considered to be part of ABI.
4064 Overwrite the default at least for -ffast-math.
4065 TODO: -mfpmath=both seems to produce same performing code with bit
4066 smaller binaries. It is however not clear if register allocation is
4067 ready for this setting.
4068 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
4069 codegen. We may switch to 387 with -ffast-math for size optimized
4070 functions. */
4071 else if (fast_math_flags_set_p (&global_options)
4072 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
4073 opts->x_ix86_fpmath = FPMATH_SSE;
4074 else
4075 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
4077 /* If the i387 is disabled, then do not return values in it. */
4078 if (!TARGET_80387_P (opts->x_target_flags))
4079 opts->x_target_flags &= ~MASK_FLOAT_RETURNS;
4081 /* Use external vectorized library in vectorizing intrinsics. */
4082 if (opts_set->x_ix86_veclibabi_type)
4083 switch (opts->x_ix86_veclibabi_type)
4085 case ix86_veclibabi_type_svml:
4086 ix86_veclib_handler = ix86_veclibabi_svml;
4087 break;
4089 case ix86_veclibabi_type_acml:
4090 ix86_veclib_handler = ix86_veclibabi_acml;
4091 break;
4093 default:
4094 gcc_unreachable ();
4097 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4098 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4099 && !opts->x_optimize_size)
4100 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4102 /* If stack probes are required, the space used for large function
4103 arguments on the stack must also be probed, so enable
4104 -maccumulate-outgoing-args so this happens in the prologue. */
4105 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4106 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4108 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4109 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4110 "for correctness", prefix, suffix);
4111 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4114 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4116 char *p;
4117 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4118 p = strchr (internal_label_prefix, 'X');
4119 internal_label_prefix_len = p - internal_label_prefix;
4120 *p = '\0';
4123 /* When scheduling description is not available, disable scheduler pass
4124 so it won't slow down the compilation and make x87 code slower. */
4125 if (!TARGET_SCHEDULE)
4126 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4128 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4129 ix86_tune_cost->simultaneous_prefetches,
4130 opts->x_param_values,
4131 opts_set->x_param_values);
4132 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4133 ix86_tune_cost->prefetch_block,
4134 opts->x_param_values,
4135 opts_set->x_param_values);
4136 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4137 ix86_tune_cost->l1_cache_size,
4138 opts->x_param_values,
4139 opts_set->x_param_values);
4140 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4141 ix86_tune_cost->l2_cache_size,
4142 opts->x_param_values,
4143 opts_set->x_param_values);
4145 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4146 if (opts->x_flag_prefetch_loop_arrays < 0
4147 && HAVE_prefetch
4148 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4149 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4150 opts->x_flag_prefetch_loop_arrays = 1;
4152 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4153 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4154 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4155 targetm.expand_builtin_va_start = NULL;
4157 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4159 ix86_gen_leave = gen_leave_rex64;
4160 if (Pmode == DImode)
4162 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4163 ix86_gen_tls_local_dynamic_base_64
4164 = gen_tls_local_dynamic_base_64_di;
4166 else
4168 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4169 ix86_gen_tls_local_dynamic_base_64
4170 = gen_tls_local_dynamic_base_64_si;
4173 else
4174 ix86_gen_leave = gen_leave;
4176 if (Pmode == DImode)
4178 ix86_gen_add3 = gen_adddi3;
4179 ix86_gen_sub3 = gen_subdi3;
4180 ix86_gen_sub3_carry = gen_subdi3_carry;
4181 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4182 ix86_gen_andsp = gen_anddi3;
4183 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4184 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4185 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4186 ix86_gen_monitor = gen_sse3_monitor_di;
4188 else
4190 ix86_gen_add3 = gen_addsi3;
4191 ix86_gen_sub3 = gen_subsi3;
4192 ix86_gen_sub3_carry = gen_subsi3_carry;
4193 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4194 ix86_gen_andsp = gen_andsi3;
4195 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4196 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4197 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4198 ix86_gen_monitor = gen_sse3_monitor_si;
4201 #ifdef USE_IX86_CLD
4202 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4203 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4204 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4205 #endif
4207 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
4209 if (opts->x_flag_fentry > 0)
4210 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4211 "with -fpic");
4212 opts->x_flag_fentry = 0;
4214 else if (TARGET_SEH)
4216 if (opts->x_flag_fentry == 0)
4217 sorry ("-mno-fentry isn%'t compatible with SEH");
4218 opts->x_flag_fentry = 1;
4220 else if (opts->x_flag_fentry < 0)
4222 #if defined(PROFILE_BEFORE_PROLOGUE)
4223 opts->x_flag_fentry = 1;
4224 #else
4225 opts->x_flag_fentry = 0;
4226 #endif
4229 /* When not opts->x_optimize for size, enable vzeroupper optimization for
4230 TARGET_AVX with -fexpensive-optimizations and split 32-byte
4231 AVX unaligned load/store. */
4232 if (!opts->x_optimize_size)
4234 if (flag_expensive_optimizations
4235 && !(opts_set->x_target_flags & MASK_VZEROUPPER))
4236 opts->x_target_flags |= MASK_VZEROUPPER;
4237 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4238 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4239 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4240 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4241 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4242 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4243 /* Enable 128-bit AVX instruction generation
4244 for the auto-vectorizer. */
4245 if (TARGET_AVX128_OPTIMAL
4246 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
4247 opts->x_target_flags |= MASK_PREFER_AVX128;
4250 if (opts->x_ix86_recip_name)
4252 char *p = ASTRDUP (opts->x_ix86_recip_name);
4253 char *q;
4254 unsigned int mask, i;
4255 bool invert;
4257 while ((q = strtok (p, ",")) != NULL)
4259 p = NULL;
4260 if (*q == '!')
4262 invert = true;
4263 q++;
4265 else
4266 invert = false;
4268 if (!strcmp (q, "default"))
4269 mask = RECIP_MASK_ALL;
4270 else
4272 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4273 if (!strcmp (q, recip_options[i].string))
4275 mask = recip_options[i].mask;
4276 break;
4279 if (i == ARRAY_SIZE (recip_options))
4281 error ("unknown option for -mrecip=%s", q);
4282 invert = false;
4283 mask = RECIP_MASK_NONE;
4287 opts->x_recip_mask_explicit |= mask;
4288 if (invert)
4289 opts->x_recip_mask &= ~mask;
4290 else
4291 opts->x_recip_mask |= mask;
4295 if (TARGET_RECIP_P (opts->x_target_flags))
4296 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4297 else if (opts_set->x_target_flags & MASK_RECIP)
4298 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4300 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4301 for 64-bit Bionic. */
4302 if (TARGET_HAS_BIONIC
4303 && !(opts_set->x_target_flags
4304 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4305 opts->x_target_flags |= (TARGET_64BIT
4306 ? MASK_LONG_DOUBLE_128
4307 : MASK_LONG_DOUBLE_64);
4309 /* Only one of them can be active. */
4310 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4311 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4313 /* Save the initial options in case the user does function specific
4314 options. */
4315 if (main_args_p)
4316 target_option_default_node = target_option_current_node
4317 = build_target_option_node (opts);
4319 /* Handle stack protector */
4320 if (!opts_set->x_ix86_stack_protector_guard)
4321 opts->x_ix86_stack_protector_guard
4322 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4324 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4325 if (opts->x_ix86_tune_memcpy_strategy)
4327 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4328 ix86_parse_stringop_strategy_string (str, false);
4329 free (str);
4332 if (opts->x_ix86_tune_memset_strategy)
4334 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4335 ix86_parse_stringop_strategy_string (str, true);
4336 free (str);
4340 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4342 static void
4343 ix86_option_override (void)
4345 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
4346 struct register_pass_info insert_vzeroupper_info
4347 = { pass_insert_vzeroupper, "reload",
4348 1, PASS_POS_INSERT_AFTER
4351 ix86_option_override_internal (true, &global_options, &global_options_set);
4354 /* This needs to be done at start up. It's convenient to do it here. */
4355 register_pass (&insert_vzeroupper_info);
4358 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
4359 static char *
4360 ix86_offload_options (void)
4362 if (TARGET_LP64)
4363 return xstrdup ("-foffload-abi=lp64");
4364 return xstrdup ("-foffload-abi=ilp32");
4367 /* Update register usage after having seen the compiler flags. */
4369 static void
4370 ix86_conditional_register_usage (void)
4372 int i, c_mask;
4373 unsigned int j;
4375 /* The PIC register, if it exists, is fixed. */
4376 j = PIC_OFFSET_TABLE_REGNUM;
4377 if (j != INVALID_REGNUM)
4378 fixed_regs[j] = call_used_regs[j] = 1;
4380 /* For 32-bit targets, squash the REX registers. */
4381 if (! TARGET_64BIT)
4383 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4384 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4385 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4386 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4387 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4388 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4391 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4392 c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
4393 : TARGET_64BIT ? (1 << 2)
4394 : (1 << 1));
4396 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4398 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4400 /* Set/reset conditionally defined registers from
4401 CALL_USED_REGISTERS initializer. */
4402 if (call_used_regs[i] > 1)
4403 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4405 /* Calculate registers of CLOBBERED_REGS register set
4406 as call used registers from GENERAL_REGS register set. */
4407 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4408 && call_used_regs[i])
4409 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4412 /* If MMX is disabled, squash the registers. */
4413 if (! TARGET_MMX)
4414 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4415 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4416 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4418 /* If SSE is disabled, squash the registers. */
4419 if (! TARGET_SSE)
4420 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4421 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4422 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4424 /* If the FPU is disabled, squash the registers. */
4425 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4426 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4427 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4428 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4430 /* If AVX512F is disabled, squash the registers. */
4431 if (! TARGET_AVX512F)
4433 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4434 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4436 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4437 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4440 /* If MPX is disabled, squash the registers. */
4441 if (! TARGET_MPX)
4442 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
4443 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4447 /* Save the current options */
4449 static void
4450 ix86_function_specific_save (struct cl_target_option *ptr,
4451 struct gcc_options *opts)
4453 ptr->arch = ix86_arch;
4454 ptr->schedule = ix86_schedule;
4455 ptr->tune = ix86_tune;
4456 ptr->branch_cost = ix86_branch_cost;
4457 ptr->tune_defaulted = ix86_tune_defaulted;
4458 ptr->arch_specified = ix86_arch_specified;
4459 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
4460 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
4461 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
4462 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
4463 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
4464 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
4465 ptr->x_ix86_abi = opts->x_ix86_abi;
4466 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
4467 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
4468 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
4469 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
4470 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
4471 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
4472 ptr->x_ix86_pmode = opts->x_ix86_pmode;
4473 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
4474 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
4475 ptr->x_ix86_regparm = opts->x_ix86_regparm;
4476 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
4477 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
4478 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
4479 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
4480 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
4481 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
4482 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
4483 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
4484 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
4485 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
4487 /* The fields are char but the variables are not; make sure the
4488 values fit in the fields. */
4489 gcc_assert (ptr->arch == ix86_arch);
4490 gcc_assert (ptr->schedule == ix86_schedule);
4491 gcc_assert (ptr->tune == ix86_tune);
4492 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4495 /* Restore the current options */
4497 static void
4498 ix86_function_specific_restore (struct gcc_options *opts,
4499 struct cl_target_option *ptr)
4501 enum processor_type old_tune = ix86_tune;
4502 enum processor_type old_arch = ix86_arch;
4503 unsigned int ix86_arch_mask;
4504 int i;
4506 /* We don't change -fPIC. */
4507 opts->x_flag_pic = flag_pic;
4509 ix86_arch = (enum processor_type) ptr->arch;
4510 ix86_schedule = (enum attr_cpu) ptr->schedule;
4511 ix86_tune = (enum processor_type) ptr->tune;
4512 opts->x_ix86_branch_cost = ptr->branch_cost;
4513 ix86_tune_defaulted = ptr->tune_defaulted;
4514 ix86_arch_specified = ptr->arch_specified;
4515 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4516 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
4517 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
4518 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
4519 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
4520 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
4521 opts->x_ix86_abi = ptr->x_ix86_abi;
4522 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
4523 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
4524 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
4525 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
4526 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
4527 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
4528 opts->x_ix86_pmode = ptr->x_ix86_pmode;
4529 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
4530 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
4531 opts->x_ix86_regparm = ptr->x_ix86_regparm;
4532 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
4533 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
4534 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
4535 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
4536 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
4537 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
4538 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
4539 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
4540 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
4541 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
4543 /* Recreate the arch feature tests if the arch changed */
4544 if (old_arch != ix86_arch)
4546 ix86_arch_mask = 1u << ix86_arch;
4547 for (i = 0; i < X86_ARCH_LAST; ++i)
4548 ix86_arch_features[i]
4549 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4552 /* Recreate the tune optimization tests */
4553 if (old_tune != ix86_tune)
4554 set_ix86_tune_features (ix86_tune, false);
4557 /* Print the current options */
4559 static void
4560 ix86_function_specific_print (FILE *file, int indent,
4561 struct cl_target_option *ptr)
4563 char *target_string
4564 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4565 NULL, NULL, ptr->x_ix86_fpmath, false);
4567 gcc_assert (ptr->arch < PROCESSOR_max);
4568 fprintf (file, "%*sarch = %d (%s)\n",
4569 indent, "",
4570 ptr->arch, processor_target_table[ptr->arch].name);
4572 gcc_assert (ptr->tune < PROCESSOR_max);
4573 fprintf (file, "%*stune = %d (%s)\n",
4574 indent, "",
4575 ptr->tune, processor_target_table[ptr->tune].name);
4577 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4579 if (target_string)
4581 fprintf (file, "%*s%s\n", indent, "", target_string);
4582 free (target_string);
4587 /* Inner function to process the attribute((target(...))), take an argument and
4588 set the current options from the argument. If we have a list, recursively go
4589 over the list. */
4591 static bool
4592 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4593 struct gcc_options *opts,
4594 struct gcc_options *opts_set,
4595 struct gcc_options *enum_opts_set)
4597 char *next_optstr;
4598 bool ret = true;
4600 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4601 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4602 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4603 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4604 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4606 enum ix86_opt_type
4608 ix86_opt_unknown,
4609 ix86_opt_yes,
4610 ix86_opt_no,
4611 ix86_opt_str,
4612 ix86_opt_enum,
4613 ix86_opt_isa
4616 static const struct
4618 const char *string;
4619 size_t len;
4620 enum ix86_opt_type type;
4621 int opt;
4622 int mask;
4623 } attrs[] = {
4624 /* isa options */
4625 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4626 IX86_ATTR_ISA ("abm", OPT_mabm),
4627 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4628 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4629 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4630 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4631 IX86_ATTR_ISA ("aes", OPT_maes),
4632 IX86_ATTR_ISA ("sha", OPT_msha),
4633 IX86_ATTR_ISA ("avx", OPT_mavx),
4634 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4635 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
4636 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
4637 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
4638 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
4639 IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq),
4640 IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw),
4641 IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl),
4642 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4643 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4644 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4645 IX86_ATTR_ISA ("sse", OPT_msse),
4646 IX86_ATTR_ISA ("sse2", OPT_msse2),
4647 IX86_ATTR_ISA ("sse3", OPT_msse3),
4648 IX86_ATTR_ISA ("sse4", OPT_msse4),
4649 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4650 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4651 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4652 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4653 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4654 IX86_ATTR_ISA ("fma", OPT_mfma),
4655 IX86_ATTR_ISA ("xop", OPT_mxop),
4656 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4657 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4658 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4659 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4660 IX86_ATTR_ISA ("rtm", OPT_mrtm),
4661 IX86_ATTR_ISA ("hle", OPT_mhle),
4662 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
4663 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
4664 IX86_ATTR_ISA ("adx", OPT_madx),
4665 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
4666 IX86_ATTR_ISA ("xsave", OPT_mxsave),
4667 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
4668 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
4669 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt),
4670 IX86_ATTR_ISA ("xsavec", OPT_mxsavec),
4671 IX86_ATTR_ISA ("xsaves", OPT_mxsaves),
4672 IX86_ATTR_ISA ("avx512vbmi", OPT_mavx512vbmi),
4673 IX86_ATTR_ISA ("avx512ifma", OPT_mavx512ifma),
4674 IX86_ATTR_ISA ("clwb", OPT_mclwb),
4675 IX86_ATTR_ISA ("pcommit", OPT_mpcommit),
4677 /* enum options */
4678 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4680 /* string options */
4681 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4682 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4684 /* flag options */
4685 IX86_ATTR_YES ("cld",
4686 OPT_mcld,
4687 MASK_CLD),
4689 IX86_ATTR_NO ("fancy-math-387",
4690 OPT_mfancy_math_387,
4691 MASK_NO_FANCY_MATH_387),
4693 IX86_ATTR_YES ("ieee-fp",
4694 OPT_mieee_fp,
4695 MASK_IEEE_FP),
4697 IX86_ATTR_YES ("inline-all-stringops",
4698 OPT_minline_all_stringops,
4699 MASK_INLINE_ALL_STRINGOPS),
4701 IX86_ATTR_YES ("inline-stringops-dynamically",
4702 OPT_minline_stringops_dynamically,
4703 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4705 IX86_ATTR_NO ("align-stringops",
4706 OPT_mno_align_stringops,
4707 MASK_NO_ALIGN_STRINGOPS),
4709 IX86_ATTR_YES ("recip",
4710 OPT_mrecip,
4711 MASK_RECIP),
4715 /* If this is a list, recurse to get the options. */
4716 if (TREE_CODE (args) == TREE_LIST)
4718 bool ret = true;
4720 for (; args; args = TREE_CHAIN (args))
4721 if (TREE_VALUE (args)
4722 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4723 p_strings, opts, opts_set,
4724 enum_opts_set))
4725 ret = false;
4727 return ret;
4730 else if (TREE_CODE (args) != STRING_CST)
4732 error ("attribute %<target%> argument not a string");
4733 return false;
4736 /* Handle multiple arguments separated by commas. */
4737 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4739 while (next_optstr && *next_optstr != '\0')
4741 char *p = next_optstr;
4742 char *orig_p = p;
4743 char *comma = strchr (next_optstr, ',');
4744 const char *opt_string;
4745 size_t len, opt_len;
4746 int opt;
4747 bool opt_set_p;
4748 char ch;
4749 unsigned i;
4750 enum ix86_opt_type type = ix86_opt_unknown;
4751 int mask = 0;
4753 if (comma)
4755 *comma = '\0';
4756 len = comma - next_optstr;
4757 next_optstr = comma + 1;
4759 else
4761 len = strlen (p);
4762 next_optstr = NULL;
4765 /* Recognize no-xxx. */
4766 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4768 opt_set_p = false;
4769 p += 3;
4770 len -= 3;
4772 else
4773 opt_set_p = true;
4775 /* Find the option. */
4776 ch = *p;
4777 opt = N_OPTS;
4778 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4780 type = attrs[i].type;
4781 opt_len = attrs[i].len;
4782 if (ch == attrs[i].string[0]
4783 && ((type != ix86_opt_str && type != ix86_opt_enum)
4784 ? len == opt_len
4785 : len > opt_len)
4786 && memcmp (p, attrs[i].string, opt_len) == 0)
4788 opt = attrs[i].opt;
4789 mask = attrs[i].mask;
4790 opt_string = attrs[i].string;
4791 break;
4795 /* Process the option. */
4796 if (opt == N_OPTS)
4798 error ("attribute(target(\"%s\")) is unknown", orig_p);
4799 ret = false;
4802 else if (type == ix86_opt_isa)
4804 struct cl_decoded_option decoded;
4806 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4807 ix86_handle_option (opts, opts_set,
4808 &decoded, input_location);
4811 else if (type == ix86_opt_yes || type == ix86_opt_no)
4813 if (type == ix86_opt_no)
4814 opt_set_p = !opt_set_p;
4816 if (opt_set_p)
4817 opts->x_target_flags |= mask;
4818 else
4819 opts->x_target_flags &= ~mask;
4822 else if (type == ix86_opt_str)
4824 if (p_strings[opt])
4826 error ("option(\"%s\") was already specified", opt_string);
4827 ret = false;
4829 else
4830 p_strings[opt] = xstrdup (p + opt_len);
4833 else if (type == ix86_opt_enum)
4835 bool arg_ok;
4836 int value;
4838 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4839 if (arg_ok)
4840 set_option (opts, enum_opts_set, opt, value,
4841 p + opt_len, DK_UNSPECIFIED, input_location,
4842 global_dc);
4843 else
4845 error ("attribute(target(\"%s\")) is unknown", orig_p);
4846 ret = false;
4850 else
4851 gcc_unreachable ();
4854 return ret;
4857 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4859 tree
4860 ix86_valid_target_attribute_tree (tree args,
4861 struct gcc_options *opts,
4862 struct gcc_options *opts_set)
4864 const char *orig_arch_string = opts->x_ix86_arch_string;
4865 const char *orig_tune_string = opts->x_ix86_tune_string;
4866 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
4867 int orig_tune_defaulted = ix86_tune_defaulted;
4868 int orig_arch_specified = ix86_arch_specified;
4869 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4870 tree t = NULL_TREE;
4871 int i;
4872 struct cl_target_option *def
4873 = TREE_TARGET_OPTION (target_option_default_node);
4874 struct gcc_options enum_opts_set;
4876 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4878 /* Process each of the options on the chain. */
4879 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
4880 opts_set, &enum_opts_set))
4881 return error_mark_node;
4883 /* If the changed options are different from the default, rerun
4884 ix86_option_override_internal, and then save the options away.
4885 The string options are are attribute options, and will be undone
4886 when we copy the save structure. */
4887 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
4888 || opts->x_target_flags != def->x_target_flags
4889 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4890 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4891 || enum_opts_set.x_ix86_fpmath)
4893 /* If we are using the default tune= or arch=, undo the string assigned,
4894 and use the default. */
4895 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4896 opts->x_ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4897 else if (!orig_arch_specified)
4898 opts->x_ix86_arch_string = NULL;
4900 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4901 opts->x_ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4902 else if (orig_tune_defaulted)
4903 opts->x_ix86_tune_string = NULL;
4905 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4906 if (enum_opts_set.x_ix86_fpmath)
4907 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4908 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4909 && TARGET_SSE_P (opts->x_ix86_isa_flags))
4911 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4912 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4915 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4916 ix86_option_override_internal (false, opts, opts_set);
4918 /* Add any builtin functions with the new isa if any. */
4919 ix86_add_new_builtins (opts->x_ix86_isa_flags);
4921 /* Save the current options unless we are validating options for
4922 #pragma. */
4923 t = build_target_option_node (opts);
4925 opts->x_ix86_arch_string = orig_arch_string;
4926 opts->x_ix86_tune_string = orig_tune_string;
4927 opts_set->x_ix86_fpmath = orig_fpmath_set;
4929 /* Free up memory allocated to hold the strings */
4930 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4931 free (option_strings[i]);
4934 return t;
4937 /* Hook to validate attribute((target("string"))). */
4939 static bool
4940 ix86_valid_target_attribute_p (tree fndecl,
4941 tree ARG_UNUSED (name),
4942 tree args,
4943 int ARG_UNUSED (flags))
4945 struct gcc_options func_options;
4946 tree new_target, new_optimize;
4947 bool ret = true;
4949 /* attribute((target("default"))) does nothing, beyond
4950 affecting multi-versioning. */
4951 if (TREE_VALUE (args)
4952 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
4953 && TREE_CHAIN (args) == NULL_TREE
4954 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
4955 return true;
4957 tree old_optimize = build_optimization_node (&global_options);
4959 /* Get the optimization options of the current function. */
4960 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4962 if (!func_optimize)
4963 func_optimize = old_optimize;
4965 /* Init func_options. */
4966 memset (&func_options, 0, sizeof (func_options));
4967 init_options_struct (&func_options, NULL);
4968 lang_hooks.init_options_struct (&func_options);
4970 cl_optimization_restore (&func_options,
4971 TREE_OPTIMIZATION (func_optimize));
4973 /* Initialize func_options to the default before its target options can
4974 be set. */
4975 cl_target_option_restore (&func_options,
4976 TREE_TARGET_OPTION (target_option_default_node));
4978 new_target = ix86_valid_target_attribute_tree (args, &func_options,
4979 &global_options_set);
4981 new_optimize = build_optimization_node (&func_options);
4983 if (new_target == error_mark_node)
4984 ret = false;
4986 else if (fndecl && new_target)
4988 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4990 if (old_optimize != new_optimize)
4991 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4994 return ret;
4998 /* Hook to determine if one function can safely inline another. */
5000 static bool
5001 ix86_can_inline_p (tree caller, tree callee)
5003 bool ret = false;
5004 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
5005 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
5007 /* If callee has no option attributes, then it is ok to inline. */
5008 if (!callee_tree)
5009 ret = true;
5011 /* If caller has no option attributes, but callee does then it is not ok to
5012 inline. */
5013 else if (!caller_tree)
5014 ret = false;
5016 else
5018 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
5019 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
5021 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
5022 can inline a SSE2 function but a SSE2 function can't inline a SSE4
5023 function. */
5024 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
5025 != callee_opts->x_ix86_isa_flags)
5026 ret = false;
5028 /* See if we have the same non-isa options. */
5029 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
5030 ret = false;
5032 /* See if arch, tune, etc. are the same. */
5033 else if (caller_opts->arch != callee_opts->arch)
5034 ret = false;
5036 else if (caller_opts->tune != callee_opts->tune)
5037 ret = false;
5039 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
5040 ret = false;
5042 else if (caller_opts->branch_cost != callee_opts->branch_cost)
5043 ret = false;
5045 else
5046 ret = true;
5049 return ret;
5053 /* Remember the last target of ix86_set_current_function. */
5054 static GTY(()) tree ix86_previous_fndecl;
5056 /* Set target globals to default. */
5058 static void
5059 ix86_reset_to_default_globals (void)
5061 tree old_tree = (ix86_previous_fndecl
5062 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
5063 : NULL_TREE);
5065 if (old_tree)
5067 tree new_tree = target_option_current_node;
5068 cl_target_option_restore (&global_options,
5069 TREE_TARGET_OPTION (new_tree));
5070 if (TREE_TARGET_GLOBALS (new_tree))
5071 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5072 else if (new_tree == target_option_default_node)
5073 restore_target_globals (&default_target_globals);
5074 else
5075 TREE_TARGET_GLOBALS (new_tree)
5076 = save_target_globals_default_opts ();
5080 /* Invalidate ix86_previous_fndecl cache. */
5081 void
5082 ix86_reset_previous_fndecl (void)
5084 ix86_reset_to_default_globals ();
5085 ix86_previous_fndecl = NULL_TREE;
5088 /* Establish appropriate back-end context for processing the function
5089 FNDECL. The argument might be NULL to indicate processing at top
5090 level, outside of any function scope. */
5091 static void
5092 ix86_set_current_function (tree fndecl)
5094 /* Only change the context if the function changes. This hook is called
5095 several times in the course of compiling a function, and we don't want to
5096 slow things down too much or call target_reinit when it isn't safe. */
5097 if (fndecl && fndecl != ix86_previous_fndecl)
5099 tree old_tree = (ix86_previous_fndecl
5100 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
5101 : NULL_TREE);
5103 tree new_tree = (fndecl
5104 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
5105 : NULL_TREE);
5107 if (old_tree == new_tree)
5110 else if (new_tree && new_tree != target_option_default_node)
5112 cl_target_option_restore (&global_options,
5113 TREE_TARGET_OPTION (new_tree));
5114 if (TREE_TARGET_GLOBALS (new_tree))
5115 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5116 else
5117 TREE_TARGET_GLOBALS (new_tree)
5118 = save_target_globals_default_opts ();
5121 else if (old_tree && old_tree != target_option_default_node)
5122 ix86_reset_to_default_globals ();
5123 ix86_previous_fndecl = fndecl;
5128 /* Return true if this goes in large data/bss. */
5130 static bool
5131 ix86_in_large_data_p (tree exp)
5133 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
5134 return false;
5136 /* Functions are never large data. */
5137 if (TREE_CODE (exp) == FUNCTION_DECL)
5138 return false;
5140 /* Automatic variables are never large data. */
5141 if (TREE_CODE (exp) == VAR_DECL && !is_global_var (exp))
5142 return false;
5144 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
5146 const char *section = DECL_SECTION_NAME (exp);
5147 if (strcmp (section, ".ldata") == 0
5148 || strcmp (section, ".lbss") == 0)
5149 return true;
5150 return false;
5152 else
5154 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5156 /* If this is an incomplete type with size 0, then we can't put it
5157 in data because it might be too big when completed. Also,
5158 int_size_in_bytes returns -1 if size can vary or is larger than
5159 an integer in which case also it is safer to assume that it goes in
5160 large data. */
5161 if (size <= 0 || size > ix86_section_threshold)
5162 return true;
5165 return false;
5168 /* Switch to the appropriate section for output of DECL.
5169 DECL is either a `VAR_DECL' node or a constant of some sort.
5170 RELOC indicates whether forming the initial value of DECL requires
5171 link-time relocations. */
5173 ATTRIBUTE_UNUSED static section *
5174 x86_64_elf_select_section (tree decl, int reloc,
5175 unsigned HOST_WIDE_INT align)
5177 if (ix86_in_large_data_p (decl))
5179 const char *sname = NULL;
5180 unsigned int flags = SECTION_WRITE;
5181 switch (categorize_decl_for_section (decl, reloc))
5183 case SECCAT_DATA:
5184 sname = ".ldata";
5185 break;
5186 case SECCAT_DATA_REL:
5187 sname = ".ldata.rel";
5188 break;
5189 case SECCAT_DATA_REL_LOCAL:
5190 sname = ".ldata.rel.local";
5191 break;
5192 case SECCAT_DATA_REL_RO:
5193 sname = ".ldata.rel.ro";
5194 break;
5195 case SECCAT_DATA_REL_RO_LOCAL:
5196 sname = ".ldata.rel.ro.local";
5197 break;
5198 case SECCAT_BSS:
5199 sname = ".lbss";
5200 flags |= SECTION_BSS;
5201 break;
5202 case SECCAT_RODATA:
5203 case SECCAT_RODATA_MERGE_STR:
5204 case SECCAT_RODATA_MERGE_STR_INIT:
5205 case SECCAT_RODATA_MERGE_CONST:
5206 sname = ".lrodata";
5207 flags = 0;
5208 break;
5209 case SECCAT_SRODATA:
5210 case SECCAT_SDATA:
5211 case SECCAT_SBSS:
5212 gcc_unreachable ();
5213 case SECCAT_TEXT:
5214 case SECCAT_TDATA:
5215 case SECCAT_TBSS:
5216 /* We don't split these for medium model. Place them into
5217 default sections and hope for best. */
5218 break;
5220 if (sname)
5222 /* We might get called with string constants, but get_named_section
5223 doesn't like them as they are not DECLs. Also, we need to set
5224 flags in that case. */
5225 if (!DECL_P (decl))
5226 return get_section (sname, flags, NULL);
5227 return get_named_section (decl, sname, reloc);
5230 return default_elf_select_section (decl, reloc, align);
5233 /* Select a set of attributes for section NAME based on the properties
5234 of DECL and whether or not RELOC indicates that DECL's initializer
5235 might contain runtime relocations. */
5237 static unsigned int ATTRIBUTE_UNUSED
5238 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5240 unsigned int flags = default_section_type_flags (decl, name, reloc);
5242 if (decl == NULL_TREE
5243 && (strcmp (name, ".ldata.rel.ro") == 0
5244 || strcmp (name, ".ldata.rel.ro.local") == 0))
5245 flags |= SECTION_RELRO;
5247 if (strcmp (name, ".lbss") == 0
5248 || strncmp (name, ".lbss.", 5) == 0
5249 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
5250 flags |= SECTION_BSS;
5252 return flags;
5255 /* Build up a unique section name, expressed as a
5256 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5257 RELOC indicates whether the initial value of EXP requires
5258 link-time relocations. */
5260 static void ATTRIBUTE_UNUSED
5261 x86_64_elf_unique_section (tree decl, int reloc)
5263 if (ix86_in_large_data_p (decl))
5265 const char *prefix = NULL;
5266 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5267 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
5269 switch (categorize_decl_for_section (decl, reloc))
5271 case SECCAT_DATA:
5272 case SECCAT_DATA_REL:
5273 case SECCAT_DATA_REL_LOCAL:
5274 case SECCAT_DATA_REL_RO:
5275 case SECCAT_DATA_REL_RO_LOCAL:
5276 prefix = one_only ? ".ld" : ".ldata";
5277 break;
5278 case SECCAT_BSS:
5279 prefix = one_only ? ".lb" : ".lbss";
5280 break;
5281 case SECCAT_RODATA:
5282 case SECCAT_RODATA_MERGE_STR:
5283 case SECCAT_RODATA_MERGE_STR_INIT:
5284 case SECCAT_RODATA_MERGE_CONST:
5285 prefix = one_only ? ".lr" : ".lrodata";
5286 break;
5287 case SECCAT_SRODATA:
5288 case SECCAT_SDATA:
5289 case SECCAT_SBSS:
5290 gcc_unreachable ();
5291 case SECCAT_TEXT:
5292 case SECCAT_TDATA:
5293 case SECCAT_TBSS:
5294 /* We don't split these for medium model. Place them into
5295 default sections and hope for best. */
5296 break;
5298 if (prefix)
5300 const char *name, *linkonce;
5301 char *string;
5303 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5304 name = targetm.strip_name_encoding (name);
5306 /* If we're using one_only, then there needs to be a .gnu.linkonce
5307 prefix to the section name. */
5308 linkonce = one_only ? ".gnu.linkonce" : "";
5310 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5312 set_decl_section_name (decl, string);
5313 return;
5316 default_unique_section (decl, reloc);
5319 #ifdef COMMON_ASM_OP
5320 /* This says how to output assembler code to declare an
5321 uninitialized external linkage data object.
5323 For medium model x86-64 we need to use .largecomm opcode for
5324 large objects. */
5325 void
5326 x86_elf_aligned_common (FILE *file,
5327 const char *name, unsigned HOST_WIDE_INT size,
5328 int align)
5330 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5331 && size > (unsigned int)ix86_section_threshold)
5332 fputs ("\t.largecomm\t", file);
5333 else
5334 fputs (COMMON_ASM_OP, file);
5335 assemble_name (file, name);
5336 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5337 size, align / BITS_PER_UNIT);
5339 #endif
5341 /* Utility function for targets to use in implementing
5342 ASM_OUTPUT_ALIGNED_BSS. */
5344 void
5345 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
5346 unsigned HOST_WIDE_INT size, int align)
5348 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5349 && size > (unsigned int)ix86_section_threshold)
5350 switch_to_section (get_named_section (decl, ".lbss", 0));
5351 else
5352 switch_to_section (bss_section);
5353 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5354 #ifdef ASM_DECLARE_OBJECT_NAME
5355 last_assemble_variable_decl = decl;
5356 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5357 #else
5358 /* Standard thing is just output label for the object. */
5359 ASM_OUTPUT_LABEL (file, name);
5360 #endif /* ASM_DECLARE_OBJECT_NAME */
5361 ASM_OUTPUT_SKIP (file, size ? size : 1);
5364 /* Decide whether we must probe the stack before any space allocation
5365 on this target. It's essentially TARGET_STACK_PROBE except when
5366 -fstack-check causes the stack to be already probed differently. */
5368 bool
5369 ix86_target_stack_probe (void)
5371 /* Do not probe the stack twice if static stack checking is enabled. */
5372 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5373 return false;
5375 return TARGET_STACK_PROBE;
5378 /* Decide whether we can make a sibling call to a function. DECL is the
5379 declaration of the function being targeted by the call and EXP is the
5380 CALL_EXPR representing the call. */
5382 static bool
5383 ix86_function_ok_for_sibcall (tree decl, tree exp)
5385 tree type, decl_or_type;
5386 rtx a, b;
5388 /* If we are generating position-independent code, we cannot sibcall
5389 optimize any indirect call, or a direct call to a global function,
5390 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5391 if (!TARGET_MACHO
5392 && !TARGET_64BIT
5393 && flag_pic
5394 && (!decl || !targetm.binds_local_p (decl)))
5395 return false;
5397 /* If we need to align the outgoing stack, then sibcalling would
5398 unalign the stack, which may break the called function. */
5399 if (ix86_minimum_incoming_stack_boundary (true)
5400 < PREFERRED_STACK_BOUNDARY)
5401 return false;
5403 if (decl)
5405 decl_or_type = decl;
5406 type = TREE_TYPE (decl);
5408 else
5410 /* We're looking at the CALL_EXPR, we need the type of the function. */
5411 type = CALL_EXPR_FN (exp); /* pointer expression */
5412 type = TREE_TYPE (type); /* pointer type */
5413 type = TREE_TYPE (type); /* function type */
5414 decl_or_type = type;
5417 /* Check that the return value locations are the same. Like
5418 if we are returning floats on the 80387 register stack, we cannot
5419 make a sibcall from a function that doesn't return a float to a
5420 function that does or, conversely, from a function that does return
5421 a float to a function that doesn't; the necessary stack adjustment
5422 would not be executed. This is also the place we notice
5423 differences in the return value ABI. Note that it is ok for one
5424 of the functions to have void return type as long as the return
5425 value of the other is passed in a register. */
5426 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5427 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5428 cfun->decl, false);
5429 if (STACK_REG_P (a) || STACK_REG_P (b))
5431 if (!rtx_equal_p (a, b))
5432 return false;
5434 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5436 else if (!rtx_equal_p (a, b))
5437 return false;
5439 if (TARGET_64BIT)
5441 /* The SYSV ABI has more call-clobbered registers;
5442 disallow sibcalls from MS to SYSV. */
5443 if (cfun->machine->call_abi == MS_ABI
5444 && ix86_function_type_abi (type) == SYSV_ABI)
5445 return false;
5447 else
5449 /* If this call is indirect, we'll need to be able to use a
5450 call-clobbered register for the address of the target function.
5451 Make sure that all such registers are not used for passing
5452 parameters. Note that DLLIMPORT functions are indirect. */
5453 if (!decl
5454 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5456 if (ix86_function_regparm (type, NULL) >= 3)
5458 /* ??? Need to count the actual number of registers to be used,
5459 not the possible number of registers. Fix later. */
5460 return false;
5465 /* Otherwise okay. That also includes certain types of indirect calls. */
5466 return true;
5469 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5470 and "sseregparm" calling convention attributes;
5471 arguments as in struct attribute_spec.handler. */
5473 static tree
5474 ix86_handle_cconv_attribute (tree *node, tree name,
5475 tree args,
5476 int,
5477 bool *no_add_attrs)
5479 if (TREE_CODE (*node) != FUNCTION_TYPE
5480 && TREE_CODE (*node) != METHOD_TYPE
5481 && TREE_CODE (*node) != FIELD_DECL
5482 && TREE_CODE (*node) != TYPE_DECL)
5484 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5485 name);
5486 *no_add_attrs = true;
5487 return NULL_TREE;
5490 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5491 if (is_attribute_p ("regparm", name))
5493 tree cst;
5495 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5497 error ("fastcall and regparm attributes are not compatible");
5500 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5502 error ("regparam and thiscall attributes are not compatible");
5505 cst = TREE_VALUE (args);
5506 if (TREE_CODE (cst) != INTEGER_CST)
5508 warning (OPT_Wattributes,
5509 "%qE attribute requires an integer constant argument",
5510 name);
5511 *no_add_attrs = true;
5513 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5515 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5516 name, REGPARM_MAX);
5517 *no_add_attrs = true;
5520 return NULL_TREE;
5523 if (TARGET_64BIT)
5525 /* Do not warn when emulating the MS ABI. */
5526 if ((TREE_CODE (*node) != FUNCTION_TYPE
5527 && TREE_CODE (*node) != METHOD_TYPE)
5528 || ix86_function_type_abi (*node) != MS_ABI)
5529 warning (OPT_Wattributes, "%qE attribute ignored",
5530 name);
5531 *no_add_attrs = true;
5532 return NULL_TREE;
5535 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5536 if (is_attribute_p ("fastcall", name))
5538 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5540 error ("fastcall and cdecl attributes are not compatible");
5542 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5544 error ("fastcall and stdcall attributes are not compatible");
5546 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5548 error ("fastcall and regparm attributes are not compatible");
5550 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5552 error ("fastcall and thiscall attributes are not compatible");
5556 /* Can combine stdcall with fastcall (redundant), regparm and
5557 sseregparm. */
5558 else if (is_attribute_p ("stdcall", name))
5560 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5562 error ("stdcall and cdecl attributes are not compatible");
5564 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5566 error ("stdcall and fastcall attributes are not compatible");
5568 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5570 error ("stdcall and thiscall attributes are not compatible");
5574 /* Can combine cdecl with regparm and sseregparm. */
5575 else if (is_attribute_p ("cdecl", name))
5577 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5579 error ("stdcall and cdecl attributes are not compatible");
5581 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5583 error ("fastcall and cdecl attributes are not compatible");
5585 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5587 error ("cdecl and thiscall attributes are not compatible");
5590 else if (is_attribute_p ("thiscall", name))
5592 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5593 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5594 name);
5595 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5597 error ("stdcall and thiscall attributes are not compatible");
5599 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5601 error ("fastcall and thiscall attributes are not compatible");
5603 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5605 error ("cdecl and thiscall attributes are not compatible");
5609 /* Can combine sseregparm with all attributes. */
5611 return NULL_TREE;
5614 /* The transactional memory builtins are implicitly regparm or fastcall
5615 depending on the ABI. Override the generic do-nothing attribute that
5616 these builtins were declared with, and replace it with one of the two
5617 attributes that we expect elsewhere. */
5619 static tree
5620 ix86_handle_tm_regparm_attribute (tree *node, tree, tree,
5621 int flags, bool *no_add_attrs)
5623 tree alt;
5625 /* In no case do we want to add the placeholder attribute. */
5626 *no_add_attrs = true;
5628 /* The 64-bit ABI is unchanged for transactional memory. */
5629 if (TARGET_64BIT)
5630 return NULL_TREE;
5632 /* ??? Is there a better way to validate 32-bit windows? We have
5633 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5634 if (CHECK_STACK_LIMIT > 0)
5635 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5636 else
5638 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5639 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5641 decl_attributes (node, alt, flags);
5643 return NULL_TREE;
5646 /* This function determines from TYPE the calling-convention. */
5648 unsigned int
5649 ix86_get_callcvt (const_tree type)
5651 unsigned int ret = 0;
5652 bool is_stdarg;
5653 tree attrs;
5655 if (TARGET_64BIT)
5656 return IX86_CALLCVT_CDECL;
5658 attrs = TYPE_ATTRIBUTES (type);
5659 if (attrs != NULL_TREE)
5661 if (lookup_attribute ("cdecl", attrs))
5662 ret |= IX86_CALLCVT_CDECL;
5663 else if (lookup_attribute ("stdcall", attrs))
5664 ret |= IX86_CALLCVT_STDCALL;
5665 else if (lookup_attribute ("fastcall", attrs))
5666 ret |= IX86_CALLCVT_FASTCALL;
5667 else if (lookup_attribute ("thiscall", attrs))
5668 ret |= IX86_CALLCVT_THISCALL;
5670 /* Regparam isn't allowed for thiscall and fastcall. */
5671 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5673 if (lookup_attribute ("regparm", attrs))
5674 ret |= IX86_CALLCVT_REGPARM;
5675 if (lookup_attribute ("sseregparm", attrs))
5676 ret |= IX86_CALLCVT_SSEREGPARM;
5679 if (IX86_BASE_CALLCVT(ret) != 0)
5680 return ret;
5683 is_stdarg = stdarg_p (type);
5684 if (TARGET_RTD && !is_stdarg)
5685 return IX86_CALLCVT_STDCALL | ret;
5687 if (ret != 0
5688 || is_stdarg
5689 || TREE_CODE (type) != METHOD_TYPE
5690 || ix86_function_type_abi (type) != MS_ABI)
5691 return IX86_CALLCVT_CDECL | ret;
5693 return IX86_CALLCVT_THISCALL;
5696 /* Return 0 if the attributes for two types are incompatible, 1 if they
5697 are compatible, and 2 if they are nearly compatible (which causes a
5698 warning to be generated). */
5700 static int
5701 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5703 unsigned int ccvt1, ccvt2;
5705 if (TREE_CODE (type1) != FUNCTION_TYPE
5706 && TREE_CODE (type1) != METHOD_TYPE)
5707 return 1;
5709 ccvt1 = ix86_get_callcvt (type1);
5710 ccvt2 = ix86_get_callcvt (type2);
5711 if (ccvt1 != ccvt2)
5712 return 0;
5713 if (ix86_function_regparm (type1, NULL)
5714 != ix86_function_regparm (type2, NULL))
5715 return 0;
5717 return 1;
5720 /* Return the regparm value for a function with the indicated TYPE and DECL.
5721 DECL may be NULL when calling function indirectly
5722 or considering a libcall. */
5724 static int
5725 ix86_function_regparm (const_tree type, const_tree decl)
5727 tree attr;
5728 int regparm;
5729 unsigned int ccvt;
5731 if (TARGET_64BIT)
5732 return (ix86_function_type_abi (type) == SYSV_ABI
5733 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5734 ccvt = ix86_get_callcvt (type);
5735 regparm = ix86_regparm;
5737 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5739 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5740 if (attr)
5742 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5743 return regparm;
5746 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5747 return 2;
5748 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5749 return 1;
5751 /* Use register calling convention for local functions when possible. */
5752 if (decl
5753 && TREE_CODE (decl) == FUNCTION_DECL
5754 /* Caller and callee must agree on the calling convention, so
5755 checking here just optimize means that with
5756 __attribute__((optimize (...))) caller could use regparm convention
5757 and callee not, or vice versa. Instead look at whether the callee
5758 is optimized or not. */
5759 && opt_for_fn (decl, optimize)
5760 && !(profile_flag && !flag_fentry))
5762 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5763 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE (decl));
5764 if (i && i->local && i->can_change_signature)
5766 int local_regparm, globals = 0, regno;
5768 /* Make sure no regparm register is taken by a
5769 fixed register variable. */
5770 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5771 if (fixed_regs[local_regparm])
5772 break;
5774 /* We don't want to use regparm(3) for nested functions as
5775 these use a static chain pointer in the third argument. */
5776 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5777 local_regparm = 2;
5779 /* In 32-bit mode save a register for the split stack. */
5780 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5781 local_regparm = 2;
5783 /* Each fixed register usage increases register pressure,
5784 so less registers should be used for argument passing.
5785 This functionality can be overriden by an explicit
5786 regparm value. */
5787 for (regno = AX_REG; regno <= DI_REG; regno++)
5788 if (fixed_regs[regno])
5789 globals++;
5791 local_regparm
5792 = globals < local_regparm ? local_regparm - globals : 0;
5794 if (local_regparm > regparm)
5795 regparm = local_regparm;
5799 return regparm;
5802 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5803 DFmode (2) arguments in SSE registers for a function with the
5804 indicated TYPE and DECL. DECL may be NULL when calling function
5805 indirectly or considering a libcall. Otherwise return 0. */
5807 static int
5808 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5810 gcc_assert (!TARGET_64BIT);
5812 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5813 by the sseregparm attribute. */
5814 if (TARGET_SSEREGPARM
5815 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5817 if (!TARGET_SSE)
5819 if (warn)
5821 if (decl)
5822 error ("calling %qD with attribute sseregparm without "
5823 "SSE/SSE2 enabled", decl);
5824 else
5825 error ("calling %qT with attribute sseregparm without "
5826 "SSE/SSE2 enabled", type);
5828 return 0;
5831 return 2;
5834 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5835 (and DFmode for SSE2) arguments in SSE registers. */
5836 if (decl && TARGET_SSE_MATH && optimize
5837 && !(profile_flag && !flag_fentry))
5839 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5840 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5841 if (i && i->local && i->can_change_signature)
5842 return TARGET_SSE2 ? 2 : 1;
5845 return 0;
5848 /* Return true if EAX is live at the start of the function. Used by
5849 ix86_expand_prologue to determine if we need special help before
5850 calling allocate_stack_worker. */
5852 static bool
5853 ix86_eax_live_at_start_p (void)
5855 /* Cheat. Don't bother working forward from ix86_function_regparm
5856 to the function type to whether an actual argument is located in
5857 eax. Instead just look at cfg info, which is still close enough
5858 to correct at this point. This gives false positives for broken
5859 functions that might use uninitialized data that happens to be
5860 allocated in eax, but who cares? */
5861 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
5864 static bool
5865 ix86_keep_aggregate_return_pointer (tree fntype)
5867 tree attr;
5869 if (!TARGET_64BIT)
5871 attr = lookup_attribute ("callee_pop_aggregate_return",
5872 TYPE_ATTRIBUTES (fntype));
5873 if (attr)
5874 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5876 /* For 32-bit MS-ABI the default is to keep aggregate
5877 return pointer. */
5878 if (ix86_function_type_abi (fntype) == MS_ABI)
5879 return true;
5881 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5884 /* Value is the number of bytes of arguments automatically
5885 popped when returning from a subroutine call.
5886 FUNDECL is the declaration node of the function (as a tree),
5887 FUNTYPE is the data type of the function (as a tree),
5888 or for a library call it is an identifier node for the subroutine name.
5889 SIZE is the number of bytes of arguments passed on the stack.
5891 On the 80386, the RTD insn may be used to pop them if the number
5892 of args is fixed, but if the number is variable then the caller
5893 must pop them all. RTD can't be used for library calls now
5894 because the library is compiled with the Unix compiler.
5895 Use of RTD is a selectable option, since it is incompatible with
5896 standard Unix calling sequences. If the option is not selected,
5897 the caller must always pop the args.
5899 The attribute stdcall is equivalent to RTD on a per module basis. */
5901 static int
5902 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5904 unsigned int ccvt;
5906 /* None of the 64-bit ABIs pop arguments. */
5907 if (TARGET_64BIT)
5908 return 0;
5910 ccvt = ix86_get_callcvt (funtype);
5912 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5913 | IX86_CALLCVT_THISCALL)) != 0
5914 && ! stdarg_p (funtype))
5915 return size;
5917 /* Lose any fake structure return argument if it is passed on the stack. */
5918 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5919 && !ix86_keep_aggregate_return_pointer (funtype))
5921 int nregs = ix86_function_regparm (funtype, fundecl);
5922 if (nregs == 0)
5923 return GET_MODE_SIZE (Pmode);
5926 return 0;
5929 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
5931 static bool
5932 ix86_legitimate_combined_insn (rtx_insn *insn)
5934 /* Check operand constraints in case hard registers were propagated
5935 into insn pattern. This check prevents combine pass from
5936 generating insn patterns with invalid hard register operands.
5937 These invalid insns can eventually confuse reload to error out
5938 with a spill failure. See also PRs 46829 and 46843. */
5939 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
5941 int i;
5943 extract_insn (insn);
5944 preprocess_constraints (insn);
5946 int n_operands = recog_data.n_operands;
5947 int n_alternatives = recog_data.n_alternatives;
5948 for (i = 0; i < n_operands; i++)
5950 rtx op = recog_data.operand[i];
5951 machine_mode mode = GET_MODE (op);
5952 const operand_alternative *op_alt;
5953 int offset = 0;
5954 bool win;
5955 int j;
5957 /* For pre-AVX disallow unaligned loads/stores where the
5958 instructions don't support it. */
5959 if (!TARGET_AVX
5960 && VECTOR_MODE_P (GET_MODE (op))
5961 && misaligned_operand (op, GET_MODE (op)))
5963 int min_align = get_attr_ssememalign (insn);
5964 if (min_align == 0)
5965 return false;
5968 /* A unary operator may be accepted by the predicate, but it
5969 is irrelevant for matching constraints. */
5970 if (UNARY_P (op))
5971 op = XEXP (op, 0);
5973 if (GET_CODE (op) == SUBREG)
5975 if (REG_P (SUBREG_REG (op))
5976 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
5977 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
5978 GET_MODE (SUBREG_REG (op)),
5979 SUBREG_BYTE (op),
5980 GET_MODE (op));
5981 op = SUBREG_REG (op);
5984 if (!(REG_P (op) && HARD_REGISTER_P (op)))
5985 continue;
5987 op_alt = recog_op_alt;
5989 /* Operand has no constraints, anything is OK. */
5990 win = !n_alternatives;
5992 alternative_mask preferred = get_preferred_alternatives (insn);
5993 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
5995 if (!TEST_BIT (preferred, j))
5996 continue;
5997 if (op_alt[i].anything_ok
5998 || (op_alt[i].matches != -1
5999 && operands_match_p
6000 (recog_data.operand[i],
6001 recog_data.operand[op_alt[i].matches]))
6002 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
6004 win = true;
6005 break;
6009 if (!win)
6010 return false;
6014 return true;
6017 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
6019 static unsigned HOST_WIDE_INT
6020 ix86_asan_shadow_offset (void)
6022 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
6023 : HOST_WIDE_INT_C (0x7fff8000))
6024 : (HOST_WIDE_INT_1 << 29);
6027 /* Argument support functions. */
6029 /* Return true when register may be used to pass function parameters. */
6030 bool
6031 ix86_function_arg_regno_p (int regno)
6033 int i;
6034 const int *parm_regs;
6036 if (!TARGET_64BIT)
6038 if (TARGET_MACHO)
6039 return (regno < REGPARM_MAX
6040 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
6041 else
6042 return (regno < REGPARM_MAX
6043 || (TARGET_MMX && MMX_REGNO_P (regno)
6044 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
6045 || (TARGET_SSE && SSE_REGNO_P (regno)
6046 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
6049 if (TARGET_SSE && SSE_REGNO_P (regno)
6050 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
6051 return true;
6053 /* TODO: The function should depend on current function ABI but
6054 builtins.c would need updating then. Therefore we use the
6055 default ABI. */
6057 /* RAX is used as hidden argument to va_arg functions. */
6058 if (ix86_abi == SYSV_ABI && regno == AX_REG)
6059 return true;
6061 if (ix86_abi == MS_ABI)
6062 parm_regs = x86_64_ms_abi_int_parameter_registers;
6063 else
6064 parm_regs = x86_64_int_parameter_registers;
6065 for (i = 0; i < (ix86_abi == MS_ABI
6066 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
6067 if (regno == parm_regs[i])
6068 return true;
6069 return false;
6072 /* Return if we do not know how to pass TYPE solely in registers. */
6074 static bool
6075 ix86_must_pass_in_stack (machine_mode mode, const_tree type)
6077 if (must_pass_in_stack_var_size_or_pad (mode, type))
6078 return true;
6080 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
6081 The layout_type routine is crafty and tries to trick us into passing
6082 currently unsupported vector types on the stack by using TImode. */
6083 return (!TARGET_64BIT && mode == TImode
6084 && type && TREE_CODE (type) != VECTOR_TYPE);
6087 /* It returns the size, in bytes, of the area reserved for arguments passed
6088 in registers for the function represented by fndecl dependent to the used
6089 abi format. */
6091 ix86_reg_parm_stack_space (const_tree fndecl)
6093 enum calling_abi call_abi = SYSV_ABI;
6094 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
6095 call_abi = ix86_function_abi (fndecl);
6096 else
6097 call_abi = ix86_function_type_abi (fndecl);
6098 if (TARGET_64BIT && call_abi == MS_ABI)
6099 return 32;
6100 return 0;
6103 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
6104 call abi used. */
6105 enum calling_abi
6106 ix86_function_type_abi (const_tree fntype)
6108 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
6110 enum calling_abi abi = ix86_abi;
6111 if (abi == SYSV_ABI)
6113 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
6114 abi = MS_ABI;
6116 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
6117 abi = SYSV_ABI;
6118 return abi;
6120 return ix86_abi;
6123 /* We add this as a workaround in order to use libc_has_function
6124 hook in i386.md. */
6125 bool
6126 ix86_libc_has_function (enum function_class fn_class)
6128 return targetm.libc_has_function (fn_class);
6131 static bool
6132 ix86_function_ms_hook_prologue (const_tree fn)
6134 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
6136 if (decl_function_context (fn) != NULL_TREE)
6137 error_at (DECL_SOURCE_LOCATION (fn),
6138 "ms_hook_prologue is not compatible with nested function");
6139 else
6140 return true;
6142 return false;
6145 static enum calling_abi
6146 ix86_function_abi (const_tree fndecl)
6148 if (! fndecl)
6149 return ix86_abi;
6150 return ix86_function_type_abi (TREE_TYPE (fndecl));
6153 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
6154 call abi used. */
6155 enum calling_abi
6156 ix86_cfun_abi (void)
6158 if (! cfun)
6159 return ix86_abi;
6160 return cfun->machine->call_abi;
6163 /* Write the extra assembler code needed to declare a function properly. */
6165 void
6166 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
6167 tree decl)
6169 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
6171 if (is_ms_hook)
6173 int i, filler_count = (TARGET_64BIT ? 32 : 16);
6174 unsigned int filler_cc = 0xcccccccc;
6176 for (i = 0; i < filler_count; i += 4)
6177 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
6180 #ifdef SUBTARGET_ASM_UNWIND_INIT
6181 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
6182 #endif
6184 ASM_OUTPUT_LABEL (asm_out_file, fname);
6186 /* Output magic byte marker, if hot-patch attribute is set. */
6187 if (is_ms_hook)
6189 if (TARGET_64BIT)
6191 /* leaq [%rsp + 0], %rsp */
6192 asm_fprintf (asm_out_file, ASM_BYTE
6193 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
6195 else
6197 /* movl.s %edi, %edi
6198 push %ebp
6199 movl.s %esp, %ebp */
6200 asm_fprintf (asm_out_file, ASM_BYTE
6201 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
6206 /* regclass.c */
6207 extern void init_regs (void);
6209 /* Implementation of call abi switching target hook. Specific to FNDECL
6210 the specific call register sets are set. See also
6211 ix86_conditional_register_usage for more details. */
6212 void
6213 ix86_call_abi_override (const_tree fndecl)
6215 if (fndecl == NULL_TREE)
6216 cfun->machine->call_abi = ix86_abi;
6217 else
6218 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
6221 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
6222 expensive re-initialization of init_regs each time we switch function context
6223 since this is needed only during RTL expansion. */
6224 static void
6225 ix86_maybe_switch_abi (void)
6227 if (TARGET_64BIT &&
6228 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
6229 reinit_regs ();
6232 /* Return 1 if pseudo register should be created and used to hold
6233 GOT address for PIC code. */
6234 static bool
6235 ix86_use_pseudo_pic_reg (void)
6237 if ((TARGET_64BIT
6238 && (ix86_cmodel == CM_SMALL_PIC
6239 || TARGET_PECOFF))
6240 || !flag_pic)
6241 return false;
6242 return true;
6245 /* Initialize large model PIC register. */
6247 static void
6248 ix86_init_large_pic_reg (unsigned int tmp_regno)
6250 rtx_code_label *label;
6251 rtx tmp_reg;
6253 gcc_assert (Pmode == DImode);
6254 label = gen_label_rtx ();
6255 emit_label (label);
6256 LABEL_PRESERVE_P (label) = 1;
6257 tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
6258 gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
6259 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
6260 label));
6261 emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6262 emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
6263 pic_offset_table_rtx, tmp_reg));
6266 /* Create and initialize PIC register if required. */
6267 static void
6268 ix86_init_pic_reg (void)
6270 edge entry_edge;
6271 rtx_insn *seq;
6273 if (!ix86_use_pseudo_pic_reg ())
6274 return;
6276 start_sequence ();
6278 if (TARGET_64BIT)
6280 if (ix86_cmodel == CM_LARGE_PIC)
6281 ix86_init_large_pic_reg (R11_REG);
6282 else
6283 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6285 else
6287 /* If there is future mcount call in the function it is more profitable
6288 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
6289 rtx reg = crtl->profile
6290 ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
6291 : pic_offset_table_rtx;
6292 rtx insn = emit_insn (gen_set_got (reg));
6293 RTX_FRAME_RELATED_P (insn) = 1;
6294 if (crtl->profile)
6295 emit_move_insn (pic_offset_table_rtx, reg);
6296 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
6299 seq = get_insns ();
6300 end_sequence ();
6302 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6303 insert_insn_on_edge (seq, entry_edge);
6304 commit_one_edge_insertion (entry_edge);
6307 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6308 for a call to a function whose data type is FNTYPE.
6309 For a library call, FNTYPE is 0. */
6311 void
6312 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
6313 tree fntype, /* tree ptr for function decl */
6314 rtx libname, /* SYMBOL_REF of library name or 0 */
6315 tree fndecl,
6316 int caller)
6318 struct cgraph_local_info *i;
6320 memset (cum, 0, sizeof (*cum));
6322 if (fndecl)
6324 i = cgraph_node::local_info (fndecl);
6325 cum->call_abi = ix86_function_abi (fndecl);
6327 else
6329 i = NULL;
6330 cum->call_abi = ix86_function_type_abi (fntype);
6333 cum->caller = caller;
6335 /* Set up the number of registers to use for passing arguments. */
6336 cum->nregs = ix86_regparm;
6337 if (TARGET_64BIT)
6339 cum->nregs = (cum->call_abi == SYSV_ABI
6340 ? X86_64_REGPARM_MAX
6341 : X86_64_MS_REGPARM_MAX);
6343 if (TARGET_SSE)
6345 cum->sse_nregs = SSE_REGPARM_MAX;
6346 if (TARGET_64BIT)
6348 cum->sse_nregs = (cum->call_abi == SYSV_ABI
6349 ? X86_64_SSE_REGPARM_MAX
6350 : X86_64_MS_SSE_REGPARM_MAX);
6353 if (TARGET_MMX)
6354 cum->mmx_nregs = MMX_REGPARM_MAX;
6355 cum->warn_avx512f = true;
6356 cum->warn_avx = true;
6357 cum->warn_sse = true;
6358 cum->warn_mmx = true;
6360 /* Because type might mismatch in between caller and callee, we need to
6361 use actual type of function for local calls.
6362 FIXME: cgraph_analyze can be told to actually record if function uses
6363 va_start so for local functions maybe_vaarg can be made aggressive
6364 helping K&R code.
6365 FIXME: once typesytem is fixed, we won't need this code anymore. */
6366 if (i && i->local && i->can_change_signature)
6367 fntype = TREE_TYPE (fndecl);
6368 cum->stdarg = stdarg_p (fntype);
6369 cum->maybe_vaarg = (fntype
6370 ? (!prototype_p (fntype) || stdarg_p (fntype))
6371 : !libname);
6373 cum->bnd_regno = FIRST_BND_REG;
6374 cum->bnds_in_bt = 0;
6375 cum->force_bnd_pass = 0;
6377 if (!TARGET_64BIT)
6379 /* If there are variable arguments, then we won't pass anything
6380 in registers in 32-bit mode. */
6381 if (stdarg_p (fntype))
6383 cum->nregs = 0;
6384 cum->sse_nregs = 0;
6385 cum->mmx_nregs = 0;
6386 cum->warn_avx512f = false;
6387 cum->warn_avx = false;
6388 cum->warn_sse = false;
6389 cum->warn_mmx = false;
6390 return;
6393 /* Use ecx and edx registers if function has fastcall attribute,
6394 else look for regparm information. */
6395 if (fntype)
6397 unsigned int ccvt = ix86_get_callcvt (fntype);
6398 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6400 cum->nregs = 1;
6401 cum->fastcall = 1; /* Same first register as in fastcall. */
6403 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6405 cum->nregs = 2;
6406 cum->fastcall = 1;
6408 else
6409 cum->nregs = ix86_function_regparm (fntype, fndecl);
6412 /* Set up the number of SSE registers used for passing SFmode
6413 and DFmode arguments. Warn for mismatching ABI. */
6414 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6418 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6419 But in the case of vector types, it is some vector mode.
6421 When we have only some of our vector isa extensions enabled, then there
6422 are some modes for which vector_mode_supported_p is false. For these
6423 modes, the generic vector support in gcc will choose some non-vector mode
6424 in order to implement the type. By computing the natural mode, we'll
6425 select the proper ABI location for the operand and not depend on whatever
6426 the middle-end decides to do with these vector types.
6428 The midde-end can't deal with the vector types > 16 bytes. In this
6429 case, we return the original mode and warn ABI change if CUM isn't
6430 NULL.
6432 If INT_RETURN is true, warn ABI change if the vector mode isn't
6433 available for function return value. */
6435 static machine_mode
6436 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
6437 bool in_return)
6439 machine_mode mode = TYPE_MODE (type);
6441 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6443 HOST_WIDE_INT size = int_size_in_bytes (type);
6444 if ((size == 8 || size == 16 || size == 32 || size == 64)
6445 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6446 && TYPE_VECTOR_SUBPARTS (type) > 1)
6448 machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6450 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6451 mode = MIN_MODE_VECTOR_FLOAT;
6452 else
6453 mode = MIN_MODE_VECTOR_INT;
6455 /* Get the mode which has this inner mode and number of units. */
6456 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6457 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6458 && GET_MODE_INNER (mode) == innermode)
6460 if (size == 64 && !TARGET_AVX512F)
6462 static bool warnedavx512f;
6463 static bool warnedavx512f_ret;
6465 if (cum && cum->warn_avx512f && !warnedavx512f)
6467 if (warning (OPT_Wpsabi, "AVX512F vector argument "
6468 "without AVX512F enabled changes the ABI"))
6469 warnedavx512f = true;
6471 else if (in_return && !warnedavx512f_ret)
6473 if (warning (OPT_Wpsabi, "AVX512F vector return "
6474 "without AVX512F enabled changes the ABI"))
6475 warnedavx512f_ret = true;
6478 return TYPE_MODE (type);
6480 else if (size == 32 && !TARGET_AVX)
6482 static bool warnedavx;
6483 static bool warnedavx_ret;
6485 if (cum && cum->warn_avx && !warnedavx)
6487 if (warning (OPT_Wpsabi, "AVX vector argument "
6488 "without AVX enabled changes the ABI"))
6489 warnedavx = true;
6491 else if (in_return && !warnedavx_ret)
6493 if (warning (OPT_Wpsabi, "AVX vector return "
6494 "without AVX enabled changes the ABI"))
6495 warnedavx_ret = true;
6498 return TYPE_MODE (type);
6500 else if (((size == 8 && TARGET_64BIT) || size == 16)
6501 && !TARGET_SSE)
6503 static bool warnedsse;
6504 static bool warnedsse_ret;
6506 if (cum && cum->warn_sse && !warnedsse)
6508 if (warning (OPT_Wpsabi, "SSE vector argument "
6509 "without SSE enabled changes the ABI"))
6510 warnedsse = true;
6512 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
6514 if (warning (OPT_Wpsabi, "SSE vector return "
6515 "without SSE enabled changes the ABI"))
6516 warnedsse_ret = true;
6519 else if ((size == 8 && !TARGET_64BIT) && !TARGET_MMX)
6521 static bool warnedmmx;
6522 static bool warnedmmx_ret;
6524 if (cum && cum->warn_mmx && !warnedmmx)
6526 if (warning (OPT_Wpsabi, "MMX vector argument "
6527 "without MMX enabled changes the ABI"))
6528 warnedmmx = true;
6530 else if (in_return && !warnedmmx_ret)
6532 if (warning (OPT_Wpsabi, "MMX vector return "
6533 "without MMX enabled changes the ABI"))
6534 warnedmmx_ret = true;
6537 return mode;
6540 gcc_unreachable ();
6544 return mode;
6547 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6548 this may not agree with the mode that the type system has chosen for the
6549 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6550 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6552 static rtx
6553 gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
6554 unsigned int regno)
6556 rtx tmp;
6558 if (orig_mode != BLKmode)
6559 tmp = gen_rtx_REG (orig_mode, regno);
6560 else
6562 tmp = gen_rtx_REG (mode, regno);
6563 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6564 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6567 return tmp;
6570 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6571 of this code is to classify each 8bytes of incoming argument by the register
6572 class and assign registers accordingly. */
6574 /* Return the union class of CLASS1 and CLASS2.
6575 See the x86-64 PS ABI for details. */
6577 static enum x86_64_reg_class
6578 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6580 /* Rule #1: If both classes are equal, this is the resulting class. */
6581 if (class1 == class2)
6582 return class1;
6584 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6585 the other class. */
6586 if (class1 == X86_64_NO_CLASS)
6587 return class2;
6588 if (class2 == X86_64_NO_CLASS)
6589 return class1;
6591 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6592 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6593 return X86_64_MEMORY_CLASS;
6595 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6596 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6597 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6598 return X86_64_INTEGERSI_CLASS;
6599 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6600 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6601 return X86_64_INTEGER_CLASS;
6603 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6604 MEMORY is used. */
6605 if (class1 == X86_64_X87_CLASS
6606 || class1 == X86_64_X87UP_CLASS
6607 || class1 == X86_64_COMPLEX_X87_CLASS
6608 || class2 == X86_64_X87_CLASS
6609 || class2 == X86_64_X87UP_CLASS
6610 || class2 == X86_64_COMPLEX_X87_CLASS)
6611 return X86_64_MEMORY_CLASS;
6613 /* Rule #6: Otherwise class SSE is used. */
6614 return X86_64_SSE_CLASS;
6617 /* Classify the argument of type TYPE and mode MODE.
6618 CLASSES will be filled by the register class used to pass each word
6619 of the operand. The number of words is returned. In case the parameter
6620 should be passed in memory, 0 is returned. As a special case for zero
6621 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6623 BIT_OFFSET is used internally for handling records and specifies offset
6624 of the offset in bits modulo 512 to avoid overflow cases.
6626 See the x86-64 PS ABI for details.
6629 static int
6630 classify_argument (machine_mode mode, const_tree type,
6631 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6633 HOST_WIDE_INT bytes =
6634 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6635 int words
6636 = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6638 /* Variable sized entities are always passed/returned in memory. */
6639 if (bytes < 0)
6640 return 0;
6642 if (mode != VOIDmode
6643 && targetm.calls.must_pass_in_stack (mode, type))
6644 return 0;
6646 if (type && AGGREGATE_TYPE_P (type))
6648 int i;
6649 tree field;
6650 enum x86_64_reg_class subclasses[MAX_CLASSES];
6652 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
6653 if (bytes > 64)
6654 return 0;
6656 for (i = 0; i < words; i++)
6657 classes[i] = X86_64_NO_CLASS;
6659 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6660 signalize memory class, so handle it as special case. */
6661 if (!words)
6663 classes[0] = X86_64_NO_CLASS;
6664 return 1;
6667 /* Classify each field of record and merge classes. */
6668 switch (TREE_CODE (type))
6670 case RECORD_TYPE:
6671 /* And now merge the fields of structure. */
6672 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6674 if (TREE_CODE (field) == FIELD_DECL)
6676 int num;
6678 if (TREE_TYPE (field) == error_mark_node)
6679 continue;
6681 /* Bitfields are always classified as integer. Handle them
6682 early, since later code would consider them to be
6683 misaligned integers. */
6684 if (DECL_BIT_FIELD (field))
6686 for (i = (int_bit_position (field)
6687 + (bit_offset % 64)) / 8 / 8;
6688 i < ((int_bit_position (field) + (bit_offset % 64))
6689 + tree_to_shwi (DECL_SIZE (field))
6690 + 63) / 8 / 8; i++)
6691 classes[i] =
6692 merge_classes (X86_64_INTEGER_CLASS,
6693 classes[i]);
6695 else
6697 int pos;
6699 type = TREE_TYPE (field);
6701 /* Flexible array member is ignored. */
6702 if (TYPE_MODE (type) == BLKmode
6703 && TREE_CODE (type) == ARRAY_TYPE
6704 && TYPE_SIZE (type) == NULL_TREE
6705 && TYPE_DOMAIN (type) != NULL_TREE
6706 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6707 == NULL_TREE))
6709 static bool warned;
6711 if (!warned && warn_psabi)
6713 warned = true;
6714 inform (input_location,
6715 "the ABI of passing struct with"
6716 " a flexible array member has"
6717 " changed in GCC 4.4");
6719 continue;
6721 num = classify_argument (TYPE_MODE (type), type,
6722 subclasses,
6723 (int_bit_position (field)
6724 + bit_offset) % 512);
6725 if (!num)
6726 return 0;
6727 pos = (int_bit_position (field)
6728 + (bit_offset % 64)) / 8 / 8;
6729 for (i = 0; i < num && (i + pos) < words; i++)
6730 classes[i + pos] =
6731 merge_classes (subclasses[i], classes[i + pos]);
6735 break;
6737 case ARRAY_TYPE:
6738 /* Arrays are handled as small records. */
6740 int num;
6741 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6742 TREE_TYPE (type), subclasses, bit_offset);
6743 if (!num)
6744 return 0;
6746 /* The partial classes are now full classes. */
6747 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6748 subclasses[0] = X86_64_SSE_CLASS;
6749 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6750 && !((bit_offset % 64) == 0 && bytes == 4))
6751 subclasses[0] = X86_64_INTEGER_CLASS;
6753 for (i = 0; i < words; i++)
6754 classes[i] = subclasses[i % num];
6756 break;
6758 case UNION_TYPE:
6759 case QUAL_UNION_TYPE:
6760 /* Unions are similar to RECORD_TYPE but offset is always 0.
6762 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6764 if (TREE_CODE (field) == FIELD_DECL)
6766 int num;
6768 if (TREE_TYPE (field) == error_mark_node)
6769 continue;
6771 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6772 TREE_TYPE (field), subclasses,
6773 bit_offset);
6774 if (!num)
6775 return 0;
6776 for (i = 0; i < num && i < words; i++)
6777 classes[i] = merge_classes (subclasses[i], classes[i]);
6780 break;
6782 default:
6783 gcc_unreachable ();
6786 if (words > 2)
6788 /* When size > 16 bytes, if the first one isn't
6789 X86_64_SSE_CLASS or any other ones aren't
6790 X86_64_SSEUP_CLASS, everything should be passed in
6791 memory. */
6792 if (classes[0] != X86_64_SSE_CLASS)
6793 return 0;
6795 for (i = 1; i < words; i++)
6796 if (classes[i] != X86_64_SSEUP_CLASS)
6797 return 0;
6800 /* Final merger cleanup. */
6801 for (i = 0; i < words; i++)
6803 /* If one class is MEMORY, everything should be passed in
6804 memory. */
6805 if (classes[i] == X86_64_MEMORY_CLASS)
6806 return 0;
6808 /* The X86_64_SSEUP_CLASS should be always preceded by
6809 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6810 if (classes[i] == X86_64_SSEUP_CLASS
6811 && classes[i - 1] != X86_64_SSE_CLASS
6812 && classes[i - 1] != X86_64_SSEUP_CLASS)
6814 /* The first one should never be X86_64_SSEUP_CLASS. */
6815 gcc_assert (i != 0);
6816 classes[i] = X86_64_SSE_CLASS;
6819 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6820 everything should be passed in memory. */
6821 if (classes[i] == X86_64_X87UP_CLASS
6822 && (classes[i - 1] != X86_64_X87_CLASS))
6824 static bool warned;
6826 /* The first one should never be X86_64_X87UP_CLASS. */
6827 gcc_assert (i != 0);
6828 if (!warned && warn_psabi)
6830 warned = true;
6831 inform (input_location,
6832 "the ABI of passing union with long double"
6833 " has changed in GCC 4.4");
6835 return 0;
6838 return words;
6841 /* Compute alignment needed. We align all types to natural boundaries with
6842 exception of XFmode that is aligned to 64bits. */
6843 if (mode != VOIDmode && mode != BLKmode)
6845 int mode_alignment = GET_MODE_BITSIZE (mode);
6847 if (mode == XFmode)
6848 mode_alignment = 128;
6849 else if (mode == XCmode)
6850 mode_alignment = 256;
6851 if (COMPLEX_MODE_P (mode))
6852 mode_alignment /= 2;
6853 /* Misaligned fields are always returned in memory. */
6854 if (bit_offset % mode_alignment)
6855 return 0;
6858 /* for V1xx modes, just use the base mode */
6859 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6860 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6861 mode = GET_MODE_INNER (mode);
6863 /* Classification of atomic types. */
6864 switch (mode)
6866 case SDmode:
6867 case DDmode:
6868 classes[0] = X86_64_SSE_CLASS;
6869 return 1;
6870 case TDmode:
6871 classes[0] = X86_64_SSE_CLASS;
6872 classes[1] = X86_64_SSEUP_CLASS;
6873 return 2;
6874 case DImode:
6875 case SImode:
6876 case HImode:
6877 case QImode:
6878 case CSImode:
6879 case CHImode:
6880 case CQImode:
6882 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
6884 /* Analyze last 128 bits only. */
6885 size = (size - 1) & 0x7f;
6887 if (size < 32)
6889 classes[0] = X86_64_INTEGERSI_CLASS;
6890 return 1;
6892 else if (size < 64)
6894 classes[0] = X86_64_INTEGER_CLASS;
6895 return 1;
6897 else if (size < 64+32)
6899 classes[0] = X86_64_INTEGER_CLASS;
6900 classes[1] = X86_64_INTEGERSI_CLASS;
6901 return 2;
6903 else if (size < 64+64)
6905 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6906 return 2;
6908 else
6909 gcc_unreachable ();
6911 case CDImode:
6912 case TImode:
6913 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6914 return 2;
6915 case COImode:
6916 case OImode:
6917 /* OImode shouldn't be used directly. */
6918 gcc_unreachable ();
6919 case CTImode:
6920 return 0;
6921 case SFmode:
6922 if (!(bit_offset % 64))
6923 classes[0] = X86_64_SSESF_CLASS;
6924 else
6925 classes[0] = X86_64_SSE_CLASS;
6926 return 1;
6927 case DFmode:
6928 classes[0] = X86_64_SSEDF_CLASS;
6929 return 1;
6930 case XFmode:
6931 classes[0] = X86_64_X87_CLASS;
6932 classes[1] = X86_64_X87UP_CLASS;
6933 return 2;
6934 case TFmode:
6935 classes[0] = X86_64_SSE_CLASS;
6936 classes[1] = X86_64_SSEUP_CLASS;
6937 return 2;
6938 case SCmode:
6939 classes[0] = X86_64_SSE_CLASS;
6940 if (!(bit_offset % 64))
6941 return 1;
6942 else
6944 static bool warned;
6946 if (!warned && warn_psabi)
6948 warned = true;
6949 inform (input_location,
6950 "the ABI of passing structure with complex float"
6951 " member has changed in GCC 4.4");
6953 classes[1] = X86_64_SSESF_CLASS;
6954 return 2;
6956 case DCmode:
6957 classes[0] = X86_64_SSEDF_CLASS;
6958 classes[1] = X86_64_SSEDF_CLASS;
6959 return 2;
6960 case XCmode:
6961 classes[0] = X86_64_COMPLEX_X87_CLASS;
6962 return 1;
6963 case TCmode:
6964 /* This modes is larger than 16 bytes. */
6965 return 0;
6966 case V8SFmode:
6967 case V8SImode:
6968 case V32QImode:
6969 case V16HImode:
6970 case V4DFmode:
6971 case V4DImode:
6972 classes[0] = X86_64_SSE_CLASS;
6973 classes[1] = X86_64_SSEUP_CLASS;
6974 classes[2] = X86_64_SSEUP_CLASS;
6975 classes[3] = X86_64_SSEUP_CLASS;
6976 return 4;
6977 case V8DFmode:
6978 case V16SFmode:
6979 case V8DImode:
6980 case V16SImode:
6981 case V32HImode:
6982 case V64QImode:
6983 classes[0] = X86_64_SSE_CLASS;
6984 classes[1] = X86_64_SSEUP_CLASS;
6985 classes[2] = X86_64_SSEUP_CLASS;
6986 classes[3] = X86_64_SSEUP_CLASS;
6987 classes[4] = X86_64_SSEUP_CLASS;
6988 classes[5] = X86_64_SSEUP_CLASS;
6989 classes[6] = X86_64_SSEUP_CLASS;
6990 classes[7] = X86_64_SSEUP_CLASS;
6991 return 8;
6992 case V4SFmode:
6993 case V4SImode:
6994 case V16QImode:
6995 case V8HImode:
6996 case V2DFmode:
6997 case V2DImode:
6998 classes[0] = X86_64_SSE_CLASS;
6999 classes[1] = X86_64_SSEUP_CLASS;
7000 return 2;
7001 case V1TImode:
7002 case V1DImode:
7003 case V2SFmode:
7004 case V2SImode:
7005 case V4HImode:
7006 case V8QImode:
7007 classes[0] = X86_64_SSE_CLASS;
7008 return 1;
7009 case BLKmode:
7010 case VOIDmode:
7011 return 0;
7012 default:
7013 gcc_assert (VECTOR_MODE_P (mode));
7015 if (bytes > 16)
7016 return 0;
7018 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
7020 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
7021 classes[0] = X86_64_INTEGERSI_CLASS;
7022 else
7023 classes[0] = X86_64_INTEGER_CLASS;
7024 classes[1] = X86_64_INTEGER_CLASS;
7025 return 1 + (bytes > 8);
7029 /* Examine the argument and return set number of register required in each
7030 class. Return true iff parameter should be passed in memory. */
7032 static bool
7033 examine_argument (machine_mode mode, const_tree type, int in_return,
7034 int *int_nregs, int *sse_nregs)
7036 enum x86_64_reg_class regclass[MAX_CLASSES];
7037 int n = classify_argument (mode, type, regclass, 0);
7039 *int_nregs = 0;
7040 *sse_nregs = 0;
7042 if (!n)
7043 return true;
7044 for (n--; n >= 0; n--)
7045 switch (regclass[n])
7047 case X86_64_INTEGER_CLASS:
7048 case X86_64_INTEGERSI_CLASS:
7049 (*int_nregs)++;
7050 break;
7051 case X86_64_SSE_CLASS:
7052 case X86_64_SSESF_CLASS:
7053 case X86_64_SSEDF_CLASS:
7054 (*sse_nregs)++;
7055 break;
7056 case X86_64_NO_CLASS:
7057 case X86_64_SSEUP_CLASS:
7058 break;
7059 case X86_64_X87_CLASS:
7060 case X86_64_X87UP_CLASS:
7061 case X86_64_COMPLEX_X87_CLASS:
7062 if (!in_return)
7063 return true;
7064 break;
7065 case X86_64_MEMORY_CLASS:
7066 gcc_unreachable ();
7069 return false;
7072 /* Construct container for the argument used by GCC interface. See
7073 FUNCTION_ARG for the detailed description. */
7075 static rtx
7076 construct_container (machine_mode mode, machine_mode orig_mode,
7077 const_tree type, int in_return, int nintregs, int nsseregs,
7078 const int *intreg, int sse_regno)
7080 /* The following variables hold the static issued_error state. */
7081 static bool issued_sse_arg_error;
7082 static bool issued_sse_ret_error;
7083 static bool issued_x87_ret_error;
7085 machine_mode tmpmode;
7086 int bytes =
7087 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
7088 enum x86_64_reg_class regclass[MAX_CLASSES];
7089 int n;
7090 int i;
7091 int nexps = 0;
7092 int needed_sseregs, needed_intregs;
7093 rtx exp[MAX_CLASSES];
7094 rtx ret;
7096 n = classify_argument (mode, type, regclass, 0);
7097 if (!n)
7098 return NULL;
7099 if (examine_argument (mode, type, in_return, &needed_intregs,
7100 &needed_sseregs))
7101 return NULL;
7102 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
7103 return NULL;
7105 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
7106 some less clueful developer tries to use floating-point anyway. */
7107 if (needed_sseregs && !TARGET_SSE)
7109 if (in_return)
7111 if (!issued_sse_ret_error)
7113 error ("SSE register return with SSE disabled");
7114 issued_sse_ret_error = true;
7117 else if (!issued_sse_arg_error)
7119 error ("SSE register argument with SSE disabled");
7120 issued_sse_arg_error = true;
7122 return NULL;
7125 /* Likewise, error if the ABI requires us to return values in the
7126 x87 registers and the user specified -mno-80387. */
7127 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
7128 for (i = 0; i < n; i++)
7129 if (regclass[i] == X86_64_X87_CLASS
7130 || regclass[i] == X86_64_X87UP_CLASS
7131 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
7133 if (!issued_x87_ret_error)
7135 error ("x87 register return with x87 disabled");
7136 issued_x87_ret_error = true;
7138 return NULL;
7141 /* First construct simple cases. Avoid SCmode, since we want to use
7142 single register to pass this type. */
7143 if (n == 1 && mode != SCmode)
7144 switch (regclass[0])
7146 case X86_64_INTEGER_CLASS:
7147 case X86_64_INTEGERSI_CLASS:
7148 return gen_rtx_REG (mode, intreg[0]);
7149 case X86_64_SSE_CLASS:
7150 case X86_64_SSESF_CLASS:
7151 case X86_64_SSEDF_CLASS:
7152 if (mode != BLKmode)
7153 return gen_reg_or_parallel (mode, orig_mode,
7154 SSE_REGNO (sse_regno));
7155 break;
7156 case X86_64_X87_CLASS:
7157 case X86_64_COMPLEX_X87_CLASS:
7158 return gen_rtx_REG (mode, FIRST_STACK_REG);
7159 case X86_64_NO_CLASS:
7160 /* Zero sized array, struct or class. */
7161 return NULL;
7162 default:
7163 gcc_unreachable ();
7165 if (n == 2
7166 && regclass[0] == X86_64_SSE_CLASS
7167 && regclass[1] == X86_64_SSEUP_CLASS
7168 && mode != BLKmode)
7169 return gen_reg_or_parallel (mode, orig_mode,
7170 SSE_REGNO (sse_regno));
7171 if (n == 4
7172 && regclass[0] == X86_64_SSE_CLASS
7173 && regclass[1] == X86_64_SSEUP_CLASS
7174 && regclass[2] == X86_64_SSEUP_CLASS
7175 && regclass[3] == X86_64_SSEUP_CLASS
7176 && mode != BLKmode)
7177 return gen_reg_or_parallel (mode, orig_mode,
7178 SSE_REGNO (sse_regno));
7179 if (n == 8
7180 && regclass[0] == X86_64_SSE_CLASS
7181 && regclass[1] == X86_64_SSEUP_CLASS
7182 && regclass[2] == X86_64_SSEUP_CLASS
7183 && regclass[3] == X86_64_SSEUP_CLASS
7184 && regclass[4] == X86_64_SSEUP_CLASS
7185 && regclass[5] == X86_64_SSEUP_CLASS
7186 && regclass[6] == X86_64_SSEUP_CLASS
7187 && regclass[7] == X86_64_SSEUP_CLASS
7188 && mode != BLKmode)
7189 return gen_reg_or_parallel (mode, orig_mode,
7190 SSE_REGNO (sse_regno));
7191 if (n == 2
7192 && regclass[0] == X86_64_X87_CLASS
7193 && regclass[1] == X86_64_X87UP_CLASS)
7194 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
7196 if (n == 2
7197 && regclass[0] == X86_64_INTEGER_CLASS
7198 && regclass[1] == X86_64_INTEGER_CLASS
7199 && (mode == CDImode || mode == TImode)
7200 && intreg[0] + 1 == intreg[1])
7201 return gen_rtx_REG (mode, intreg[0]);
7203 /* Otherwise figure out the entries of the PARALLEL. */
7204 for (i = 0; i < n; i++)
7206 int pos;
7208 switch (regclass[i])
7210 case X86_64_NO_CLASS:
7211 break;
7212 case X86_64_INTEGER_CLASS:
7213 case X86_64_INTEGERSI_CLASS:
7214 /* Merge TImodes on aligned occasions here too. */
7215 if (i * 8 + 8 > bytes)
7216 tmpmode
7217 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
7218 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
7219 tmpmode = SImode;
7220 else
7221 tmpmode = DImode;
7222 /* We've requested 24 bytes we
7223 don't have mode for. Use DImode. */
7224 if (tmpmode == BLKmode)
7225 tmpmode = DImode;
7226 exp [nexps++]
7227 = gen_rtx_EXPR_LIST (VOIDmode,
7228 gen_rtx_REG (tmpmode, *intreg),
7229 GEN_INT (i*8));
7230 intreg++;
7231 break;
7232 case X86_64_SSESF_CLASS:
7233 exp [nexps++]
7234 = gen_rtx_EXPR_LIST (VOIDmode,
7235 gen_rtx_REG (SFmode,
7236 SSE_REGNO (sse_regno)),
7237 GEN_INT (i*8));
7238 sse_regno++;
7239 break;
7240 case X86_64_SSEDF_CLASS:
7241 exp [nexps++]
7242 = gen_rtx_EXPR_LIST (VOIDmode,
7243 gen_rtx_REG (DFmode,
7244 SSE_REGNO (sse_regno)),
7245 GEN_INT (i*8));
7246 sse_regno++;
7247 break;
7248 case X86_64_SSE_CLASS:
7249 pos = i;
7250 switch (n)
7252 case 1:
7253 tmpmode = DImode;
7254 break;
7255 case 2:
7256 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
7258 tmpmode = TImode;
7259 i++;
7261 else
7262 tmpmode = DImode;
7263 break;
7264 case 4:
7265 gcc_assert (i == 0
7266 && regclass[1] == X86_64_SSEUP_CLASS
7267 && regclass[2] == X86_64_SSEUP_CLASS
7268 && regclass[3] == X86_64_SSEUP_CLASS);
7269 tmpmode = OImode;
7270 i += 3;
7271 break;
7272 case 8:
7273 gcc_assert (i == 0
7274 && regclass[1] == X86_64_SSEUP_CLASS
7275 && regclass[2] == X86_64_SSEUP_CLASS
7276 && regclass[3] == X86_64_SSEUP_CLASS
7277 && regclass[4] == X86_64_SSEUP_CLASS
7278 && regclass[5] == X86_64_SSEUP_CLASS
7279 && regclass[6] == X86_64_SSEUP_CLASS
7280 && regclass[7] == X86_64_SSEUP_CLASS);
7281 tmpmode = XImode;
7282 i += 7;
7283 break;
7284 default:
7285 gcc_unreachable ();
7287 exp [nexps++]
7288 = gen_rtx_EXPR_LIST (VOIDmode,
7289 gen_rtx_REG (tmpmode,
7290 SSE_REGNO (sse_regno)),
7291 GEN_INT (pos*8));
7292 sse_regno++;
7293 break;
7294 default:
7295 gcc_unreachable ();
7299 /* Empty aligned struct, union or class. */
7300 if (nexps == 0)
7301 return NULL;
7303 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
7304 for (i = 0; i < nexps; i++)
7305 XVECEXP (ret, 0, i) = exp [i];
7306 return ret;
7309 /* Update the data in CUM to advance over an argument of mode MODE
7310 and data type TYPE. (TYPE is null for libcalls where that information
7311 may not be available.)
7313 Return a number of integer regsiters advanced over. */
7315 static int
7316 function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
7317 const_tree type, HOST_WIDE_INT bytes,
7318 HOST_WIDE_INT words)
7320 int res = 0;
7322 switch (mode)
7324 default:
7325 break;
7327 case BLKmode:
7328 if (bytes < 0)
7329 break;
7330 /* FALLTHRU */
7332 case DImode:
7333 case SImode:
7334 case HImode:
7335 case QImode:
7336 cum->words += words;
7337 cum->nregs -= words;
7338 cum->regno += words;
7339 if (cum->nregs >= 0)
7340 res = words;
7341 if (cum->nregs <= 0)
7343 cum->nregs = 0;
7344 cum->regno = 0;
7346 break;
7348 case OImode:
7349 /* OImode shouldn't be used directly. */
7350 gcc_unreachable ();
7352 case DFmode:
7353 if (cum->float_in_sse < 2)
7354 break;
7355 case SFmode:
7356 if (cum->float_in_sse < 1)
7357 break;
7358 /* FALLTHRU */
7360 case V8SFmode:
7361 case V8SImode:
7362 case V64QImode:
7363 case V32HImode:
7364 case V16SImode:
7365 case V8DImode:
7366 case V16SFmode:
7367 case V8DFmode:
7368 case V32QImode:
7369 case V16HImode:
7370 case V4DFmode:
7371 case V4DImode:
7372 case TImode:
7373 case V16QImode:
7374 case V8HImode:
7375 case V4SImode:
7376 case V2DImode:
7377 case V4SFmode:
7378 case V2DFmode:
7379 if (!type || !AGGREGATE_TYPE_P (type))
7381 cum->sse_words += words;
7382 cum->sse_nregs -= 1;
7383 cum->sse_regno += 1;
7384 if (cum->sse_nregs <= 0)
7386 cum->sse_nregs = 0;
7387 cum->sse_regno = 0;
7390 break;
7392 case V8QImode:
7393 case V4HImode:
7394 case V2SImode:
7395 case V2SFmode:
7396 case V1TImode:
7397 case V1DImode:
7398 if (!type || !AGGREGATE_TYPE_P (type))
7400 cum->mmx_words += words;
7401 cum->mmx_nregs -= 1;
7402 cum->mmx_regno += 1;
7403 if (cum->mmx_nregs <= 0)
7405 cum->mmx_nregs = 0;
7406 cum->mmx_regno = 0;
7409 break;
7412 return res;
7415 static int
7416 function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
7417 const_tree type, HOST_WIDE_INT words, bool named)
7419 int int_nregs, sse_nregs;
7421 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
7422 if (!named && (VALID_AVX512F_REG_MODE (mode)
7423 || VALID_AVX256_REG_MODE (mode)))
7424 return 0;
7426 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
7427 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
7429 cum->nregs -= int_nregs;
7430 cum->sse_nregs -= sse_nregs;
7431 cum->regno += int_nregs;
7432 cum->sse_regno += sse_nregs;
7433 return int_nregs;
7435 else
7437 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
7438 cum->words = (cum->words + align - 1) & ~(align - 1);
7439 cum->words += words;
7440 return 0;
7444 static int
7445 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
7446 HOST_WIDE_INT words)
7448 /* Otherwise, this should be passed indirect. */
7449 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
7451 cum->words += words;
7452 if (cum->nregs > 0)
7454 cum->nregs -= 1;
7455 cum->regno += 1;
7456 return 1;
7458 return 0;
7461 /* Update the data in CUM to advance over an argument of mode MODE and
7462 data type TYPE. (TYPE is null for libcalls where that information
7463 may not be available.) */
7465 static void
7466 ix86_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
7467 const_tree type, bool named)
7469 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7470 HOST_WIDE_INT bytes, words;
7471 int nregs;
7473 if (mode == BLKmode)
7474 bytes = int_size_in_bytes (type);
7475 else
7476 bytes = GET_MODE_SIZE (mode);
7477 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7479 if (type)
7480 mode = type_natural_mode (type, NULL, false);
7482 if ((type && POINTER_BOUNDS_TYPE_P (type))
7483 || POINTER_BOUNDS_MODE_P (mode))
7485 /* If we pass bounds in BT then just update remained bounds count. */
7486 if (cum->bnds_in_bt)
7488 cum->bnds_in_bt--;
7489 return;
7492 /* Update remained number of bounds to force. */
7493 if (cum->force_bnd_pass)
7494 cum->force_bnd_pass--;
7496 cum->bnd_regno++;
7498 return;
7501 /* The first arg not going to Bounds Tables resets this counter. */
7502 cum->bnds_in_bt = 0;
7503 /* For unnamed args we always pass bounds to avoid bounds mess when
7504 passed and received types do not match. If bounds do not follow
7505 unnamed arg, still pretend required number of bounds were passed. */
7506 if (cum->force_bnd_pass)
7508 cum->bnd_regno += cum->force_bnd_pass;
7509 cum->force_bnd_pass = 0;
7512 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7513 nregs = function_arg_advance_ms_64 (cum, bytes, words);
7514 else if (TARGET_64BIT)
7515 nregs = function_arg_advance_64 (cum, mode, type, words, named);
7516 else
7517 nregs = function_arg_advance_32 (cum, mode, type, bytes, words);
7519 /* For stdarg we expect bounds to be passed for each value passed
7520 in register. */
7521 if (cum->stdarg)
7522 cum->force_bnd_pass = nregs;
7523 /* For pointers passed in memory we expect bounds passed in Bounds
7524 Table. */
7525 if (!nregs)
7526 cum->bnds_in_bt = chkp_type_bounds_count (type);
7529 /* Define where to put the arguments to a function.
7530 Value is zero to push the argument on the stack,
7531 or a hard register in which to store the argument.
7533 MODE is the argument's machine mode.
7534 TYPE is the data type of the argument (as a tree).
7535 This is null for libcalls where that information may
7536 not be available.
7537 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7538 the preceding args and about the function being called.
7539 NAMED is nonzero if this argument is a named parameter
7540 (otherwise it is an extra parameter matching an ellipsis). */
7542 static rtx
7543 function_arg_32 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7544 machine_mode orig_mode, const_tree type,
7545 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
7547 /* Avoid the AL settings for the Unix64 ABI. */
7548 if (mode == VOIDmode)
7549 return constm1_rtx;
7551 switch (mode)
7553 default:
7554 break;
7556 case BLKmode:
7557 if (bytes < 0)
7558 break;
7559 /* FALLTHRU */
7560 case DImode:
7561 case SImode:
7562 case HImode:
7563 case QImode:
7564 if (words <= cum->nregs)
7566 int regno = cum->regno;
7568 /* Fastcall allocates the first two DWORD (SImode) or
7569 smaller arguments to ECX and EDX if it isn't an
7570 aggregate type . */
7571 if (cum->fastcall)
7573 if (mode == BLKmode
7574 || mode == DImode
7575 || (type && AGGREGATE_TYPE_P (type)))
7576 break;
7578 /* ECX not EAX is the first allocated register. */
7579 if (regno == AX_REG)
7580 regno = CX_REG;
7582 return gen_rtx_REG (mode, regno);
7584 break;
7586 case DFmode:
7587 if (cum->float_in_sse < 2)
7588 break;
7589 case SFmode:
7590 if (cum->float_in_sse < 1)
7591 break;
7592 /* FALLTHRU */
7593 case TImode:
7594 /* In 32bit, we pass TImode in xmm registers. */
7595 case V16QImode:
7596 case V8HImode:
7597 case V4SImode:
7598 case V2DImode:
7599 case V4SFmode:
7600 case V2DFmode:
7601 if (!type || !AGGREGATE_TYPE_P (type))
7603 if (cum->sse_nregs)
7604 return gen_reg_or_parallel (mode, orig_mode,
7605 cum->sse_regno + FIRST_SSE_REG);
7607 break;
7609 case OImode:
7610 case XImode:
7611 /* OImode and XImode shouldn't be used directly. */
7612 gcc_unreachable ();
7614 case V64QImode:
7615 case V32HImode:
7616 case V16SImode:
7617 case V8DImode:
7618 case V16SFmode:
7619 case V8DFmode:
7620 case V8SFmode:
7621 case V8SImode:
7622 case V32QImode:
7623 case V16HImode:
7624 case V4DFmode:
7625 case V4DImode:
7626 if (!type || !AGGREGATE_TYPE_P (type))
7628 if (cum->sse_nregs)
7629 return gen_reg_or_parallel (mode, orig_mode,
7630 cum->sse_regno + FIRST_SSE_REG);
7632 break;
7634 case V8QImode:
7635 case V4HImode:
7636 case V2SImode:
7637 case V2SFmode:
7638 case V1TImode:
7639 case V1DImode:
7640 if (!type || !AGGREGATE_TYPE_P (type))
7642 if (cum->mmx_nregs)
7643 return gen_reg_or_parallel (mode, orig_mode,
7644 cum->mmx_regno + FIRST_MMX_REG);
7646 break;
7649 return NULL_RTX;
7652 static rtx
7653 function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7654 machine_mode orig_mode, const_tree type, bool named)
7656 /* Handle a hidden AL argument containing number of registers
7657 for varargs x86-64 functions. */
7658 if (mode == VOIDmode)
7659 return GEN_INT (cum->maybe_vaarg
7660 ? (cum->sse_nregs < 0
7661 ? X86_64_SSE_REGPARM_MAX
7662 : cum->sse_regno)
7663 : -1);
7665 switch (mode)
7667 default:
7668 break;
7670 case V8SFmode:
7671 case V8SImode:
7672 case V32QImode:
7673 case V16HImode:
7674 case V4DFmode:
7675 case V4DImode:
7676 case V16SFmode:
7677 case V16SImode:
7678 case V64QImode:
7679 case V32HImode:
7680 case V8DFmode:
7681 case V8DImode:
7682 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
7683 if (!named)
7684 return NULL;
7685 break;
7688 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7689 cum->sse_nregs,
7690 &x86_64_int_parameter_registers [cum->regno],
7691 cum->sse_regno);
7694 static rtx
7695 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7696 machine_mode orig_mode, bool named,
7697 HOST_WIDE_INT bytes)
7699 unsigned int regno;
7701 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7702 We use value of -2 to specify that current function call is MSABI. */
7703 if (mode == VOIDmode)
7704 return GEN_INT (-2);
7706 /* If we've run out of registers, it goes on the stack. */
7707 if (cum->nregs == 0)
7708 return NULL_RTX;
7710 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7712 /* Only floating point modes are passed in anything but integer regs. */
7713 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7715 if (named)
7716 regno = cum->regno + FIRST_SSE_REG;
7717 else
7719 rtx t1, t2;
7721 /* Unnamed floating parameters are passed in both the
7722 SSE and integer registers. */
7723 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7724 t2 = gen_rtx_REG (mode, regno);
7725 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7726 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7727 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7730 /* Handle aggregated types passed in register. */
7731 if (orig_mode == BLKmode)
7733 if (bytes > 0 && bytes <= 8)
7734 mode = (bytes > 4 ? DImode : SImode);
7735 if (mode == BLKmode)
7736 mode = DImode;
7739 return gen_reg_or_parallel (mode, orig_mode, regno);
7742 /* Return where to put the arguments to a function.
7743 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7745 MODE is the argument's machine mode. TYPE is the data type of the
7746 argument. It is null for libcalls where that information may not be
7747 available. CUM gives information about the preceding args and about
7748 the function being called. NAMED is nonzero if this argument is a
7749 named parameter (otherwise it is an extra parameter matching an
7750 ellipsis). */
7752 static rtx
7753 ix86_function_arg (cumulative_args_t cum_v, machine_mode omode,
7754 const_tree type, bool named)
7756 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7757 machine_mode mode = omode;
7758 HOST_WIDE_INT bytes, words;
7759 rtx arg;
7761 /* All pointer bounds argumntas are handled separately here. */
7762 if ((type && POINTER_BOUNDS_TYPE_P (type))
7763 || POINTER_BOUNDS_MODE_P (mode))
7765 /* Return NULL if bounds are forced to go in Bounds Table. */
7766 if (cum->bnds_in_bt)
7767 arg = NULL;
7768 /* Return the next available bound reg if any. */
7769 else if (cum->bnd_regno <= LAST_BND_REG)
7770 arg = gen_rtx_REG (BNDmode, cum->bnd_regno);
7771 /* Return the next special slot number otherwise. */
7772 else
7773 arg = GEN_INT (cum->bnd_regno - LAST_BND_REG - 1);
7775 return arg;
7778 if (mode == BLKmode)
7779 bytes = int_size_in_bytes (type);
7780 else
7781 bytes = GET_MODE_SIZE (mode);
7782 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7784 /* To simplify the code below, represent vector types with a vector mode
7785 even if MMX/SSE are not active. */
7786 if (type && TREE_CODE (type) == VECTOR_TYPE)
7787 mode = type_natural_mode (type, cum, false);
7789 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7790 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7791 else if (TARGET_64BIT)
7792 arg = function_arg_64 (cum, mode, omode, type, named);
7793 else
7794 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7796 return arg;
7799 /* A C expression that indicates when an argument must be passed by
7800 reference. If nonzero for an argument, a copy of that argument is
7801 made in memory and a pointer to the argument is passed instead of
7802 the argument itself. The pointer is passed in whatever way is
7803 appropriate for passing a pointer to that type. */
7805 static bool
7806 ix86_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
7807 const_tree type, bool)
7809 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7811 /* See Windows x64 Software Convention. */
7812 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7814 int msize = (int) GET_MODE_SIZE (mode);
7815 if (type)
7817 /* Arrays are passed by reference. */
7818 if (TREE_CODE (type) == ARRAY_TYPE)
7819 return true;
7821 if (AGGREGATE_TYPE_P (type))
7823 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7824 are passed by reference. */
7825 msize = int_size_in_bytes (type);
7829 /* __m128 is passed by reference. */
7830 switch (msize) {
7831 case 1: case 2: case 4: case 8:
7832 break;
7833 default:
7834 return true;
7837 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7838 return 1;
7840 return 0;
7843 /* Return true when TYPE should be 128bit aligned for 32bit argument
7844 passing ABI. XXX: This function is obsolete and is only used for
7845 checking psABI compatibility with previous versions of GCC. */
7847 static bool
7848 ix86_compat_aligned_value_p (const_tree type)
7850 machine_mode mode = TYPE_MODE (type);
7851 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7852 || mode == TDmode
7853 || mode == TFmode
7854 || mode == TCmode)
7855 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7856 return true;
7857 if (TYPE_ALIGN (type) < 128)
7858 return false;
7860 if (AGGREGATE_TYPE_P (type))
7862 /* Walk the aggregates recursively. */
7863 switch (TREE_CODE (type))
7865 case RECORD_TYPE:
7866 case UNION_TYPE:
7867 case QUAL_UNION_TYPE:
7869 tree field;
7871 /* Walk all the structure fields. */
7872 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7874 if (TREE_CODE (field) == FIELD_DECL
7875 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
7876 return true;
7878 break;
7881 case ARRAY_TYPE:
7882 /* Just for use if some languages passes arrays by value. */
7883 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
7884 return true;
7885 break;
7887 default:
7888 gcc_unreachable ();
7891 return false;
7894 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7895 XXX: This function is obsolete and is only used for checking psABI
7896 compatibility with previous versions of GCC. */
7898 static unsigned int
7899 ix86_compat_function_arg_boundary (machine_mode mode,
7900 const_tree type, unsigned int align)
7902 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7903 natural boundaries. */
7904 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
7906 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7907 make an exception for SSE modes since these require 128bit
7908 alignment.
7910 The handling here differs from field_alignment. ICC aligns MMX
7911 arguments to 4 byte boundaries, while structure fields are aligned
7912 to 8 byte boundaries. */
7913 if (!type)
7915 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
7916 align = PARM_BOUNDARY;
7918 else
7920 if (!ix86_compat_aligned_value_p (type))
7921 align = PARM_BOUNDARY;
7924 if (align > BIGGEST_ALIGNMENT)
7925 align = BIGGEST_ALIGNMENT;
7926 return align;
7929 /* Return true when TYPE should be 128bit aligned for 32bit argument
7930 passing ABI. */
7932 static bool
7933 ix86_contains_aligned_value_p (const_tree type)
7935 machine_mode mode = TYPE_MODE (type);
7937 if (mode == XFmode || mode == XCmode)
7938 return false;
7940 if (TYPE_ALIGN (type) < 128)
7941 return false;
7943 if (AGGREGATE_TYPE_P (type))
7945 /* Walk the aggregates recursively. */
7946 switch (TREE_CODE (type))
7948 case RECORD_TYPE:
7949 case UNION_TYPE:
7950 case QUAL_UNION_TYPE:
7952 tree field;
7954 /* Walk all the structure fields. */
7955 for (field = TYPE_FIELDS (type);
7956 field;
7957 field = DECL_CHAIN (field))
7959 if (TREE_CODE (field) == FIELD_DECL
7960 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7961 return true;
7963 break;
7966 case ARRAY_TYPE:
7967 /* Just for use if some languages passes arrays by value. */
7968 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7969 return true;
7970 break;
7972 default:
7973 gcc_unreachable ();
7976 else
7977 return TYPE_ALIGN (type) >= 128;
7979 return false;
7982 /* Gives the alignment boundary, in bits, of an argument with the
7983 specified mode and type. */
7985 static unsigned int
7986 ix86_function_arg_boundary (machine_mode mode, const_tree type)
7988 unsigned int align;
7989 if (type)
7991 /* Since the main variant type is used for call, we convert it to
7992 the main variant type. */
7993 type = TYPE_MAIN_VARIANT (type);
7994 align = TYPE_ALIGN (type);
7996 else
7997 align = GET_MODE_ALIGNMENT (mode);
7998 if (align < PARM_BOUNDARY)
7999 align = PARM_BOUNDARY;
8000 else
8002 static bool warned;
8003 unsigned int saved_align = align;
8005 if (!TARGET_64BIT)
8007 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
8008 if (!type)
8010 if (mode == XFmode || mode == XCmode)
8011 align = PARM_BOUNDARY;
8013 else if (!ix86_contains_aligned_value_p (type))
8014 align = PARM_BOUNDARY;
8016 if (align < 128)
8017 align = PARM_BOUNDARY;
8020 if (warn_psabi
8021 && !warned
8022 && align != ix86_compat_function_arg_boundary (mode, type,
8023 saved_align))
8025 warned = true;
8026 inform (input_location,
8027 "The ABI for passing parameters with %d-byte"
8028 " alignment has changed in GCC 4.6",
8029 align / BITS_PER_UNIT);
8033 return align;
8036 /* Return true if N is a possible register number of function value. */
8038 static bool
8039 ix86_function_value_regno_p (const unsigned int regno)
8041 switch (regno)
8043 case AX_REG:
8044 return true;
8045 case DX_REG:
8046 return (!TARGET_64BIT || ix86_abi != MS_ABI);
8047 case DI_REG:
8048 case SI_REG:
8049 return TARGET_64BIT && ix86_abi != MS_ABI;
8051 case FIRST_BND_REG:
8052 return chkp_function_instrumented_p (current_function_decl);
8054 /* Complex values are returned in %st(0)/%st(1) pair. */
8055 case ST0_REG:
8056 case ST1_REG:
8057 /* TODO: The function should depend on current function ABI but
8058 builtins.c would need updating then. Therefore we use the
8059 default ABI. */
8060 if (TARGET_64BIT && ix86_abi == MS_ABI)
8061 return false;
8062 return TARGET_FLOAT_RETURNS_IN_80387;
8064 /* Complex values are returned in %xmm0/%xmm1 pair. */
8065 case XMM0_REG:
8066 case XMM1_REG:
8067 return TARGET_SSE;
8069 case MM0_REG:
8070 if (TARGET_MACHO || TARGET_64BIT)
8071 return false;
8072 return TARGET_MMX;
8075 return false;
8078 /* Define how to find the value returned by a function.
8079 VALTYPE is the data type of the value (as a tree).
8080 If the precise function being called is known, FUNC is its FUNCTION_DECL;
8081 otherwise, FUNC is 0. */
8083 static rtx
8084 function_value_32 (machine_mode orig_mode, machine_mode mode,
8085 const_tree fntype, const_tree fn)
8087 unsigned int regno;
8089 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
8090 we normally prevent this case when mmx is not available. However
8091 some ABIs may require the result to be returned like DImode. */
8092 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
8093 regno = FIRST_MMX_REG;
8095 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
8096 we prevent this case when sse is not available. However some ABIs
8097 may require the result to be returned like integer TImode. */
8098 else if (mode == TImode
8099 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
8100 regno = FIRST_SSE_REG;
8102 /* 32-byte vector modes in %ymm0. */
8103 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
8104 regno = FIRST_SSE_REG;
8106 /* 64-byte vector modes in %zmm0. */
8107 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
8108 regno = FIRST_SSE_REG;
8110 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
8111 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
8112 regno = FIRST_FLOAT_REG;
8113 else
8114 /* Most things go in %eax. */
8115 regno = AX_REG;
8117 /* Override FP return register with %xmm0 for local functions when
8118 SSE math is enabled or for functions with sseregparm attribute. */
8119 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
8121 int sse_level = ix86_function_sseregparm (fntype, fn, false);
8122 if ((sse_level >= 1 && mode == SFmode)
8123 || (sse_level == 2 && mode == DFmode))
8124 regno = FIRST_SSE_REG;
8127 /* OImode shouldn't be used directly. */
8128 gcc_assert (mode != OImode);
8130 return gen_rtx_REG (orig_mode, regno);
8133 static rtx
8134 function_value_64 (machine_mode orig_mode, machine_mode mode,
8135 const_tree valtype)
8137 rtx ret;
8139 /* Handle libcalls, which don't provide a type node. */
8140 if (valtype == NULL)
8142 unsigned int regno;
8144 switch (mode)
8146 case SFmode:
8147 case SCmode:
8148 case DFmode:
8149 case DCmode:
8150 case TFmode:
8151 case SDmode:
8152 case DDmode:
8153 case TDmode:
8154 regno = FIRST_SSE_REG;
8155 break;
8156 case XFmode:
8157 case XCmode:
8158 regno = FIRST_FLOAT_REG;
8159 break;
8160 case TCmode:
8161 return NULL;
8162 default:
8163 regno = AX_REG;
8166 return gen_rtx_REG (mode, regno);
8168 else if (POINTER_TYPE_P (valtype))
8170 /* Pointers are always returned in word_mode. */
8171 mode = word_mode;
8174 ret = construct_container (mode, orig_mode, valtype, 1,
8175 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
8176 x86_64_int_return_registers, 0);
8178 /* For zero sized structures, construct_container returns NULL, but we
8179 need to keep rest of compiler happy by returning meaningful value. */
8180 if (!ret)
8181 ret = gen_rtx_REG (orig_mode, AX_REG);
8183 return ret;
8186 static rtx
8187 function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
8188 const_tree valtype)
8190 unsigned int regno = AX_REG;
8192 if (TARGET_SSE)
8194 switch (GET_MODE_SIZE (mode))
8196 case 16:
8197 if (valtype != NULL_TREE
8198 && !VECTOR_INTEGER_TYPE_P (valtype)
8199 && !VECTOR_INTEGER_TYPE_P (valtype)
8200 && !INTEGRAL_TYPE_P (valtype)
8201 && !VECTOR_FLOAT_TYPE_P (valtype))
8202 break;
8203 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8204 && !COMPLEX_MODE_P (mode))
8205 regno = FIRST_SSE_REG;
8206 break;
8207 case 8:
8208 case 4:
8209 if (mode == SFmode || mode == DFmode)
8210 regno = FIRST_SSE_REG;
8211 break;
8212 default:
8213 break;
8216 return gen_rtx_REG (orig_mode, regno);
8219 static rtx
8220 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
8221 machine_mode orig_mode, machine_mode mode)
8223 const_tree fn, fntype;
8225 fn = NULL_TREE;
8226 if (fntype_or_decl && DECL_P (fntype_or_decl))
8227 fn = fntype_or_decl;
8228 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
8230 if ((valtype && POINTER_BOUNDS_TYPE_P (valtype))
8231 || POINTER_BOUNDS_MODE_P (mode))
8232 return gen_rtx_REG (BNDmode, FIRST_BND_REG);
8233 else if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
8234 return function_value_ms_64 (orig_mode, mode, valtype);
8235 else if (TARGET_64BIT)
8236 return function_value_64 (orig_mode, mode, valtype);
8237 else
8238 return function_value_32 (orig_mode, mode, fntype, fn);
8241 static rtx
8242 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
8244 machine_mode mode, orig_mode;
8246 orig_mode = TYPE_MODE (valtype);
8247 mode = type_natural_mode (valtype, NULL, true);
8248 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
8251 /* Return an RTX representing a place where a function returns
8252 or recieves pointer bounds or NULL if no bounds are returned.
8254 VALTYPE is a data type of a value returned by the function.
8256 FN_DECL_OR_TYPE is a tree node representing FUNCTION_DECL
8257 or FUNCTION_TYPE of the function.
8259 If OUTGOING is false, return a place in which the caller will
8260 see the return value. Otherwise, return a place where a
8261 function returns a value. */
8263 static rtx
8264 ix86_function_value_bounds (const_tree valtype,
8265 const_tree fntype_or_decl ATTRIBUTE_UNUSED,
8266 bool outgoing ATTRIBUTE_UNUSED)
8268 rtx res = NULL_RTX;
8270 if (BOUNDED_TYPE_P (valtype))
8271 res = gen_rtx_REG (BNDmode, FIRST_BND_REG);
8272 else if (chkp_type_has_pointer (valtype))
8274 bitmap slots;
8275 rtx bounds[2];
8276 bitmap_iterator bi;
8277 unsigned i, bnd_no = 0;
8279 bitmap_obstack_initialize (NULL);
8280 slots = BITMAP_ALLOC (NULL);
8281 chkp_find_bound_slots (valtype, slots);
8283 EXECUTE_IF_SET_IN_BITMAP (slots, 0, i, bi)
8285 rtx reg = gen_rtx_REG (BNDmode, FIRST_BND_REG + bnd_no);
8286 rtx offs = GEN_INT (i * POINTER_SIZE / BITS_PER_UNIT);
8287 gcc_assert (bnd_no < 2);
8288 bounds[bnd_no++] = gen_rtx_EXPR_LIST (VOIDmode, reg, offs);
8291 res = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (bnd_no, bounds));
8293 BITMAP_FREE (slots);
8294 bitmap_obstack_release (NULL);
8296 else
8297 res = NULL_RTX;
8299 return res;
8302 /* Pointer function arguments and return values are promoted to
8303 word_mode. */
8305 static machine_mode
8306 ix86_promote_function_mode (const_tree type, machine_mode mode,
8307 int *punsignedp, const_tree fntype,
8308 int for_return)
8310 if (type != NULL_TREE && POINTER_TYPE_P (type))
8312 *punsignedp = POINTERS_EXTEND_UNSIGNED;
8313 return word_mode;
8315 return default_promote_function_mode (type, mode, punsignedp, fntype,
8316 for_return);
8319 /* Return true if a structure, union or array with MODE containing FIELD
8320 should be accessed using BLKmode. */
8322 static bool
8323 ix86_member_type_forces_blk (const_tree field, machine_mode mode)
8325 /* Union with XFmode must be in BLKmode. */
8326 return (mode == XFmode
8327 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
8328 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
8332 ix86_libcall_value (machine_mode mode)
8334 return ix86_function_value_1 (NULL, NULL, mode, mode);
8337 /* Return true iff type is returned in memory. */
8339 static bool
8340 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
8342 #ifdef SUBTARGET_RETURN_IN_MEMORY
8343 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
8344 #else
8345 const machine_mode mode = type_natural_mode (type, NULL, true);
8346 HOST_WIDE_INT size;
8348 if (POINTER_BOUNDS_TYPE_P (type))
8349 return false;
8351 if (TARGET_64BIT)
8353 if (ix86_function_type_abi (fntype) == MS_ABI)
8355 size = int_size_in_bytes (type);
8357 /* __m128 is returned in xmm0. */
8358 if ((!type || VECTOR_INTEGER_TYPE_P (type)
8359 || INTEGRAL_TYPE_P (type)
8360 || VECTOR_FLOAT_TYPE_P (type))
8361 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8362 && !COMPLEX_MODE_P (mode)
8363 && (GET_MODE_SIZE (mode) == 16 || size == 16))
8364 return false;
8366 /* Otherwise, the size must be exactly in [1248]. */
8367 return size != 1 && size != 2 && size != 4 && size != 8;
8369 else
8371 int needed_intregs, needed_sseregs;
8373 return examine_argument (mode, type, 1,
8374 &needed_intregs, &needed_sseregs);
8377 else
8379 if (mode == BLKmode)
8380 return true;
8382 size = int_size_in_bytes (type);
8384 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
8385 return false;
8387 if (VECTOR_MODE_P (mode) || mode == TImode)
8389 /* User-created vectors small enough to fit in EAX. */
8390 if (size < 8)
8391 return false;
8393 /* Unless ABI prescibes otherwise,
8394 MMX/3dNow values are returned in MM0 if available. */
8396 if (size == 8)
8397 return TARGET_VECT8_RETURNS || !TARGET_MMX;
8399 /* SSE values are returned in XMM0 if available. */
8400 if (size == 16)
8401 return !TARGET_SSE;
8403 /* AVX values are returned in YMM0 if available. */
8404 if (size == 32)
8405 return !TARGET_AVX;
8407 /* AVX512F values are returned in ZMM0 if available. */
8408 if (size == 64)
8409 return !TARGET_AVX512F;
8412 if (mode == XFmode)
8413 return false;
8415 if (size > 12)
8416 return true;
8418 /* OImode shouldn't be used directly. */
8419 gcc_assert (mode != OImode);
8421 return false;
8423 #endif
8427 /* Create the va_list data type. */
8429 /* Returns the calling convention specific va_list date type.
8430 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
8432 static tree
8433 ix86_build_builtin_va_list_abi (enum calling_abi abi)
8435 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
8437 /* For i386 we use plain pointer to argument area. */
8438 if (!TARGET_64BIT || abi == MS_ABI)
8439 return build_pointer_type (char_type_node);
8441 record = lang_hooks.types.make_type (RECORD_TYPE);
8442 type_decl = build_decl (BUILTINS_LOCATION,
8443 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8445 f_gpr = build_decl (BUILTINS_LOCATION,
8446 FIELD_DECL, get_identifier ("gp_offset"),
8447 unsigned_type_node);
8448 f_fpr = build_decl (BUILTINS_LOCATION,
8449 FIELD_DECL, get_identifier ("fp_offset"),
8450 unsigned_type_node);
8451 f_ovf = build_decl (BUILTINS_LOCATION,
8452 FIELD_DECL, get_identifier ("overflow_arg_area"),
8453 ptr_type_node);
8454 f_sav = build_decl (BUILTINS_LOCATION,
8455 FIELD_DECL, get_identifier ("reg_save_area"),
8456 ptr_type_node);
8458 va_list_gpr_counter_field = f_gpr;
8459 va_list_fpr_counter_field = f_fpr;
8461 DECL_FIELD_CONTEXT (f_gpr) = record;
8462 DECL_FIELD_CONTEXT (f_fpr) = record;
8463 DECL_FIELD_CONTEXT (f_ovf) = record;
8464 DECL_FIELD_CONTEXT (f_sav) = record;
8466 TYPE_STUB_DECL (record) = type_decl;
8467 TYPE_NAME (record) = type_decl;
8468 TYPE_FIELDS (record) = f_gpr;
8469 DECL_CHAIN (f_gpr) = f_fpr;
8470 DECL_CHAIN (f_fpr) = f_ovf;
8471 DECL_CHAIN (f_ovf) = f_sav;
8473 layout_type (record);
8475 /* The correct type is an array type of one element. */
8476 return build_array_type (record, build_index_type (size_zero_node));
8479 /* Setup the builtin va_list data type and for 64-bit the additional
8480 calling convention specific va_list data types. */
8482 static tree
8483 ix86_build_builtin_va_list (void)
8485 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
8487 /* Initialize abi specific va_list builtin types. */
8488 if (TARGET_64BIT)
8490 tree t;
8491 if (ix86_abi == MS_ABI)
8493 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
8494 if (TREE_CODE (t) != RECORD_TYPE)
8495 t = build_variant_type_copy (t);
8496 sysv_va_list_type_node = t;
8498 else
8500 t = ret;
8501 if (TREE_CODE (t) != RECORD_TYPE)
8502 t = build_variant_type_copy (t);
8503 sysv_va_list_type_node = t;
8505 if (ix86_abi != MS_ABI)
8507 t = ix86_build_builtin_va_list_abi (MS_ABI);
8508 if (TREE_CODE (t) != RECORD_TYPE)
8509 t = build_variant_type_copy (t);
8510 ms_va_list_type_node = t;
8512 else
8514 t = ret;
8515 if (TREE_CODE (t) != RECORD_TYPE)
8516 t = build_variant_type_copy (t);
8517 ms_va_list_type_node = t;
8521 return ret;
8524 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
8526 static void
8527 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
8529 rtx save_area, mem;
8530 alias_set_type set;
8531 int i, max;
8533 /* GPR size of varargs save area. */
8534 if (cfun->va_list_gpr_size)
8535 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
8536 else
8537 ix86_varargs_gpr_size = 0;
8539 /* FPR size of varargs save area. We don't need it if we don't pass
8540 anything in SSE registers. */
8541 if (TARGET_SSE && cfun->va_list_fpr_size)
8542 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
8543 else
8544 ix86_varargs_fpr_size = 0;
8546 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
8547 return;
8549 save_area = frame_pointer_rtx;
8550 set = get_varargs_alias_set ();
8552 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8553 if (max > X86_64_REGPARM_MAX)
8554 max = X86_64_REGPARM_MAX;
8556 for (i = cum->regno; i < max; i++)
8558 mem = gen_rtx_MEM (word_mode,
8559 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
8560 MEM_NOTRAP_P (mem) = 1;
8561 set_mem_alias_set (mem, set);
8562 emit_move_insn (mem,
8563 gen_rtx_REG (word_mode,
8564 x86_64_int_parameter_registers[i]));
8567 if (ix86_varargs_fpr_size)
8569 machine_mode smode;
8570 rtx_code_label *label;
8571 rtx test;
8573 /* Now emit code to save SSE registers. The AX parameter contains number
8574 of SSE parameter registers used to call this function, though all we
8575 actually check here is the zero/non-zero status. */
8577 label = gen_label_rtx ();
8578 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
8579 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
8580 label));
8582 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8583 we used movdqa (i.e. TImode) instead? Perhaps even better would
8584 be if we could determine the real mode of the data, via a hook
8585 into pass_stdarg. Ignore all that for now. */
8586 smode = V4SFmode;
8587 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
8588 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
8590 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
8591 if (max > X86_64_SSE_REGPARM_MAX)
8592 max = X86_64_SSE_REGPARM_MAX;
8594 for (i = cum->sse_regno; i < max; ++i)
8596 mem = plus_constant (Pmode, save_area,
8597 i * 16 + ix86_varargs_gpr_size);
8598 mem = gen_rtx_MEM (smode, mem);
8599 MEM_NOTRAP_P (mem) = 1;
8600 set_mem_alias_set (mem, set);
8601 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
8603 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
8606 emit_label (label);
8610 static void
8611 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
8613 alias_set_type set = get_varargs_alias_set ();
8614 int i;
8616 /* Reset to zero, as there might be a sysv vaarg used
8617 before. */
8618 ix86_varargs_gpr_size = 0;
8619 ix86_varargs_fpr_size = 0;
8621 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
8623 rtx reg, mem;
8625 mem = gen_rtx_MEM (Pmode,
8626 plus_constant (Pmode, virtual_incoming_args_rtx,
8627 i * UNITS_PER_WORD));
8628 MEM_NOTRAP_P (mem) = 1;
8629 set_mem_alias_set (mem, set);
8631 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
8632 emit_move_insn (mem, reg);
8636 static void
8637 ix86_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
8638 tree type, int *, int no_rtl)
8640 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8641 CUMULATIVE_ARGS next_cum;
8642 tree fntype;
8644 /* This argument doesn't appear to be used anymore. Which is good,
8645 because the old code here didn't suppress rtl generation. */
8646 gcc_assert (!no_rtl);
8648 if (!TARGET_64BIT)
8649 return;
8651 fntype = TREE_TYPE (current_function_decl);
8653 /* For varargs, we do not want to skip the dummy va_dcl argument.
8654 For stdargs, we do want to skip the last named argument. */
8655 next_cum = *cum;
8656 if (stdarg_p (fntype))
8657 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8658 true);
8660 if (cum->call_abi == MS_ABI)
8661 setup_incoming_varargs_ms_64 (&next_cum);
8662 else
8663 setup_incoming_varargs_64 (&next_cum);
8666 static void
8667 ix86_setup_incoming_vararg_bounds (cumulative_args_t cum_v,
8668 enum machine_mode mode,
8669 tree type,
8670 int *pretend_size ATTRIBUTE_UNUSED,
8671 int no_rtl)
8673 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8674 CUMULATIVE_ARGS next_cum;
8675 tree fntype;
8676 rtx save_area;
8677 int bnd_reg, i, max;
8679 gcc_assert (!no_rtl);
8681 /* Do nothing if we use plain pointer to argument area. */
8682 if (!TARGET_64BIT || cum->call_abi == MS_ABI)
8683 return;
8685 fntype = TREE_TYPE (current_function_decl);
8687 /* For varargs, we do not want to skip the dummy va_dcl argument.
8688 For stdargs, we do want to skip the last named argument. */
8689 next_cum = *cum;
8690 if (stdarg_p (fntype))
8691 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8692 true);
8693 save_area = frame_pointer_rtx;
8695 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8696 if (max > X86_64_REGPARM_MAX)
8697 max = X86_64_REGPARM_MAX;
8699 bnd_reg = cum->bnd_regno + cum->force_bnd_pass;
8700 if (chkp_function_instrumented_p (current_function_decl))
8701 for (i = cum->regno; i < max; i++)
8703 rtx addr = plus_constant (Pmode, save_area, i * UNITS_PER_WORD);
8704 rtx reg = gen_rtx_REG (DImode,
8705 x86_64_int_parameter_registers[i]);
8706 rtx ptr = reg;
8707 rtx bounds;
8709 if (bnd_reg <= LAST_BND_REG)
8710 bounds = gen_rtx_REG (BNDmode, bnd_reg);
8711 else
8713 rtx ldx_addr =
8714 plus_constant (Pmode, arg_pointer_rtx,
8715 (LAST_BND_REG - bnd_reg) * GET_MODE_SIZE (Pmode));
8716 bounds = gen_reg_rtx (BNDmode);
8717 emit_insn (BNDmode == BND64mode
8718 ? gen_bnd64_ldx (bounds, ldx_addr, ptr)
8719 : gen_bnd32_ldx (bounds, ldx_addr, ptr));
8722 emit_insn (BNDmode == BND64mode
8723 ? gen_bnd64_stx (addr, ptr, bounds)
8724 : gen_bnd32_stx (addr, ptr, bounds));
8726 bnd_reg++;
8731 /* Checks if TYPE is of kind va_list char *. */
8733 static bool
8734 is_va_list_char_pointer (tree type)
8736 tree canonic;
8738 /* For 32-bit it is always true. */
8739 if (!TARGET_64BIT)
8740 return true;
8741 canonic = ix86_canonical_va_list_type (type);
8742 return (canonic == ms_va_list_type_node
8743 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
8746 /* Implement va_start. */
8748 static void
8749 ix86_va_start (tree valist, rtx nextarg)
8751 HOST_WIDE_INT words, n_gpr, n_fpr;
8752 tree f_gpr, f_fpr, f_ovf, f_sav;
8753 tree gpr, fpr, ovf, sav, t;
8754 tree type;
8755 rtx ovf_rtx;
8757 if (flag_split_stack
8758 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8760 unsigned int scratch_regno;
8762 /* When we are splitting the stack, we can't refer to the stack
8763 arguments using internal_arg_pointer, because they may be on
8764 the old stack. The split stack prologue will arrange to
8765 leave a pointer to the old stack arguments in a scratch
8766 register, which we here copy to a pseudo-register. The split
8767 stack prologue can't set the pseudo-register directly because
8768 it (the prologue) runs before any registers have been saved. */
8770 scratch_regno = split_stack_prologue_scratch_regno ();
8771 if (scratch_regno != INVALID_REGNUM)
8773 rtx reg;
8774 rtx_insn *seq;
8776 reg = gen_reg_rtx (Pmode);
8777 cfun->machine->split_stack_varargs_pointer = reg;
8779 start_sequence ();
8780 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8781 seq = get_insns ();
8782 end_sequence ();
8784 push_topmost_sequence ();
8785 emit_insn_after (seq, entry_of_function ());
8786 pop_topmost_sequence ();
8790 /* Only 64bit target needs something special. */
8791 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8793 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8794 std_expand_builtin_va_start (valist, nextarg);
8795 else
8797 rtx va_r, next;
8799 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8800 next = expand_binop (ptr_mode, add_optab,
8801 cfun->machine->split_stack_varargs_pointer,
8802 crtl->args.arg_offset_rtx,
8803 NULL_RTX, 0, OPTAB_LIB_WIDEN);
8804 convert_move (va_r, next, 0);
8806 /* Store zero bounds for va_list. */
8807 if (chkp_function_instrumented_p (current_function_decl))
8808 chkp_expand_bounds_reset_for_mem (valist,
8809 make_tree (TREE_TYPE (valist),
8810 next));
8813 return;
8816 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8817 f_fpr = DECL_CHAIN (f_gpr);
8818 f_ovf = DECL_CHAIN (f_fpr);
8819 f_sav = DECL_CHAIN (f_ovf);
8821 valist = build_simple_mem_ref (valist);
8822 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
8823 /* The following should be folded into the MEM_REF offset. */
8824 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
8825 f_gpr, NULL_TREE);
8826 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
8827 f_fpr, NULL_TREE);
8828 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
8829 f_ovf, NULL_TREE);
8830 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
8831 f_sav, NULL_TREE);
8833 /* Count number of gp and fp argument registers used. */
8834 words = crtl->args.info.words;
8835 n_gpr = crtl->args.info.regno;
8836 n_fpr = crtl->args.info.sse_regno;
8838 if (cfun->va_list_gpr_size)
8840 type = TREE_TYPE (gpr);
8841 t = build2 (MODIFY_EXPR, type,
8842 gpr, build_int_cst (type, n_gpr * 8));
8843 TREE_SIDE_EFFECTS (t) = 1;
8844 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8847 if (TARGET_SSE && cfun->va_list_fpr_size)
8849 type = TREE_TYPE (fpr);
8850 t = build2 (MODIFY_EXPR, type, fpr,
8851 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
8852 TREE_SIDE_EFFECTS (t) = 1;
8853 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8856 /* Find the overflow area. */
8857 type = TREE_TYPE (ovf);
8858 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8859 ovf_rtx = crtl->args.internal_arg_pointer;
8860 else
8861 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
8862 t = make_tree (type, ovf_rtx);
8863 if (words != 0)
8864 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
8866 /* Store zero bounds for overflow area pointer. */
8867 if (chkp_function_instrumented_p (current_function_decl))
8868 chkp_expand_bounds_reset_for_mem (ovf, t);
8870 t = build2 (MODIFY_EXPR, type, ovf, t);
8871 TREE_SIDE_EFFECTS (t) = 1;
8872 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8874 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
8876 /* Find the register save area.
8877 Prologue of the function save it right above stack frame. */
8878 type = TREE_TYPE (sav);
8879 t = make_tree (type, frame_pointer_rtx);
8880 if (!ix86_varargs_gpr_size)
8881 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
8883 /* Store zero bounds for save area pointer. */
8884 if (chkp_function_instrumented_p (current_function_decl))
8885 chkp_expand_bounds_reset_for_mem (sav, t);
8887 t = build2 (MODIFY_EXPR, type, sav, t);
8888 TREE_SIDE_EFFECTS (t) = 1;
8889 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8893 /* Implement va_arg. */
8895 static tree
8896 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
8897 gimple_seq *post_p)
8899 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
8900 tree f_gpr, f_fpr, f_ovf, f_sav;
8901 tree gpr, fpr, ovf, sav, t;
8902 int size, rsize;
8903 tree lab_false, lab_over = NULL_TREE;
8904 tree addr, t2;
8905 rtx container;
8906 int indirect_p = 0;
8907 tree ptrtype;
8908 machine_mode nat_mode;
8909 unsigned int arg_boundary;
8911 /* Only 64bit target needs something special. */
8912 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8913 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
8915 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8916 f_fpr = DECL_CHAIN (f_gpr);
8917 f_ovf = DECL_CHAIN (f_fpr);
8918 f_sav = DECL_CHAIN (f_ovf);
8920 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
8921 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
8922 valist = build_va_arg_indirect_ref (valist);
8923 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
8924 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
8925 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
8927 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
8928 if (indirect_p)
8929 type = build_pointer_type (type);
8930 size = int_size_in_bytes (type);
8931 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8933 nat_mode = type_natural_mode (type, NULL, false);
8934 switch (nat_mode)
8936 case V8SFmode:
8937 case V8SImode:
8938 case V32QImode:
8939 case V16HImode:
8940 case V4DFmode:
8941 case V4DImode:
8942 case V16SFmode:
8943 case V16SImode:
8944 case V64QImode:
8945 case V32HImode:
8946 case V8DFmode:
8947 case V8DImode:
8948 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
8949 if (!TARGET_64BIT_MS_ABI)
8951 container = NULL;
8952 break;
8955 default:
8956 container = construct_container (nat_mode, TYPE_MODE (type),
8957 type, 0, X86_64_REGPARM_MAX,
8958 X86_64_SSE_REGPARM_MAX, intreg,
8960 break;
8963 /* Pull the value out of the saved registers. */
8965 addr = create_tmp_var (ptr_type_node, "addr");
8967 if (container)
8969 int needed_intregs, needed_sseregs;
8970 bool need_temp;
8971 tree int_addr, sse_addr;
8973 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8974 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8976 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
8978 need_temp = (!REG_P (container)
8979 && ((needed_intregs && TYPE_ALIGN (type) > 64)
8980 || TYPE_ALIGN (type) > 128));
8982 /* In case we are passing structure, verify that it is consecutive block
8983 on the register save area. If not we need to do moves. */
8984 if (!need_temp && !REG_P (container))
8986 /* Verify that all registers are strictly consecutive */
8987 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
8989 int i;
8991 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8993 rtx slot = XVECEXP (container, 0, i);
8994 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
8995 || INTVAL (XEXP (slot, 1)) != i * 16)
8996 need_temp = 1;
8999 else
9001 int i;
9003 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
9005 rtx slot = XVECEXP (container, 0, i);
9006 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
9007 || INTVAL (XEXP (slot, 1)) != i * 8)
9008 need_temp = 1;
9012 if (!need_temp)
9014 int_addr = addr;
9015 sse_addr = addr;
9017 else
9019 int_addr = create_tmp_var (ptr_type_node, "int_addr");
9020 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
9023 /* First ensure that we fit completely in registers. */
9024 if (needed_intregs)
9026 t = build_int_cst (TREE_TYPE (gpr),
9027 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
9028 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
9029 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9030 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9031 gimplify_and_add (t, pre_p);
9033 if (needed_sseregs)
9035 t = build_int_cst (TREE_TYPE (fpr),
9036 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
9037 + X86_64_REGPARM_MAX * 8);
9038 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
9039 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9040 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9041 gimplify_and_add (t, pre_p);
9044 /* Compute index to start of area used for integer regs. */
9045 if (needed_intregs)
9047 /* int_addr = gpr + sav; */
9048 t = fold_build_pointer_plus (sav, gpr);
9049 gimplify_assign (int_addr, t, pre_p);
9051 if (needed_sseregs)
9053 /* sse_addr = fpr + sav; */
9054 t = fold_build_pointer_plus (sav, fpr);
9055 gimplify_assign (sse_addr, t, pre_p);
9057 if (need_temp)
9059 int i, prev_size = 0;
9060 tree temp = create_tmp_var (type, "va_arg_tmp");
9062 /* addr = &temp; */
9063 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
9064 gimplify_assign (addr, t, pre_p);
9066 for (i = 0; i < XVECLEN (container, 0); i++)
9068 rtx slot = XVECEXP (container, 0, i);
9069 rtx reg = XEXP (slot, 0);
9070 machine_mode mode = GET_MODE (reg);
9071 tree piece_type;
9072 tree addr_type;
9073 tree daddr_type;
9074 tree src_addr, src;
9075 int src_offset;
9076 tree dest_addr, dest;
9077 int cur_size = GET_MODE_SIZE (mode);
9079 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
9080 prev_size = INTVAL (XEXP (slot, 1));
9081 if (prev_size + cur_size > size)
9083 cur_size = size - prev_size;
9084 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
9085 if (mode == BLKmode)
9086 mode = QImode;
9088 piece_type = lang_hooks.types.type_for_mode (mode, 1);
9089 if (mode == GET_MODE (reg))
9090 addr_type = build_pointer_type (piece_type);
9091 else
9092 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
9093 true);
9094 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
9095 true);
9097 if (SSE_REGNO_P (REGNO (reg)))
9099 src_addr = sse_addr;
9100 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
9102 else
9104 src_addr = int_addr;
9105 src_offset = REGNO (reg) * 8;
9107 src_addr = fold_convert (addr_type, src_addr);
9108 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
9110 dest_addr = fold_convert (daddr_type, addr);
9111 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
9112 if (cur_size == GET_MODE_SIZE (mode))
9114 src = build_va_arg_indirect_ref (src_addr);
9115 dest = build_va_arg_indirect_ref (dest_addr);
9117 gimplify_assign (dest, src, pre_p);
9119 else
9121 tree copy
9122 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
9123 3, dest_addr, src_addr,
9124 size_int (cur_size));
9125 gimplify_and_add (copy, pre_p);
9127 prev_size += cur_size;
9131 if (needed_intregs)
9133 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
9134 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
9135 gimplify_assign (gpr, t, pre_p);
9138 if (needed_sseregs)
9140 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
9141 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
9142 gimplify_assign (fpr, t, pre_p);
9145 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
9147 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
9150 /* ... otherwise out of the overflow area. */
9152 /* When we align parameter on stack for caller, if the parameter
9153 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
9154 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
9155 here with caller. */
9156 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
9157 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
9158 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
9160 /* Care for on-stack alignment if needed. */
9161 if (arg_boundary <= 64 || size == 0)
9162 t = ovf;
9163 else
9165 HOST_WIDE_INT align = arg_boundary / 8;
9166 t = fold_build_pointer_plus_hwi (ovf, align - 1);
9167 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
9168 build_int_cst (TREE_TYPE (t), -align));
9171 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
9172 gimplify_assign (addr, t, pre_p);
9174 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
9175 gimplify_assign (unshare_expr (ovf), t, pre_p);
9177 if (container)
9178 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
9180 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
9181 addr = fold_convert (ptrtype, addr);
9183 if (indirect_p)
9184 addr = build_va_arg_indirect_ref (addr);
9185 return build_va_arg_indirect_ref (addr);
9188 /* Return true if OPNUM's MEM should be matched
9189 in movabs* patterns. */
9191 bool
9192 ix86_check_movabs (rtx insn, int opnum)
9194 rtx set, mem;
9196 set = PATTERN (insn);
9197 if (GET_CODE (set) == PARALLEL)
9198 set = XVECEXP (set, 0, 0);
9199 gcc_assert (GET_CODE (set) == SET);
9200 mem = XEXP (set, opnum);
9201 while (GET_CODE (mem) == SUBREG)
9202 mem = SUBREG_REG (mem);
9203 gcc_assert (MEM_P (mem));
9204 return volatile_ok || !MEM_VOLATILE_P (mem);
9207 /* Initialize the table of extra 80387 mathematical constants. */
9209 static void
9210 init_ext_80387_constants (void)
9212 static const char * cst[5] =
9214 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
9215 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
9216 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
9217 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
9218 "3.1415926535897932385128089594061862044", /* 4: fldpi */
9220 int i;
9222 for (i = 0; i < 5; i++)
9224 real_from_string (&ext_80387_constants_table[i], cst[i]);
9225 /* Ensure each constant is rounded to XFmode precision. */
9226 real_convert (&ext_80387_constants_table[i],
9227 XFmode, &ext_80387_constants_table[i]);
9230 ext_80387_constants_init = 1;
9233 /* Return non-zero if the constant is something that
9234 can be loaded with a special instruction. */
9237 standard_80387_constant_p (rtx x)
9239 machine_mode mode = GET_MODE (x);
9241 REAL_VALUE_TYPE r;
9243 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
9244 return -1;
9246 if (x == CONST0_RTX (mode))
9247 return 1;
9248 if (x == CONST1_RTX (mode))
9249 return 2;
9251 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9253 /* For XFmode constants, try to find a special 80387 instruction when
9254 optimizing for size or on those CPUs that benefit from them. */
9255 if (mode == XFmode
9256 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
9258 int i;
9260 if (! ext_80387_constants_init)
9261 init_ext_80387_constants ();
9263 for (i = 0; i < 5; i++)
9264 if (real_identical (&r, &ext_80387_constants_table[i]))
9265 return i + 3;
9268 /* Load of the constant -0.0 or -1.0 will be split as
9269 fldz;fchs or fld1;fchs sequence. */
9270 if (real_isnegzero (&r))
9271 return 8;
9272 if (real_identical (&r, &dconstm1))
9273 return 9;
9275 return 0;
9278 /* Return the opcode of the special instruction to be used to load
9279 the constant X. */
9281 const char *
9282 standard_80387_constant_opcode (rtx x)
9284 switch (standard_80387_constant_p (x))
9286 case 1:
9287 return "fldz";
9288 case 2:
9289 return "fld1";
9290 case 3:
9291 return "fldlg2";
9292 case 4:
9293 return "fldln2";
9294 case 5:
9295 return "fldl2e";
9296 case 6:
9297 return "fldl2t";
9298 case 7:
9299 return "fldpi";
9300 case 8:
9301 case 9:
9302 return "#";
9303 default:
9304 gcc_unreachable ();
9308 /* Return the CONST_DOUBLE representing the 80387 constant that is
9309 loaded by the specified special instruction. The argument IDX
9310 matches the return value from standard_80387_constant_p. */
9313 standard_80387_constant_rtx (int idx)
9315 int i;
9317 if (! ext_80387_constants_init)
9318 init_ext_80387_constants ();
9320 switch (idx)
9322 case 3:
9323 case 4:
9324 case 5:
9325 case 6:
9326 case 7:
9327 i = idx - 3;
9328 break;
9330 default:
9331 gcc_unreachable ();
9334 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
9335 XFmode);
9338 /* Return 1 if X is all 0s and 2 if x is all 1s
9339 in supported SSE/AVX vector mode. */
9342 standard_sse_constant_p (rtx x)
9344 machine_mode mode = GET_MODE (x);
9346 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
9347 return 1;
9348 if (vector_all_ones_operand (x, mode))
9349 switch (mode)
9351 case V16QImode:
9352 case V8HImode:
9353 case V4SImode:
9354 case V2DImode:
9355 if (TARGET_SSE2)
9356 return 2;
9357 case V32QImode:
9358 case V16HImode:
9359 case V8SImode:
9360 case V4DImode:
9361 if (TARGET_AVX2)
9362 return 2;
9363 case V64QImode:
9364 case V32HImode:
9365 case V16SImode:
9366 case V8DImode:
9367 if (TARGET_AVX512F)
9368 return 2;
9369 default:
9370 break;
9373 return 0;
9376 /* Return the opcode of the special instruction to be used to load
9377 the constant X. */
9379 const char *
9380 standard_sse_constant_opcode (rtx_insn *insn, rtx x)
9382 switch (standard_sse_constant_p (x))
9384 case 1:
9385 switch (get_attr_mode (insn))
9387 case MODE_XI:
9388 return "vpxord\t%g0, %g0, %g0";
9389 case MODE_V16SF:
9390 return TARGET_AVX512DQ ? "vxorps\t%g0, %g0, %g0"
9391 : "vpxord\t%g0, %g0, %g0";
9392 case MODE_V8DF:
9393 return TARGET_AVX512DQ ? "vxorpd\t%g0, %g0, %g0"
9394 : "vpxorq\t%g0, %g0, %g0";
9395 case MODE_TI:
9396 return TARGET_AVX512VL ? "vpxord\t%t0, %t0, %t0"
9397 : "%vpxor\t%0, %d0";
9398 case MODE_V2DF:
9399 return "%vxorpd\t%0, %d0";
9400 case MODE_V4SF:
9401 return "%vxorps\t%0, %d0";
9403 case MODE_OI:
9404 return TARGET_AVX512VL ? "vpxord\t%x0, %x0, %x0"
9405 : "vpxor\t%x0, %x0, %x0";
9406 case MODE_V4DF:
9407 return "vxorpd\t%x0, %x0, %x0";
9408 case MODE_V8SF:
9409 return "vxorps\t%x0, %x0, %x0";
9411 default:
9412 break;
9415 case 2:
9416 if (TARGET_AVX512VL
9417 || get_attr_mode (insn) == MODE_XI
9418 || get_attr_mode (insn) == MODE_V8DF
9419 || get_attr_mode (insn) == MODE_V16SF)
9420 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
9421 if (TARGET_AVX)
9422 return "vpcmpeqd\t%0, %0, %0";
9423 else
9424 return "pcmpeqd\t%0, %0";
9426 default:
9427 break;
9429 gcc_unreachable ();
9432 /* Returns true if OP contains a symbol reference */
9434 bool
9435 symbolic_reference_mentioned_p (rtx op)
9437 const char *fmt;
9438 int i;
9440 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
9441 return true;
9443 fmt = GET_RTX_FORMAT (GET_CODE (op));
9444 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
9446 if (fmt[i] == 'E')
9448 int j;
9450 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
9451 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
9452 return true;
9455 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
9456 return true;
9459 return false;
9462 /* Return true if it is appropriate to emit `ret' instructions in the
9463 body of a function. Do this only if the epilogue is simple, needing a
9464 couple of insns. Prior to reloading, we can't tell how many registers
9465 must be saved, so return false then. Return false if there is no frame
9466 marker to de-allocate. */
9468 bool
9469 ix86_can_use_return_insn_p (void)
9471 struct ix86_frame frame;
9473 if (! reload_completed || frame_pointer_needed)
9474 return 0;
9476 /* Don't allow more than 32k pop, since that's all we can do
9477 with one instruction. */
9478 if (crtl->args.pops_args && crtl->args.size >= 32768)
9479 return 0;
9481 ix86_compute_frame_layout (&frame);
9482 return (frame.stack_pointer_offset == UNITS_PER_WORD
9483 && (frame.nregs + frame.nsseregs) == 0);
9486 /* Value should be nonzero if functions must have frame pointers.
9487 Zero means the frame pointer need not be set up (and parms may
9488 be accessed via the stack pointer) in functions that seem suitable. */
9490 static bool
9491 ix86_frame_pointer_required (void)
9493 /* If we accessed previous frames, then the generated code expects
9494 to be able to access the saved ebp value in our frame. */
9495 if (cfun->machine->accesses_prev_frame)
9496 return true;
9498 /* Several x86 os'es need a frame pointer for other reasons,
9499 usually pertaining to setjmp. */
9500 if (SUBTARGET_FRAME_POINTER_REQUIRED)
9501 return true;
9503 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
9504 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
9505 return true;
9507 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
9508 allocation is 4GB. */
9509 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
9510 return true;
9512 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
9513 turns off the frame pointer by default. Turn it back on now if
9514 we've not got a leaf function. */
9515 if (TARGET_OMIT_LEAF_FRAME_POINTER
9516 && (!crtl->is_leaf
9517 || ix86_current_function_calls_tls_descriptor))
9518 return true;
9520 if (crtl->profile && !flag_fentry)
9521 return true;
9523 return false;
9526 /* Record that the current function accesses previous call frames. */
9528 void
9529 ix86_setup_frame_addresses (void)
9531 cfun->machine->accesses_prev_frame = 1;
9534 #ifndef USE_HIDDEN_LINKONCE
9535 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
9536 # define USE_HIDDEN_LINKONCE 1
9537 # else
9538 # define USE_HIDDEN_LINKONCE 0
9539 # endif
9540 #endif
9542 static int pic_labels_used;
9544 /* Fills in the label name that should be used for a pc thunk for
9545 the given register. */
9547 static void
9548 get_pc_thunk_name (char name[32], unsigned int regno)
9550 gcc_assert (!TARGET_64BIT);
9552 if (USE_HIDDEN_LINKONCE)
9553 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
9554 else
9555 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
9559 /* This function generates code for -fpic that loads %ebx with
9560 the return address of the caller and then returns. */
9562 static void
9563 ix86_code_end (void)
9565 rtx xops[2];
9566 int regno;
9568 for (regno = AX_REG; regno <= SP_REG; regno++)
9570 char name[32];
9571 tree decl;
9573 if (!(pic_labels_used & (1 << regno)))
9574 continue;
9576 get_pc_thunk_name (name, regno);
9578 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9579 get_identifier (name),
9580 build_function_type_list (void_type_node, NULL_TREE));
9581 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9582 NULL_TREE, void_type_node);
9583 TREE_PUBLIC (decl) = 1;
9584 TREE_STATIC (decl) = 1;
9585 DECL_IGNORED_P (decl) = 1;
9587 #if TARGET_MACHO
9588 if (TARGET_MACHO)
9590 switch_to_section (darwin_sections[text_coal_section]);
9591 fputs ("\t.weak_definition\t", asm_out_file);
9592 assemble_name (asm_out_file, name);
9593 fputs ("\n\t.private_extern\t", asm_out_file);
9594 assemble_name (asm_out_file, name);
9595 putc ('\n', asm_out_file);
9596 ASM_OUTPUT_LABEL (asm_out_file, name);
9597 DECL_WEAK (decl) = 1;
9599 else
9600 #endif
9601 if (USE_HIDDEN_LINKONCE)
9603 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
9605 targetm.asm_out.unique_section (decl, 0);
9606 switch_to_section (get_named_section (decl, NULL, 0));
9608 targetm.asm_out.globalize_label (asm_out_file, name);
9609 fputs ("\t.hidden\t", asm_out_file);
9610 assemble_name (asm_out_file, name);
9611 putc ('\n', asm_out_file);
9612 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
9614 else
9616 switch_to_section (text_section);
9617 ASM_OUTPUT_LABEL (asm_out_file, name);
9620 DECL_INITIAL (decl) = make_node (BLOCK);
9621 current_function_decl = decl;
9622 init_function_start (decl);
9623 first_function_block_is_cold = false;
9624 /* Make sure unwind info is emitted for the thunk if needed. */
9625 final_start_function (emit_barrier (), asm_out_file, 1);
9627 /* Pad stack IP move with 4 instructions (two NOPs count
9628 as one instruction). */
9629 if (TARGET_PAD_SHORT_FUNCTION)
9631 int i = 8;
9633 while (i--)
9634 fputs ("\tnop\n", asm_out_file);
9637 xops[0] = gen_rtx_REG (Pmode, regno);
9638 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
9639 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
9640 output_asm_insn ("%!ret", NULL);
9641 final_end_function ();
9642 init_insn_lengths ();
9643 free_after_compilation (cfun);
9644 set_cfun (NULL);
9645 current_function_decl = NULL;
9648 if (flag_split_stack)
9649 file_end_indicate_split_stack ();
9652 /* Emit code for the SET_GOT patterns. */
9654 const char *
9655 output_set_got (rtx dest, rtx label)
9657 rtx xops[3];
9659 xops[0] = dest;
9661 if (TARGET_VXWORKS_RTP && flag_pic)
9663 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
9664 xops[2] = gen_rtx_MEM (Pmode,
9665 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
9666 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
9668 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9669 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9670 an unadorned address. */
9671 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
9672 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
9673 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
9674 return "";
9677 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
9679 if (!flag_pic)
9681 if (TARGET_MACHO)
9682 /* We don't need a pic base, we're not producing pic. */
9683 gcc_unreachable ();
9685 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
9686 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
9687 targetm.asm_out.internal_label (asm_out_file, "L",
9688 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
9690 else
9692 char name[32];
9693 get_pc_thunk_name (name, REGNO (dest));
9694 pic_labels_used |= 1 << REGNO (dest);
9696 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
9697 xops[2] = gen_rtx_MEM (QImode, xops[2]);
9698 output_asm_insn ("%!call\t%X2", xops);
9700 #if TARGET_MACHO
9701 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
9702 This is what will be referenced by the Mach-O PIC subsystem. */
9703 if (machopic_should_output_picbase_label () || !label)
9704 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
9706 /* When we are restoring the pic base at the site of a nonlocal label,
9707 and we decided to emit the pic base above, we will still output a
9708 local label used for calculating the correction offset (even though
9709 the offset will be 0 in that case). */
9710 if (label)
9711 targetm.asm_out.internal_label (asm_out_file, "L",
9712 CODE_LABEL_NUMBER (label));
9713 #endif
9716 if (!TARGET_MACHO)
9717 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
9719 return "";
9722 /* Generate an "push" pattern for input ARG. */
9724 static rtx
9725 gen_push (rtx arg)
9727 struct machine_function *m = cfun->machine;
9729 if (m->fs.cfa_reg == stack_pointer_rtx)
9730 m->fs.cfa_offset += UNITS_PER_WORD;
9731 m->fs.sp_offset += UNITS_PER_WORD;
9733 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9734 arg = gen_rtx_REG (word_mode, REGNO (arg));
9736 return gen_rtx_SET (VOIDmode,
9737 gen_rtx_MEM (word_mode,
9738 gen_rtx_PRE_DEC (Pmode,
9739 stack_pointer_rtx)),
9740 arg);
9743 /* Generate an "pop" pattern for input ARG. */
9745 static rtx
9746 gen_pop (rtx arg)
9748 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9749 arg = gen_rtx_REG (word_mode, REGNO (arg));
9751 return gen_rtx_SET (VOIDmode,
9752 arg,
9753 gen_rtx_MEM (word_mode,
9754 gen_rtx_POST_INC (Pmode,
9755 stack_pointer_rtx)));
9758 /* Return >= 0 if there is an unused call-clobbered register available
9759 for the entire function. */
9761 static unsigned int
9762 ix86_select_alt_pic_regnum (void)
9764 if (ix86_use_pseudo_pic_reg ())
9765 return INVALID_REGNUM;
9767 if (crtl->is_leaf
9768 && !crtl->profile
9769 && !ix86_current_function_calls_tls_descriptor)
9771 int i, drap;
9772 /* Can't use the same register for both PIC and DRAP. */
9773 if (crtl->drap_reg)
9774 drap = REGNO (crtl->drap_reg);
9775 else
9776 drap = -1;
9777 for (i = 2; i >= 0; --i)
9778 if (i != drap && !df_regs_ever_live_p (i))
9779 return i;
9782 return INVALID_REGNUM;
9785 /* Return TRUE if we need to save REGNO. */
9787 static bool
9788 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
9790 if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
9791 && pic_offset_table_rtx)
9793 if (ix86_use_pseudo_pic_reg ())
9795 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
9796 _mcount in prologue. */
9797 if (!TARGET_64BIT && flag_pic && crtl->profile)
9798 return true;
9800 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9801 || crtl->profile
9802 || crtl->calls_eh_return
9803 || crtl->uses_const_pool
9804 || cfun->has_nonlocal_label)
9805 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
9808 if (crtl->calls_eh_return && maybe_eh_return)
9810 unsigned i;
9811 for (i = 0; ; i++)
9813 unsigned test = EH_RETURN_DATA_REGNO (i);
9814 if (test == INVALID_REGNUM)
9815 break;
9816 if (test == regno)
9817 return true;
9821 if (crtl->drap_reg
9822 && regno == REGNO (crtl->drap_reg)
9823 && !cfun->machine->no_drap_save_restore)
9824 return true;
9826 return (df_regs_ever_live_p (regno)
9827 && !call_used_regs[regno]
9828 && !fixed_regs[regno]
9829 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
9832 /* Return number of saved general prupose registers. */
9834 static int
9835 ix86_nsaved_regs (void)
9837 int nregs = 0;
9838 int regno;
9840 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9841 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9842 nregs ++;
9843 return nregs;
9846 /* Return number of saved SSE registrers. */
9848 static int
9849 ix86_nsaved_sseregs (void)
9851 int nregs = 0;
9852 int regno;
9854 if (!TARGET_64BIT_MS_ABI)
9855 return 0;
9856 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9857 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9858 nregs ++;
9859 return nregs;
9862 /* Given FROM and TO register numbers, say whether this elimination is
9863 allowed. If stack alignment is needed, we can only replace argument
9864 pointer with hard frame pointer, or replace frame pointer with stack
9865 pointer. Otherwise, frame pointer elimination is automatically
9866 handled and all other eliminations are valid. */
9868 static bool
9869 ix86_can_eliminate (const int from, const int to)
9871 if (stack_realign_fp)
9872 return ((from == ARG_POINTER_REGNUM
9873 && to == HARD_FRAME_POINTER_REGNUM)
9874 || (from == FRAME_POINTER_REGNUM
9875 && to == STACK_POINTER_REGNUM));
9876 else
9877 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
9880 /* Return the offset between two registers, one to be eliminated, and the other
9881 its replacement, at the start of a routine. */
9883 HOST_WIDE_INT
9884 ix86_initial_elimination_offset (int from, int to)
9886 struct ix86_frame frame;
9887 ix86_compute_frame_layout (&frame);
9889 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9890 return frame.hard_frame_pointer_offset;
9891 else if (from == FRAME_POINTER_REGNUM
9892 && to == HARD_FRAME_POINTER_REGNUM)
9893 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
9894 else
9896 gcc_assert (to == STACK_POINTER_REGNUM);
9898 if (from == ARG_POINTER_REGNUM)
9899 return frame.stack_pointer_offset;
9901 gcc_assert (from == FRAME_POINTER_REGNUM);
9902 return frame.stack_pointer_offset - frame.frame_pointer_offset;
9906 /* In a dynamically-aligned function, we can't know the offset from
9907 stack pointer to frame pointer, so we must ensure that setjmp
9908 eliminates fp against the hard fp (%ebp) rather than trying to
9909 index from %esp up to the top of the frame across a gap that is
9910 of unknown (at compile-time) size. */
9911 static rtx
9912 ix86_builtin_setjmp_frame_value (void)
9914 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
9917 /* When using -fsplit-stack, the allocation routines set a field in
9918 the TCB to the bottom of the stack plus this much space, measured
9919 in bytes. */
9921 #define SPLIT_STACK_AVAILABLE 256
9923 /* Fill structure ix86_frame about frame of currently computed function. */
9925 static void
9926 ix86_compute_frame_layout (struct ix86_frame *frame)
9928 unsigned HOST_WIDE_INT stack_alignment_needed;
9929 HOST_WIDE_INT offset;
9930 unsigned HOST_WIDE_INT preferred_alignment;
9931 HOST_WIDE_INT size = get_frame_size ();
9932 HOST_WIDE_INT to_allocate;
9934 frame->nregs = ix86_nsaved_regs ();
9935 frame->nsseregs = ix86_nsaved_sseregs ();
9937 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9938 function prologues and leaf. */
9939 if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
9940 && (!crtl->is_leaf || cfun->calls_alloca != 0
9941 || ix86_current_function_calls_tls_descriptor))
9943 crtl->preferred_stack_boundary = 128;
9944 crtl->stack_alignment_needed = 128;
9946 /* preferred_stack_boundary is never updated for call
9947 expanded from tls descriptor. Update it here. We don't update it in
9948 expand stage because according to the comments before
9949 ix86_current_function_calls_tls_descriptor, tls calls may be optimized
9950 away. */
9951 else if (ix86_current_function_calls_tls_descriptor
9952 && crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY)
9954 crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
9955 if (crtl->stack_alignment_needed < PREFERRED_STACK_BOUNDARY)
9956 crtl->stack_alignment_needed = PREFERRED_STACK_BOUNDARY;
9959 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
9960 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
9962 gcc_assert (!size || stack_alignment_needed);
9963 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
9964 gcc_assert (preferred_alignment <= stack_alignment_needed);
9966 /* For SEH we have to limit the amount of code movement into the prologue.
9967 At present we do this via a BLOCKAGE, at which point there's very little
9968 scheduling that can be done, which means that there's very little point
9969 in doing anything except PUSHs. */
9970 if (TARGET_SEH)
9971 cfun->machine->use_fast_prologue_epilogue = false;
9973 /* During reload iteration the amount of registers saved can change.
9974 Recompute the value as needed. Do not recompute when amount of registers
9975 didn't change as reload does multiple calls to the function and does not
9976 expect the decision to change within single iteration. */
9977 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
9978 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
9980 int count = frame->nregs;
9981 struct cgraph_node *node = cgraph_node::get (current_function_decl);
9983 cfun->machine->use_fast_prologue_epilogue_nregs = count;
9985 /* The fast prologue uses move instead of push to save registers. This
9986 is significantly longer, but also executes faster as modern hardware
9987 can execute the moves in parallel, but can't do that for push/pop.
9989 Be careful about choosing what prologue to emit: When function takes
9990 many instructions to execute we may use slow version as well as in
9991 case function is known to be outside hot spot (this is known with
9992 feedback only). Weight the size of function by number of registers
9993 to save as it is cheap to use one or two push instructions but very
9994 slow to use many of them. */
9995 if (count)
9996 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
9997 if (node->frequency < NODE_FREQUENCY_NORMAL
9998 || (flag_branch_probabilities
9999 && node->frequency < NODE_FREQUENCY_HOT))
10000 cfun->machine->use_fast_prologue_epilogue = false;
10001 else
10002 cfun->machine->use_fast_prologue_epilogue
10003 = !expensive_function_p (count);
10006 frame->save_regs_using_mov
10007 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
10008 /* If static stack checking is enabled and done with probes,
10009 the registers need to be saved before allocating the frame. */
10010 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
10012 /* Skip return address. */
10013 offset = UNITS_PER_WORD;
10015 /* Skip pushed static chain. */
10016 if (ix86_static_chain_on_stack)
10017 offset += UNITS_PER_WORD;
10019 /* Skip saved base pointer. */
10020 if (frame_pointer_needed)
10021 offset += UNITS_PER_WORD;
10022 frame->hfp_save_offset = offset;
10024 /* The traditional frame pointer location is at the top of the frame. */
10025 frame->hard_frame_pointer_offset = offset;
10027 /* Register save area */
10028 offset += frame->nregs * UNITS_PER_WORD;
10029 frame->reg_save_offset = offset;
10031 /* On SEH target, registers are pushed just before the frame pointer
10032 location. */
10033 if (TARGET_SEH)
10034 frame->hard_frame_pointer_offset = offset;
10036 /* Align and set SSE register save area. */
10037 if (frame->nsseregs)
10039 /* The only ABI that has saved SSE registers (Win64) also has a
10040 16-byte aligned default stack, and thus we don't need to be
10041 within the re-aligned local stack frame to save them. */
10042 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
10043 offset = (offset + 16 - 1) & -16;
10044 offset += frame->nsseregs * 16;
10046 frame->sse_reg_save_offset = offset;
10048 /* The re-aligned stack starts here. Values before this point are not
10049 directly comparable with values below this point. In order to make
10050 sure that no value happens to be the same before and after, force
10051 the alignment computation below to add a non-zero value. */
10052 if (stack_realign_fp)
10053 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
10055 /* Va-arg area */
10056 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
10057 offset += frame->va_arg_size;
10059 /* Align start of frame for local function. */
10060 if (stack_realign_fp
10061 || offset != frame->sse_reg_save_offset
10062 || size != 0
10063 || !crtl->is_leaf
10064 || cfun->calls_alloca
10065 || ix86_current_function_calls_tls_descriptor)
10066 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
10068 /* Frame pointer points here. */
10069 frame->frame_pointer_offset = offset;
10071 offset += size;
10073 /* Add outgoing arguments area. Can be skipped if we eliminated
10074 all the function calls as dead code.
10075 Skipping is however impossible when function calls alloca. Alloca
10076 expander assumes that last crtl->outgoing_args_size
10077 of stack frame are unused. */
10078 if (ACCUMULATE_OUTGOING_ARGS
10079 && (!crtl->is_leaf || cfun->calls_alloca
10080 || ix86_current_function_calls_tls_descriptor))
10082 offset += crtl->outgoing_args_size;
10083 frame->outgoing_arguments_size = crtl->outgoing_args_size;
10085 else
10086 frame->outgoing_arguments_size = 0;
10088 /* Align stack boundary. Only needed if we're calling another function
10089 or using alloca. */
10090 if (!crtl->is_leaf || cfun->calls_alloca
10091 || ix86_current_function_calls_tls_descriptor)
10092 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
10094 /* We've reached end of stack frame. */
10095 frame->stack_pointer_offset = offset;
10097 /* Size prologue needs to allocate. */
10098 to_allocate = offset - frame->sse_reg_save_offset;
10100 if ((!to_allocate && frame->nregs <= 1)
10101 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
10102 frame->save_regs_using_mov = false;
10104 if (ix86_using_red_zone ()
10105 && crtl->sp_is_unchanging
10106 && crtl->is_leaf
10107 && !ix86_current_function_calls_tls_descriptor)
10109 frame->red_zone_size = to_allocate;
10110 if (frame->save_regs_using_mov)
10111 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
10112 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
10113 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
10115 else
10116 frame->red_zone_size = 0;
10117 frame->stack_pointer_offset -= frame->red_zone_size;
10119 /* The SEH frame pointer location is near the bottom of the frame.
10120 This is enforced by the fact that the difference between the
10121 stack pointer and the frame pointer is limited to 240 bytes in
10122 the unwind data structure. */
10123 if (TARGET_SEH)
10125 HOST_WIDE_INT diff;
10127 /* If we can leave the frame pointer where it is, do so. Also, returns
10128 the establisher frame for __builtin_frame_address (0). */
10129 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
10130 if (diff <= SEH_MAX_FRAME_SIZE
10131 && (diff > 240 || (diff & 15) != 0)
10132 && !crtl->accesses_prior_frames)
10134 /* Ideally we'd determine what portion of the local stack frame
10135 (within the constraint of the lowest 240) is most heavily used.
10136 But without that complication, simply bias the frame pointer
10137 by 128 bytes so as to maximize the amount of the local stack
10138 frame that is addressable with 8-bit offsets. */
10139 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
10144 /* This is semi-inlined memory_address_length, but simplified
10145 since we know that we're always dealing with reg+offset, and
10146 to avoid having to create and discard all that rtl. */
10148 static inline int
10149 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
10151 int len = 4;
10153 if (offset == 0)
10155 /* EBP and R13 cannot be encoded without an offset. */
10156 len = (regno == BP_REG || regno == R13_REG);
10158 else if (IN_RANGE (offset, -128, 127))
10159 len = 1;
10161 /* ESP and R12 must be encoded with a SIB byte. */
10162 if (regno == SP_REG || regno == R12_REG)
10163 len++;
10165 return len;
10168 /* Return an RTX that points to CFA_OFFSET within the stack frame.
10169 The valid base registers are taken from CFUN->MACHINE->FS. */
10171 static rtx
10172 choose_baseaddr (HOST_WIDE_INT cfa_offset)
10174 const struct machine_function *m = cfun->machine;
10175 rtx base_reg = NULL;
10176 HOST_WIDE_INT base_offset = 0;
10178 if (m->use_fast_prologue_epilogue)
10180 /* Choose the base register most likely to allow the most scheduling
10181 opportunities. Generally FP is valid throughout the function,
10182 while DRAP must be reloaded within the epilogue. But choose either
10183 over the SP due to increased encoding size. */
10185 if (m->fs.fp_valid)
10187 base_reg = hard_frame_pointer_rtx;
10188 base_offset = m->fs.fp_offset - cfa_offset;
10190 else if (m->fs.drap_valid)
10192 base_reg = crtl->drap_reg;
10193 base_offset = 0 - cfa_offset;
10195 else if (m->fs.sp_valid)
10197 base_reg = stack_pointer_rtx;
10198 base_offset = m->fs.sp_offset - cfa_offset;
10201 else
10203 HOST_WIDE_INT toffset;
10204 int len = 16, tlen;
10206 /* Choose the base register with the smallest address encoding.
10207 With a tie, choose FP > DRAP > SP. */
10208 if (m->fs.sp_valid)
10210 base_reg = stack_pointer_rtx;
10211 base_offset = m->fs.sp_offset - cfa_offset;
10212 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
10214 if (m->fs.drap_valid)
10216 toffset = 0 - cfa_offset;
10217 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
10218 if (tlen <= len)
10220 base_reg = crtl->drap_reg;
10221 base_offset = toffset;
10222 len = tlen;
10225 if (m->fs.fp_valid)
10227 toffset = m->fs.fp_offset - cfa_offset;
10228 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
10229 if (tlen <= len)
10231 base_reg = hard_frame_pointer_rtx;
10232 base_offset = toffset;
10233 len = tlen;
10237 gcc_assert (base_reg != NULL);
10239 return plus_constant (Pmode, base_reg, base_offset);
10242 /* Emit code to save registers in the prologue. */
10244 static void
10245 ix86_emit_save_regs (void)
10247 unsigned int regno;
10248 rtx insn;
10250 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
10251 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10253 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
10254 RTX_FRAME_RELATED_P (insn) = 1;
10258 /* Emit a single register save at CFA - CFA_OFFSET. */
10260 static void
10261 ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
10262 HOST_WIDE_INT cfa_offset)
10264 struct machine_function *m = cfun->machine;
10265 rtx reg = gen_rtx_REG (mode, regno);
10266 rtx mem, addr, base, insn;
10268 addr = choose_baseaddr (cfa_offset);
10269 mem = gen_frame_mem (mode, addr);
10271 /* For SSE saves, we need to indicate the 128-bit alignment. */
10272 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
10274 insn = emit_move_insn (mem, reg);
10275 RTX_FRAME_RELATED_P (insn) = 1;
10277 base = addr;
10278 if (GET_CODE (base) == PLUS)
10279 base = XEXP (base, 0);
10280 gcc_checking_assert (REG_P (base));
10282 /* When saving registers into a re-aligned local stack frame, avoid
10283 any tricky guessing by dwarf2out. */
10284 if (m->fs.realigned)
10286 gcc_checking_assert (stack_realign_drap);
10288 if (regno == REGNO (crtl->drap_reg))
10290 /* A bit of a hack. We force the DRAP register to be saved in
10291 the re-aligned stack frame, which provides us with a copy
10292 of the CFA that will last past the prologue. Install it. */
10293 gcc_checking_assert (cfun->machine->fs.fp_valid);
10294 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
10295 cfun->machine->fs.fp_offset - cfa_offset);
10296 mem = gen_rtx_MEM (mode, addr);
10297 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
10299 else
10301 /* The frame pointer is a stable reference within the
10302 aligned frame. Use it. */
10303 gcc_checking_assert (cfun->machine->fs.fp_valid);
10304 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
10305 cfun->machine->fs.fp_offset - cfa_offset);
10306 mem = gen_rtx_MEM (mode, addr);
10307 add_reg_note (insn, REG_CFA_EXPRESSION,
10308 gen_rtx_SET (VOIDmode, mem, reg));
10312 /* The memory may not be relative to the current CFA register,
10313 which means that we may need to generate a new pattern for
10314 use by the unwind info. */
10315 else if (base != m->fs.cfa_reg)
10317 addr = plus_constant (Pmode, m->fs.cfa_reg,
10318 m->fs.cfa_offset - cfa_offset);
10319 mem = gen_rtx_MEM (mode, addr);
10320 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
10324 /* Emit code to save registers using MOV insns.
10325 First register is stored at CFA - CFA_OFFSET. */
10326 static void
10327 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
10329 unsigned int regno;
10331 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10332 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10334 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
10335 cfa_offset -= UNITS_PER_WORD;
10339 /* Emit code to save SSE registers using MOV insns.
10340 First register is stored at CFA - CFA_OFFSET. */
10341 static void
10342 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
10344 unsigned int regno;
10346 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10347 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10349 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
10350 cfa_offset -= 16;
10354 static GTY(()) rtx queued_cfa_restores;
10356 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
10357 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
10358 Don't add the note if the previously saved value will be left untouched
10359 within stack red-zone till return, as unwinders can find the same value
10360 in the register and on the stack. */
10362 static void
10363 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
10365 if (!crtl->shrink_wrapped
10366 && cfa_offset <= cfun->machine->fs.red_zone_offset)
10367 return;
10369 if (insn)
10371 add_reg_note (insn, REG_CFA_RESTORE, reg);
10372 RTX_FRAME_RELATED_P (insn) = 1;
10374 else
10375 queued_cfa_restores
10376 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
10379 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
10381 static void
10382 ix86_add_queued_cfa_restore_notes (rtx insn)
10384 rtx last;
10385 if (!queued_cfa_restores)
10386 return;
10387 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
10389 XEXP (last, 1) = REG_NOTES (insn);
10390 REG_NOTES (insn) = queued_cfa_restores;
10391 queued_cfa_restores = NULL_RTX;
10392 RTX_FRAME_RELATED_P (insn) = 1;
10395 /* Expand prologue or epilogue stack adjustment.
10396 The pattern exist to put a dependency on all ebp-based memory accesses.
10397 STYLE should be negative if instructions should be marked as frame related,
10398 zero if %r11 register is live and cannot be freely used and positive
10399 otherwise. */
10401 static void
10402 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
10403 int style, bool set_cfa)
10405 struct machine_function *m = cfun->machine;
10406 rtx insn;
10407 bool add_frame_related_expr = false;
10409 if (Pmode == SImode)
10410 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
10411 else if (x86_64_immediate_operand (offset, DImode))
10412 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
10413 else
10415 rtx tmp;
10416 /* r11 is used by indirect sibcall return as well, set before the
10417 epilogue and used after the epilogue. */
10418 if (style)
10419 tmp = gen_rtx_REG (DImode, R11_REG);
10420 else
10422 gcc_assert (src != hard_frame_pointer_rtx
10423 && dest != hard_frame_pointer_rtx);
10424 tmp = hard_frame_pointer_rtx;
10426 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
10427 if (style < 0)
10428 add_frame_related_expr = true;
10430 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
10433 insn = emit_insn (insn);
10434 if (style >= 0)
10435 ix86_add_queued_cfa_restore_notes (insn);
10437 if (set_cfa)
10439 rtx r;
10441 gcc_assert (m->fs.cfa_reg == src);
10442 m->fs.cfa_offset += INTVAL (offset);
10443 m->fs.cfa_reg = dest;
10445 r = gen_rtx_PLUS (Pmode, src, offset);
10446 r = gen_rtx_SET (VOIDmode, dest, r);
10447 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
10448 RTX_FRAME_RELATED_P (insn) = 1;
10450 else if (style < 0)
10452 RTX_FRAME_RELATED_P (insn) = 1;
10453 if (add_frame_related_expr)
10455 rtx r = gen_rtx_PLUS (Pmode, src, offset);
10456 r = gen_rtx_SET (VOIDmode, dest, r);
10457 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
10461 if (dest == stack_pointer_rtx)
10463 HOST_WIDE_INT ooffset = m->fs.sp_offset;
10464 bool valid = m->fs.sp_valid;
10466 if (src == hard_frame_pointer_rtx)
10468 valid = m->fs.fp_valid;
10469 ooffset = m->fs.fp_offset;
10471 else if (src == crtl->drap_reg)
10473 valid = m->fs.drap_valid;
10474 ooffset = 0;
10476 else
10478 /* Else there are two possibilities: SP itself, which we set
10479 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
10480 taken care of this by hand along the eh_return path. */
10481 gcc_checking_assert (src == stack_pointer_rtx
10482 || offset == const0_rtx);
10485 m->fs.sp_offset = ooffset - INTVAL (offset);
10486 m->fs.sp_valid = valid;
10490 /* Find an available register to be used as dynamic realign argument
10491 pointer regsiter. Such a register will be written in prologue and
10492 used in begin of body, so it must not be
10493 1. parameter passing register.
10494 2. GOT pointer.
10495 We reuse static-chain register if it is available. Otherwise, we
10496 use DI for i386 and R13 for x86-64. We chose R13 since it has
10497 shorter encoding.
10499 Return: the regno of chosen register. */
10501 static unsigned int
10502 find_drap_reg (void)
10504 tree decl = cfun->decl;
10506 if (TARGET_64BIT)
10508 /* Use R13 for nested function or function need static chain.
10509 Since function with tail call may use any caller-saved
10510 registers in epilogue, DRAP must not use caller-saved
10511 register in such case. */
10512 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10513 return R13_REG;
10515 return R10_REG;
10517 else
10519 /* Use DI for nested function or function need static chain.
10520 Since function with tail call may use any caller-saved
10521 registers in epilogue, DRAP must not use caller-saved
10522 register in such case. */
10523 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10524 return DI_REG;
10526 /* Reuse static chain register if it isn't used for parameter
10527 passing. */
10528 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
10530 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
10531 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
10532 return CX_REG;
10534 return DI_REG;
10538 /* Return minimum incoming stack alignment. */
10540 static unsigned int
10541 ix86_minimum_incoming_stack_boundary (bool sibcall)
10543 unsigned int incoming_stack_boundary;
10545 /* Prefer the one specified at command line. */
10546 if (ix86_user_incoming_stack_boundary)
10547 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
10548 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
10549 if -mstackrealign is used, it isn't used for sibcall check and
10550 estimated stack alignment is 128bit. */
10551 else if (!sibcall
10552 && !TARGET_64BIT
10553 && ix86_force_align_arg_pointer
10554 && crtl->stack_alignment_estimated == 128)
10555 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10556 else
10557 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
10559 /* Incoming stack alignment can be changed on individual functions
10560 via force_align_arg_pointer attribute. We use the smallest
10561 incoming stack boundary. */
10562 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
10563 && lookup_attribute (ix86_force_align_arg_pointer_string,
10564 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
10565 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10567 /* The incoming stack frame has to be aligned at least at
10568 parm_stack_boundary. */
10569 if (incoming_stack_boundary < crtl->parm_stack_boundary)
10570 incoming_stack_boundary = crtl->parm_stack_boundary;
10572 /* Stack at entrance of main is aligned by runtime. We use the
10573 smallest incoming stack boundary. */
10574 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
10575 && DECL_NAME (current_function_decl)
10576 && MAIN_NAME_P (DECL_NAME (current_function_decl))
10577 && DECL_FILE_SCOPE_P (current_function_decl))
10578 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
10580 return incoming_stack_boundary;
10583 /* Update incoming stack boundary and estimated stack alignment. */
10585 static void
10586 ix86_update_stack_boundary (void)
10588 ix86_incoming_stack_boundary
10589 = ix86_minimum_incoming_stack_boundary (false);
10591 /* x86_64 vararg needs 16byte stack alignment for register save
10592 area. */
10593 if (TARGET_64BIT
10594 && cfun->stdarg
10595 && crtl->stack_alignment_estimated < 128)
10596 crtl->stack_alignment_estimated = 128;
10599 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
10600 needed or an rtx for DRAP otherwise. */
10602 static rtx
10603 ix86_get_drap_rtx (void)
10605 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
10606 crtl->need_drap = true;
10608 if (stack_realign_drap)
10610 /* Assign DRAP to vDRAP and returns vDRAP */
10611 unsigned int regno = find_drap_reg ();
10612 rtx drap_vreg;
10613 rtx arg_ptr;
10614 rtx_insn *seq, *insn;
10616 arg_ptr = gen_rtx_REG (Pmode, regno);
10617 crtl->drap_reg = arg_ptr;
10619 start_sequence ();
10620 drap_vreg = copy_to_reg (arg_ptr);
10621 seq = get_insns ();
10622 end_sequence ();
10624 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
10625 if (!optimize)
10627 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
10628 RTX_FRAME_RELATED_P (insn) = 1;
10630 return drap_vreg;
10632 else
10633 return NULL;
10636 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
10638 static rtx
10639 ix86_internal_arg_pointer (void)
10641 return virtual_incoming_args_rtx;
10644 struct scratch_reg {
10645 rtx reg;
10646 bool saved;
10649 /* Return a short-lived scratch register for use on function entry.
10650 In 32-bit mode, it is valid only after the registers are saved
10651 in the prologue. This register must be released by means of
10652 release_scratch_register_on_entry once it is dead. */
10654 static void
10655 get_scratch_register_on_entry (struct scratch_reg *sr)
10657 int regno;
10659 sr->saved = false;
10661 if (TARGET_64BIT)
10663 /* We always use R11 in 64-bit mode. */
10664 regno = R11_REG;
10666 else
10668 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
10669 bool fastcall_p
10670 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10671 bool thiscall_p
10672 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10673 bool static_chain_p = DECL_STATIC_CHAIN (decl);
10674 int regparm = ix86_function_regparm (fntype, decl);
10675 int drap_regno
10676 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
10678 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10679 for the static chain register. */
10680 if ((regparm < 1 || (fastcall_p && !static_chain_p))
10681 && drap_regno != AX_REG)
10682 regno = AX_REG;
10683 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10684 for the static chain register. */
10685 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
10686 regno = AX_REG;
10687 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
10688 regno = DX_REG;
10689 /* ecx is the static chain register. */
10690 else if (regparm < 3 && !fastcall_p && !thiscall_p
10691 && !static_chain_p
10692 && drap_regno != CX_REG)
10693 regno = CX_REG;
10694 else if (ix86_save_reg (BX_REG, true))
10695 regno = BX_REG;
10696 /* esi is the static chain register. */
10697 else if (!(regparm == 3 && static_chain_p)
10698 && ix86_save_reg (SI_REG, true))
10699 regno = SI_REG;
10700 else if (ix86_save_reg (DI_REG, true))
10701 regno = DI_REG;
10702 else
10704 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
10705 sr->saved = true;
10709 sr->reg = gen_rtx_REG (Pmode, regno);
10710 if (sr->saved)
10712 rtx insn = emit_insn (gen_push (sr->reg));
10713 RTX_FRAME_RELATED_P (insn) = 1;
10717 /* Release a scratch register obtained from the preceding function. */
10719 static void
10720 release_scratch_register_on_entry (struct scratch_reg *sr)
10722 if (sr->saved)
10724 struct machine_function *m = cfun->machine;
10725 rtx x, insn = emit_insn (gen_pop (sr->reg));
10727 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
10728 RTX_FRAME_RELATED_P (insn) = 1;
10729 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
10730 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10731 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
10732 m->fs.sp_offset -= UNITS_PER_WORD;
10736 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
10738 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
10740 static void
10741 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
10743 /* We skip the probe for the first interval + a small dope of 4 words and
10744 probe that many bytes past the specified size to maintain a protection
10745 area at the botton of the stack. */
10746 const int dope = 4 * UNITS_PER_WORD;
10747 rtx size_rtx = GEN_INT (size), last;
10749 /* See if we have a constant small number of probes to generate. If so,
10750 that's the easy case. The run-time loop is made up of 11 insns in the
10751 generic case while the compile-time loop is made up of 3+2*(n-1) insns
10752 for n # of intervals. */
10753 if (size <= 5 * PROBE_INTERVAL)
10755 HOST_WIDE_INT i, adjust;
10756 bool first_probe = true;
10758 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10759 values of N from 1 until it exceeds SIZE. If only one probe is
10760 needed, this will not generate any code. Then adjust and probe
10761 to PROBE_INTERVAL + SIZE. */
10762 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10764 if (first_probe)
10766 adjust = 2 * PROBE_INTERVAL + dope;
10767 first_probe = false;
10769 else
10770 adjust = PROBE_INTERVAL;
10772 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10773 plus_constant (Pmode, stack_pointer_rtx,
10774 -adjust)));
10775 emit_stack_probe (stack_pointer_rtx);
10778 if (first_probe)
10779 adjust = size + PROBE_INTERVAL + dope;
10780 else
10781 adjust = size + PROBE_INTERVAL - i;
10783 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10784 plus_constant (Pmode, stack_pointer_rtx,
10785 -adjust)));
10786 emit_stack_probe (stack_pointer_rtx);
10788 /* Adjust back to account for the additional first interval. */
10789 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10790 plus_constant (Pmode, stack_pointer_rtx,
10791 PROBE_INTERVAL + dope)));
10794 /* Otherwise, do the same as above, but in a loop. Note that we must be
10795 extra careful with variables wrapping around because we might be at
10796 the very top (or the very bottom) of the address space and we have
10797 to be able to handle this case properly; in particular, we use an
10798 equality test for the loop condition. */
10799 else
10801 HOST_WIDE_INT rounded_size;
10802 struct scratch_reg sr;
10804 get_scratch_register_on_entry (&sr);
10807 /* Step 1: round SIZE to the previous multiple of the interval. */
10809 rounded_size = size & -PROBE_INTERVAL;
10812 /* Step 2: compute initial and final value of the loop counter. */
10814 /* SP = SP_0 + PROBE_INTERVAL. */
10815 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10816 plus_constant (Pmode, stack_pointer_rtx,
10817 - (PROBE_INTERVAL + dope))));
10819 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10820 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
10821 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
10822 gen_rtx_PLUS (Pmode, sr.reg,
10823 stack_pointer_rtx)));
10826 /* Step 3: the loop
10828 while (SP != LAST_ADDR)
10830 SP = SP + PROBE_INTERVAL
10831 probe at SP
10834 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10835 values of N from 1 until it is equal to ROUNDED_SIZE. */
10837 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
10840 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10841 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10843 if (size != rounded_size)
10845 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10846 plus_constant (Pmode, stack_pointer_rtx,
10847 rounded_size - size)));
10848 emit_stack_probe (stack_pointer_rtx);
10851 /* Adjust back to account for the additional first interval. */
10852 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10853 plus_constant (Pmode, stack_pointer_rtx,
10854 PROBE_INTERVAL + dope)));
10856 release_scratch_register_on_entry (&sr);
10859 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
10861 /* Even if the stack pointer isn't the CFA register, we need to correctly
10862 describe the adjustments made to it, in particular differentiate the
10863 frame-related ones from the frame-unrelated ones. */
10864 if (size > 0)
10866 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
10867 XVECEXP (expr, 0, 0)
10868 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10869 plus_constant (Pmode, stack_pointer_rtx, -size));
10870 XVECEXP (expr, 0, 1)
10871 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10872 plus_constant (Pmode, stack_pointer_rtx,
10873 PROBE_INTERVAL + dope + size));
10874 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
10875 RTX_FRAME_RELATED_P (last) = 1;
10877 cfun->machine->fs.sp_offset += size;
10880 /* Make sure nothing is scheduled before we are done. */
10881 emit_insn (gen_blockage ());
10884 /* Adjust the stack pointer up to REG while probing it. */
10886 const char *
10887 output_adjust_stack_and_probe (rtx reg)
10889 static int labelno = 0;
10890 char loop_lab[32], end_lab[32];
10891 rtx xops[2];
10893 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10894 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10896 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10898 /* Jump to END_LAB if SP == LAST_ADDR. */
10899 xops[0] = stack_pointer_rtx;
10900 xops[1] = reg;
10901 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10902 fputs ("\tje\t", asm_out_file);
10903 assemble_name_raw (asm_out_file, end_lab);
10904 fputc ('\n', asm_out_file);
10906 /* SP = SP + PROBE_INTERVAL. */
10907 xops[1] = GEN_INT (PROBE_INTERVAL);
10908 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10910 /* Probe at SP. */
10911 xops[1] = const0_rtx;
10912 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
10914 fprintf (asm_out_file, "\tjmp\t");
10915 assemble_name_raw (asm_out_file, loop_lab);
10916 fputc ('\n', asm_out_file);
10918 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10920 return "";
10923 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10924 inclusive. These are offsets from the current stack pointer. */
10926 static void
10927 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
10929 /* See if we have a constant small number of probes to generate. If so,
10930 that's the easy case. The run-time loop is made up of 7 insns in the
10931 generic case while the compile-time loop is made up of n insns for n #
10932 of intervals. */
10933 if (size <= 7 * PROBE_INTERVAL)
10935 HOST_WIDE_INT i;
10937 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10938 it exceeds SIZE. If only one probe is needed, this will not
10939 generate any code. Then probe at FIRST + SIZE. */
10940 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10941 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10942 -(first + i)));
10944 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10945 -(first + size)));
10948 /* Otherwise, do the same as above, but in a loop. Note that we must be
10949 extra careful with variables wrapping around because we might be at
10950 the very top (or the very bottom) of the address space and we have
10951 to be able to handle this case properly; in particular, we use an
10952 equality test for the loop condition. */
10953 else
10955 HOST_WIDE_INT rounded_size, last;
10956 struct scratch_reg sr;
10958 get_scratch_register_on_entry (&sr);
10961 /* Step 1: round SIZE to the previous multiple of the interval. */
10963 rounded_size = size & -PROBE_INTERVAL;
10966 /* Step 2: compute initial and final value of the loop counter. */
10968 /* TEST_OFFSET = FIRST. */
10969 emit_move_insn (sr.reg, GEN_INT (-first));
10971 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10972 last = first + rounded_size;
10975 /* Step 3: the loop
10977 while (TEST_ADDR != LAST_ADDR)
10979 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10980 probe at TEST_ADDR
10983 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10984 until it is equal to ROUNDED_SIZE. */
10986 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
10989 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10990 that SIZE is equal to ROUNDED_SIZE. */
10992 if (size != rounded_size)
10993 emit_stack_probe (plus_constant (Pmode,
10994 gen_rtx_PLUS (Pmode,
10995 stack_pointer_rtx,
10996 sr.reg),
10997 rounded_size - size));
10999 release_scratch_register_on_entry (&sr);
11002 /* Make sure nothing is scheduled before we are done. */
11003 emit_insn (gen_blockage ());
11006 /* Probe a range of stack addresses from REG to END, inclusive. These are
11007 offsets from the current stack pointer. */
11009 const char *
11010 output_probe_stack_range (rtx reg, rtx end)
11012 static int labelno = 0;
11013 char loop_lab[32], end_lab[32];
11014 rtx xops[3];
11016 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
11017 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
11019 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
11021 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
11022 xops[0] = reg;
11023 xops[1] = end;
11024 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
11025 fputs ("\tje\t", asm_out_file);
11026 assemble_name_raw (asm_out_file, end_lab);
11027 fputc ('\n', asm_out_file);
11029 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
11030 xops[1] = GEN_INT (PROBE_INTERVAL);
11031 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
11033 /* Probe at TEST_ADDR. */
11034 xops[0] = stack_pointer_rtx;
11035 xops[1] = reg;
11036 xops[2] = const0_rtx;
11037 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
11039 fprintf (asm_out_file, "\tjmp\t");
11040 assemble_name_raw (asm_out_file, loop_lab);
11041 fputc ('\n', asm_out_file);
11043 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
11045 return "";
11048 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
11049 to be generated in correct form. */
11050 static void
11051 ix86_finalize_stack_realign_flags (void)
11053 /* Check if stack realign is really needed after reload, and
11054 stores result in cfun */
11055 unsigned int incoming_stack_boundary
11056 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
11057 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
11058 unsigned int stack_realign = (incoming_stack_boundary
11059 < (crtl->is_leaf
11060 ? crtl->max_used_stack_slot_alignment
11061 : crtl->stack_alignment_needed));
11063 if (crtl->stack_realign_finalized)
11065 /* After stack_realign_needed is finalized, we can't no longer
11066 change it. */
11067 gcc_assert (crtl->stack_realign_needed == stack_realign);
11068 return;
11071 /* If the only reason for frame_pointer_needed is that we conservatively
11072 assumed stack realignment might be needed, but in the end nothing that
11073 needed the stack alignment had been spilled, clear frame_pointer_needed
11074 and say we don't need stack realignment. */
11075 if (stack_realign
11076 && frame_pointer_needed
11077 && crtl->is_leaf
11078 && flag_omit_frame_pointer
11079 && crtl->sp_is_unchanging
11080 && !ix86_current_function_calls_tls_descriptor
11081 && !crtl->accesses_prior_frames
11082 && !cfun->calls_alloca
11083 && !crtl->calls_eh_return
11084 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
11085 && !ix86_frame_pointer_required ()
11086 && get_frame_size () == 0
11087 && ix86_nsaved_sseregs () == 0
11088 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
11090 HARD_REG_SET set_up_by_prologue, prologue_used;
11091 basic_block bb;
11093 CLEAR_HARD_REG_SET (prologue_used);
11094 CLEAR_HARD_REG_SET (set_up_by_prologue);
11095 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
11096 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
11097 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
11098 HARD_FRAME_POINTER_REGNUM);
11099 FOR_EACH_BB_FN (bb, cfun)
11101 rtx_insn *insn;
11102 FOR_BB_INSNS (bb, insn)
11103 if (NONDEBUG_INSN_P (insn)
11104 && requires_stack_frame_p (insn, prologue_used,
11105 set_up_by_prologue))
11107 crtl->stack_realign_needed = stack_realign;
11108 crtl->stack_realign_finalized = true;
11109 return;
11113 /* If drap has been set, but it actually isn't live at the start
11114 of the function, there is no reason to set it up. */
11115 if (crtl->drap_reg)
11117 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
11118 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
11120 crtl->drap_reg = NULL_RTX;
11121 crtl->need_drap = false;
11124 else
11125 cfun->machine->no_drap_save_restore = true;
11127 frame_pointer_needed = false;
11128 stack_realign = false;
11129 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
11130 crtl->stack_alignment_needed = incoming_stack_boundary;
11131 crtl->stack_alignment_estimated = incoming_stack_boundary;
11132 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
11133 crtl->preferred_stack_boundary = incoming_stack_boundary;
11134 df_finish_pass (true);
11135 df_scan_alloc (NULL);
11136 df_scan_blocks ();
11137 df_compute_regs_ever_live (true);
11138 df_analyze ();
11141 crtl->stack_realign_needed = stack_realign;
11142 crtl->stack_realign_finalized = true;
11145 /* Delete SET_GOT right after entry block if it is allocated to reg. */
11147 static void
11148 ix86_elim_entry_set_got (rtx reg)
11150 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
11151 rtx_insn *c_insn = BB_HEAD (bb);
11152 if (!NONDEBUG_INSN_P (c_insn))
11153 c_insn = next_nonnote_nondebug_insn (c_insn);
11154 if (c_insn && NONJUMP_INSN_P (c_insn))
11156 rtx pat = PATTERN (c_insn);
11157 if (GET_CODE (pat) == PARALLEL)
11159 rtx vec = XVECEXP (pat, 0, 0);
11160 if (GET_CODE (vec) == SET
11161 && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT
11162 && REGNO (XEXP (vec, 0)) == REGNO (reg))
11163 delete_insn (c_insn);
11168 /* Expand the prologue into a bunch of separate insns. */
11170 void
11171 ix86_expand_prologue (void)
11173 struct machine_function *m = cfun->machine;
11174 rtx insn, t;
11175 struct ix86_frame frame;
11176 HOST_WIDE_INT allocate;
11177 bool int_registers_saved;
11178 bool sse_registers_saved;
11180 ix86_finalize_stack_realign_flags ();
11182 /* DRAP should not coexist with stack_realign_fp */
11183 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
11185 memset (&m->fs, 0, sizeof (m->fs));
11187 /* Initialize CFA state for before the prologue. */
11188 m->fs.cfa_reg = stack_pointer_rtx;
11189 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
11191 /* Track SP offset to the CFA. We continue tracking this after we've
11192 swapped the CFA register away from SP. In the case of re-alignment
11193 this is fudged; we're interested to offsets within the local frame. */
11194 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
11195 m->fs.sp_valid = true;
11197 ix86_compute_frame_layout (&frame);
11199 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
11201 /* We should have already generated an error for any use of
11202 ms_hook on a nested function. */
11203 gcc_checking_assert (!ix86_static_chain_on_stack);
11205 /* Check if profiling is active and we shall use profiling before
11206 prologue variant. If so sorry. */
11207 if (crtl->profile && flag_fentry != 0)
11208 sorry ("ms_hook_prologue attribute isn%'t compatible "
11209 "with -mfentry for 32-bit");
11211 /* In ix86_asm_output_function_label we emitted:
11212 8b ff movl.s %edi,%edi
11213 55 push %ebp
11214 8b ec movl.s %esp,%ebp
11216 This matches the hookable function prologue in Win32 API
11217 functions in Microsoft Windows XP Service Pack 2 and newer.
11218 Wine uses this to enable Windows apps to hook the Win32 API
11219 functions provided by Wine.
11221 What that means is that we've already set up the frame pointer. */
11223 if (frame_pointer_needed
11224 && !(crtl->drap_reg && crtl->stack_realign_needed))
11226 rtx push, mov;
11228 /* We've decided to use the frame pointer already set up.
11229 Describe this to the unwinder by pretending that both
11230 push and mov insns happen right here.
11232 Putting the unwind info here at the end of the ms_hook
11233 is done so that we can make absolutely certain we get
11234 the required byte sequence at the start of the function,
11235 rather than relying on an assembler that can produce
11236 the exact encoding required.
11238 However it does mean (in the unpatched case) that we have
11239 a 1 insn window where the asynchronous unwind info is
11240 incorrect. However, if we placed the unwind info at
11241 its correct location we would have incorrect unwind info
11242 in the patched case. Which is probably all moot since
11243 I don't expect Wine generates dwarf2 unwind info for the
11244 system libraries that use this feature. */
11246 insn = emit_insn (gen_blockage ());
11248 push = gen_push (hard_frame_pointer_rtx);
11249 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
11250 stack_pointer_rtx);
11251 RTX_FRAME_RELATED_P (push) = 1;
11252 RTX_FRAME_RELATED_P (mov) = 1;
11254 RTX_FRAME_RELATED_P (insn) = 1;
11255 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11256 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
11258 /* Note that gen_push incremented m->fs.cfa_offset, even
11259 though we didn't emit the push insn here. */
11260 m->fs.cfa_reg = hard_frame_pointer_rtx;
11261 m->fs.fp_offset = m->fs.cfa_offset;
11262 m->fs.fp_valid = true;
11264 else
11266 /* The frame pointer is not needed so pop %ebp again.
11267 This leaves us with a pristine state. */
11268 emit_insn (gen_pop (hard_frame_pointer_rtx));
11272 /* The first insn of a function that accepts its static chain on the
11273 stack is to push the register that would be filled in by a direct
11274 call. This insn will be skipped by the trampoline. */
11275 else if (ix86_static_chain_on_stack)
11277 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
11278 emit_insn (gen_blockage ());
11280 /* We don't want to interpret this push insn as a register save,
11281 only as a stack adjustment. The real copy of the register as
11282 a save will be done later, if needed. */
11283 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
11284 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
11285 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
11286 RTX_FRAME_RELATED_P (insn) = 1;
11289 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
11290 of DRAP is needed and stack realignment is really needed after reload */
11291 if (stack_realign_drap)
11293 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11295 /* Only need to push parameter pointer reg if it is caller saved. */
11296 if (!call_used_regs[REGNO (crtl->drap_reg)])
11298 /* Push arg pointer reg */
11299 insn = emit_insn (gen_push (crtl->drap_reg));
11300 RTX_FRAME_RELATED_P (insn) = 1;
11303 /* Grab the argument pointer. */
11304 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
11305 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11306 RTX_FRAME_RELATED_P (insn) = 1;
11307 m->fs.cfa_reg = crtl->drap_reg;
11308 m->fs.cfa_offset = 0;
11310 /* Align the stack. */
11311 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11312 stack_pointer_rtx,
11313 GEN_INT (-align_bytes)));
11314 RTX_FRAME_RELATED_P (insn) = 1;
11316 /* Replicate the return address on the stack so that return
11317 address can be reached via (argp - 1) slot. This is needed
11318 to implement macro RETURN_ADDR_RTX and intrinsic function
11319 expand_builtin_return_addr etc. */
11320 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
11321 t = gen_frame_mem (word_mode, t);
11322 insn = emit_insn (gen_push (t));
11323 RTX_FRAME_RELATED_P (insn) = 1;
11325 /* For the purposes of frame and register save area addressing,
11326 we've started over with a new frame. */
11327 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
11328 m->fs.realigned = true;
11331 int_registers_saved = (frame.nregs == 0);
11332 sse_registers_saved = (frame.nsseregs == 0);
11334 if (frame_pointer_needed && !m->fs.fp_valid)
11336 /* Note: AT&T enter does NOT have reversed args. Enter is probably
11337 slower on all targets. Also sdb doesn't like it. */
11338 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
11339 RTX_FRAME_RELATED_P (insn) = 1;
11341 /* Push registers now, before setting the frame pointer
11342 on SEH target. */
11343 if (!int_registers_saved
11344 && TARGET_SEH
11345 && !frame.save_regs_using_mov)
11347 ix86_emit_save_regs ();
11348 int_registers_saved = true;
11349 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11352 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
11354 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
11355 RTX_FRAME_RELATED_P (insn) = 1;
11357 if (m->fs.cfa_reg == stack_pointer_rtx)
11358 m->fs.cfa_reg = hard_frame_pointer_rtx;
11359 m->fs.fp_offset = m->fs.sp_offset;
11360 m->fs.fp_valid = true;
11364 if (!int_registers_saved)
11366 /* If saving registers via PUSH, do so now. */
11367 if (!frame.save_regs_using_mov)
11369 ix86_emit_save_regs ();
11370 int_registers_saved = true;
11371 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11374 /* When using red zone we may start register saving before allocating
11375 the stack frame saving one cycle of the prologue. However, avoid
11376 doing this if we have to probe the stack; at least on x86_64 the
11377 stack probe can turn into a call that clobbers a red zone location. */
11378 else if (ix86_using_red_zone ()
11379 && (! TARGET_STACK_PROBE
11380 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
11382 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11383 int_registers_saved = true;
11387 if (stack_realign_fp)
11389 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11390 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
11392 /* The computation of the size of the re-aligned stack frame means
11393 that we must allocate the size of the register save area before
11394 performing the actual alignment. Otherwise we cannot guarantee
11395 that there's enough storage above the realignment point. */
11396 if (m->fs.sp_offset != frame.sse_reg_save_offset)
11397 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11398 GEN_INT (m->fs.sp_offset
11399 - frame.sse_reg_save_offset),
11400 -1, false);
11402 /* Align the stack. */
11403 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11404 stack_pointer_rtx,
11405 GEN_INT (-align_bytes)));
11407 /* For the purposes of register save area addressing, the stack
11408 pointer is no longer valid. As for the value of sp_offset,
11409 see ix86_compute_frame_layout, which we need to match in order
11410 to pass verification of stack_pointer_offset at the end. */
11411 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
11412 m->fs.sp_valid = false;
11415 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
11417 if (flag_stack_usage_info)
11419 /* We start to count from ARG_POINTER. */
11420 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
11422 /* If it was realigned, take into account the fake frame. */
11423 if (stack_realign_drap)
11425 if (ix86_static_chain_on_stack)
11426 stack_size += UNITS_PER_WORD;
11428 if (!call_used_regs[REGNO (crtl->drap_reg)])
11429 stack_size += UNITS_PER_WORD;
11431 /* This over-estimates by 1 minimal-stack-alignment-unit but
11432 mitigates that by counting in the new return address slot. */
11433 current_function_dynamic_stack_size
11434 += crtl->stack_alignment_needed / BITS_PER_UNIT;
11437 current_function_static_stack_size = stack_size;
11440 /* On SEH target with very large frame size, allocate an area to save
11441 SSE registers (as the very large allocation won't be described). */
11442 if (TARGET_SEH
11443 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
11444 && !sse_registers_saved)
11446 HOST_WIDE_INT sse_size =
11447 frame.sse_reg_save_offset - frame.reg_save_offset;
11449 gcc_assert (int_registers_saved);
11451 /* No need to do stack checking as the area will be immediately
11452 written. */
11453 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11454 GEN_INT (-sse_size), -1,
11455 m->fs.cfa_reg == stack_pointer_rtx);
11456 allocate -= sse_size;
11457 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11458 sse_registers_saved = true;
11461 /* The stack has already been decremented by the instruction calling us
11462 so probe if the size is non-negative to preserve the protection area. */
11463 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
11465 /* We expect the registers to be saved when probes are used. */
11466 gcc_assert (int_registers_saved);
11468 if (STACK_CHECK_MOVING_SP)
11470 if (!(crtl->is_leaf && !cfun->calls_alloca
11471 && allocate <= PROBE_INTERVAL))
11473 ix86_adjust_stack_and_probe (allocate);
11474 allocate = 0;
11477 else
11479 HOST_WIDE_INT size = allocate;
11481 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
11482 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
11484 if (TARGET_STACK_PROBE)
11486 if (crtl->is_leaf && !cfun->calls_alloca)
11488 if (size > PROBE_INTERVAL)
11489 ix86_emit_probe_stack_range (0, size);
11491 else
11492 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
11494 else
11496 if (crtl->is_leaf && !cfun->calls_alloca)
11498 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
11499 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
11500 size - STACK_CHECK_PROTECT);
11502 else
11503 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
11508 if (allocate == 0)
11510 else if (!ix86_target_stack_probe ()
11511 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
11513 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11514 GEN_INT (-allocate), -1,
11515 m->fs.cfa_reg == stack_pointer_rtx);
11517 else
11519 rtx eax = gen_rtx_REG (Pmode, AX_REG);
11520 rtx r10 = NULL;
11521 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
11522 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
11523 bool eax_live = ix86_eax_live_at_start_p ();
11524 bool r10_live = false;
11526 if (TARGET_64BIT)
11527 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
11529 if (eax_live)
11531 insn = emit_insn (gen_push (eax));
11532 allocate -= UNITS_PER_WORD;
11533 /* Note that SEH directives need to continue tracking the stack
11534 pointer even after the frame pointer has been set up. */
11535 if (sp_is_cfa_reg || TARGET_SEH)
11537 if (sp_is_cfa_reg)
11538 m->fs.cfa_offset += UNITS_PER_WORD;
11539 RTX_FRAME_RELATED_P (insn) = 1;
11543 if (r10_live)
11545 r10 = gen_rtx_REG (Pmode, R10_REG);
11546 insn = emit_insn (gen_push (r10));
11547 allocate -= UNITS_PER_WORD;
11548 if (sp_is_cfa_reg || TARGET_SEH)
11550 if (sp_is_cfa_reg)
11551 m->fs.cfa_offset += UNITS_PER_WORD;
11552 RTX_FRAME_RELATED_P (insn) = 1;
11556 emit_move_insn (eax, GEN_INT (allocate));
11557 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
11559 /* Use the fact that AX still contains ALLOCATE. */
11560 adjust_stack_insn = (Pmode == DImode
11561 ? gen_pro_epilogue_adjust_stack_di_sub
11562 : gen_pro_epilogue_adjust_stack_si_sub);
11564 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
11565 stack_pointer_rtx, eax));
11567 if (sp_is_cfa_reg || TARGET_SEH)
11569 if (sp_is_cfa_reg)
11570 m->fs.cfa_offset += allocate;
11571 RTX_FRAME_RELATED_P (insn) = 1;
11572 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11573 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11574 plus_constant (Pmode, stack_pointer_rtx,
11575 -allocate)));
11577 m->fs.sp_offset += allocate;
11579 /* Use stack_pointer_rtx for relative addressing so that code
11580 works for realigned stack, too. */
11581 if (r10_live && eax_live)
11583 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11584 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11585 gen_frame_mem (word_mode, t));
11586 t = plus_constant (Pmode, t, UNITS_PER_WORD);
11587 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
11588 gen_frame_mem (word_mode, t));
11590 else if (eax_live || r10_live)
11592 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11593 emit_move_insn (gen_rtx_REG (word_mode,
11594 (eax_live ? AX_REG : R10_REG)),
11595 gen_frame_mem (word_mode, t));
11598 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
11600 /* If we havn't already set up the frame pointer, do so now. */
11601 if (frame_pointer_needed && !m->fs.fp_valid)
11603 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
11604 GEN_INT (frame.stack_pointer_offset
11605 - frame.hard_frame_pointer_offset));
11606 insn = emit_insn (insn);
11607 RTX_FRAME_RELATED_P (insn) = 1;
11608 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
11610 if (m->fs.cfa_reg == stack_pointer_rtx)
11611 m->fs.cfa_reg = hard_frame_pointer_rtx;
11612 m->fs.fp_offset = frame.hard_frame_pointer_offset;
11613 m->fs.fp_valid = true;
11616 if (!int_registers_saved)
11617 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11618 if (!sse_registers_saved)
11619 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11621 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
11622 in PROLOGUE. */
11623 if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
11625 rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
11626 insn = emit_insn (gen_set_got (pic));
11627 RTX_FRAME_RELATED_P (insn) = 1;
11628 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
11629 emit_insn (gen_prologue_use (pic));
11630 /* Deleting already emmitted SET_GOT if exist and allocated to
11631 REAL_PIC_OFFSET_TABLE_REGNUM. */
11632 ix86_elim_entry_set_got (pic);
11635 if (crtl->drap_reg && !crtl->stack_realign_needed)
11637 /* vDRAP is setup but after reload it turns out stack realign
11638 isn't necessary, here we will emit prologue to setup DRAP
11639 without stack realign adjustment */
11640 t = choose_baseaddr (0);
11641 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11644 /* Prevent instructions from being scheduled into register save push
11645 sequence when access to the redzone area is done through frame pointer.
11646 The offset between the frame pointer and the stack pointer is calculated
11647 relative to the value of the stack pointer at the end of the function
11648 prologue, and moving instructions that access redzone area via frame
11649 pointer inside push sequence violates this assumption. */
11650 if (frame_pointer_needed && frame.red_zone_size)
11651 emit_insn (gen_memory_blockage ());
11653 /* Emit cld instruction if stringops are used in the function. */
11654 if (TARGET_CLD && ix86_current_function_needs_cld)
11655 emit_insn (gen_cld ());
11657 /* SEH requires that the prologue end within 256 bytes of the start of
11658 the function. Prevent instruction schedules that would extend that.
11659 Further, prevent alloca modifications to the stack pointer from being
11660 combined with prologue modifications. */
11661 if (TARGET_SEH)
11662 emit_insn (gen_prologue_use (stack_pointer_rtx));
11665 /* Emit code to restore REG using a POP insn. */
11667 static void
11668 ix86_emit_restore_reg_using_pop (rtx reg)
11670 struct machine_function *m = cfun->machine;
11671 rtx insn = emit_insn (gen_pop (reg));
11673 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
11674 m->fs.sp_offset -= UNITS_PER_WORD;
11676 if (m->fs.cfa_reg == crtl->drap_reg
11677 && REGNO (reg) == REGNO (crtl->drap_reg))
11679 /* Previously we'd represented the CFA as an expression
11680 like *(%ebp - 8). We've just popped that value from
11681 the stack, which means we need to reset the CFA to
11682 the drap register. This will remain until we restore
11683 the stack pointer. */
11684 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11685 RTX_FRAME_RELATED_P (insn) = 1;
11687 /* This means that the DRAP register is valid for addressing too. */
11688 m->fs.drap_valid = true;
11689 return;
11692 if (m->fs.cfa_reg == stack_pointer_rtx)
11694 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11695 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11696 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11697 RTX_FRAME_RELATED_P (insn) = 1;
11699 m->fs.cfa_offset -= UNITS_PER_WORD;
11702 /* When the frame pointer is the CFA, and we pop it, we are
11703 swapping back to the stack pointer as the CFA. This happens
11704 for stack frames that don't allocate other data, so we assume
11705 the stack pointer is now pointing at the return address, i.e.
11706 the function entry state, which makes the offset be 1 word. */
11707 if (reg == hard_frame_pointer_rtx)
11709 m->fs.fp_valid = false;
11710 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11712 m->fs.cfa_reg = stack_pointer_rtx;
11713 m->fs.cfa_offset -= UNITS_PER_WORD;
11715 add_reg_note (insn, REG_CFA_DEF_CFA,
11716 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11717 GEN_INT (m->fs.cfa_offset)));
11718 RTX_FRAME_RELATED_P (insn) = 1;
11723 /* Emit code to restore saved registers using POP insns. */
11725 static void
11726 ix86_emit_restore_regs_using_pop (void)
11728 unsigned int regno;
11730 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11731 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
11732 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
11735 /* Emit code and notes for the LEAVE instruction. */
11737 static void
11738 ix86_emit_leave (void)
11740 struct machine_function *m = cfun->machine;
11741 rtx insn = emit_insn (ix86_gen_leave ());
11743 ix86_add_queued_cfa_restore_notes (insn);
11745 gcc_assert (m->fs.fp_valid);
11746 m->fs.sp_valid = true;
11747 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
11748 m->fs.fp_valid = false;
11750 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11752 m->fs.cfa_reg = stack_pointer_rtx;
11753 m->fs.cfa_offset = m->fs.sp_offset;
11755 add_reg_note (insn, REG_CFA_DEF_CFA,
11756 plus_constant (Pmode, stack_pointer_rtx,
11757 m->fs.sp_offset));
11758 RTX_FRAME_RELATED_P (insn) = 1;
11760 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
11761 m->fs.fp_offset);
11764 /* Emit code to restore saved registers using MOV insns.
11765 First register is restored from CFA - CFA_OFFSET. */
11766 static void
11767 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
11768 bool maybe_eh_return)
11770 struct machine_function *m = cfun->machine;
11771 unsigned int regno;
11773 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11774 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11776 rtx reg = gen_rtx_REG (word_mode, regno);
11777 rtx insn, mem;
11779 mem = choose_baseaddr (cfa_offset);
11780 mem = gen_frame_mem (word_mode, mem);
11781 insn = emit_move_insn (reg, mem);
11783 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
11785 /* Previously we'd represented the CFA as an expression
11786 like *(%ebp - 8). We've just popped that value from
11787 the stack, which means we need to reset the CFA to
11788 the drap register. This will remain until we restore
11789 the stack pointer. */
11790 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11791 RTX_FRAME_RELATED_P (insn) = 1;
11793 /* This means that the DRAP register is valid for addressing. */
11794 m->fs.drap_valid = true;
11796 else
11797 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11799 cfa_offset -= UNITS_PER_WORD;
11803 /* Emit code to restore saved registers using MOV insns.
11804 First register is restored from CFA - CFA_OFFSET. */
11805 static void
11806 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
11807 bool maybe_eh_return)
11809 unsigned int regno;
11811 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11812 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11814 rtx reg = gen_rtx_REG (V4SFmode, regno);
11815 rtx mem;
11817 mem = choose_baseaddr (cfa_offset);
11818 mem = gen_rtx_MEM (V4SFmode, mem);
11819 set_mem_align (mem, 128);
11820 emit_move_insn (reg, mem);
11822 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11824 cfa_offset -= 16;
11828 /* Restore function stack, frame, and registers. */
11830 void
11831 ix86_expand_epilogue (int style)
11833 struct machine_function *m = cfun->machine;
11834 struct machine_frame_state frame_state_save = m->fs;
11835 struct ix86_frame frame;
11836 bool restore_regs_via_mov;
11837 bool using_drap;
11839 ix86_finalize_stack_realign_flags ();
11840 ix86_compute_frame_layout (&frame);
11842 m->fs.sp_valid = (!frame_pointer_needed
11843 || (crtl->sp_is_unchanging
11844 && !stack_realign_fp));
11845 gcc_assert (!m->fs.sp_valid
11846 || m->fs.sp_offset == frame.stack_pointer_offset);
11848 /* The FP must be valid if the frame pointer is present. */
11849 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
11850 gcc_assert (!m->fs.fp_valid
11851 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
11853 /* We must have *some* valid pointer to the stack frame. */
11854 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
11856 /* The DRAP is never valid at this point. */
11857 gcc_assert (!m->fs.drap_valid);
11859 /* See the comment about red zone and frame
11860 pointer usage in ix86_expand_prologue. */
11861 if (frame_pointer_needed && frame.red_zone_size)
11862 emit_insn (gen_memory_blockage ());
11864 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
11865 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
11867 /* Determine the CFA offset of the end of the red-zone. */
11868 m->fs.red_zone_offset = 0;
11869 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
11871 /* The red-zone begins below the return address. */
11872 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
11874 /* When the register save area is in the aligned portion of
11875 the stack, determine the maximum runtime displacement that
11876 matches up with the aligned frame. */
11877 if (stack_realign_drap)
11878 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
11879 + UNITS_PER_WORD);
11882 /* Special care must be taken for the normal return case of a function
11883 using eh_return: the eax and edx registers are marked as saved, but
11884 not restored along this path. Adjust the save location to match. */
11885 if (crtl->calls_eh_return && style != 2)
11886 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
11888 /* EH_RETURN requires the use of moves to function properly. */
11889 if (crtl->calls_eh_return)
11890 restore_regs_via_mov = true;
11891 /* SEH requires the use of pops to identify the epilogue. */
11892 else if (TARGET_SEH)
11893 restore_regs_via_mov = false;
11894 /* If we're only restoring one register and sp is not valid then
11895 using a move instruction to restore the register since it's
11896 less work than reloading sp and popping the register. */
11897 else if (!m->fs.sp_valid && frame.nregs <= 1)
11898 restore_regs_via_mov = true;
11899 else if (TARGET_EPILOGUE_USING_MOVE
11900 && cfun->machine->use_fast_prologue_epilogue
11901 && (frame.nregs > 1
11902 || m->fs.sp_offset != frame.reg_save_offset))
11903 restore_regs_via_mov = true;
11904 else if (frame_pointer_needed
11905 && !frame.nregs
11906 && m->fs.sp_offset != frame.reg_save_offset)
11907 restore_regs_via_mov = true;
11908 else if (frame_pointer_needed
11909 && TARGET_USE_LEAVE
11910 && cfun->machine->use_fast_prologue_epilogue
11911 && frame.nregs == 1)
11912 restore_regs_via_mov = true;
11913 else
11914 restore_regs_via_mov = false;
11916 if (restore_regs_via_mov || frame.nsseregs)
11918 /* Ensure that the entire register save area is addressable via
11919 the stack pointer, if we will restore via sp. */
11920 if (TARGET_64BIT
11921 && m->fs.sp_offset > 0x7fffffff
11922 && !(m->fs.fp_valid || m->fs.drap_valid)
11923 && (frame.nsseregs + frame.nregs) != 0)
11925 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11926 GEN_INT (m->fs.sp_offset
11927 - frame.sse_reg_save_offset),
11928 style,
11929 m->fs.cfa_reg == stack_pointer_rtx);
11933 /* If there are any SSE registers to restore, then we have to do it
11934 via moves, since there's obviously no pop for SSE regs. */
11935 if (frame.nsseregs)
11936 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
11937 style == 2);
11939 if (restore_regs_via_mov)
11941 rtx t;
11943 if (frame.nregs)
11944 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
11946 /* eh_return epilogues need %ecx added to the stack pointer. */
11947 if (style == 2)
11949 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
11951 /* Stack align doesn't work with eh_return. */
11952 gcc_assert (!stack_realign_drap);
11953 /* Neither does regparm nested functions. */
11954 gcc_assert (!ix86_static_chain_on_stack);
11956 if (frame_pointer_needed)
11958 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
11959 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
11960 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
11962 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
11963 insn = emit_move_insn (hard_frame_pointer_rtx, t);
11965 /* Note that we use SA as a temporary CFA, as the return
11966 address is at the proper place relative to it. We
11967 pretend this happens at the FP restore insn because
11968 prior to this insn the FP would be stored at the wrong
11969 offset relative to SA, and after this insn we have no
11970 other reasonable register to use for the CFA. We don't
11971 bother resetting the CFA to the SP for the duration of
11972 the return insn. */
11973 add_reg_note (insn, REG_CFA_DEF_CFA,
11974 plus_constant (Pmode, sa, UNITS_PER_WORD));
11975 ix86_add_queued_cfa_restore_notes (insn);
11976 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
11977 RTX_FRAME_RELATED_P (insn) = 1;
11979 m->fs.cfa_reg = sa;
11980 m->fs.cfa_offset = UNITS_PER_WORD;
11981 m->fs.fp_valid = false;
11983 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
11984 const0_rtx, style, false);
11986 else
11988 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
11989 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
11990 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
11991 ix86_add_queued_cfa_restore_notes (insn);
11993 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
11994 if (m->fs.cfa_offset != UNITS_PER_WORD)
11996 m->fs.cfa_offset = UNITS_PER_WORD;
11997 add_reg_note (insn, REG_CFA_DEF_CFA,
11998 plus_constant (Pmode, stack_pointer_rtx,
11999 UNITS_PER_WORD));
12000 RTX_FRAME_RELATED_P (insn) = 1;
12003 m->fs.sp_offset = UNITS_PER_WORD;
12004 m->fs.sp_valid = true;
12007 else
12009 /* SEH requires that the function end with (1) a stack adjustment
12010 if necessary, (2) a sequence of pops, and (3) a return or
12011 jump instruction. Prevent insns from the function body from
12012 being scheduled into this sequence. */
12013 if (TARGET_SEH)
12015 /* Prevent a catch region from being adjacent to the standard
12016 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
12017 several other flags that would be interesting to test are
12018 not yet set up. */
12019 if (flag_non_call_exceptions)
12020 emit_insn (gen_nops (const1_rtx));
12021 else
12022 emit_insn (gen_blockage ());
12025 /* First step is to deallocate the stack frame so that we can
12026 pop the registers. Also do it on SEH target for very large
12027 frame as the emitted instructions aren't allowed by the ABI in
12028 epilogues. */
12029 if (!m->fs.sp_valid
12030 || (TARGET_SEH
12031 && (m->fs.sp_offset - frame.reg_save_offset
12032 >= SEH_MAX_FRAME_SIZE)))
12034 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
12035 GEN_INT (m->fs.fp_offset
12036 - frame.reg_save_offset),
12037 style, false);
12039 else if (m->fs.sp_offset != frame.reg_save_offset)
12041 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12042 GEN_INT (m->fs.sp_offset
12043 - frame.reg_save_offset),
12044 style,
12045 m->fs.cfa_reg == stack_pointer_rtx);
12048 ix86_emit_restore_regs_using_pop ();
12051 /* If we used a stack pointer and haven't already got rid of it,
12052 then do so now. */
12053 if (m->fs.fp_valid)
12055 /* If the stack pointer is valid and pointing at the frame
12056 pointer store address, then we only need a pop. */
12057 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
12058 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
12059 /* Leave results in shorter dependency chains on CPUs that are
12060 able to grok it fast. */
12061 else if (TARGET_USE_LEAVE
12062 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
12063 || !cfun->machine->use_fast_prologue_epilogue)
12064 ix86_emit_leave ();
12065 else
12067 pro_epilogue_adjust_stack (stack_pointer_rtx,
12068 hard_frame_pointer_rtx,
12069 const0_rtx, style, !using_drap);
12070 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
12074 if (using_drap)
12076 int param_ptr_offset = UNITS_PER_WORD;
12077 rtx insn;
12079 gcc_assert (stack_realign_drap);
12081 if (ix86_static_chain_on_stack)
12082 param_ptr_offset += UNITS_PER_WORD;
12083 if (!call_used_regs[REGNO (crtl->drap_reg)])
12084 param_ptr_offset += UNITS_PER_WORD;
12086 insn = emit_insn (gen_rtx_SET
12087 (VOIDmode, stack_pointer_rtx,
12088 gen_rtx_PLUS (Pmode,
12089 crtl->drap_reg,
12090 GEN_INT (-param_ptr_offset))));
12091 m->fs.cfa_reg = stack_pointer_rtx;
12092 m->fs.cfa_offset = param_ptr_offset;
12093 m->fs.sp_offset = param_ptr_offset;
12094 m->fs.realigned = false;
12096 add_reg_note (insn, REG_CFA_DEF_CFA,
12097 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12098 GEN_INT (param_ptr_offset)));
12099 RTX_FRAME_RELATED_P (insn) = 1;
12101 if (!call_used_regs[REGNO (crtl->drap_reg)])
12102 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
12105 /* At this point the stack pointer must be valid, and we must have
12106 restored all of the registers. We may not have deallocated the
12107 entire stack frame. We've delayed this until now because it may
12108 be possible to merge the local stack deallocation with the
12109 deallocation forced by ix86_static_chain_on_stack. */
12110 gcc_assert (m->fs.sp_valid);
12111 gcc_assert (!m->fs.fp_valid);
12112 gcc_assert (!m->fs.realigned);
12113 if (m->fs.sp_offset != UNITS_PER_WORD)
12115 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12116 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
12117 style, true);
12119 else
12120 ix86_add_queued_cfa_restore_notes (get_last_insn ());
12122 /* Sibcall epilogues don't want a return instruction. */
12123 if (style == 0)
12125 m->fs = frame_state_save;
12126 return;
12129 if (crtl->args.pops_args && crtl->args.size)
12131 rtx popc = GEN_INT (crtl->args.pops_args);
12133 /* i386 can only pop 64K bytes. If asked to pop more, pop return
12134 address, do explicit add, and jump indirectly to the caller. */
12136 if (crtl->args.pops_args >= 65536)
12138 rtx ecx = gen_rtx_REG (SImode, CX_REG);
12139 rtx insn;
12141 /* There is no "pascal" calling convention in any 64bit ABI. */
12142 gcc_assert (!TARGET_64BIT);
12144 insn = emit_insn (gen_pop (ecx));
12145 m->fs.cfa_offset -= UNITS_PER_WORD;
12146 m->fs.sp_offset -= UNITS_PER_WORD;
12148 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
12149 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
12150 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
12151 add_reg_note (insn, REG_CFA_REGISTER,
12152 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
12153 RTX_FRAME_RELATED_P (insn) = 1;
12155 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12156 popc, -1, true);
12157 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
12159 else
12160 emit_jump_insn (gen_simple_return_pop_internal (popc));
12162 else
12163 emit_jump_insn (gen_simple_return_internal ());
12165 /* Restore the state back to the state from the prologue,
12166 so that it's correct for the next epilogue. */
12167 m->fs = frame_state_save;
12170 /* Reset from the function's potential modifications. */
12172 static void
12173 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, HOST_WIDE_INT)
12175 if (pic_offset_table_rtx
12176 && !ix86_use_pseudo_pic_reg ())
12177 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
12178 #if TARGET_MACHO
12179 /* Mach-O doesn't support labels at the end of objects, so if
12180 it looks like we might want one, insert a NOP. */
12182 rtx_insn *insn = get_last_insn ();
12183 rtx_insn *deleted_debug_label = NULL;
12184 while (insn
12185 && NOTE_P (insn)
12186 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
12188 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
12189 notes only, instead set their CODE_LABEL_NUMBER to -1,
12190 otherwise there would be code generation differences
12191 in between -g and -g0. */
12192 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
12193 deleted_debug_label = insn;
12194 insn = PREV_INSN (insn);
12196 if (insn
12197 && (LABEL_P (insn)
12198 || (NOTE_P (insn)
12199 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
12200 fputs ("\tnop\n", file);
12201 else if (deleted_debug_label)
12202 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
12203 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
12204 CODE_LABEL_NUMBER (insn) = -1;
12206 #endif
12210 /* Return a scratch register to use in the split stack prologue. The
12211 split stack prologue is used for -fsplit-stack. It is the first
12212 instructions in the function, even before the regular prologue.
12213 The scratch register can be any caller-saved register which is not
12214 used for parameters or for the static chain. */
12216 static unsigned int
12217 split_stack_prologue_scratch_regno (void)
12219 if (TARGET_64BIT)
12220 return R11_REG;
12221 else
12223 bool is_fastcall, is_thiscall;
12224 int regparm;
12226 is_fastcall = (lookup_attribute ("fastcall",
12227 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
12228 != NULL);
12229 is_thiscall = (lookup_attribute ("thiscall",
12230 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
12231 != NULL);
12232 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
12234 if (is_fastcall)
12236 if (DECL_STATIC_CHAIN (cfun->decl))
12238 sorry ("-fsplit-stack does not support fastcall with "
12239 "nested function");
12240 return INVALID_REGNUM;
12242 return AX_REG;
12244 else if (is_thiscall)
12246 if (!DECL_STATIC_CHAIN (cfun->decl))
12247 return DX_REG;
12248 return AX_REG;
12250 else if (regparm < 3)
12252 if (!DECL_STATIC_CHAIN (cfun->decl))
12253 return CX_REG;
12254 else
12256 if (regparm >= 2)
12258 sorry ("-fsplit-stack does not support 2 register "
12259 "parameters for a nested function");
12260 return INVALID_REGNUM;
12262 return DX_REG;
12265 else
12267 /* FIXME: We could make this work by pushing a register
12268 around the addition and comparison. */
12269 sorry ("-fsplit-stack does not support 3 register parameters");
12270 return INVALID_REGNUM;
12275 /* A SYMBOL_REF for the function which allocates new stackspace for
12276 -fsplit-stack. */
12278 static GTY(()) rtx split_stack_fn;
12280 /* A SYMBOL_REF for the more stack function when using the large
12281 model. */
12283 static GTY(()) rtx split_stack_fn_large;
12285 /* Handle -fsplit-stack. These are the first instructions in the
12286 function, even before the regular prologue. */
12288 void
12289 ix86_expand_split_stack_prologue (void)
12291 struct ix86_frame frame;
12292 HOST_WIDE_INT allocate;
12293 unsigned HOST_WIDE_INT args_size;
12294 rtx_code_label *label;
12295 rtx limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
12296 rtx scratch_reg = NULL_RTX;
12297 rtx_code_label *varargs_label = NULL;
12298 rtx fn;
12300 gcc_assert (flag_split_stack && reload_completed);
12302 ix86_finalize_stack_realign_flags ();
12303 ix86_compute_frame_layout (&frame);
12304 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
12306 /* This is the label we will branch to if we have enough stack
12307 space. We expect the basic block reordering pass to reverse this
12308 branch if optimizing, so that we branch in the unlikely case. */
12309 label = gen_label_rtx ();
12311 /* We need to compare the stack pointer minus the frame size with
12312 the stack boundary in the TCB. The stack boundary always gives
12313 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
12314 can compare directly. Otherwise we need to do an addition. */
12316 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12317 UNSPEC_STACK_CHECK);
12318 limit = gen_rtx_CONST (Pmode, limit);
12319 limit = gen_rtx_MEM (Pmode, limit);
12320 if (allocate < SPLIT_STACK_AVAILABLE)
12321 current = stack_pointer_rtx;
12322 else
12324 unsigned int scratch_regno;
12325 rtx offset;
12327 /* We need a scratch register to hold the stack pointer minus
12328 the required frame size. Since this is the very start of the
12329 function, the scratch register can be any caller-saved
12330 register which is not used for parameters. */
12331 offset = GEN_INT (- allocate);
12332 scratch_regno = split_stack_prologue_scratch_regno ();
12333 if (scratch_regno == INVALID_REGNUM)
12334 return;
12335 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12336 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
12338 /* We don't use ix86_gen_add3 in this case because it will
12339 want to split to lea, but when not optimizing the insn
12340 will not be split after this point. */
12341 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12342 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12343 offset)));
12345 else
12347 emit_move_insn (scratch_reg, offset);
12348 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
12349 stack_pointer_rtx));
12351 current = scratch_reg;
12354 ix86_expand_branch (GEU, current, limit, label);
12355 jump_insn = get_last_insn ();
12356 JUMP_LABEL (jump_insn) = label;
12358 /* Mark the jump as very likely to be taken. */
12359 add_int_reg_note (jump_insn, REG_BR_PROB,
12360 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
12362 if (split_stack_fn == NULL_RTX)
12364 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
12365 SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
12367 fn = split_stack_fn;
12369 /* Get more stack space. We pass in the desired stack space and the
12370 size of the arguments to copy to the new stack. In 32-bit mode
12371 we push the parameters; __morestack will return on a new stack
12372 anyhow. In 64-bit mode we pass the parameters in r10 and
12373 r11. */
12374 allocate_rtx = GEN_INT (allocate);
12375 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
12376 call_fusage = NULL_RTX;
12377 if (TARGET_64BIT)
12379 rtx reg10, reg11;
12381 reg10 = gen_rtx_REG (Pmode, R10_REG);
12382 reg11 = gen_rtx_REG (Pmode, R11_REG);
12384 /* If this function uses a static chain, it will be in %r10.
12385 Preserve it across the call to __morestack. */
12386 if (DECL_STATIC_CHAIN (cfun->decl))
12388 rtx rax;
12390 rax = gen_rtx_REG (word_mode, AX_REG);
12391 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
12392 use_reg (&call_fusage, rax);
12395 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
12396 && !TARGET_PECOFF)
12398 HOST_WIDE_INT argval;
12400 gcc_assert (Pmode == DImode);
12401 /* When using the large model we need to load the address
12402 into a register, and we've run out of registers. So we
12403 switch to a different calling convention, and we call a
12404 different function: __morestack_large. We pass the
12405 argument size in the upper 32 bits of r10 and pass the
12406 frame size in the lower 32 bits. */
12407 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
12408 gcc_assert ((args_size & 0xffffffff) == args_size);
12410 if (split_stack_fn_large == NULL_RTX)
12412 split_stack_fn_large =
12413 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
12414 SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
12416 if (ix86_cmodel == CM_LARGE_PIC)
12418 rtx_code_label *label;
12419 rtx x;
12421 label = gen_label_rtx ();
12422 emit_label (label);
12423 LABEL_PRESERVE_P (label) = 1;
12424 emit_insn (gen_set_rip_rex64 (reg10, label));
12425 emit_insn (gen_set_got_offset_rex64 (reg11, label));
12426 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
12427 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
12428 UNSPEC_GOT);
12429 x = gen_rtx_CONST (Pmode, x);
12430 emit_move_insn (reg11, x);
12431 x = gen_rtx_PLUS (Pmode, reg10, reg11);
12432 x = gen_const_mem (Pmode, x);
12433 emit_move_insn (reg11, x);
12435 else
12436 emit_move_insn (reg11, split_stack_fn_large);
12438 fn = reg11;
12440 argval = ((args_size << 16) << 16) + allocate;
12441 emit_move_insn (reg10, GEN_INT (argval));
12443 else
12445 emit_move_insn (reg10, allocate_rtx);
12446 emit_move_insn (reg11, GEN_INT (args_size));
12447 use_reg (&call_fusage, reg11);
12450 use_reg (&call_fusage, reg10);
12452 else
12454 emit_insn (gen_push (GEN_INT (args_size)));
12455 emit_insn (gen_push (allocate_rtx));
12457 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
12458 GEN_INT (UNITS_PER_WORD), constm1_rtx,
12459 NULL_RTX, false);
12460 add_function_usage_to (call_insn, call_fusage);
12462 /* In order to make call/return prediction work right, we now need
12463 to execute a return instruction. See
12464 libgcc/config/i386/morestack.S for the details on how this works.
12466 For flow purposes gcc must not see this as a return
12467 instruction--we need control flow to continue at the subsequent
12468 label. Therefore, we use an unspec. */
12469 gcc_assert (crtl->args.pops_args < 65536);
12470 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
12472 /* If we are in 64-bit mode and this function uses a static chain,
12473 we saved %r10 in %rax before calling _morestack. */
12474 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
12475 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
12476 gen_rtx_REG (word_mode, AX_REG));
12478 /* If this function calls va_start, we need to store a pointer to
12479 the arguments on the old stack, because they may not have been
12480 all copied to the new stack. At this point the old stack can be
12481 found at the frame pointer value used by __morestack, because
12482 __morestack has set that up before calling back to us. Here we
12483 store that pointer in a scratch register, and in
12484 ix86_expand_prologue we store the scratch register in a stack
12485 slot. */
12486 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12488 unsigned int scratch_regno;
12489 rtx frame_reg;
12490 int words;
12492 scratch_regno = split_stack_prologue_scratch_regno ();
12493 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12494 frame_reg = gen_rtx_REG (Pmode, BP_REG);
12496 /* 64-bit:
12497 fp -> old fp value
12498 return address within this function
12499 return address of caller of this function
12500 stack arguments
12501 So we add three words to get to the stack arguments.
12503 32-bit:
12504 fp -> old fp value
12505 return address within this function
12506 first argument to __morestack
12507 second argument to __morestack
12508 return address of caller of this function
12509 stack arguments
12510 So we add five words to get to the stack arguments.
12512 words = TARGET_64BIT ? 3 : 5;
12513 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12514 gen_rtx_PLUS (Pmode, frame_reg,
12515 GEN_INT (words * UNITS_PER_WORD))));
12517 varargs_label = gen_label_rtx ();
12518 emit_jump_insn (gen_jump (varargs_label));
12519 JUMP_LABEL (get_last_insn ()) = varargs_label;
12521 emit_barrier ();
12524 emit_label (label);
12525 LABEL_NUSES (label) = 1;
12527 /* If this function calls va_start, we now have to set the scratch
12528 register for the case where we do not call __morestack. In this
12529 case we need to set it based on the stack pointer. */
12530 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12532 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12533 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12534 GEN_INT (UNITS_PER_WORD))));
12536 emit_label (varargs_label);
12537 LABEL_NUSES (varargs_label) = 1;
12541 /* We may have to tell the dataflow pass that the split stack prologue
12542 is initializing a scratch register. */
12544 static void
12545 ix86_live_on_entry (bitmap regs)
12547 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12549 gcc_assert (flag_split_stack);
12550 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
12554 /* Extract the parts of an RTL expression that is a valid memory address
12555 for an instruction. Return 0 if the structure of the address is
12556 grossly off. Return -1 if the address contains ASHIFT, so it is not
12557 strictly valid, but still used for computing length of lea instruction. */
12560 ix86_decompose_address (rtx addr, struct ix86_address *out)
12562 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
12563 rtx base_reg, index_reg;
12564 HOST_WIDE_INT scale = 1;
12565 rtx scale_rtx = NULL_RTX;
12566 rtx tmp;
12567 int retval = 1;
12568 enum ix86_address_seg seg = SEG_DEFAULT;
12570 /* Allow zero-extended SImode addresses,
12571 they will be emitted with addr32 prefix. */
12572 if (TARGET_64BIT && GET_MODE (addr) == DImode)
12574 if (GET_CODE (addr) == ZERO_EXTEND
12575 && GET_MODE (XEXP (addr, 0)) == SImode)
12577 addr = XEXP (addr, 0);
12578 if (CONST_INT_P (addr))
12579 return 0;
12581 else if (GET_CODE (addr) == AND
12582 && const_32bit_mask (XEXP (addr, 1), DImode))
12584 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
12585 if (addr == NULL_RTX)
12586 return 0;
12588 if (CONST_INT_P (addr))
12589 return 0;
12593 /* Allow SImode subregs of DImode addresses,
12594 they will be emitted with addr32 prefix. */
12595 if (TARGET_64BIT && GET_MODE (addr) == SImode)
12597 if (GET_CODE (addr) == SUBREG
12598 && GET_MODE (SUBREG_REG (addr)) == DImode)
12600 addr = SUBREG_REG (addr);
12601 if (CONST_INT_P (addr))
12602 return 0;
12606 if (REG_P (addr))
12607 base = addr;
12608 else if (GET_CODE (addr) == SUBREG)
12610 if (REG_P (SUBREG_REG (addr)))
12611 base = addr;
12612 else
12613 return 0;
12615 else if (GET_CODE (addr) == PLUS)
12617 rtx addends[4], op;
12618 int n = 0, i;
12620 op = addr;
12623 if (n >= 4)
12624 return 0;
12625 addends[n++] = XEXP (op, 1);
12626 op = XEXP (op, 0);
12628 while (GET_CODE (op) == PLUS);
12629 if (n >= 4)
12630 return 0;
12631 addends[n] = op;
12633 for (i = n; i >= 0; --i)
12635 op = addends[i];
12636 switch (GET_CODE (op))
12638 case MULT:
12639 if (index)
12640 return 0;
12641 index = XEXP (op, 0);
12642 scale_rtx = XEXP (op, 1);
12643 break;
12645 case ASHIFT:
12646 if (index)
12647 return 0;
12648 index = XEXP (op, 0);
12649 tmp = XEXP (op, 1);
12650 if (!CONST_INT_P (tmp))
12651 return 0;
12652 scale = INTVAL (tmp);
12653 if ((unsigned HOST_WIDE_INT) scale > 3)
12654 return 0;
12655 scale = 1 << scale;
12656 break;
12658 case ZERO_EXTEND:
12659 op = XEXP (op, 0);
12660 if (GET_CODE (op) != UNSPEC)
12661 return 0;
12662 /* FALLTHRU */
12664 case UNSPEC:
12665 if (XINT (op, 1) == UNSPEC_TP
12666 && TARGET_TLS_DIRECT_SEG_REFS
12667 && seg == SEG_DEFAULT)
12668 seg = DEFAULT_TLS_SEG_REG;
12669 else
12670 return 0;
12671 break;
12673 case SUBREG:
12674 if (!REG_P (SUBREG_REG (op)))
12675 return 0;
12676 /* FALLTHRU */
12678 case REG:
12679 if (!base)
12680 base = op;
12681 else if (!index)
12682 index = op;
12683 else
12684 return 0;
12685 break;
12687 case CONST:
12688 case CONST_INT:
12689 case SYMBOL_REF:
12690 case LABEL_REF:
12691 if (disp)
12692 return 0;
12693 disp = op;
12694 break;
12696 default:
12697 return 0;
12701 else if (GET_CODE (addr) == MULT)
12703 index = XEXP (addr, 0); /* index*scale */
12704 scale_rtx = XEXP (addr, 1);
12706 else if (GET_CODE (addr) == ASHIFT)
12708 /* We're called for lea too, which implements ashift on occasion. */
12709 index = XEXP (addr, 0);
12710 tmp = XEXP (addr, 1);
12711 if (!CONST_INT_P (tmp))
12712 return 0;
12713 scale = INTVAL (tmp);
12714 if ((unsigned HOST_WIDE_INT) scale > 3)
12715 return 0;
12716 scale = 1 << scale;
12717 retval = -1;
12719 else
12720 disp = addr; /* displacement */
12722 if (index)
12724 if (REG_P (index))
12726 else if (GET_CODE (index) == SUBREG
12727 && REG_P (SUBREG_REG (index)))
12729 else
12730 return 0;
12733 /* Extract the integral value of scale. */
12734 if (scale_rtx)
12736 if (!CONST_INT_P (scale_rtx))
12737 return 0;
12738 scale = INTVAL (scale_rtx);
12741 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
12742 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
12744 /* Avoid useless 0 displacement. */
12745 if (disp == const0_rtx && (base || index))
12746 disp = NULL_RTX;
12748 /* Allow arg pointer and stack pointer as index if there is not scaling. */
12749 if (base_reg && index_reg && scale == 1
12750 && (index_reg == arg_pointer_rtx
12751 || index_reg == frame_pointer_rtx
12752 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
12754 std::swap (base, index);
12755 std::swap (base_reg, index_reg);
12758 /* Special case: %ebp cannot be encoded as a base without a displacement.
12759 Similarly %r13. */
12760 if (!disp
12761 && base_reg
12762 && (base_reg == hard_frame_pointer_rtx
12763 || base_reg == frame_pointer_rtx
12764 || base_reg == arg_pointer_rtx
12765 || (REG_P (base_reg)
12766 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
12767 || REGNO (base_reg) == R13_REG))))
12768 disp = const0_rtx;
12770 /* Special case: on K6, [%esi] makes the instruction vector decoded.
12771 Avoid this by transforming to [%esi+0].
12772 Reload calls address legitimization without cfun defined, so we need
12773 to test cfun for being non-NULL. */
12774 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
12775 && base_reg && !index_reg && !disp
12776 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
12777 disp = const0_rtx;
12779 /* Special case: encode reg+reg instead of reg*2. */
12780 if (!base && index && scale == 2)
12781 base = index, base_reg = index_reg, scale = 1;
12783 /* Special case: scaling cannot be encoded without base or displacement. */
12784 if (!base && !disp && index && scale != 1)
12785 disp = const0_rtx;
12787 out->base = base;
12788 out->index = index;
12789 out->disp = disp;
12790 out->scale = scale;
12791 out->seg = seg;
12793 return retval;
12796 /* Return cost of the memory address x.
12797 For i386, it is better to use a complex address than let gcc copy
12798 the address into a reg and make a new pseudo. But not if the address
12799 requires to two regs - that would mean more pseudos with longer
12800 lifetimes. */
12801 static int
12802 ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
12804 struct ix86_address parts;
12805 int cost = 1;
12806 int ok = ix86_decompose_address (x, &parts);
12808 gcc_assert (ok);
12810 if (parts.base && GET_CODE (parts.base) == SUBREG)
12811 parts.base = SUBREG_REG (parts.base);
12812 if (parts.index && GET_CODE (parts.index) == SUBREG)
12813 parts.index = SUBREG_REG (parts.index);
12815 /* Attempt to minimize number of registers in the address. */
12816 if ((parts.base
12817 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
12818 || (parts.index
12819 && (!REG_P (parts.index)
12820 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
12821 cost++;
12823 /* When address base or index is "pic_offset_table_rtx" we don't increase
12824 address cost. When a memopt with "pic_offset_table_rtx" is not invariant
12825 itself it most likely means that base or index is not invariant.
12826 Therefore only "pic_offset_table_rtx" could be hoisted out, which is not
12827 profitable for x86. */
12828 if (parts.base
12829 && (!pic_offset_table_rtx
12830 || REGNO (pic_offset_table_rtx) != REGNO(parts.base))
12831 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
12832 && parts.index
12833 && (!pic_offset_table_rtx
12834 || REGNO (pic_offset_table_rtx) != REGNO(parts.index))
12835 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
12836 && parts.base != parts.index)
12837 cost++;
12839 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
12840 since it's predecode logic can't detect the length of instructions
12841 and it degenerates to vector decoded. Increase cost of such
12842 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
12843 to split such addresses or even refuse such addresses at all.
12845 Following addressing modes are affected:
12846 [base+scale*index]
12847 [scale*index+disp]
12848 [base+index]
12850 The first and last case may be avoidable by explicitly coding the zero in
12851 memory address, but I don't have AMD-K6 machine handy to check this
12852 theory. */
12854 if (TARGET_K6
12855 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
12856 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
12857 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
12858 cost += 10;
12860 return cost;
12863 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
12864 this is used for to form addresses to local data when -fPIC is in
12865 use. */
12867 static bool
12868 darwin_local_data_pic (rtx disp)
12870 return (GET_CODE (disp) == UNSPEC
12871 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
12874 /* Determine if a given RTX is a valid constant. We already know this
12875 satisfies CONSTANT_P. */
12877 static bool
12878 ix86_legitimate_constant_p (machine_mode, rtx x)
12880 /* Pointer bounds constants are not valid. */
12881 if (POINTER_BOUNDS_MODE_P (GET_MODE (x)))
12882 return false;
12884 switch (GET_CODE (x))
12886 case CONST:
12887 x = XEXP (x, 0);
12889 if (GET_CODE (x) == PLUS)
12891 if (!CONST_INT_P (XEXP (x, 1)))
12892 return false;
12893 x = XEXP (x, 0);
12896 if (TARGET_MACHO && darwin_local_data_pic (x))
12897 return true;
12899 /* Only some unspecs are valid as "constants". */
12900 if (GET_CODE (x) == UNSPEC)
12901 switch (XINT (x, 1))
12903 case UNSPEC_GOT:
12904 case UNSPEC_GOTOFF:
12905 case UNSPEC_PLTOFF:
12906 return TARGET_64BIT;
12907 case UNSPEC_TPOFF:
12908 case UNSPEC_NTPOFF:
12909 x = XVECEXP (x, 0, 0);
12910 return (GET_CODE (x) == SYMBOL_REF
12911 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12912 case UNSPEC_DTPOFF:
12913 x = XVECEXP (x, 0, 0);
12914 return (GET_CODE (x) == SYMBOL_REF
12915 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
12916 default:
12917 return false;
12920 /* We must have drilled down to a symbol. */
12921 if (GET_CODE (x) == LABEL_REF)
12922 return true;
12923 if (GET_CODE (x) != SYMBOL_REF)
12924 return false;
12925 /* FALLTHRU */
12927 case SYMBOL_REF:
12928 /* TLS symbols are never valid. */
12929 if (SYMBOL_REF_TLS_MODEL (x))
12930 return false;
12932 /* DLLIMPORT symbols are never valid. */
12933 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12934 && SYMBOL_REF_DLLIMPORT_P (x))
12935 return false;
12937 #if TARGET_MACHO
12938 /* mdynamic-no-pic */
12939 if (MACHO_DYNAMIC_NO_PIC_P)
12940 return machopic_symbol_defined_p (x);
12941 #endif
12942 break;
12944 case CONST_DOUBLE:
12945 if (GET_MODE (x) == TImode
12946 && x != CONST0_RTX (TImode)
12947 && !TARGET_64BIT)
12948 return false;
12949 break;
12951 case CONST_VECTOR:
12952 if (!standard_sse_constant_p (x))
12953 return false;
12955 default:
12956 break;
12959 /* Otherwise we handle everything else in the move patterns. */
12960 return true;
12963 /* Determine if it's legal to put X into the constant pool. This
12964 is not possible for the address of thread-local symbols, which
12965 is checked above. */
12967 static bool
12968 ix86_cannot_force_const_mem (machine_mode mode, rtx x)
12970 /* We can always put integral constants and vectors in memory. */
12971 switch (GET_CODE (x))
12973 case CONST_INT:
12974 case CONST_DOUBLE:
12975 case CONST_VECTOR:
12976 return false;
12978 default:
12979 break;
12981 return !ix86_legitimate_constant_p (mode, x);
12984 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
12985 otherwise zero. */
12987 static bool
12988 is_imported_p (rtx x)
12990 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
12991 || GET_CODE (x) != SYMBOL_REF)
12992 return false;
12994 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
12998 /* Nonzero if the constant value X is a legitimate general operand
12999 when generating PIC code. It is given that flag_pic is on and
13000 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
13002 bool
13003 legitimate_pic_operand_p (rtx x)
13005 rtx inner;
13007 switch (GET_CODE (x))
13009 case CONST:
13010 inner = XEXP (x, 0);
13011 if (GET_CODE (inner) == PLUS
13012 && CONST_INT_P (XEXP (inner, 1)))
13013 inner = XEXP (inner, 0);
13015 /* Only some unspecs are valid as "constants". */
13016 if (GET_CODE (inner) == UNSPEC)
13017 switch (XINT (inner, 1))
13019 case UNSPEC_GOT:
13020 case UNSPEC_GOTOFF:
13021 case UNSPEC_PLTOFF:
13022 return TARGET_64BIT;
13023 case UNSPEC_TPOFF:
13024 x = XVECEXP (inner, 0, 0);
13025 return (GET_CODE (x) == SYMBOL_REF
13026 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
13027 case UNSPEC_MACHOPIC_OFFSET:
13028 return legitimate_pic_address_disp_p (x);
13029 default:
13030 return false;
13032 /* FALLTHRU */
13034 case SYMBOL_REF:
13035 case LABEL_REF:
13036 return legitimate_pic_address_disp_p (x);
13038 default:
13039 return true;
13043 /* Determine if a given CONST RTX is a valid memory displacement
13044 in PIC mode. */
13046 bool
13047 legitimate_pic_address_disp_p (rtx disp)
13049 bool saw_plus;
13051 /* In 64bit mode we can allow direct addresses of symbols and labels
13052 when they are not dynamic symbols. */
13053 if (TARGET_64BIT)
13055 rtx op0 = disp, op1;
13057 switch (GET_CODE (disp))
13059 case LABEL_REF:
13060 return true;
13062 case CONST:
13063 if (GET_CODE (XEXP (disp, 0)) != PLUS)
13064 break;
13065 op0 = XEXP (XEXP (disp, 0), 0);
13066 op1 = XEXP (XEXP (disp, 0), 1);
13067 if (!CONST_INT_P (op1)
13068 || INTVAL (op1) >= 16*1024*1024
13069 || INTVAL (op1) < -16*1024*1024)
13070 break;
13071 if (GET_CODE (op0) == LABEL_REF)
13072 return true;
13073 if (GET_CODE (op0) == CONST
13074 && GET_CODE (XEXP (op0, 0)) == UNSPEC
13075 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
13076 return true;
13077 if (GET_CODE (op0) == UNSPEC
13078 && XINT (op0, 1) == UNSPEC_PCREL)
13079 return true;
13080 if (GET_CODE (op0) != SYMBOL_REF)
13081 break;
13082 /* FALLTHRU */
13084 case SYMBOL_REF:
13085 /* TLS references should always be enclosed in UNSPEC.
13086 The dllimported symbol needs always to be resolved. */
13087 if (SYMBOL_REF_TLS_MODEL (op0)
13088 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
13089 return false;
13091 if (TARGET_PECOFF)
13093 if (is_imported_p (op0))
13094 return true;
13096 if (SYMBOL_REF_FAR_ADDR_P (op0)
13097 || !SYMBOL_REF_LOCAL_P (op0))
13098 break;
13100 /* Function-symbols need to be resolved only for
13101 large-model.
13102 For the small-model we don't need to resolve anything
13103 here. */
13104 if ((ix86_cmodel != CM_LARGE_PIC
13105 && SYMBOL_REF_FUNCTION_P (op0))
13106 || ix86_cmodel == CM_SMALL_PIC)
13107 return true;
13108 /* Non-external symbols don't need to be resolved for
13109 large, and medium-model. */
13110 if ((ix86_cmodel == CM_LARGE_PIC
13111 || ix86_cmodel == CM_MEDIUM_PIC)
13112 && !SYMBOL_REF_EXTERNAL_P (op0))
13113 return true;
13115 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
13116 && SYMBOL_REF_LOCAL_P (op0)
13117 && ix86_cmodel != CM_LARGE_PIC)
13118 return true;
13119 break;
13121 default:
13122 break;
13125 if (GET_CODE (disp) != CONST)
13126 return false;
13127 disp = XEXP (disp, 0);
13129 if (TARGET_64BIT)
13131 /* We are unsafe to allow PLUS expressions. This limit allowed distance
13132 of GOT tables. We should not need these anyway. */
13133 if (GET_CODE (disp) != UNSPEC
13134 || (XINT (disp, 1) != UNSPEC_GOTPCREL
13135 && XINT (disp, 1) != UNSPEC_GOTOFF
13136 && XINT (disp, 1) != UNSPEC_PCREL
13137 && XINT (disp, 1) != UNSPEC_PLTOFF))
13138 return false;
13140 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
13141 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
13142 return false;
13143 return true;
13146 saw_plus = false;
13147 if (GET_CODE (disp) == PLUS)
13149 if (!CONST_INT_P (XEXP (disp, 1)))
13150 return false;
13151 disp = XEXP (disp, 0);
13152 saw_plus = true;
13155 if (TARGET_MACHO && darwin_local_data_pic (disp))
13156 return true;
13158 if (GET_CODE (disp) != UNSPEC)
13159 return false;
13161 switch (XINT (disp, 1))
13163 case UNSPEC_GOT:
13164 if (saw_plus)
13165 return false;
13166 /* We need to check for both symbols and labels because VxWorks loads
13167 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
13168 details. */
13169 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
13170 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
13171 case UNSPEC_GOTOFF:
13172 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
13173 While ABI specify also 32bit relocation but we don't produce it in
13174 small PIC model at all. */
13175 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
13176 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
13177 && !TARGET_64BIT)
13178 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
13179 return false;
13180 case UNSPEC_GOTTPOFF:
13181 case UNSPEC_GOTNTPOFF:
13182 case UNSPEC_INDNTPOFF:
13183 if (saw_plus)
13184 return false;
13185 disp = XVECEXP (disp, 0, 0);
13186 return (GET_CODE (disp) == SYMBOL_REF
13187 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
13188 case UNSPEC_NTPOFF:
13189 disp = XVECEXP (disp, 0, 0);
13190 return (GET_CODE (disp) == SYMBOL_REF
13191 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
13192 case UNSPEC_DTPOFF:
13193 disp = XVECEXP (disp, 0, 0);
13194 return (GET_CODE (disp) == SYMBOL_REF
13195 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
13198 return false;
13201 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
13202 replace the input X, or the original X if no replacement is called for.
13203 The output parameter *WIN is 1 if the calling macro should goto WIN,
13204 0 if it should not. */
13206 bool
13207 ix86_legitimize_reload_address (rtx x, machine_mode, int opnum, int type,
13208 int)
13210 /* Reload can generate:
13212 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
13213 (reg:DI 97))
13214 (reg:DI 2 cx))
13216 This RTX is rejected from ix86_legitimate_address_p due to
13217 non-strictness of base register 97. Following this rejection,
13218 reload pushes all three components into separate registers,
13219 creating invalid memory address RTX.
13221 Following code reloads only the invalid part of the
13222 memory address RTX. */
13224 if (GET_CODE (x) == PLUS
13225 && REG_P (XEXP (x, 1))
13226 && GET_CODE (XEXP (x, 0)) == PLUS
13227 && REG_P (XEXP (XEXP (x, 0), 1)))
13229 rtx base, index;
13230 bool something_reloaded = false;
13232 base = XEXP (XEXP (x, 0), 1);
13233 if (!REG_OK_FOR_BASE_STRICT_P (base))
13235 push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
13236 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
13237 opnum, (enum reload_type) type);
13238 something_reloaded = true;
13241 index = XEXP (x, 1);
13242 if (!REG_OK_FOR_INDEX_STRICT_P (index))
13244 push_reload (index, NULL_RTX, &XEXP (x, 1), NULL,
13245 INDEX_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
13246 opnum, (enum reload_type) type);
13247 something_reloaded = true;
13250 gcc_assert (something_reloaded);
13251 return true;
13254 return false;
13257 /* Determine if op is suitable RTX for an address register.
13258 Return naked register if a register or a register subreg is
13259 found, otherwise return NULL_RTX. */
13261 static rtx
13262 ix86_validate_address_register (rtx op)
13264 machine_mode mode = GET_MODE (op);
13266 /* Only SImode or DImode registers can form the address. */
13267 if (mode != SImode && mode != DImode)
13268 return NULL_RTX;
13270 if (REG_P (op))
13271 return op;
13272 else if (GET_CODE (op) == SUBREG)
13274 rtx reg = SUBREG_REG (op);
13276 if (!REG_P (reg))
13277 return NULL_RTX;
13279 mode = GET_MODE (reg);
13281 /* Don't allow SUBREGs that span more than a word. It can
13282 lead to spill failures when the register is one word out
13283 of a two word structure. */
13284 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
13285 return NULL_RTX;
13287 /* Allow only SUBREGs of non-eliminable hard registers. */
13288 if (register_no_elim_operand (reg, mode))
13289 return reg;
13292 /* Op is not a register. */
13293 return NULL_RTX;
13296 /* Recognizes RTL expressions that are valid memory addresses for an
13297 instruction. The MODE argument is the machine mode for the MEM
13298 expression that wants to use this address.
13300 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
13301 convert common non-canonical forms to canonical form so that they will
13302 be recognized. */
13304 static bool
13305 ix86_legitimate_address_p (machine_mode, rtx addr, bool strict)
13307 struct ix86_address parts;
13308 rtx base, index, disp;
13309 HOST_WIDE_INT scale;
13310 enum ix86_address_seg seg;
13312 if (ix86_decompose_address (addr, &parts) <= 0)
13313 /* Decomposition failed. */
13314 return false;
13316 base = parts.base;
13317 index = parts.index;
13318 disp = parts.disp;
13319 scale = parts.scale;
13320 seg = parts.seg;
13322 /* Validate base register. */
13323 if (base)
13325 rtx reg = ix86_validate_address_register (base);
13327 if (reg == NULL_RTX)
13328 return false;
13330 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
13331 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
13332 /* Base is not valid. */
13333 return false;
13336 /* Validate index register. */
13337 if (index)
13339 rtx reg = ix86_validate_address_register (index);
13341 if (reg == NULL_RTX)
13342 return false;
13344 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
13345 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
13346 /* Index is not valid. */
13347 return false;
13350 /* Index and base should have the same mode. */
13351 if (base && index
13352 && GET_MODE (base) != GET_MODE (index))
13353 return false;
13355 /* Address override works only on the (%reg) part of %fs:(%reg). */
13356 if (seg != SEG_DEFAULT
13357 && ((base && GET_MODE (base) != word_mode)
13358 || (index && GET_MODE (index) != word_mode)))
13359 return false;
13361 /* Validate scale factor. */
13362 if (scale != 1)
13364 if (!index)
13365 /* Scale without index. */
13366 return false;
13368 if (scale != 2 && scale != 4 && scale != 8)
13369 /* Scale is not a valid multiplier. */
13370 return false;
13373 /* Validate displacement. */
13374 if (disp)
13376 if (GET_CODE (disp) == CONST
13377 && GET_CODE (XEXP (disp, 0)) == UNSPEC
13378 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
13379 switch (XINT (XEXP (disp, 0), 1))
13381 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
13382 used. While ABI specify also 32bit relocations, we don't produce
13383 them at all and use IP relative instead. */
13384 case UNSPEC_GOT:
13385 case UNSPEC_GOTOFF:
13386 gcc_assert (flag_pic);
13387 if (!TARGET_64BIT)
13388 goto is_legitimate_pic;
13390 /* 64bit address unspec. */
13391 return false;
13393 case UNSPEC_GOTPCREL:
13394 case UNSPEC_PCREL:
13395 gcc_assert (flag_pic);
13396 goto is_legitimate_pic;
13398 case UNSPEC_GOTTPOFF:
13399 case UNSPEC_GOTNTPOFF:
13400 case UNSPEC_INDNTPOFF:
13401 case UNSPEC_NTPOFF:
13402 case UNSPEC_DTPOFF:
13403 break;
13405 case UNSPEC_STACK_CHECK:
13406 gcc_assert (flag_split_stack);
13407 break;
13409 default:
13410 /* Invalid address unspec. */
13411 return false;
13414 else if (SYMBOLIC_CONST (disp)
13415 && (flag_pic
13416 || (TARGET_MACHO
13417 #if TARGET_MACHO
13418 && MACHOPIC_INDIRECT
13419 && !machopic_operand_p (disp)
13420 #endif
13424 is_legitimate_pic:
13425 if (TARGET_64BIT && (index || base))
13427 /* foo@dtpoff(%rX) is ok. */
13428 if (GET_CODE (disp) != CONST
13429 || GET_CODE (XEXP (disp, 0)) != PLUS
13430 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
13431 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
13432 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
13433 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
13434 /* Non-constant pic memory reference. */
13435 return false;
13437 else if ((!TARGET_MACHO || flag_pic)
13438 && ! legitimate_pic_address_disp_p (disp))
13439 /* Displacement is an invalid pic construct. */
13440 return false;
13441 #if TARGET_MACHO
13442 else if (MACHO_DYNAMIC_NO_PIC_P
13443 && !ix86_legitimate_constant_p (Pmode, disp))
13444 /* displacment must be referenced via non_lazy_pointer */
13445 return false;
13446 #endif
13448 /* This code used to verify that a symbolic pic displacement
13449 includes the pic_offset_table_rtx register.
13451 While this is good idea, unfortunately these constructs may
13452 be created by "adds using lea" optimization for incorrect
13453 code like:
13455 int a;
13456 int foo(int i)
13458 return *(&a+i);
13461 This code is nonsensical, but results in addressing
13462 GOT table with pic_offset_table_rtx base. We can't
13463 just refuse it easily, since it gets matched by
13464 "addsi3" pattern, that later gets split to lea in the
13465 case output register differs from input. While this
13466 can be handled by separate addsi pattern for this case
13467 that never results in lea, this seems to be easier and
13468 correct fix for crash to disable this test. */
13470 else if (GET_CODE (disp) != LABEL_REF
13471 && !CONST_INT_P (disp)
13472 && (GET_CODE (disp) != CONST
13473 || !ix86_legitimate_constant_p (Pmode, disp))
13474 && (GET_CODE (disp) != SYMBOL_REF
13475 || !ix86_legitimate_constant_p (Pmode, disp)))
13476 /* Displacement is not constant. */
13477 return false;
13478 else if (TARGET_64BIT
13479 && !x86_64_immediate_operand (disp, VOIDmode))
13480 /* Displacement is out of range. */
13481 return false;
13482 /* In x32 mode, constant addresses are sign extended to 64bit, so
13483 we have to prevent addresses from 0x80000000 to 0xffffffff. */
13484 else if (TARGET_X32 && !(index || base)
13485 && CONST_INT_P (disp)
13486 && val_signbit_known_set_p (SImode, INTVAL (disp)))
13487 return false;
13490 /* Everything looks valid. */
13491 return true;
13494 /* Determine if a given RTX is a valid constant address. */
13496 bool
13497 constant_address_p (rtx x)
13499 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
13502 /* Return a unique alias set for the GOT. */
13504 static alias_set_type
13505 ix86_GOT_alias_set (void)
13507 static alias_set_type set = -1;
13508 if (set == -1)
13509 set = new_alias_set ();
13510 return set;
13513 /* Set regs_ever_live for PIC base address register
13514 to true if required. */
13515 static void
13516 set_pic_reg_ever_live ()
13518 if (reload_in_progress)
13519 df_set_regs_ever_live (REGNO (pic_offset_table_rtx), true);
13522 /* Return a legitimate reference for ORIG (an address) using the
13523 register REG. If REG is 0, a new pseudo is generated.
13525 There are two types of references that must be handled:
13527 1. Global data references must load the address from the GOT, via
13528 the PIC reg. An insn is emitted to do this load, and the reg is
13529 returned.
13531 2. Static data references, constant pool addresses, and code labels
13532 compute the address as an offset from the GOT, whose base is in
13533 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
13534 differentiate them from global data objects. The returned
13535 address is the PIC reg + an unspec constant.
13537 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
13538 reg also appears in the address. */
13540 static rtx
13541 legitimize_pic_address (rtx orig, rtx reg)
13543 rtx addr = orig;
13544 rtx new_rtx = orig;
13546 #if TARGET_MACHO
13547 if (TARGET_MACHO && !TARGET_64BIT)
13549 if (reg == 0)
13550 reg = gen_reg_rtx (Pmode);
13551 /* Use the generic Mach-O PIC machinery. */
13552 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
13554 #endif
13556 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13558 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13559 if (tmp)
13560 return tmp;
13563 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
13564 new_rtx = addr;
13565 else if (TARGET_64BIT && !TARGET_PECOFF
13566 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
13568 rtx tmpreg;
13569 /* This symbol may be referenced via a displacement from the PIC
13570 base address (@GOTOFF). */
13572 set_pic_reg_ever_live ();
13573 if (GET_CODE (addr) == CONST)
13574 addr = XEXP (addr, 0);
13575 if (GET_CODE (addr) == PLUS)
13577 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13578 UNSPEC_GOTOFF);
13579 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13581 else
13582 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13583 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13584 if (!reg)
13585 tmpreg = gen_reg_rtx (Pmode);
13586 else
13587 tmpreg = reg;
13588 emit_move_insn (tmpreg, new_rtx);
13590 if (reg != 0)
13592 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
13593 tmpreg, 1, OPTAB_DIRECT);
13594 new_rtx = reg;
13596 else
13597 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
13599 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
13601 /* This symbol may be referenced via a displacement from the PIC
13602 base address (@GOTOFF). */
13604 set_pic_reg_ever_live ();
13605 if (GET_CODE (addr) == CONST)
13606 addr = XEXP (addr, 0);
13607 if (GET_CODE (addr) == PLUS)
13609 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13610 UNSPEC_GOTOFF);
13611 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13613 else
13614 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13615 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13616 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13618 if (reg != 0)
13620 emit_move_insn (reg, new_rtx);
13621 new_rtx = reg;
13624 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
13625 /* We can't use @GOTOFF for text labels on VxWorks;
13626 see gotoff_operand. */
13627 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
13629 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13630 if (tmp)
13631 return tmp;
13633 /* For x64 PE-COFF there is no GOT table. So we use address
13634 directly. */
13635 if (TARGET_64BIT && TARGET_PECOFF)
13637 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
13638 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13640 if (reg == 0)
13641 reg = gen_reg_rtx (Pmode);
13642 emit_move_insn (reg, new_rtx);
13643 new_rtx = reg;
13645 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
13647 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
13648 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13649 new_rtx = gen_const_mem (Pmode, new_rtx);
13650 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13652 if (reg == 0)
13653 reg = gen_reg_rtx (Pmode);
13654 /* Use directly gen_movsi, otherwise the address is loaded
13655 into register for CSE. We don't want to CSE this addresses,
13656 instead we CSE addresses from the GOT table, so skip this. */
13657 emit_insn (gen_movsi (reg, new_rtx));
13658 new_rtx = reg;
13660 else
13662 /* This symbol must be referenced via a load from the
13663 Global Offset Table (@GOT). */
13665 set_pic_reg_ever_live ();
13666 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
13667 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13668 if (TARGET_64BIT)
13669 new_rtx = force_reg (Pmode, new_rtx);
13670 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13671 new_rtx = gen_const_mem (Pmode, new_rtx);
13672 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13674 if (reg == 0)
13675 reg = gen_reg_rtx (Pmode);
13676 emit_move_insn (reg, new_rtx);
13677 new_rtx = reg;
13680 else
13682 if (CONST_INT_P (addr)
13683 && !x86_64_immediate_operand (addr, VOIDmode))
13685 if (reg)
13687 emit_move_insn (reg, addr);
13688 new_rtx = reg;
13690 else
13691 new_rtx = force_reg (Pmode, addr);
13693 else if (GET_CODE (addr) == CONST)
13695 addr = XEXP (addr, 0);
13697 /* We must match stuff we generate before. Assume the only
13698 unspecs that can get here are ours. Not that we could do
13699 anything with them anyway.... */
13700 if (GET_CODE (addr) == UNSPEC
13701 || (GET_CODE (addr) == PLUS
13702 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
13703 return orig;
13704 gcc_assert (GET_CODE (addr) == PLUS);
13706 if (GET_CODE (addr) == PLUS)
13708 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
13710 /* Check first to see if this is a constant offset from a @GOTOFF
13711 symbol reference. */
13712 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
13713 && CONST_INT_P (op1))
13715 if (!TARGET_64BIT)
13717 set_pic_reg_ever_live ();
13718 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
13719 UNSPEC_GOTOFF);
13720 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
13721 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13722 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13724 if (reg != 0)
13726 emit_move_insn (reg, new_rtx);
13727 new_rtx = reg;
13730 else
13732 if (INTVAL (op1) < -16*1024*1024
13733 || INTVAL (op1) >= 16*1024*1024)
13735 if (!x86_64_immediate_operand (op1, Pmode))
13736 op1 = force_reg (Pmode, op1);
13737 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
13741 else
13743 rtx base = legitimize_pic_address (op0, reg);
13744 machine_mode mode = GET_MODE (base);
13745 new_rtx
13746 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
13748 if (CONST_INT_P (new_rtx))
13750 if (INTVAL (new_rtx) < -16*1024*1024
13751 || INTVAL (new_rtx) >= 16*1024*1024)
13753 if (!x86_64_immediate_operand (new_rtx, mode))
13754 new_rtx = force_reg (mode, new_rtx);
13755 new_rtx
13756 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
13758 else
13759 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
13761 else
13763 if (GET_CODE (new_rtx) == PLUS
13764 && CONSTANT_P (XEXP (new_rtx, 1)))
13766 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
13767 new_rtx = XEXP (new_rtx, 1);
13769 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
13774 return new_rtx;
13777 /* Load the thread pointer. If TO_REG is true, force it into a register. */
13779 static rtx
13780 get_thread_pointer (machine_mode tp_mode, bool to_reg)
13782 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
13784 if (GET_MODE (tp) != tp_mode)
13786 gcc_assert (GET_MODE (tp) == SImode);
13787 gcc_assert (tp_mode == DImode);
13789 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
13792 if (to_reg)
13793 tp = copy_to_mode_reg (tp_mode, tp);
13795 return tp;
13798 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13800 static GTY(()) rtx ix86_tls_symbol;
13802 static rtx
13803 ix86_tls_get_addr (void)
13805 if (!ix86_tls_symbol)
13807 const char *sym
13808 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
13809 ? "___tls_get_addr" : "__tls_get_addr");
13811 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
13814 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
13816 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
13817 UNSPEC_PLTOFF);
13818 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
13819 gen_rtx_CONST (Pmode, unspec));
13822 return ix86_tls_symbol;
13825 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13827 static GTY(()) rtx ix86_tls_module_base_symbol;
13830 ix86_tls_module_base (void)
13832 if (!ix86_tls_module_base_symbol)
13834 ix86_tls_module_base_symbol
13835 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
13837 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13838 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13841 return ix86_tls_module_base_symbol;
13844 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
13845 false if we expect this to be used for a memory address and true if
13846 we expect to load the address into a register. */
13848 static rtx
13849 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
13851 rtx dest, base, off;
13852 rtx pic = NULL_RTX, tp = NULL_RTX;
13853 machine_mode tp_mode = Pmode;
13854 int type;
13856 /* Fall back to global dynamic model if tool chain cannot support local
13857 dynamic. */
13858 if (TARGET_SUN_TLS && !TARGET_64BIT
13859 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
13860 && model == TLS_MODEL_LOCAL_DYNAMIC)
13861 model = TLS_MODEL_GLOBAL_DYNAMIC;
13863 switch (model)
13865 case TLS_MODEL_GLOBAL_DYNAMIC:
13866 dest = gen_reg_rtx (Pmode);
13868 if (!TARGET_64BIT)
13870 if (flag_pic && !TARGET_PECOFF)
13871 pic = pic_offset_table_rtx;
13872 else
13874 pic = gen_reg_rtx (Pmode);
13875 emit_insn (gen_set_got (pic));
13879 if (TARGET_GNU2_TLS)
13881 if (TARGET_64BIT)
13882 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
13883 else
13884 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
13886 tp = get_thread_pointer (Pmode, true);
13887 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
13889 if (GET_MODE (x) != Pmode)
13890 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13892 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13894 else
13896 rtx caddr = ix86_tls_get_addr ();
13898 if (TARGET_64BIT)
13900 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13901 rtx_insn *insns;
13903 start_sequence ();
13904 emit_call_insn
13905 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
13906 insns = get_insns ();
13907 end_sequence ();
13909 if (GET_MODE (x) != Pmode)
13910 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13912 RTL_CONST_CALL_P (insns) = 1;
13913 emit_libcall_block (insns, dest, rax, x);
13915 else
13916 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
13918 break;
13920 case TLS_MODEL_LOCAL_DYNAMIC:
13921 base = gen_reg_rtx (Pmode);
13923 if (!TARGET_64BIT)
13925 if (flag_pic)
13926 pic = pic_offset_table_rtx;
13927 else
13929 pic = gen_reg_rtx (Pmode);
13930 emit_insn (gen_set_got (pic));
13934 if (TARGET_GNU2_TLS)
13936 rtx tmp = ix86_tls_module_base ();
13938 if (TARGET_64BIT)
13939 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
13940 else
13941 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
13943 tp = get_thread_pointer (Pmode, true);
13944 set_unique_reg_note (get_last_insn (), REG_EQUAL,
13945 gen_rtx_MINUS (Pmode, tmp, tp));
13947 else
13949 rtx caddr = ix86_tls_get_addr ();
13951 if (TARGET_64BIT)
13953 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13954 rtx_insn *insns;
13955 rtx eqv;
13957 start_sequence ();
13958 emit_call_insn
13959 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
13960 insns = get_insns ();
13961 end_sequence ();
13963 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
13964 share the LD_BASE result with other LD model accesses. */
13965 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
13966 UNSPEC_TLS_LD_BASE);
13968 RTL_CONST_CALL_P (insns) = 1;
13969 emit_libcall_block (insns, base, rax, eqv);
13971 else
13972 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
13975 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
13976 off = gen_rtx_CONST (Pmode, off);
13978 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
13980 if (TARGET_GNU2_TLS)
13982 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
13984 if (GET_MODE (x) != Pmode)
13985 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13987 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13989 break;
13991 case TLS_MODEL_INITIAL_EXEC:
13992 if (TARGET_64BIT)
13994 if (TARGET_SUN_TLS && !TARGET_X32)
13996 /* The Sun linker took the AMD64 TLS spec literally
13997 and can only handle %rax as destination of the
13998 initial executable code sequence. */
14000 dest = gen_reg_rtx (DImode);
14001 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
14002 return dest;
14005 /* Generate DImode references to avoid %fs:(%reg32)
14006 problems and linker IE->LE relaxation bug. */
14007 tp_mode = DImode;
14008 pic = NULL;
14009 type = UNSPEC_GOTNTPOFF;
14011 else if (flag_pic)
14013 set_pic_reg_ever_live ();
14014 pic = pic_offset_table_rtx;
14015 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
14017 else if (!TARGET_ANY_GNU_TLS)
14019 pic = gen_reg_rtx (Pmode);
14020 emit_insn (gen_set_got (pic));
14021 type = UNSPEC_GOTTPOFF;
14023 else
14025 pic = NULL;
14026 type = UNSPEC_INDNTPOFF;
14029 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
14030 off = gen_rtx_CONST (tp_mode, off);
14031 if (pic)
14032 off = gen_rtx_PLUS (tp_mode, pic, off);
14033 off = gen_const_mem (tp_mode, off);
14034 set_mem_alias_set (off, ix86_GOT_alias_set ());
14036 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14038 base = get_thread_pointer (tp_mode,
14039 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
14040 off = force_reg (tp_mode, off);
14041 return gen_rtx_PLUS (tp_mode, base, off);
14043 else
14045 base = get_thread_pointer (Pmode, true);
14046 dest = gen_reg_rtx (Pmode);
14047 emit_insn (ix86_gen_sub3 (dest, base, off));
14049 break;
14051 case TLS_MODEL_LOCAL_EXEC:
14052 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
14053 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14054 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
14055 off = gen_rtx_CONST (Pmode, off);
14057 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14059 base = get_thread_pointer (Pmode,
14060 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
14061 return gen_rtx_PLUS (Pmode, base, off);
14063 else
14065 base = get_thread_pointer (Pmode, true);
14066 dest = gen_reg_rtx (Pmode);
14067 emit_insn (ix86_gen_sub3 (dest, base, off));
14069 break;
14071 default:
14072 gcc_unreachable ();
14075 return dest;
14078 /* Create or return the unique __imp_DECL dllimport symbol corresponding
14079 to symbol DECL if BEIMPORT is true. Otherwise create or return the
14080 unique refptr-DECL symbol corresponding to symbol DECL. */
14082 struct dllimport_hasher : ggc_cache_hasher<tree_map *>
14084 static inline hashval_t hash (tree_map *m) { return m->hash; }
14085 static inline bool
14086 equal (tree_map *a, tree_map *b)
14088 return a->base.from == b->base.from;
14091 static void
14092 handle_cache_entry (tree_map *&m)
14094 extern void gt_ggc_mx (tree_map *&);
14095 if (m == HTAB_EMPTY_ENTRY || m == HTAB_DELETED_ENTRY)
14096 return;
14097 else if (ggc_marked_p (m->base.from))
14098 gt_ggc_mx (m);
14099 else
14100 m = static_cast<tree_map *> (HTAB_DELETED_ENTRY);
14104 static GTY((cache)) hash_table<dllimport_hasher> *dllimport_map;
14106 static tree
14107 get_dllimport_decl (tree decl, bool beimport)
14109 struct tree_map *h, in;
14110 const char *name;
14111 const char *prefix;
14112 size_t namelen, prefixlen;
14113 char *imp_name;
14114 tree to;
14115 rtx rtl;
14117 if (!dllimport_map)
14118 dllimport_map = hash_table<dllimport_hasher>::create_ggc (512);
14120 in.hash = htab_hash_pointer (decl);
14121 in.base.from = decl;
14122 tree_map **loc = dllimport_map->find_slot_with_hash (&in, in.hash, INSERT);
14123 h = *loc;
14124 if (h)
14125 return h->to;
14127 *loc = h = ggc_alloc<tree_map> ();
14128 h->hash = in.hash;
14129 h->base.from = decl;
14130 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
14131 VAR_DECL, NULL, ptr_type_node);
14132 DECL_ARTIFICIAL (to) = 1;
14133 DECL_IGNORED_P (to) = 1;
14134 DECL_EXTERNAL (to) = 1;
14135 TREE_READONLY (to) = 1;
14137 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
14138 name = targetm.strip_name_encoding (name);
14139 if (beimport)
14140 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
14141 ? "*__imp_" : "*__imp__";
14142 else
14143 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
14144 namelen = strlen (name);
14145 prefixlen = strlen (prefix);
14146 imp_name = (char *) alloca (namelen + prefixlen + 1);
14147 memcpy (imp_name, prefix, prefixlen);
14148 memcpy (imp_name + prefixlen, name, namelen + 1);
14150 name = ggc_alloc_string (imp_name, namelen + prefixlen);
14151 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
14152 SET_SYMBOL_REF_DECL (rtl, to);
14153 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
14154 if (!beimport)
14156 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
14157 #ifdef SUB_TARGET_RECORD_STUB
14158 SUB_TARGET_RECORD_STUB (name);
14159 #endif
14162 rtl = gen_const_mem (Pmode, rtl);
14163 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
14165 SET_DECL_RTL (to, rtl);
14166 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
14168 return to;
14171 /* Expand SYMBOL into its corresponding far-addresse symbol.
14172 WANT_REG is true if we require the result be a register. */
14174 static rtx
14175 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
14177 tree imp_decl;
14178 rtx x;
14180 gcc_assert (SYMBOL_REF_DECL (symbol));
14181 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
14183 x = DECL_RTL (imp_decl);
14184 if (want_reg)
14185 x = force_reg (Pmode, x);
14186 return x;
14189 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
14190 true if we require the result be a register. */
14192 static rtx
14193 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
14195 tree imp_decl;
14196 rtx x;
14198 gcc_assert (SYMBOL_REF_DECL (symbol));
14199 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
14201 x = DECL_RTL (imp_decl);
14202 if (want_reg)
14203 x = force_reg (Pmode, x);
14204 return x;
14207 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
14208 is true if we require the result be a register. */
14210 static rtx
14211 legitimize_pe_coff_symbol (rtx addr, bool inreg)
14213 if (!TARGET_PECOFF)
14214 return NULL_RTX;
14216 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14218 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
14219 return legitimize_dllimport_symbol (addr, inreg);
14220 if (GET_CODE (addr) == CONST
14221 && GET_CODE (XEXP (addr, 0)) == PLUS
14222 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
14223 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
14225 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
14226 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
14230 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
14231 return NULL_RTX;
14232 if (GET_CODE (addr) == SYMBOL_REF
14233 && !is_imported_p (addr)
14234 && SYMBOL_REF_EXTERNAL_P (addr)
14235 && SYMBOL_REF_DECL (addr))
14236 return legitimize_pe_coff_extern_decl (addr, inreg);
14238 if (GET_CODE (addr) == CONST
14239 && GET_CODE (XEXP (addr, 0)) == PLUS
14240 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
14241 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
14242 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
14243 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
14245 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
14246 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
14248 return NULL_RTX;
14251 /* Try machine-dependent ways of modifying an illegitimate address
14252 to be legitimate. If we find one, return the new, valid address.
14253 This macro is used in only one place: `memory_address' in explow.c.
14255 OLDX is the address as it was before break_out_memory_refs was called.
14256 In some cases it is useful to look at this to decide what needs to be done.
14258 It is always safe for this macro to do nothing. It exists to recognize
14259 opportunities to optimize the output.
14261 For the 80386, we handle X+REG by loading X into a register R and
14262 using R+REG. R will go in a general reg and indexing will be used.
14263 However, if REG is a broken-out memory address or multiplication,
14264 nothing needs to be done because REG can certainly go in a general reg.
14266 When -fpic is used, special handling is needed for symbolic references.
14267 See comments by legitimize_pic_address in i386.c for details. */
14269 static rtx
14270 ix86_legitimize_address (rtx x, rtx, machine_mode mode)
14272 int changed = 0;
14273 unsigned log;
14275 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
14276 if (log)
14277 return legitimize_tls_address (x, (enum tls_model) log, false);
14278 if (GET_CODE (x) == CONST
14279 && GET_CODE (XEXP (x, 0)) == PLUS
14280 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
14281 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
14283 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
14284 (enum tls_model) log, false);
14285 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
14288 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14290 rtx tmp = legitimize_pe_coff_symbol (x, true);
14291 if (tmp)
14292 return tmp;
14295 if (flag_pic && SYMBOLIC_CONST (x))
14296 return legitimize_pic_address (x, 0);
14298 #if TARGET_MACHO
14299 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
14300 return machopic_indirect_data_reference (x, 0);
14301 #endif
14303 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
14304 if (GET_CODE (x) == ASHIFT
14305 && CONST_INT_P (XEXP (x, 1))
14306 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
14308 changed = 1;
14309 log = INTVAL (XEXP (x, 1));
14310 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
14311 GEN_INT (1 << log));
14314 if (GET_CODE (x) == PLUS)
14316 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
14318 if (GET_CODE (XEXP (x, 0)) == ASHIFT
14319 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14320 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
14322 changed = 1;
14323 log = INTVAL (XEXP (XEXP (x, 0), 1));
14324 XEXP (x, 0) = gen_rtx_MULT (Pmode,
14325 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
14326 GEN_INT (1 << log));
14329 if (GET_CODE (XEXP (x, 1)) == ASHIFT
14330 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
14331 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
14333 changed = 1;
14334 log = INTVAL (XEXP (XEXP (x, 1), 1));
14335 XEXP (x, 1) = gen_rtx_MULT (Pmode,
14336 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
14337 GEN_INT (1 << log));
14340 /* Put multiply first if it isn't already. */
14341 if (GET_CODE (XEXP (x, 1)) == MULT)
14343 rtx tmp = XEXP (x, 0);
14344 XEXP (x, 0) = XEXP (x, 1);
14345 XEXP (x, 1) = tmp;
14346 changed = 1;
14349 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
14350 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
14351 created by virtual register instantiation, register elimination, and
14352 similar optimizations. */
14353 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
14355 changed = 1;
14356 x = gen_rtx_PLUS (Pmode,
14357 gen_rtx_PLUS (Pmode, XEXP (x, 0),
14358 XEXP (XEXP (x, 1), 0)),
14359 XEXP (XEXP (x, 1), 1));
14362 /* Canonicalize
14363 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
14364 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
14365 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
14366 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
14367 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
14368 && CONSTANT_P (XEXP (x, 1)))
14370 rtx constant;
14371 rtx other = NULL_RTX;
14373 if (CONST_INT_P (XEXP (x, 1)))
14375 constant = XEXP (x, 1);
14376 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
14378 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
14380 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
14381 other = XEXP (x, 1);
14383 else
14384 constant = 0;
14386 if (constant)
14388 changed = 1;
14389 x = gen_rtx_PLUS (Pmode,
14390 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
14391 XEXP (XEXP (XEXP (x, 0), 1), 0)),
14392 plus_constant (Pmode, other,
14393 INTVAL (constant)));
14397 if (changed && ix86_legitimate_address_p (mode, x, false))
14398 return x;
14400 if (GET_CODE (XEXP (x, 0)) == MULT)
14402 changed = 1;
14403 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
14406 if (GET_CODE (XEXP (x, 1)) == MULT)
14408 changed = 1;
14409 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
14412 if (changed
14413 && REG_P (XEXP (x, 1))
14414 && REG_P (XEXP (x, 0)))
14415 return x;
14417 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
14419 changed = 1;
14420 x = legitimize_pic_address (x, 0);
14423 if (changed && ix86_legitimate_address_p (mode, x, false))
14424 return x;
14426 if (REG_P (XEXP (x, 0)))
14428 rtx temp = gen_reg_rtx (Pmode);
14429 rtx val = force_operand (XEXP (x, 1), temp);
14430 if (val != temp)
14432 val = convert_to_mode (Pmode, val, 1);
14433 emit_move_insn (temp, val);
14436 XEXP (x, 1) = temp;
14437 return x;
14440 else if (REG_P (XEXP (x, 1)))
14442 rtx temp = gen_reg_rtx (Pmode);
14443 rtx val = force_operand (XEXP (x, 0), temp);
14444 if (val != temp)
14446 val = convert_to_mode (Pmode, val, 1);
14447 emit_move_insn (temp, val);
14450 XEXP (x, 0) = temp;
14451 return x;
14455 return x;
14458 /* Print an integer constant expression in assembler syntax. Addition
14459 and subtraction are the only arithmetic that may appear in these
14460 expressions. FILE is the stdio stream to write to, X is the rtx, and
14461 CODE is the operand print code from the output string. */
14463 static void
14464 output_pic_addr_const (FILE *file, rtx x, int code)
14466 char buf[256];
14468 switch (GET_CODE (x))
14470 case PC:
14471 gcc_assert (flag_pic);
14472 putc ('.', file);
14473 break;
14475 case SYMBOL_REF:
14476 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
14477 output_addr_const (file, x);
14478 else
14480 const char *name = XSTR (x, 0);
14482 /* Mark the decl as referenced so that cgraph will
14483 output the function. */
14484 if (SYMBOL_REF_DECL (x))
14485 mark_decl_referenced (SYMBOL_REF_DECL (x));
14487 #if TARGET_MACHO
14488 if (MACHOPIC_INDIRECT
14489 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14490 name = machopic_indirection_name (x, /*stub_p=*/true);
14491 #endif
14492 assemble_name (file, name);
14494 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
14495 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
14496 fputs ("@PLT", file);
14497 break;
14499 case LABEL_REF:
14500 x = XEXP (x, 0);
14501 /* FALLTHRU */
14502 case CODE_LABEL:
14503 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
14504 assemble_name (asm_out_file, buf);
14505 break;
14507 case CONST_INT:
14508 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14509 break;
14511 case CONST:
14512 /* This used to output parentheses around the expression,
14513 but that does not work on the 386 (either ATT or BSD assembler). */
14514 output_pic_addr_const (file, XEXP (x, 0), code);
14515 break;
14517 case CONST_DOUBLE:
14518 if (GET_MODE (x) == VOIDmode)
14520 /* We can use %d if the number is <32 bits and positive. */
14521 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
14522 fprintf (file, "0x%lx%08lx",
14523 (unsigned long) CONST_DOUBLE_HIGH (x),
14524 (unsigned long) CONST_DOUBLE_LOW (x));
14525 else
14526 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
14528 else
14529 /* We can't handle floating point constants;
14530 TARGET_PRINT_OPERAND must handle them. */
14531 output_operand_lossage ("floating constant misused");
14532 break;
14534 case PLUS:
14535 /* Some assemblers need integer constants to appear first. */
14536 if (CONST_INT_P (XEXP (x, 0)))
14538 output_pic_addr_const (file, XEXP (x, 0), code);
14539 putc ('+', file);
14540 output_pic_addr_const (file, XEXP (x, 1), code);
14542 else
14544 gcc_assert (CONST_INT_P (XEXP (x, 1)));
14545 output_pic_addr_const (file, XEXP (x, 1), code);
14546 putc ('+', file);
14547 output_pic_addr_const (file, XEXP (x, 0), code);
14549 break;
14551 case MINUS:
14552 if (!TARGET_MACHO)
14553 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
14554 output_pic_addr_const (file, XEXP (x, 0), code);
14555 putc ('-', file);
14556 output_pic_addr_const (file, XEXP (x, 1), code);
14557 if (!TARGET_MACHO)
14558 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
14559 break;
14561 case UNSPEC:
14562 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
14564 bool f = i386_asm_output_addr_const_extra (file, x);
14565 gcc_assert (f);
14566 break;
14569 gcc_assert (XVECLEN (x, 0) == 1);
14570 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
14571 switch (XINT (x, 1))
14573 case UNSPEC_GOT:
14574 fputs ("@GOT", file);
14575 break;
14576 case UNSPEC_GOTOFF:
14577 fputs ("@GOTOFF", file);
14578 break;
14579 case UNSPEC_PLTOFF:
14580 fputs ("@PLTOFF", file);
14581 break;
14582 case UNSPEC_PCREL:
14583 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14584 "(%rip)" : "[rip]", file);
14585 break;
14586 case UNSPEC_GOTPCREL:
14587 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14588 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
14589 break;
14590 case UNSPEC_GOTTPOFF:
14591 /* FIXME: This might be @TPOFF in Sun ld too. */
14592 fputs ("@gottpoff", file);
14593 break;
14594 case UNSPEC_TPOFF:
14595 fputs ("@tpoff", file);
14596 break;
14597 case UNSPEC_NTPOFF:
14598 if (TARGET_64BIT)
14599 fputs ("@tpoff", file);
14600 else
14601 fputs ("@ntpoff", file);
14602 break;
14603 case UNSPEC_DTPOFF:
14604 fputs ("@dtpoff", file);
14605 break;
14606 case UNSPEC_GOTNTPOFF:
14607 if (TARGET_64BIT)
14608 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14609 "@gottpoff(%rip)": "@gottpoff[rip]", file);
14610 else
14611 fputs ("@gotntpoff", file);
14612 break;
14613 case UNSPEC_INDNTPOFF:
14614 fputs ("@indntpoff", file);
14615 break;
14616 #if TARGET_MACHO
14617 case UNSPEC_MACHOPIC_OFFSET:
14618 putc ('-', file);
14619 machopic_output_function_base_name (file);
14620 break;
14621 #endif
14622 default:
14623 output_operand_lossage ("invalid UNSPEC as operand");
14624 break;
14626 break;
14628 default:
14629 output_operand_lossage ("invalid expression as operand");
14633 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
14634 We need to emit DTP-relative relocations. */
14636 static void ATTRIBUTE_UNUSED
14637 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
14639 fputs (ASM_LONG, file);
14640 output_addr_const (file, x);
14641 fputs ("@dtpoff", file);
14642 switch (size)
14644 case 4:
14645 break;
14646 case 8:
14647 fputs (", 0", file);
14648 break;
14649 default:
14650 gcc_unreachable ();
14654 /* Return true if X is a representation of the PIC register. This copes
14655 with calls from ix86_find_base_term, where the register might have
14656 been replaced by a cselib value. */
14658 static bool
14659 ix86_pic_register_p (rtx x)
14661 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
14662 return (pic_offset_table_rtx
14663 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
14664 else if (!REG_P (x))
14665 return false;
14666 else if (pic_offset_table_rtx)
14668 if (REGNO (x) == REGNO (pic_offset_table_rtx))
14669 return true;
14670 if (HARD_REGISTER_P (x)
14671 && !HARD_REGISTER_P (pic_offset_table_rtx)
14672 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
14673 return true;
14674 return false;
14676 else
14677 return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
14680 /* Helper function for ix86_delegitimize_address.
14681 Attempt to delegitimize TLS local-exec accesses. */
14683 static rtx
14684 ix86_delegitimize_tls_address (rtx orig_x)
14686 rtx x = orig_x, unspec;
14687 struct ix86_address addr;
14689 if (!TARGET_TLS_DIRECT_SEG_REFS)
14690 return orig_x;
14691 if (MEM_P (x))
14692 x = XEXP (x, 0);
14693 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
14694 return orig_x;
14695 if (ix86_decompose_address (x, &addr) == 0
14696 || addr.seg != DEFAULT_TLS_SEG_REG
14697 || addr.disp == NULL_RTX
14698 || GET_CODE (addr.disp) != CONST)
14699 return orig_x;
14700 unspec = XEXP (addr.disp, 0);
14701 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
14702 unspec = XEXP (unspec, 0);
14703 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
14704 return orig_x;
14705 x = XVECEXP (unspec, 0, 0);
14706 gcc_assert (GET_CODE (x) == SYMBOL_REF);
14707 if (unspec != XEXP (addr.disp, 0))
14708 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
14709 if (addr.index)
14711 rtx idx = addr.index;
14712 if (addr.scale != 1)
14713 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
14714 x = gen_rtx_PLUS (Pmode, idx, x);
14716 if (addr.base)
14717 x = gen_rtx_PLUS (Pmode, addr.base, x);
14718 if (MEM_P (orig_x))
14719 x = replace_equiv_address_nv (orig_x, x);
14720 return x;
14723 /* In the name of slightly smaller debug output, and to cater to
14724 general assembler lossage, recognize PIC+GOTOFF and turn it back
14725 into a direct symbol reference.
14727 On Darwin, this is necessary to avoid a crash, because Darwin
14728 has a different PIC label for each routine but the DWARF debugging
14729 information is not associated with any particular routine, so it's
14730 necessary to remove references to the PIC label from RTL stored by
14731 the DWARF output code. */
14733 static rtx
14734 ix86_delegitimize_address (rtx x)
14736 rtx orig_x = delegitimize_mem_from_attrs (x);
14737 /* addend is NULL or some rtx if x is something+GOTOFF where
14738 something doesn't include the PIC register. */
14739 rtx addend = NULL_RTX;
14740 /* reg_addend is NULL or a multiple of some register. */
14741 rtx reg_addend = NULL_RTX;
14742 /* const_addend is NULL or a const_int. */
14743 rtx const_addend = NULL_RTX;
14744 /* This is the result, or NULL. */
14745 rtx result = NULL_RTX;
14747 x = orig_x;
14749 if (MEM_P (x))
14750 x = XEXP (x, 0);
14752 if (TARGET_64BIT)
14754 if (GET_CODE (x) == CONST
14755 && GET_CODE (XEXP (x, 0)) == PLUS
14756 && GET_MODE (XEXP (x, 0)) == Pmode
14757 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14758 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
14759 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
14761 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
14762 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
14763 if (MEM_P (orig_x))
14764 x = replace_equiv_address_nv (orig_x, x);
14765 return x;
14768 if (GET_CODE (x) == CONST
14769 && GET_CODE (XEXP (x, 0)) == UNSPEC
14770 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
14771 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
14772 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
14774 x = XVECEXP (XEXP (x, 0), 0, 0);
14775 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
14777 x = simplify_gen_subreg (GET_MODE (orig_x), x,
14778 GET_MODE (x), 0);
14779 if (x == NULL_RTX)
14780 return orig_x;
14782 return x;
14785 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
14786 return ix86_delegitimize_tls_address (orig_x);
14788 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
14789 and -mcmodel=medium -fpic. */
14792 if (GET_CODE (x) != PLUS
14793 || GET_CODE (XEXP (x, 1)) != CONST)
14794 return ix86_delegitimize_tls_address (orig_x);
14796 if (ix86_pic_register_p (XEXP (x, 0)))
14797 /* %ebx + GOT/GOTOFF */
14799 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14801 /* %ebx + %reg * scale + GOT/GOTOFF */
14802 reg_addend = XEXP (x, 0);
14803 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
14804 reg_addend = XEXP (reg_addend, 1);
14805 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
14806 reg_addend = XEXP (reg_addend, 0);
14807 else
14809 reg_addend = NULL_RTX;
14810 addend = XEXP (x, 0);
14813 else
14814 addend = XEXP (x, 0);
14816 x = XEXP (XEXP (x, 1), 0);
14817 if (GET_CODE (x) == PLUS
14818 && CONST_INT_P (XEXP (x, 1)))
14820 const_addend = XEXP (x, 1);
14821 x = XEXP (x, 0);
14824 if (GET_CODE (x) == UNSPEC
14825 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
14826 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
14827 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
14828 && !MEM_P (orig_x) && !addend)))
14829 result = XVECEXP (x, 0, 0);
14831 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
14832 && !MEM_P (orig_x))
14833 result = XVECEXP (x, 0, 0);
14835 if (! result)
14836 return ix86_delegitimize_tls_address (orig_x);
14838 if (const_addend)
14839 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
14840 if (reg_addend)
14841 result = gen_rtx_PLUS (Pmode, reg_addend, result);
14842 if (addend)
14844 /* If the rest of original X doesn't involve the PIC register, add
14845 addend and subtract pic_offset_table_rtx. This can happen e.g.
14846 for code like:
14847 leal (%ebx, %ecx, 4), %ecx
14849 movl foo@GOTOFF(%ecx), %edx
14850 in which case we return (%ecx - %ebx) + foo
14851 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
14852 and reload has completed. */
14853 if (pic_offset_table_rtx
14854 && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
14855 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
14856 pic_offset_table_rtx),
14857 result);
14858 else if (pic_offset_table_rtx && !TARGET_MACHO && !TARGET_VXWORKS_RTP)
14860 rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
14861 tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
14862 result = gen_rtx_PLUS (Pmode, tmp, result);
14864 else
14865 return orig_x;
14867 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
14869 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
14870 if (result == NULL_RTX)
14871 return orig_x;
14873 return result;
14876 /* If X is a machine specific address (i.e. a symbol or label being
14877 referenced as a displacement from the GOT implemented using an
14878 UNSPEC), then return the base term. Otherwise return X. */
14881 ix86_find_base_term (rtx x)
14883 rtx term;
14885 if (TARGET_64BIT)
14887 if (GET_CODE (x) != CONST)
14888 return x;
14889 term = XEXP (x, 0);
14890 if (GET_CODE (term) == PLUS
14891 && (CONST_INT_P (XEXP (term, 1))
14892 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
14893 term = XEXP (term, 0);
14894 if (GET_CODE (term) != UNSPEC
14895 || (XINT (term, 1) != UNSPEC_GOTPCREL
14896 && XINT (term, 1) != UNSPEC_PCREL))
14897 return x;
14899 return XVECEXP (term, 0, 0);
14902 return ix86_delegitimize_address (x);
14905 static void
14906 put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
14907 bool fp, FILE *file)
14909 const char *suffix;
14911 if (mode == CCFPmode || mode == CCFPUmode)
14913 code = ix86_fp_compare_code_to_integer (code);
14914 mode = CCmode;
14916 if (reverse)
14917 code = reverse_condition (code);
14919 switch (code)
14921 case EQ:
14922 switch (mode)
14924 case CCAmode:
14925 suffix = "a";
14926 break;
14928 case CCCmode:
14929 suffix = "c";
14930 break;
14932 case CCOmode:
14933 suffix = "o";
14934 break;
14936 case CCSmode:
14937 suffix = "s";
14938 break;
14940 default:
14941 suffix = "e";
14943 break;
14944 case NE:
14945 switch (mode)
14947 case CCAmode:
14948 suffix = "na";
14949 break;
14951 case CCCmode:
14952 suffix = "nc";
14953 break;
14955 case CCOmode:
14956 suffix = "no";
14957 break;
14959 case CCSmode:
14960 suffix = "ns";
14961 break;
14963 default:
14964 suffix = "ne";
14966 break;
14967 case GT:
14968 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
14969 suffix = "g";
14970 break;
14971 case GTU:
14972 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
14973 Those same assemblers have the same but opposite lossage on cmov. */
14974 if (mode == CCmode)
14975 suffix = fp ? "nbe" : "a";
14976 else
14977 gcc_unreachable ();
14978 break;
14979 case LT:
14980 switch (mode)
14982 case CCNOmode:
14983 case CCGOCmode:
14984 suffix = "s";
14985 break;
14987 case CCmode:
14988 case CCGCmode:
14989 suffix = "l";
14990 break;
14992 default:
14993 gcc_unreachable ();
14995 break;
14996 case LTU:
14997 if (mode == CCmode)
14998 suffix = "b";
14999 else if (mode == CCCmode)
15000 suffix = fp ? "b" : "c";
15001 else
15002 gcc_unreachable ();
15003 break;
15004 case GE:
15005 switch (mode)
15007 case CCNOmode:
15008 case CCGOCmode:
15009 suffix = "ns";
15010 break;
15012 case CCmode:
15013 case CCGCmode:
15014 suffix = "ge";
15015 break;
15017 default:
15018 gcc_unreachable ();
15020 break;
15021 case GEU:
15022 if (mode == CCmode)
15023 suffix = "nb";
15024 else if (mode == CCCmode)
15025 suffix = fp ? "nb" : "nc";
15026 else
15027 gcc_unreachable ();
15028 break;
15029 case LE:
15030 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
15031 suffix = "le";
15032 break;
15033 case LEU:
15034 if (mode == CCmode)
15035 suffix = "be";
15036 else
15037 gcc_unreachable ();
15038 break;
15039 case UNORDERED:
15040 suffix = fp ? "u" : "p";
15041 break;
15042 case ORDERED:
15043 suffix = fp ? "nu" : "np";
15044 break;
15045 default:
15046 gcc_unreachable ();
15048 fputs (suffix, file);
15051 /* Print the name of register X to FILE based on its machine mode and number.
15052 If CODE is 'w', pretend the mode is HImode.
15053 If CODE is 'b', pretend the mode is QImode.
15054 If CODE is 'k', pretend the mode is SImode.
15055 If CODE is 'q', pretend the mode is DImode.
15056 If CODE is 'x', pretend the mode is V4SFmode.
15057 If CODE is 't', pretend the mode is V8SFmode.
15058 If CODE is 'g', pretend the mode is V16SFmode.
15059 If CODE is 'h', pretend the reg is the 'high' byte register.
15060 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
15061 If CODE is 'd', duplicate the operand for AVX instruction.
15064 void
15065 print_reg (rtx x, int code, FILE *file)
15067 const char *reg;
15068 unsigned int regno;
15069 bool duplicated = code == 'd' && TARGET_AVX;
15071 if (ASSEMBLER_DIALECT == ASM_ATT)
15072 putc ('%', file);
15074 if (x == pc_rtx)
15076 gcc_assert (TARGET_64BIT);
15077 fputs ("rip", file);
15078 return;
15081 regno = true_regnum (x);
15082 gcc_assert (regno != ARG_POINTER_REGNUM
15083 && regno != FRAME_POINTER_REGNUM
15084 && regno != FLAGS_REG
15085 && regno != FPSR_REG
15086 && regno != FPCR_REG);
15088 if (code == 'w' || MMX_REG_P (x))
15089 code = 2;
15090 else if (code == 'b')
15091 code = 1;
15092 else if (code == 'k')
15093 code = 4;
15094 else if (code == 'q')
15095 code = 8;
15096 else if (code == 'y')
15097 code = 3;
15098 else if (code == 'h')
15099 code = 0;
15100 else if (code == 'x')
15101 code = 16;
15102 else if (code == 't')
15103 code = 32;
15104 else if (code == 'g')
15105 code = 64;
15106 else
15107 code = GET_MODE_SIZE (GET_MODE (x));
15109 /* Irritatingly, AMD extended registers use different naming convention
15110 from the normal registers: "r%d[bwd]" */
15111 if (REX_INT_REGNO_P (regno))
15113 gcc_assert (TARGET_64BIT);
15114 putc ('r', file);
15115 fprint_ul (file, regno - FIRST_REX_INT_REG + 8);
15116 switch (code)
15118 case 0:
15119 error ("extended registers have no high halves");
15120 break;
15121 case 1:
15122 putc ('b', file);
15123 break;
15124 case 2:
15125 putc ('w', file);
15126 break;
15127 case 4:
15128 putc ('d', file);
15129 break;
15130 case 8:
15131 /* no suffix */
15132 break;
15133 default:
15134 error ("unsupported operand size for extended register");
15135 break;
15137 return;
15140 reg = NULL;
15141 switch (code)
15143 case 3:
15144 if (STACK_TOP_P (x))
15146 reg = "st(0)";
15147 break;
15149 /* FALLTHRU */
15150 case 8:
15151 case 4:
15152 case 12:
15153 if (! ANY_FP_REG_P (x) && ! ANY_MASK_REG_P (x) && ! ANY_BND_REG_P (x))
15154 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
15155 /* FALLTHRU */
15156 case 16:
15157 case 2:
15158 normal:
15159 reg = hi_reg_name[regno];
15160 break;
15161 case 1:
15162 if (regno >= ARRAY_SIZE (qi_reg_name))
15163 goto normal;
15164 reg = qi_reg_name[regno];
15165 break;
15166 case 0:
15167 if (regno >= ARRAY_SIZE (qi_high_reg_name))
15168 goto normal;
15169 reg = qi_high_reg_name[regno];
15170 break;
15171 case 32:
15172 if (SSE_REG_P (x))
15174 gcc_assert (!duplicated);
15175 putc ('y', file);
15176 fputs (hi_reg_name[regno] + 1, file);
15177 return;
15179 case 64:
15180 if (SSE_REG_P (x))
15182 gcc_assert (!duplicated);
15183 putc ('z', file);
15184 fputs (hi_reg_name[REGNO (x)] + 1, file);
15185 return;
15187 break;
15188 default:
15189 gcc_unreachable ();
15192 fputs (reg, file);
15193 if (duplicated)
15195 if (ASSEMBLER_DIALECT == ASM_ATT)
15196 fprintf (file, ", %%%s", reg);
15197 else
15198 fprintf (file, ", %s", reg);
15202 /* Meaning of CODE:
15203 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
15204 C -- print opcode suffix for set/cmov insn.
15205 c -- like C, but print reversed condition
15206 F,f -- likewise, but for floating-point.
15207 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
15208 otherwise nothing
15209 R -- print embeded rounding and sae.
15210 r -- print only sae.
15211 z -- print the opcode suffix for the size of the current operand.
15212 Z -- likewise, with special suffixes for x87 instructions.
15213 * -- print a star (in certain assembler syntax)
15214 A -- print an absolute memory reference.
15215 E -- print address with DImode register names if TARGET_64BIT.
15216 w -- print the operand as if it's a "word" (HImode) even if it isn't.
15217 s -- print a shift double count, followed by the assemblers argument
15218 delimiter.
15219 b -- print the QImode name of the register for the indicated operand.
15220 %b0 would print %al if operands[0] is reg 0.
15221 w -- likewise, print the HImode name of the register.
15222 k -- likewise, print the SImode name of the register.
15223 q -- likewise, print the DImode name of the register.
15224 x -- likewise, print the V4SFmode name of the register.
15225 t -- likewise, print the V8SFmode name of the register.
15226 g -- likewise, print the V16SFmode name of the register.
15227 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
15228 y -- print "st(0)" instead of "st" as a register.
15229 d -- print duplicated register operand for AVX instruction.
15230 D -- print condition for SSE cmp instruction.
15231 P -- if PIC, print an @PLT suffix.
15232 p -- print raw symbol name.
15233 X -- don't print any sort of PIC '@' suffix for a symbol.
15234 & -- print some in-use local-dynamic symbol name.
15235 H -- print a memory address offset by 8; used for sse high-parts
15236 Y -- print condition for XOP pcom* instruction.
15237 + -- print a branch hint as 'cs' or 'ds' prefix
15238 ; -- print a semicolon (after prefixes due to bug in older gas).
15239 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
15240 @ -- print a segment register of thread base pointer load
15241 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
15242 ! -- print MPX prefix for jxx/call/ret instructions if required.
15245 void
15246 ix86_print_operand (FILE *file, rtx x, int code)
15248 if (code)
15250 switch (code)
15252 case 'A':
15253 switch (ASSEMBLER_DIALECT)
15255 case ASM_ATT:
15256 putc ('*', file);
15257 break;
15259 case ASM_INTEL:
15260 /* Intel syntax. For absolute addresses, registers should not
15261 be surrounded by braces. */
15262 if (!REG_P (x))
15264 putc ('[', file);
15265 ix86_print_operand (file, x, 0);
15266 putc (']', file);
15267 return;
15269 break;
15271 default:
15272 gcc_unreachable ();
15275 ix86_print_operand (file, x, 0);
15276 return;
15278 case 'E':
15279 /* Wrap address in an UNSPEC to declare special handling. */
15280 if (TARGET_64BIT)
15281 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
15283 output_address (x);
15284 return;
15286 case 'L':
15287 if (ASSEMBLER_DIALECT == ASM_ATT)
15288 putc ('l', file);
15289 return;
15291 case 'W':
15292 if (ASSEMBLER_DIALECT == ASM_ATT)
15293 putc ('w', file);
15294 return;
15296 case 'B':
15297 if (ASSEMBLER_DIALECT == ASM_ATT)
15298 putc ('b', file);
15299 return;
15301 case 'Q':
15302 if (ASSEMBLER_DIALECT == ASM_ATT)
15303 putc ('l', file);
15304 return;
15306 case 'S':
15307 if (ASSEMBLER_DIALECT == ASM_ATT)
15308 putc ('s', file);
15309 return;
15311 case 'T':
15312 if (ASSEMBLER_DIALECT == ASM_ATT)
15313 putc ('t', file);
15314 return;
15316 case 'O':
15317 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15318 if (ASSEMBLER_DIALECT != ASM_ATT)
15319 return;
15321 switch (GET_MODE_SIZE (GET_MODE (x)))
15323 case 2:
15324 putc ('w', file);
15325 break;
15327 case 4:
15328 putc ('l', file);
15329 break;
15331 case 8:
15332 putc ('q', file);
15333 break;
15335 default:
15336 output_operand_lossage
15337 ("invalid operand size for operand code 'O'");
15338 return;
15341 putc ('.', file);
15342 #endif
15343 return;
15345 case 'z':
15346 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15348 /* Opcodes don't get size suffixes if using Intel opcodes. */
15349 if (ASSEMBLER_DIALECT == ASM_INTEL)
15350 return;
15352 switch (GET_MODE_SIZE (GET_MODE (x)))
15354 case 1:
15355 putc ('b', file);
15356 return;
15358 case 2:
15359 putc ('w', file);
15360 return;
15362 case 4:
15363 putc ('l', file);
15364 return;
15366 case 8:
15367 putc ('q', file);
15368 return;
15370 default:
15371 output_operand_lossage
15372 ("invalid operand size for operand code 'z'");
15373 return;
15377 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15378 warning
15379 (0, "non-integer operand used with operand code 'z'");
15380 /* FALLTHRU */
15382 case 'Z':
15383 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
15384 if (ASSEMBLER_DIALECT == ASM_INTEL)
15385 return;
15387 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15389 switch (GET_MODE_SIZE (GET_MODE (x)))
15391 case 2:
15392 #ifdef HAVE_AS_IX86_FILDS
15393 putc ('s', file);
15394 #endif
15395 return;
15397 case 4:
15398 putc ('l', file);
15399 return;
15401 case 8:
15402 #ifdef HAVE_AS_IX86_FILDQ
15403 putc ('q', file);
15404 #else
15405 fputs ("ll", file);
15406 #endif
15407 return;
15409 default:
15410 break;
15413 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15415 /* 387 opcodes don't get size suffixes
15416 if the operands are registers. */
15417 if (STACK_REG_P (x))
15418 return;
15420 switch (GET_MODE_SIZE (GET_MODE (x)))
15422 case 4:
15423 putc ('s', file);
15424 return;
15426 case 8:
15427 putc ('l', file);
15428 return;
15430 case 12:
15431 case 16:
15432 putc ('t', file);
15433 return;
15435 default:
15436 break;
15439 else
15441 output_operand_lossage
15442 ("invalid operand type used with operand code 'Z'");
15443 return;
15446 output_operand_lossage
15447 ("invalid operand size for operand code 'Z'");
15448 return;
15450 case 'd':
15451 case 'b':
15452 case 'w':
15453 case 'k':
15454 case 'q':
15455 case 'h':
15456 case 't':
15457 case 'g':
15458 case 'y':
15459 case 'x':
15460 case 'X':
15461 case 'P':
15462 case 'p':
15463 break;
15465 case 's':
15466 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
15468 ix86_print_operand (file, x, 0);
15469 fputs (", ", file);
15471 return;
15473 case 'Y':
15474 switch (GET_CODE (x))
15476 case NE:
15477 fputs ("neq", file);
15478 break;
15479 case EQ:
15480 fputs ("eq", file);
15481 break;
15482 case GE:
15483 case GEU:
15484 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
15485 break;
15486 case GT:
15487 case GTU:
15488 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
15489 break;
15490 case LE:
15491 case LEU:
15492 fputs ("le", file);
15493 break;
15494 case LT:
15495 case LTU:
15496 fputs ("lt", file);
15497 break;
15498 case UNORDERED:
15499 fputs ("unord", file);
15500 break;
15501 case ORDERED:
15502 fputs ("ord", file);
15503 break;
15504 case UNEQ:
15505 fputs ("ueq", file);
15506 break;
15507 case UNGE:
15508 fputs ("nlt", file);
15509 break;
15510 case UNGT:
15511 fputs ("nle", file);
15512 break;
15513 case UNLE:
15514 fputs ("ule", file);
15515 break;
15516 case UNLT:
15517 fputs ("ult", file);
15518 break;
15519 case LTGT:
15520 fputs ("une", file);
15521 break;
15522 default:
15523 output_operand_lossage ("operand is not a condition code, "
15524 "invalid operand code 'Y'");
15525 return;
15527 return;
15529 case 'D':
15530 /* Little bit of braindamage here. The SSE compare instructions
15531 does use completely different names for the comparisons that the
15532 fp conditional moves. */
15533 switch (GET_CODE (x))
15535 case UNEQ:
15536 if (TARGET_AVX)
15538 fputs ("eq_us", file);
15539 break;
15541 case EQ:
15542 fputs ("eq", file);
15543 break;
15544 case UNLT:
15545 if (TARGET_AVX)
15547 fputs ("nge", file);
15548 break;
15550 case LT:
15551 fputs ("lt", file);
15552 break;
15553 case UNLE:
15554 if (TARGET_AVX)
15556 fputs ("ngt", file);
15557 break;
15559 case LE:
15560 fputs ("le", file);
15561 break;
15562 case UNORDERED:
15563 fputs ("unord", file);
15564 break;
15565 case LTGT:
15566 if (TARGET_AVX)
15568 fputs ("neq_oq", file);
15569 break;
15571 case NE:
15572 fputs ("neq", file);
15573 break;
15574 case GE:
15575 if (TARGET_AVX)
15577 fputs ("ge", file);
15578 break;
15580 case UNGE:
15581 fputs ("nlt", file);
15582 break;
15583 case GT:
15584 if (TARGET_AVX)
15586 fputs ("gt", file);
15587 break;
15589 case UNGT:
15590 fputs ("nle", file);
15591 break;
15592 case ORDERED:
15593 fputs ("ord", file);
15594 break;
15595 default:
15596 output_operand_lossage ("operand is not a condition code, "
15597 "invalid operand code 'D'");
15598 return;
15600 return;
15602 case 'F':
15603 case 'f':
15604 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15605 if (ASSEMBLER_DIALECT == ASM_ATT)
15606 putc ('.', file);
15607 #endif
15609 case 'C':
15610 case 'c':
15611 if (!COMPARISON_P (x))
15613 output_operand_lossage ("operand is not a condition code, "
15614 "invalid operand code '%c'", code);
15615 return;
15617 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
15618 code == 'c' || code == 'f',
15619 code == 'F' || code == 'f',
15620 file);
15621 return;
15623 case 'H':
15624 if (!offsettable_memref_p (x))
15626 output_operand_lossage ("operand is not an offsettable memory "
15627 "reference, invalid operand code 'H'");
15628 return;
15630 /* It doesn't actually matter what mode we use here, as we're
15631 only going to use this for printing. */
15632 x = adjust_address_nv (x, DImode, 8);
15633 /* Output 'qword ptr' for intel assembler dialect. */
15634 if (ASSEMBLER_DIALECT == ASM_INTEL)
15635 code = 'q';
15636 break;
15638 case 'K':
15639 gcc_assert (CONST_INT_P (x));
15641 if (INTVAL (x) & IX86_HLE_ACQUIRE)
15642 #ifdef HAVE_AS_IX86_HLE
15643 fputs ("xacquire ", file);
15644 #else
15645 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
15646 #endif
15647 else if (INTVAL (x) & IX86_HLE_RELEASE)
15648 #ifdef HAVE_AS_IX86_HLE
15649 fputs ("xrelease ", file);
15650 #else
15651 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
15652 #endif
15653 /* We do not want to print value of the operand. */
15654 return;
15656 case 'N':
15657 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
15658 fputs ("{z}", file);
15659 return;
15661 case 'r':
15662 gcc_assert (CONST_INT_P (x));
15663 gcc_assert (INTVAL (x) == ROUND_SAE);
15665 if (ASSEMBLER_DIALECT == ASM_INTEL)
15666 fputs (", ", file);
15668 fputs ("{sae}", file);
15670 if (ASSEMBLER_DIALECT == ASM_ATT)
15671 fputs (", ", file);
15673 return;
15675 case 'R':
15676 gcc_assert (CONST_INT_P (x));
15678 if (ASSEMBLER_DIALECT == ASM_INTEL)
15679 fputs (", ", file);
15681 switch (INTVAL (x))
15683 case ROUND_NEAREST_INT | ROUND_SAE:
15684 fputs ("{rn-sae}", file);
15685 break;
15686 case ROUND_NEG_INF | ROUND_SAE:
15687 fputs ("{rd-sae}", file);
15688 break;
15689 case ROUND_POS_INF | ROUND_SAE:
15690 fputs ("{ru-sae}", file);
15691 break;
15692 case ROUND_ZERO | ROUND_SAE:
15693 fputs ("{rz-sae}", file);
15694 break;
15695 default:
15696 gcc_unreachable ();
15699 if (ASSEMBLER_DIALECT == ASM_ATT)
15700 fputs (", ", file);
15702 return;
15704 case '*':
15705 if (ASSEMBLER_DIALECT == ASM_ATT)
15706 putc ('*', file);
15707 return;
15709 case '&':
15711 const char *name = get_some_local_dynamic_name ();
15712 if (name == NULL)
15713 output_operand_lossage ("'%%&' used without any "
15714 "local dynamic TLS references");
15715 else
15716 assemble_name (file, name);
15717 return;
15720 case '+':
15722 rtx x;
15724 if (!optimize
15725 || optimize_function_for_size_p (cfun)
15726 || !TARGET_BRANCH_PREDICTION_HINTS)
15727 return;
15729 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
15730 if (x)
15732 int pred_val = XINT (x, 0);
15734 if (pred_val < REG_BR_PROB_BASE * 45 / 100
15735 || pred_val > REG_BR_PROB_BASE * 55 / 100)
15737 bool taken = pred_val > REG_BR_PROB_BASE / 2;
15738 bool cputaken
15739 = final_forward_branch_p (current_output_insn) == 0;
15741 /* Emit hints only in the case default branch prediction
15742 heuristics would fail. */
15743 if (taken != cputaken)
15745 /* We use 3e (DS) prefix for taken branches and
15746 2e (CS) prefix for not taken branches. */
15747 if (taken)
15748 fputs ("ds ; ", file);
15749 else
15750 fputs ("cs ; ", file);
15754 return;
15757 case ';':
15758 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
15759 putc (';', file);
15760 #endif
15761 return;
15763 case '@':
15764 if (ASSEMBLER_DIALECT == ASM_ATT)
15765 putc ('%', file);
15767 /* The kernel uses a different segment register for performance
15768 reasons; a system call would not have to trash the userspace
15769 segment register, which would be expensive. */
15770 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
15771 fputs ("fs", file);
15772 else
15773 fputs ("gs", file);
15774 return;
15776 case '~':
15777 putc (TARGET_AVX2 ? 'i' : 'f', file);
15778 return;
15780 case '^':
15781 if (TARGET_64BIT && Pmode != word_mode)
15782 fputs ("addr32 ", file);
15783 return;
15785 case '!':
15786 if (ix86_bnd_prefixed_insn_p (current_output_insn))
15787 fputs ("bnd ", file);
15788 return;
15790 default:
15791 output_operand_lossage ("invalid operand code '%c'", code);
15795 if (REG_P (x))
15796 print_reg (x, code, file);
15798 else if (MEM_P (x))
15800 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
15801 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
15802 && GET_MODE (x) != BLKmode)
15804 const char * size;
15805 switch (GET_MODE_SIZE (GET_MODE (x)))
15807 case 1: size = "BYTE"; break;
15808 case 2: size = "WORD"; break;
15809 case 4: size = "DWORD"; break;
15810 case 8: size = "QWORD"; break;
15811 case 12: size = "TBYTE"; break;
15812 case 16:
15813 if (GET_MODE (x) == XFmode)
15814 size = "TBYTE";
15815 else
15816 size = "XMMWORD";
15817 break;
15818 case 32: size = "YMMWORD"; break;
15819 case 64: size = "ZMMWORD"; break;
15820 default:
15821 gcc_unreachable ();
15824 /* Check for explicit size override (codes 'b', 'w', 'k',
15825 'q' and 'x') */
15826 if (code == 'b')
15827 size = "BYTE";
15828 else if (code == 'w')
15829 size = "WORD";
15830 else if (code == 'k')
15831 size = "DWORD";
15832 else if (code == 'q')
15833 size = "QWORD";
15834 else if (code == 'x')
15835 size = "XMMWORD";
15837 fputs (size, file);
15838 fputs (" PTR ", file);
15841 x = XEXP (x, 0);
15842 /* Avoid (%rip) for call operands. */
15843 if (CONSTANT_ADDRESS_P (x) && code == 'P'
15844 && !CONST_INT_P (x))
15845 output_addr_const (file, x);
15846 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
15847 output_operand_lossage ("invalid constraints for operand");
15848 else
15849 output_address (x);
15852 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
15854 REAL_VALUE_TYPE r;
15855 long l;
15857 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15858 REAL_VALUE_TO_TARGET_SINGLE (r, l);
15860 if (ASSEMBLER_DIALECT == ASM_ATT)
15861 putc ('$', file);
15862 /* Sign extend 32bit SFmode immediate to 8 bytes. */
15863 if (code == 'q')
15864 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
15865 (unsigned long long) (int) l);
15866 else
15867 fprintf (file, "0x%08x", (unsigned int) l);
15870 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
15872 REAL_VALUE_TYPE r;
15873 long l[2];
15875 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15876 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
15878 if (ASSEMBLER_DIALECT == ASM_ATT)
15879 putc ('$', file);
15880 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
15883 /* These float cases don't actually occur as immediate operands. */
15884 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
15886 char dstr[30];
15888 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
15889 fputs (dstr, file);
15892 else
15894 /* We have patterns that allow zero sets of memory, for instance.
15895 In 64-bit mode, we should probably support all 8-byte vectors,
15896 since we can in fact encode that into an immediate. */
15897 if (GET_CODE (x) == CONST_VECTOR)
15899 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
15900 x = const0_rtx;
15903 if (code != 'P' && code != 'p')
15905 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
15907 if (ASSEMBLER_DIALECT == ASM_ATT)
15908 putc ('$', file);
15910 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
15911 || GET_CODE (x) == LABEL_REF)
15913 if (ASSEMBLER_DIALECT == ASM_ATT)
15914 putc ('$', file);
15915 else
15916 fputs ("OFFSET FLAT:", file);
15919 if (CONST_INT_P (x))
15920 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15921 else if (flag_pic || MACHOPIC_INDIRECT)
15922 output_pic_addr_const (file, x, code);
15923 else
15924 output_addr_const (file, x);
15928 static bool
15929 ix86_print_operand_punct_valid_p (unsigned char code)
15931 return (code == '@' || code == '*' || code == '+' || code == '&'
15932 || code == ';' || code == '~' || code == '^' || code == '!');
15935 /* Print a memory operand whose address is ADDR. */
15937 static void
15938 ix86_print_operand_address (FILE *file, rtx addr)
15940 struct ix86_address parts;
15941 rtx base, index, disp;
15942 int scale;
15943 int ok;
15944 bool vsib = false;
15945 int code = 0;
15947 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
15949 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15950 gcc_assert (parts.index == NULL_RTX);
15951 parts.index = XVECEXP (addr, 0, 1);
15952 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
15953 addr = XVECEXP (addr, 0, 0);
15954 vsib = true;
15956 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
15958 gcc_assert (TARGET_64BIT);
15959 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15960 code = 'q';
15962 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDMK_ADDR)
15964 ok = ix86_decompose_address (XVECEXP (addr, 0, 1), &parts);
15965 gcc_assert (parts.base == NULL_RTX || parts.index == NULL_RTX);
15966 if (parts.base != NULL_RTX)
15968 parts.index = parts.base;
15969 parts.scale = 1;
15971 parts.base = XVECEXP (addr, 0, 0);
15972 addr = XVECEXP (addr, 0, 0);
15974 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDLDX_ADDR)
15976 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15977 gcc_assert (parts.index == NULL_RTX);
15978 parts.index = XVECEXP (addr, 0, 1);
15979 addr = XVECEXP (addr, 0, 0);
15981 else
15982 ok = ix86_decompose_address (addr, &parts);
15984 gcc_assert (ok);
15986 base = parts.base;
15987 index = parts.index;
15988 disp = parts.disp;
15989 scale = parts.scale;
15991 switch (parts.seg)
15993 case SEG_DEFAULT:
15994 break;
15995 case SEG_FS:
15996 case SEG_GS:
15997 if (ASSEMBLER_DIALECT == ASM_ATT)
15998 putc ('%', file);
15999 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
16000 break;
16001 default:
16002 gcc_unreachable ();
16005 /* Use one byte shorter RIP relative addressing for 64bit mode. */
16006 if (TARGET_64BIT && !base && !index)
16008 rtx symbol = disp;
16010 if (GET_CODE (disp) == CONST
16011 && GET_CODE (XEXP (disp, 0)) == PLUS
16012 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
16013 symbol = XEXP (XEXP (disp, 0), 0);
16015 if (GET_CODE (symbol) == LABEL_REF
16016 || (GET_CODE (symbol) == SYMBOL_REF
16017 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
16018 base = pc_rtx;
16020 if (!base && !index)
16022 /* Displacement only requires special attention. */
16024 if (CONST_INT_P (disp))
16026 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
16027 fputs ("ds:", file);
16028 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
16030 else if (flag_pic)
16031 output_pic_addr_const (file, disp, 0);
16032 else
16033 output_addr_const (file, disp);
16035 else
16037 /* Print SImode register names to force addr32 prefix. */
16038 if (SImode_address_operand (addr, VOIDmode))
16040 #ifdef ENABLE_CHECKING
16041 gcc_assert (TARGET_64BIT);
16042 switch (GET_CODE (addr))
16044 case SUBREG:
16045 gcc_assert (GET_MODE (addr) == SImode);
16046 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
16047 break;
16048 case ZERO_EXTEND:
16049 case AND:
16050 gcc_assert (GET_MODE (addr) == DImode);
16051 break;
16052 default:
16053 gcc_unreachable ();
16055 #endif
16056 gcc_assert (!code);
16057 code = 'k';
16059 else if (code == 0
16060 && TARGET_X32
16061 && disp
16062 && CONST_INT_P (disp)
16063 && INTVAL (disp) < -16*1024*1024)
16065 /* X32 runs in 64-bit mode, where displacement, DISP, in
16066 address DISP(%r64), is encoded as 32-bit immediate sign-
16067 extended from 32-bit to 64-bit. For -0x40000300(%r64),
16068 address is %r64 + 0xffffffffbffffd00. When %r64 <
16069 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
16070 which is invalid for x32. The correct address is %r64
16071 - 0x40000300 == 0xf7ffdd64. To properly encode
16072 -0x40000300(%r64) for x32, we zero-extend negative
16073 displacement by forcing addr32 prefix which truncates
16074 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
16075 zero-extend all negative displacements, including -1(%rsp).
16076 However, for small negative displacements, sign-extension
16077 won't cause overflow. We only zero-extend negative
16078 displacements if they < -16*1024*1024, which is also used
16079 to check legitimate address displacements for PIC. */
16080 code = 'k';
16083 if (ASSEMBLER_DIALECT == ASM_ATT)
16085 if (disp)
16087 if (flag_pic)
16088 output_pic_addr_const (file, disp, 0);
16089 else if (GET_CODE (disp) == LABEL_REF)
16090 output_asm_label (disp);
16091 else
16092 output_addr_const (file, disp);
16095 putc ('(', file);
16096 if (base)
16097 print_reg (base, code, file);
16098 if (index)
16100 putc (',', file);
16101 print_reg (index, vsib ? 0 : code, file);
16102 if (scale != 1 || vsib)
16103 fprintf (file, ",%d", scale);
16105 putc (')', file);
16107 else
16109 rtx offset = NULL_RTX;
16111 if (disp)
16113 /* Pull out the offset of a symbol; print any symbol itself. */
16114 if (GET_CODE (disp) == CONST
16115 && GET_CODE (XEXP (disp, 0)) == PLUS
16116 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
16118 offset = XEXP (XEXP (disp, 0), 1);
16119 disp = gen_rtx_CONST (VOIDmode,
16120 XEXP (XEXP (disp, 0), 0));
16123 if (flag_pic)
16124 output_pic_addr_const (file, disp, 0);
16125 else if (GET_CODE (disp) == LABEL_REF)
16126 output_asm_label (disp);
16127 else if (CONST_INT_P (disp))
16128 offset = disp;
16129 else
16130 output_addr_const (file, disp);
16133 putc ('[', file);
16134 if (base)
16136 print_reg (base, code, file);
16137 if (offset)
16139 if (INTVAL (offset) >= 0)
16140 putc ('+', file);
16141 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
16144 else if (offset)
16145 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
16146 else
16147 putc ('0', file);
16149 if (index)
16151 putc ('+', file);
16152 print_reg (index, vsib ? 0 : code, file);
16153 if (scale != 1 || vsib)
16154 fprintf (file, "*%d", scale);
16156 putc (']', file);
16161 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
16163 static bool
16164 i386_asm_output_addr_const_extra (FILE *file, rtx x)
16166 rtx op;
16168 if (GET_CODE (x) != UNSPEC)
16169 return false;
16171 op = XVECEXP (x, 0, 0);
16172 switch (XINT (x, 1))
16174 case UNSPEC_GOTTPOFF:
16175 output_addr_const (file, op);
16176 /* FIXME: This might be @TPOFF in Sun ld. */
16177 fputs ("@gottpoff", file);
16178 break;
16179 case UNSPEC_TPOFF:
16180 output_addr_const (file, op);
16181 fputs ("@tpoff", file);
16182 break;
16183 case UNSPEC_NTPOFF:
16184 output_addr_const (file, op);
16185 if (TARGET_64BIT)
16186 fputs ("@tpoff", file);
16187 else
16188 fputs ("@ntpoff", file);
16189 break;
16190 case UNSPEC_DTPOFF:
16191 output_addr_const (file, op);
16192 fputs ("@dtpoff", file);
16193 break;
16194 case UNSPEC_GOTNTPOFF:
16195 output_addr_const (file, op);
16196 if (TARGET_64BIT)
16197 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
16198 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
16199 else
16200 fputs ("@gotntpoff", file);
16201 break;
16202 case UNSPEC_INDNTPOFF:
16203 output_addr_const (file, op);
16204 fputs ("@indntpoff", file);
16205 break;
16206 #if TARGET_MACHO
16207 case UNSPEC_MACHOPIC_OFFSET:
16208 output_addr_const (file, op);
16209 putc ('-', file);
16210 machopic_output_function_base_name (file);
16211 break;
16212 #endif
16214 case UNSPEC_STACK_CHECK:
16216 int offset;
16218 gcc_assert (flag_split_stack);
16220 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
16221 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
16222 #else
16223 gcc_unreachable ();
16224 #endif
16226 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
16228 break;
16230 default:
16231 return false;
16234 return true;
16237 /* Split one or more double-mode RTL references into pairs of half-mode
16238 references. The RTL can be REG, offsettable MEM, integer constant, or
16239 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
16240 split and "num" is its length. lo_half and hi_half are output arrays
16241 that parallel "operands". */
16243 void
16244 split_double_mode (machine_mode mode, rtx operands[],
16245 int num, rtx lo_half[], rtx hi_half[])
16247 machine_mode half_mode;
16248 unsigned int byte;
16250 switch (mode)
16252 case TImode:
16253 half_mode = DImode;
16254 break;
16255 case DImode:
16256 half_mode = SImode;
16257 break;
16258 default:
16259 gcc_unreachable ();
16262 byte = GET_MODE_SIZE (half_mode);
16264 while (num--)
16266 rtx op = operands[num];
16268 /* simplify_subreg refuse to split volatile memory addresses,
16269 but we still have to handle it. */
16270 if (MEM_P (op))
16272 lo_half[num] = adjust_address (op, half_mode, 0);
16273 hi_half[num] = adjust_address (op, half_mode, byte);
16275 else
16277 lo_half[num] = simplify_gen_subreg (half_mode, op,
16278 GET_MODE (op) == VOIDmode
16279 ? mode : GET_MODE (op), 0);
16280 hi_half[num] = simplify_gen_subreg (half_mode, op,
16281 GET_MODE (op) == VOIDmode
16282 ? mode : GET_MODE (op), byte);
16287 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
16288 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
16289 is the expression of the binary operation. The output may either be
16290 emitted here, or returned to the caller, like all output_* functions.
16292 There is no guarantee that the operands are the same mode, as they
16293 might be within FLOAT or FLOAT_EXTEND expressions. */
16295 #ifndef SYSV386_COMPAT
16296 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
16297 wants to fix the assemblers because that causes incompatibility
16298 with gcc. No-one wants to fix gcc because that causes
16299 incompatibility with assemblers... You can use the option of
16300 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
16301 #define SYSV386_COMPAT 1
16302 #endif
16304 const char *
16305 output_387_binary_op (rtx insn, rtx *operands)
16307 static char buf[40];
16308 const char *p;
16309 const char *ssep;
16310 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
16312 #ifdef ENABLE_CHECKING
16313 /* Even if we do not want to check the inputs, this documents input
16314 constraints. Which helps in understanding the following code. */
16315 if (STACK_REG_P (operands[0])
16316 && ((REG_P (operands[1])
16317 && REGNO (operands[0]) == REGNO (operands[1])
16318 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
16319 || (REG_P (operands[2])
16320 && REGNO (operands[0]) == REGNO (operands[2])
16321 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
16322 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
16323 ; /* ok */
16324 else
16325 gcc_assert (is_sse);
16326 #endif
16328 switch (GET_CODE (operands[3]))
16330 case PLUS:
16331 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16332 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16333 p = "fiadd";
16334 else
16335 p = "fadd";
16336 ssep = "vadd";
16337 break;
16339 case MINUS:
16340 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16341 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16342 p = "fisub";
16343 else
16344 p = "fsub";
16345 ssep = "vsub";
16346 break;
16348 case MULT:
16349 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16350 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16351 p = "fimul";
16352 else
16353 p = "fmul";
16354 ssep = "vmul";
16355 break;
16357 case DIV:
16358 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16359 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16360 p = "fidiv";
16361 else
16362 p = "fdiv";
16363 ssep = "vdiv";
16364 break;
16366 default:
16367 gcc_unreachable ();
16370 if (is_sse)
16372 if (TARGET_AVX)
16374 strcpy (buf, ssep);
16375 if (GET_MODE (operands[0]) == SFmode)
16376 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
16377 else
16378 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
16380 else
16382 strcpy (buf, ssep + 1);
16383 if (GET_MODE (operands[0]) == SFmode)
16384 strcat (buf, "ss\t{%2, %0|%0, %2}");
16385 else
16386 strcat (buf, "sd\t{%2, %0|%0, %2}");
16388 return buf;
16390 strcpy (buf, p);
16392 switch (GET_CODE (operands[3]))
16394 case MULT:
16395 case PLUS:
16396 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
16398 rtx temp = operands[2];
16399 operands[2] = operands[1];
16400 operands[1] = temp;
16403 /* know operands[0] == operands[1]. */
16405 if (MEM_P (operands[2]))
16407 p = "%Z2\t%2";
16408 break;
16411 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16413 if (STACK_TOP_P (operands[0]))
16414 /* How is it that we are storing to a dead operand[2]?
16415 Well, presumably operands[1] is dead too. We can't
16416 store the result to st(0) as st(0) gets popped on this
16417 instruction. Instead store to operands[2] (which I
16418 think has to be st(1)). st(1) will be popped later.
16419 gcc <= 2.8.1 didn't have this check and generated
16420 assembly code that the Unixware assembler rejected. */
16421 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16422 else
16423 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16424 break;
16427 if (STACK_TOP_P (operands[0]))
16428 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16429 else
16430 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16431 break;
16433 case MINUS:
16434 case DIV:
16435 if (MEM_P (operands[1]))
16437 p = "r%Z1\t%1";
16438 break;
16441 if (MEM_P (operands[2]))
16443 p = "%Z2\t%2";
16444 break;
16447 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16449 #if SYSV386_COMPAT
16450 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
16451 derived assemblers, confusingly reverse the direction of
16452 the operation for fsub{r} and fdiv{r} when the
16453 destination register is not st(0). The Intel assembler
16454 doesn't have this brain damage. Read !SYSV386_COMPAT to
16455 figure out what the hardware really does. */
16456 if (STACK_TOP_P (operands[0]))
16457 p = "{p\t%0, %2|rp\t%2, %0}";
16458 else
16459 p = "{rp\t%2, %0|p\t%0, %2}";
16460 #else
16461 if (STACK_TOP_P (operands[0]))
16462 /* As above for fmul/fadd, we can't store to st(0). */
16463 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16464 else
16465 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16466 #endif
16467 break;
16470 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
16472 #if SYSV386_COMPAT
16473 if (STACK_TOP_P (operands[0]))
16474 p = "{rp\t%0, %1|p\t%1, %0}";
16475 else
16476 p = "{p\t%1, %0|rp\t%0, %1}";
16477 #else
16478 if (STACK_TOP_P (operands[0]))
16479 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
16480 else
16481 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
16482 #endif
16483 break;
16486 if (STACK_TOP_P (operands[0]))
16488 if (STACK_TOP_P (operands[1]))
16489 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16490 else
16491 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
16492 break;
16494 else if (STACK_TOP_P (operands[1]))
16496 #if SYSV386_COMPAT
16497 p = "{\t%1, %0|r\t%0, %1}";
16498 #else
16499 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
16500 #endif
16502 else
16504 #if SYSV386_COMPAT
16505 p = "{r\t%2, %0|\t%0, %2}";
16506 #else
16507 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16508 #endif
16510 break;
16512 default:
16513 gcc_unreachable ();
16516 strcat (buf, p);
16517 return buf;
16520 /* Check if a 256bit AVX register is referenced inside of EXP. */
16522 static bool
16523 ix86_check_avx256_register (const_rtx exp)
16525 if (GET_CODE (exp) == SUBREG)
16526 exp = SUBREG_REG (exp);
16528 return (REG_P (exp)
16529 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)));
16532 /* Return needed mode for entity in optimize_mode_switching pass. */
16534 static int
16535 ix86_avx_u128_mode_needed (rtx_insn *insn)
16537 if (CALL_P (insn))
16539 rtx link;
16541 /* Needed mode is set to AVX_U128_CLEAN if there are
16542 no 256bit modes used in function arguments. */
16543 for (link = CALL_INSN_FUNCTION_USAGE (insn);
16544 link;
16545 link = XEXP (link, 1))
16547 if (GET_CODE (XEXP (link, 0)) == USE)
16549 rtx arg = XEXP (XEXP (link, 0), 0);
16551 if (ix86_check_avx256_register (arg))
16552 return AVX_U128_DIRTY;
16556 return AVX_U128_CLEAN;
16559 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
16560 changes state only when a 256bit register is written to, but we need
16561 to prevent the compiler from moving optimal insertion point above
16562 eventual read from 256bit register. */
16563 subrtx_iterator::array_type array;
16564 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
16565 if (ix86_check_avx256_register (*iter))
16566 return AVX_U128_DIRTY;
16568 return AVX_U128_ANY;
16571 /* Return mode that i387 must be switched into
16572 prior to the execution of insn. */
16574 static int
16575 ix86_i387_mode_needed (int entity, rtx_insn *insn)
16577 enum attr_i387_cw mode;
16579 /* The mode UNINITIALIZED is used to store control word after a
16580 function call or ASM pattern. The mode ANY specify that function
16581 has no requirements on the control word and make no changes in the
16582 bits we are interested in. */
16584 if (CALL_P (insn)
16585 || (NONJUMP_INSN_P (insn)
16586 && (asm_noperands (PATTERN (insn)) >= 0
16587 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
16588 return I387_CW_UNINITIALIZED;
16590 if (recog_memoized (insn) < 0)
16591 return I387_CW_ANY;
16593 mode = get_attr_i387_cw (insn);
16595 switch (entity)
16597 case I387_TRUNC:
16598 if (mode == I387_CW_TRUNC)
16599 return mode;
16600 break;
16602 case I387_FLOOR:
16603 if (mode == I387_CW_FLOOR)
16604 return mode;
16605 break;
16607 case I387_CEIL:
16608 if (mode == I387_CW_CEIL)
16609 return mode;
16610 break;
16612 case I387_MASK_PM:
16613 if (mode == I387_CW_MASK_PM)
16614 return mode;
16615 break;
16617 default:
16618 gcc_unreachable ();
16621 return I387_CW_ANY;
16624 /* Return mode that entity must be switched into
16625 prior to the execution of insn. */
16627 static int
16628 ix86_mode_needed (int entity, rtx_insn *insn)
16630 switch (entity)
16632 case AVX_U128:
16633 return ix86_avx_u128_mode_needed (insn);
16634 case I387_TRUNC:
16635 case I387_FLOOR:
16636 case I387_CEIL:
16637 case I387_MASK_PM:
16638 return ix86_i387_mode_needed (entity, insn);
16639 default:
16640 gcc_unreachable ();
16642 return 0;
16645 /* Check if a 256bit AVX register is referenced in stores. */
16647 static void
16648 ix86_check_avx256_stores (rtx dest, const_rtx, void *data)
16650 if (ix86_check_avx256_register (dest))
16652 bool *used = (bool *) data;
16653 *used = true;
16657 /* Calculate mode of upper 128bit AVX registers after the insn. */
16659 static int
16660 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
16662 rtx pat = PATTERN (insn);
16664 if (vzeroupper_operation (pat, VOIDmode)
16665 || vzeroall_operation (pat, VOIDmode))
16666 return AVX_U128_CLEAN;
16668 /* We know that state is clean after CALL insn if there are no
16669 256bit registers used in the function return register. */
16670 if (CALL_P (insn))
16672 bool avx_reg256_found = false;
16673 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
16675 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
16678 /* Otherwise, return current mode. Remember that if insn
16679 references AVX 256bit registers, the mode was already changed
16680 to DIRTY from MODE_NEEDED. */
16681 return mode;
16684 /* Return the mode that an insn results in. */
16687 ix86_mode_after (int entity, int mode, rtx_insn *insn)
16689 switch (entity)
16691 case AVX_U128:
16692 return ix86_avx_u128_mode_after (mode, insn);
16693 case I387_TRUNC:
16694 case I387_FLOOR:
16695 case I387_CEIL:
16696 case I387_MASK_PM:
16697 return mode;
16698 default:
16699 gcc_unreachable ();
16703 static int
16704 ix86_avx_u128_mode_entry (void)
16706 tree arg;
16708 /* Entry mode is set to AVX_U128_DIRTY if there are
16709 256bit modes used in function arguments. */
16710 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
16711 arg = TREE_CHAIN (arg))
16713 rtx incoming = DECL_INCOMING_RTL (arg);
16715 if (incoming && ix86_check_avx256_register (incoming))
16716 return AVX_U128_DIRTY;
16719 return AVX_U128_CLEAN;
16722 /* Return a mode that ENTITY is assumed to be
16723 switched to at function entry. */
16725 static int
16726 ix86_mode_entry (int entity)
16728 switch (entity)
16730 case AVX_U128:
16731 return ix86_avx_u128_mode_entry ();
16732 case I387_TRUNC:
16733 case I387_FLOOR:
16734 case I387_CEIL:
16735 case I387_MASK_PM:
16736 return I387_CW_ANY;
16737 default:
16738 gcc_unreachable ();
16742 static int
16743 ix86_avx_u128_mode_exit (void)
16745 rtx reg = crtl->return_rtx;
16747 /* Exit mode is set to AVX_U128_DIRTY if there are
16748 256bit modes used in the function return register. */
16749 if (reg && ix86_check_avx256_register (reg))
16750 return AVX_U128_DIRTY;
16752 return AVX_U128_CLEAN;
16755 /* Return a mode that ENTITY is assumed to be
16756 switched to at function exit. */
16758 static int
16759 ix86_mode_exit (int entity)
16761 switch (entity)
16763 case AVX_U128:
16764 return ix86_avx_u128_mode_exit ();
16765 case I387_TRUNC:
16766 case I387_FLOOR:
16767 case I387_CEIL:
16768 case I387_MASK_PM:
16769 return I387_CW_ANY;
16770 default:
16771 gcc_unreachable ();
16775 static int
16776 ix86_mode_priority (int, int n)
16778 return n;
16781 /* Output code to initialize control word copies used by trunc?f?i and
16782 rounding patterns. CURRENT_MODE is set to current control word,
16783 while NEW_MODE is set to new control word. */
16785 static void
16786 emit_i387_cw_initialization (int mode)
16788 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
16789 rtx new_mode;
16791 enum ix86_stack_slot slot;
16793 rtx reg = gen_reg_rtx (HImode);
16795 emit_insn (gen_x86_fnstcw_1 (stored_mode));
16796 emit_move_insn (reg, copy_rtx (stored_mode));
16798 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
16799 || optimize_insn_for_size_p ())
16801 switch (mode)
16803 case I387_CW_TRUNC:
16804 /* round toward zero (truncate) */
16805 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
16806 slot = SLOT_CW_TRUNC;
16807 break;
16809 case I387_CW_FLOOR:
16810 /* round down toward -oo */
16811 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16812 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
16813 slot = SLOT_CW_FLOOR;
16814 break;
16816 case I387_CW_CEIL:
16817 /* round up toward +oo */
16818 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16819 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
16820 slot = SLOT_CW_CEIL;
16821 break;
16823 case I387_CW_MASK_PM:
16824 /* mask precision exception for nearbyint() */
16825 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16826 slot = SLOT_CW_MASK_PM;
16827 break;
16829 default:
16830 gcc_unreachable ();
16833 else
16835 switch (mode)
16837 case I387_CW_TRUNC:
16838 /* round toward zero (truncate) */
16839 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
16840 slot = SLOT_CW_TRUNC;
16841 break;
16843 case I387_CW_FLOOR:
16844 /* round down toward -oo */
16845 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
16846 slot = SLOT_CW_FLOOR;
16847 break;
16849 case I387_CW_CEIL:
16850 /* round up toward +oo */
16851 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
16852 slot = SLOT_CW_CEIL;
16853 break;
16855 case I387_CW_MASK_PM:
16856 /* mask precision exception for nearbyint() */
16857 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16858 slot = SLOT_CW_MASK_PM;
16859 break;
16861 default:
16862 gcc_unreachable ();
16866 gcc_assert (slot < MAX_386_STACK_LOCALS);
16868 new_mode = assign_386_stack_local (HImode, slot);
16869 emit_move_insn (new_mode, reg);
16872 /* Emit vzeroupper. */
16874 void
16875 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
16877 int i;
16879 /* Cancel automatic vzeroupper insertion if there are
16880 live call-saved SSE registers at the insertion point. */
16882 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
16883 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16884 return;
16886 if (TARGET_64BIT)
16887 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
16888 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16889 return;
16891 emit_insn (gen_avx_vzeroupper ());
16894 /* Generate one or more insns to set ENTITY to MODE. */
16896 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
16897 is the set of hard registers live at the point where the insn(s)
16898 are to be inserted. */
16900 static void
16901 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
16902 HARD_REG_SET regs_live)
16904 switch (entity)
16906 case AVX_U128:
16907 if (mode == AVX_U128_CLEAN)
16908 ix86_avx_emit_vzeroupper (regs_live);
16909 break;
16910 case I387_TRUNC:
16911 case I387_FLOOR:
16912 case I387_CEIL:
16913 case I387_MASK_PM:
16914 if (mode != I387_CW_ANY
16915 && mode != I387_CW_UNINITIALIZED)
16916 emit_i387_cw_initialization (mode);
16917 break;
16918 default:
16919 gcc_unreachable ();
16923 /* Output code for INSN to convert a float to a signed int. OPERANDS
16924 are the insn operands. The output may be [HSD]Imode and the input
16925 operand may be [SDX]Fmode. */
16927 const char *
16928 output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
16930 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16931 int dimode_p = GET_MODE (operands[0]) == DImode;
16932 int round_mode = get_attr_i387_cw (insn);
16934 /* Jump through a hoop or two for DImode, since the hardware has no
16935 non-popping instruction. We used to do this a different way, but
16936 that was somewhat fragile and broke with post-reload splitters. */
16937 if ((dimode_p || fisttp) && !stack_top_dies)
16938 output_asm_insn ("fld\t%y1", operands);
16940 gcc_assert (STACK_TOP_P (operands[1]));
16941 gcc_assert (MEM_P (operands[0]));
16942 gcc_assert (GET_MODE (operands[1]) != TFmode);
16944 if (fisttp)
16945 output_asm_insn ("fisttp%Z0\t%0", operands);
16946 else
16948 if (round_mode != I387_CW_ANY)
16949 output_asm_insn ("fldcw\t%3", operands);
16950 if (stack_top_dies || dimode_p)
16951 output_asm_insn ("fistp%Z0\t%0", operands);
16952 else
16953 output_asm_insn ("fist%Z0\t%0", operands);
16954 if (round_mode != I387_CW_ANY)
16955 output_asm_insn ("fldcw\t%2", operands);
16958 return "";
16961 /* Output code for x87 ffreep insn. The OPNO argument, which may only
16962 have the values zero or one, indicates the ffreep insn's operand
16963 from the OPERANDS array. */
16965 static const char *
16966 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
16968 if (TARGET_USE_FFREEP)
16969 #ifdef HAVE_AS_IX86_FFREEP
16970 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
16971 #else
16973 static char retval[32];
16974 int regno = REGNO (operands[opno]);
16976 gcc_assert (STACK_REGNO_P (regno));
16978 regno -= FIRST_STACK_REG;
16980 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
16981 return retval;
16983 #endif
16985 return opno ? "fstp\t%y1" : "fstp\t%y0";
16989 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
16990 should be used. UNORDERED_P is true when fucom should be used. */
16992 const char *
16993 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
16995 int stack_top_dies;
16996 rtx cmp_op0, cmp_op1;
16997 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
16999 if (eflags_p)
17001 cmp_op0 = operands[0];
17002 cmp_op1 = operands[1];
17004 else
17006 cmp_op0 = operands[1];
17007 cmp_op1 = operands[2];
17010 if (is_sse)
17012 if (GET_MODE (operands[0]) == SFmode)
17013 if (unordered_p)
17014 return "%vucomiss\t{%1, %0|%0, %1}";
17015 else
17016 return "%vcomiss\t{%1, %0|%0, %1}";
17017 else
17018 if (unordered_p)
17019 return "%vucomisd\t{%1, %0|%0, %1}";
17020 else
17021 return "%vcomisd\t{%1, %0|%0, %1}";
17024 gcc_assert (STACK_TOP_P (cmp_op0));
17026 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
17028 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
17030 if (stack_top_dies)
17032 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
17033 return output_387_ffreep (operands, 1);
17035 else
17036 return "ftst\n\tfnstsw\t%0";
17039 if (STACK_REG_P (cmp_op1)
17040 && stack_top_dies
17041 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
17042 && REGNO (cmp_op1) != FIRST_STACK_REG)
17044 /* If both the top of the 387 stack dies, and the other operand
17045 is also a stack register that dies, then this must be a
17046 `fcompp' float compare */
17048 if (eflags_p)
17050 /* There is no double popping fcomi variant. Fortunately,
17051 eflags is immune from the fstp's cc clobbering. */
17052 if (unordered_p)
17053 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
17054 else
17055 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
17056 return output_387_ffreep (operands, 0);
17058 else
17060 if (unordered_p)
17061 return "fucompp\n\tfnstsw\t%0";
17062 else
17063 return "fcompp\n\tfnstsw\t%0";
17066 else
17068 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
17070 static const char * const alt[16] =
17072 "fcom%Z2\t%y2\n\tfnstsw\t%0",
17073 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
17074 "fucom%Z2\t%y2\n\tfnstsw\t%0",
17075 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
17077 "ficom%Z2\t%y2\n\tfnstsw\t%0",
17078 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
17079 NULL,
17080 NULL,
17082 "fcomi\t{%y1, %0|%0, %y1}",
17083 "fcomip\t{%y1, %0|%0, %y1}",
17084 "fucomi\t{%y1, %0|%0, %y1}",
17085 "fucomip\t{%y1, %0|%0, %y1}",
17087 NULL,
17088 NULL,
17089 NULL,
17090 NULL
17093 int mask;
17094 const char *ret;
17096 mask = eflags_p << 3;
17097 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
17098 mask |= unordered_p << 1;
17099 mask |= stack_top_dies;
17101 gcc_assert (mask < 16);
17102 ret = alt[mask];
17103 gcc_assert (ret);
17105 return ret;
17109 void
17110 ix86_output_addr_vec_elt (FILE *file, int value)
17112 const char *directive = ASM_LONG;
17114 #ifdef ASM_QUAD
17115 if (TARGET_LP64)
17116 directive = ASM_QUAD;
17117 #else
17118 gcc_assert (!TARGET_64BIT);
17119 #endif
17121 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
17124 void
17125 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
17127 const char *directive = ASM_LONG;
17129 #ifdef ASM_QUAD
17130 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
17131 directive = ASM_QUAD;
17132 #else
17133 gcc_assert (!TARGET_64BIT);
17134 #endif
17135 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
17136 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
17137 fprintf (file, "%s%s%d-%s%d\n",
17138 directive, LPREFIX, value, LPREFIX, rel);
17139 else if (HAVE_AS_GOTOFF_IN_DATA)
17140 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
17141 #if TARGET_MACHO
17142 else if (TARGET_MACHO)
17144 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
17145 machopic_output_function_base_name (file);
17146 putc ('\n', file);
17148 #endif
17149 else
17150 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
17151 GOT_SYMBOL_NAME, LPREFIX, value);
17154 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
17155 for the target. */
17157 void
17158 ix86_expand_clear (rtx dest)
17160 rtx tmp;
17162 /* We play register width games, which are only valid after reload. */
17163 gcc_assert (reload_completed);
17165 /* Avoid HImode and its attendant prefix byte. */
17166 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
17167 dest = gen_rtx_REG (SImode, REGNO (dest));
17168 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
17170 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
17172 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17173 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
17176 emit_insn (tmp);
17179 /* X is an unchanging MEM. If it is a constant pool reference, return
17180 the constant pool rtx, else NULL. */
17183 maybe_get_pool_constant (rtx x)
17185 x = ix86_delegitimize_address (XEXP (x, 0));
17187 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
17188 return get_pool_constant (x);
17190 return NULL_RTX;
17193 void
17194 ix86_expand_move (machine_mode mode, rtx operands[])
17196 rtx op0, op1;
17197 enum tls_model model;
17199 op0 = operands[0];
17200 op1 = operands[1];
17202 if (GET_CODE (op1) == SYMBOL_REF)
17204 rtx tmp;
17206 model = SYMBOL_REF_TLS_MODEL (op1);
17207 if (model)
17209 op1 = legitimize_tls_address (op1, model, true);
17210 op1 = force_operand (op1, op0);
17211 if (op1 == op0)
17212 return;
17213 op1 = convert_to_mode (mode, op1, 1);
17215 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
17216 op1 = tmp;
17218 else if (GET_CODE (op1) == CONST
17219 && GET_CODE (XEXP (op1, 0)) == PLUS
17220 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
17222 rtx addend = XEXP (XEXP (op1, 0), 1);
17223 rtx symbol = XEXP (XEXP (op1, 0), 0);
17224 rtx tmp;
17226 model = SYMBOL_REF_TLS_MODEL (symbol);
17227 if (model)
17228 tmp = legitimize_tls_address (symbol, model, true);
17229 else
17230 tmp = legitimize_pe_coff_symbol (symbol, true);
17232 if (tmp)
17234 tmp = force_operand (tmp, NULL);
17235 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
17236 op0, 1, OPTAB_DIRECT);
17237 if (tmp == op0)
17238 return;
17239 op1 = convert_to_mode (mode, tmp, 1);
17243 if ((flag_pic || MACHOPIC_INDIRECT)
17244 && symbolic_operand (op1, mode))
17246 if (TARGET_MACHO && !TARGET_64BIT)
17248 #if TARGET_MACHO
17249 /* dynamic-no-pic */
17250 if (MACHOPIC_INDIRECT)
17252 rtx temp = ((reload_in_progress
17253 || ((op0 && REG_P (op0))
17254 && mode == Pmode))
17255 ? op0 : gen_reg_rtx (Pmode));
17256 op1 = machopic_indirect_data_reference (op1, temp);
17257 if (MACHOPIC_PURE)
17258 op1 = machopic_legitimize_pic_address (op1, mode,
17259 temp == op1 ? 0 : temp);
17261 if (op0 != op1 && GET_CODE (op0) != MEM)
17263 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
17264 emit_insn (insn);
17265 return;
17267 if (GET_CODE (op0) == MEM)
17268 op1 = force_reg (Pmode, op1);
17269 else
17271 rtx temp = op0;
17272 if (GET_CODE (temp) != REG)
17273 temp = gen_reg_rtx (Pmode);
17274 temp = legitimize_pic_address (op1, temp);
17275 if (temp == op0)
17276 return;
17277 op1 = temp;
17279 /* dynamic-no-pic */
17280 #endif
17282 else
17284 if (MEM_P (op0))
17285 op1 = force_reg (mode, op1);
17286 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
17288 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
17289 op1 = legitimize_pic_address (op1, reg);
17290 if (op0 == op1)
17291 return;
17292 op1 = convert_to_mode (mode, op1, 1);
17296 else
17298 if (MEM_P (op0)
17299 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
17300 || !push_operand (op0, mode))
17301 && MEM_P (op1))
17302 op1 = force_reg (mode, op1);
17304 if (push_operand (op0, mode)
17305 && ! general_no_elim_operand (op1, mode))
17306 op1 = copy_to_mode_reg (mode, op1);
17308 /* Force large constants in 64bit compilation into register
17309 to get them CSEed. */
17310 if (can_create_pseudo_p ()
17311 && (mode == DImode) && TARGET_64BIT
17312 && immediate_operand (op1, mode)
17313 && !x86_64_zext_immediate_operand (op1, VOIDmode)
17314 && !register_operand (op0, mode)
17315 && optimize)
17316 op1 = copy_to_mode_reg (mode, op1);
17318 if (can_create_pseudo_p ()
17319 && FLOAT_MODE_P (mode)
17320 && GET_CODE (op1) == CONST_DOUBLE)
17322 /* If we are loading a floating point constant to a register,
17323 force the value to memory now, since we'll get better code
17324 out the back end. */
17326 op1 = validize_mem (force_const_mem (mode, op1));
17327 if (!register_operand (op0, mode))
17329 rtx temp = gen_reg_rtx (mode);
17330 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
17331 emit_move_insn (op0, temp);
17332 return;
17337 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17340 void
17341 ix86_expand_vector_move (machine_mode mode, rtx operands[])
17343 rtx op0 = operands[0], op1 = operands[1];
17344 unsigned int align = GET_MODE_ALIGNMENT (mode);
17346 if (push_operand (op0, VOIDmode))
17347 op0 = emit_move_resolve_push (mode, op0);
17349 /* Force constants other than zero into memory. We do not know how
17350 the instructions used to build constants modify the upper 64 bits
17351 of the register, once we have that information we may be able
17352 to handle some of them more efficiently. */
17353 if (can_create_pseudo_p ()
17354 && register_operand (op0, mode)
17355 && (CONSTANT_P (op1)
17356 || (GET_CODE (op1) == SUBREG
17357 && CONSTANT_P (SUBREG_REG (op1))))
17358 && !standard_sse_constant_p (op1))
17359 op1 = validize_mem (force_const_mem (mode, op1));
17361 /* We need to check memory alignment for SSE mode since attribute
17362 can make operands unaligned. */
17363 if (can_create_pseudo_p ()
17364 && SSE_REG_MODE_P (mode)
17365 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
17366 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
17368 rtx tmp[2];
17370 /* ix86_expand_vector_move_misalign() does not like constants ... */
17371 if (CONSTANT_P (op1)
17372 || (GET_CODE (op1) == SUBREG
17373 && CONSTANT_P (SUBREG_REG (op1))))
17374 op1 = validize_mem (force_const_mem (mode, op1));
17376 /* ... nor both arguments in memory. */
17377 if (!register_operand (op0, mode)
17378 && !register_operand (op1, mode))
17379 op1 = force_reg (mode, op1);
17381 tmp[0] = op0; tmp[1] = op1;
17382 ix86_expand_vector_move_misalign (mode, tmp);
17383 return;
17386 /* Make operand1 a register if it isn't already. */
17387 if (can_create_pseudo_p ()
17388 && !register_operand (op0, mode)
17389 && !register_operand (op1, mode))
17391 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
17392 return;
17395 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17398 /* Split 32-byte AVX unaligned load and store if needed. */
17400 static void
17401 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
17403 rtx m;
17404 rtx (*extract) (rtx, rtx, rtx);
17405 rtx (*load_unaligned) (rtx, rtx);
17406 rtx (*store_unaligned) (rtx, rtx);
17407 machine_mode mode;
17409 switch (GET_MODE (op0))
17411 default:
17412 gcc_unreachable ();
17413 case V32QImode:
17414 extract = gen_avx_vextractf128v32qi;
17415 load_unaligned = gen_avx_loaddquv32qi;
17416 store_unaligned = gen_avx_storedquv32qi;
17417 mode = V16QImode;
17418 break;
17419 case V8SFmode:
17420 extract = gen_avx_vextractf128v8sf;
17421 load_unaligned = gen_avx_loadups256;
17422 store_unaligned = gen_avx_storeups256;
17423 mode = V4SFmode;
17424 break;
17425 case V4DFmode:
17426 extract = gen_avx_vextractf128v4df;
17427 load_unaligned = gen_avx_loadupd256;
17428 store_unaligned = gen_avx_storeupd256;
17429 mode = V2DFmode;
17430 break;
17433 if (MEM_P (op1))
17435 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
17437 rtx r = gen_reg_rtx (mode);
17438 m = adjust_address (op1, mode, 0);
17439 emit_move_insn (r, m);
17440 m = adjust_address (op1, mode, 16);
17441 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
17442 emit_move_insn (op0, r);
17444 /* Normal *mov<mode>_internal pattern will handle
17445 unaligned loads just fine if misaligned_operand
17446 is true, and without the UNSPEC it can be combined
17447 with arithmetic instructions. */
17448 else if (misaligned_operand (op1, GET_MODE (op1)))
17449 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17450 else
17451 emit_insn (load_unaligned (op0, op1));
17453 else if (MEM_P (op0))
17455 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE)
17457 m = adjust_address (op0, mode, 0);
17458 emit_insn (extract (m, op1, const0_rtx));
17459 m = adjust_address (op0, mode, 16);
17460 emit_insn (extract (m, op1, const1_rtx));
17462 else
17463 emit_insn (store_unaligned (op0, op1));
17465 else
17466 gcc_unreachable ();
17469 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
17470 straight to ix86_expand_vector_move. */
17471 /* Code generation for scalar reg-reg moves of single and double precision data:
17472 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
17473 movaps reg, reg
17474 else
17475 movss reg, reg
17476 if (x86_sse_partial_reg_dependency == true)
17477 movapd reg, reg
17478 else
17479 movsd reg, reg
17481 Code generation for scalar loads of double precision data:
17482 if (x86_sse_split_regs == true)
17483 movlpd mem, reg (gas syntax)
17484 else
17485 movsd mem, reg
17487 Code generation for unaligned packed loads of single precision data
17488 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
17489 if (x86_sse_unaligned_move_optimal)
17490 movups mem, reg
17492 if (x86_sse_partial_reg_dependency == true)
17494 xorps reg, reg
17495 movlps mem, reg
17496 movhps mem+8, reg
17498 else
17500 movlps mem, reg
17501 movhps mem+8, reg
17504 Code generation for unaligned packed loads of double precision data
17505 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
17506 if (x86_sse_unaligned_move_optimal)
17507 movupd mem, reg
17509 if (x86_sse_split_regs == true)
17511 movlpd mem, reg
17512 movhpd mem+8, reg
17514 else
17516 movsd mem, reg
17517 movhpd mem+8, reg
17521 void
17522 ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[])
17524 rtx op0, op1, orig_op0 = NULL_RTX, m;
17525 rtx (*load_unaligned) (rtx, rtx);
17526 rtx (*store_unaligned) (rtx, rtx);
17528 op0 = operands[0];
17529 op1 = operands[1];
17531 if (GET_MODE_SIZE (mode) == 64)
17533 switch (GET_MODE_CLASS (mode))
17535 case MODE_VECTOR_INT:
17536 case MODE_INT:
17537 if (GET_MODE (op0) != V16SImode)
17539 if (!MEM_P (op0))
17541 orig_op0 = op0;
17542 op0 = gen_reg_rtx (V16SImode);
17544 else
17545 op0 = gen_lowpart (V16SImode, op0);
17547 op1 = gen_lowpart (V16SImode, op1);
17548 /* FALLTHRU */
17550 case MODE_VECTOR_FLOAT:
17551 switch (GET_MODE (op0))
17553 default:
17554 gcc_unreachable ();
17555 case V16SImode:
17556 load_unaligned = gen_avx512f_loaddquv16si;
17557 store_unaligned = gen_avx512f_storedquv16si;
17558 break;
17559 case V16SFmode:
17560 load_unaligned = gen_avx512f_loadups512;
17561 store_unaligned = gen_avx512f_storeups512;
17562 break;
17563 case V8DFmode:
17564 load_unaligned = gen_avx512f_loadupd512;
17565 store_unaligned = gen_avx512f_storeupd512;
17566 break;
17569 if (MEM_P (op1))
17570 emit_insn (load_unaligned (op0, op1));
17571 else if (MEM_P (op0))
17572 emit_insn (store_unaligned (op0, op1));
17573 else
17574 gcc_unreachable ();
17575 if (orig_op0)
17576 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17577 break;
17579 default:
17580 gcc_unreachable ();
17583 return;
17586 if (TARGET_AVX
17587 && GET_MODE_SIZE (mode) == 32)
17589 switch (GET_MODE_CLASS (mode))
17591 case MODE_VECTOR_INT:
17592 case MODE_INT:
17593 if (GET_MODE (op0) != V32QImode)
17595 if (!MEM_P (op0))
17597 orig_op0 = op0;
17598 op0 = gen_reg_rtx (V32QImode);
17600 else
17601 op0 = gen_lowpart (V32QImode, op0);
17603 op1 = gen_lowpart (V32QImode, op1);
17604 /* FALLTHRU */
17606 case MODE_VECTOR_FLOAT:
17607 ix86_avx256_split_vector_move_misalign (op0, op1);
17608 if (orig_op0)
17609 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17610 break;
17612 default:
17613 gcc_unreachable ();
17616 return;
17619 if (MEM_P (op1))
17621 /* Normal *mov<mode>_internal pattern will handle
17622 unaligned loads just fine if misaligned_operand
17623 is true, and without the UNSPEC it can be combined
17624 with arithmetic instructions. */
17625 if (TARGET_AVX
17626 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
17627 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
17628 && misaligned_operand (op1, GET_MODE (op1)))
17629 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17630 /* ??? If we have typed data, then it would appear that using
17631 movdqu is the only way to get unaligned data loaded with
17632 integer type. */
17633 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17635 if (GET_MODE (op0) != V16QImode)
17637 orig_op0 = op0;
17638 op0 = gen_reg_rtx (V16QImode);
17640 op1 = gen_lowpart (V16QImode, op1);
17641 /* We will eventually emit movups based on insn attributes. */
17642 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
17643 if (orig_op0)
17644 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17646 else if (TARGET_SSE2 && mode == V2DFmode)
17648 rtx zero;
17650 if (TARGET_AVX
17651 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17652 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17653 || optimize_insn_for_size_p ())
17655 /* We will eventually emit movups based on insn attributes. */
17656 emit_insn (gen_sse2_loadupd (op0, op1));
17657 return;
17660 /* When SSE registers are split into halves, we can avoid
17661 writing to the top half twice. */
17662 if (TARGET_SSE_SPLIT_REGS)
17664 emit_clobber (op0);
17665 zero = op0;
17667 else
17669 /* ??? Not sure about the best option for the Intel chips.
17670 The following would seem to satisfy; the register is
17671 entirely cleared, breaking the dependency chain. We
17672 then store to the upper half, with a dependency depth
17673 of one. A rumor has it that Intel recommends two movsd
17674 followed by an unpacklpd, but this is unconfirmed. And
17675 given that the dependency depth of the unpacklpd would
17676 still be one, I'm not sure why this would be better. */
17677 zero = CONST0_RTX (V2DFmode);
17680 m = adjust_address (op1, DFmode, 0);
17681 emit_insn (gen_sse2_loadlpd (op0, zero, m));
17682 m = adjust_address (op1, DFmode, 8);
17683 emit_insn (gen_sse2_loadhpd (op0, op0, m));
17685 else
17687 rtx t;
17689 if (TARGET_AVX
17690 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17691 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17692 || optimize_insn_for_size_p ())
17694 if (GET_MODE (op0) != V4SFmode)
17696 orig_op0 = op0;
17697 op0 = gen_reg_rtx (V4SFmode);
17699 op1 = gen_lowpart (V4SFmode, op1);
17700 emit_insn (gen_sse_loadups (op0, op1));
17701 if (orig_op0)
17702 emit_move_insn (orig_op0,
17703 gen_lowpart (GET_MODE (orig_op0), op0));
17704 return;
17707 if (mode != V4SFmode)
17708 t = gen_reg_rtx (V4SFmode);
17709 else
17710 t = op0;
17712 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
17713 emit_move_insn (t, CONST0_RTX (V4SFmode));
17714 else
17715 emit_clobber (t);
17717 m = adjust_address (op1, V2SFmode, 0);
17718 emit_insn (gen_sse_loadlps (t, t, m));
17719 m = adjust_address (op1, V2SFmode, 8);
17720 emit_insn (gen_sse_loadhps (t, t, m));
17721 if (mode != V4SFmode)
17722 emit_move_insn (op0, gen_lowpart (mode, t));
17725 else if (MEM_P (op0))
17727 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17729 op0 = gen_lowpart (V16QImode, op0);
17730 op1 = gen_lowpart (V16QImode, op1);
17731 /* We will eventually emit movups based on insn attributes. */
17732 emit_insn (gen_sse2_storedquv16qi (op0, op1));
17734 else if (TARGET_SSE2 && mode == V2DFmode)
17736 if (TARGET_AVX
17737 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17738 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17739 || optimize_insn_for_size_p ())
17740 /* We will eventually emit movups based on insn attributes. */
17741 emit_insn (gen_sse2_storeupd (op0, op1));
17742 else
17744 m = adjust_address (op0, DFmode, 0);
17745 emit_insn (gen_sse2_storelpd (m, op1));
17746 m = adjust_address (op0, DFmode, 8);
17747 emit_insn (gen_sse2_storehpd (m, op1));
17750 else
17752 if (mode != V4SFmode)
17753 op1 = gen_lowpart (V4SFmode, op1);
17755 if (TARGET_AVX
17756 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17757 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17758 || optimize_insn_for_size_p ())
17760 op0 = gen_lowpart (V4SFmode, op0);
17761 emit_insn (gen_sse_storeups (op0, op1));
17763 else
17765 m = adjust_address (op0, V2SFmode, 0);
17766 emit_insn (gen_sse_storelps (m, op1));
17767 m = adjust_address (op0, V2SFmode, 8);
17768 emit_insn (gen_sse_storehps (m, op1));
17772 else
17773 gcc_unreachable ();
17776 /* Helper function of ix86_fixup_binary_operands to canonicalize
17777 operand order. Returns true if the operands should be swapped. */
17779 static bool
17780 ix86_swap_binary_operands_p (enum rtx_code code, machine_mode mode,
17781 rtx operands[])
17783 rtx dst = operands[0];
17784 rtx src1 = operands[1];
17785 rtx src2 = operands[2];
17787 /* If the operation is not commutative, we can't do anything. */
17788 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
17789 return false;
17791 /* Highest priority is that src1 should match dst. */
17792 if (rtx_equal_p (dst, src1))
17793 return false;
17794 if (rtx_equal_p (dst, src2))
17795 return true;
17797 /* Next highest priority is that immediate constants come second. */
17798 if (immediate_operand (src2, mode))
17799 return false;
17800 if (immediate_operand (src1, mode))
17801 return true;
17803 /* Lowest priority is that memory references should come second. */
17804 if (MEM_P (src2))
17805 return false;
17806 if (MEM_P (src1))
17807 return true;
17809 return false;
17813 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
17814 destination to use for the operation. If different from the true
17815 destination in operands[0], a copy operation will be required. */
17818 ix86_fixup_binary_operands (enum rtx_code code, machine_mode mode,
17819 rtx operands[])
17821 rtx dst = operands[0];
17822 rtx src1 = operands[1];
17823 rtx src2 = operands[2];
17825 /* Canonicalize operand order. */
17826 if (ix86_swap_binary_operands_p (code, mode, operands))
17828 /* It is invalid to swap operands of different modes. */
17829 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
17831 std::swap (src1, src2);
17834 /* Both source operands cannot be in memory. */
17835 if (MEM_P (src1) && MEM_P (src2))
17837 /* Optimization: Only read from memory once. */
17838 if (rtx_equal_p (src1, src2))
17840 src2 = force_reg (mode, src2);
17841 src1 = src2;
17843 else if (rtx_equal_p (dst, src1))
17844 src2 = force_reg (mode, src2);
17845 else
17846 src1 = force_reg (mode, src1);
17849 /* If the destination is memory, and we do not have matching source
17850 operands, do things in registers. */
17851 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17852 dst = gen_reg_rtx (mode);
17854 /* Source 1 cannot be a constant. */
17855 if (CONSTANT_P (src1))
17856 src1 = force_reg (mode, src1);
17858 /* Source 1 cannot be a non-matching memory. */
17859 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17860 src1 = force_reg (mode, src1);
17862 /* Improve address combine. */
17863 if (code == PLUS
17864 && GET_MODE_CLASS (mode) == MODE_INT
17865 && MEM_P (src2))
17866 src2 = force_reg (mode, src2);
17868 operands[1] = src1;
17869 operands[2] = src2;
17870 return dst;
17873 /* Similarly, but assume that the destination has already been
17874 set up properly. */
17876 void
17877 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
17878 machine_mode mode, rtx operands[])
17880 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
17881 gcc_assert (dst == operands[0]);
17884 /* Attempt to expand a binary operator. Make the expansion closer to the
17885 actual machine, then just general_operand, which will allow 3 separate
17886 memory references (one output, two input) in a single insn. */
17888 void
17889 ix86_expand_binary_operator (enum rtx_code code, machine_mode mode,
17890 rtx operands[])
17892 rtx src1, src2, dst, op, clob;
17894 dst = ix86_fixup_binary_operands (code, mode, operands);
17895 src1 = operands[1];
17896 src2 = operands[2];
17898 /* Emit the instruction. */
17900 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
17901 if (reload_in_progress)
17903 /* Reload doesn't know about the flags register, and doesn't know that
17904 it doesn't want to clobber it. We can only do this with PLUS. */
17905 gcc_assert (code == PLUS);
17906 emit_insn (op);
17908 else if (reload_completed
17909 && code == PLUS
17910 && !rtx_equal_p (dst, src1))
17912 /* This is going to be an LEA; avoid splitting it later. */
17913 emit_insn (op);
17915 else
17917 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17918 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17921 /* Fix up the destination if needed. */
17922 if (dst != operands[0])
17923 emit_move_insn (operands[0], dst);
17926 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
17927 the given OPERANDS. */
17929 void
17930 ix86_expand_vector_logical_operator (enum rtx_code code, machine_mode mode,
17931 rtx operands[])
17933 rtx op1 = NULL_RTX, op2 = NULL_RTX;
17934 if (GET_CODE (operands[1]) == SUBREG)
17936 op1 = operands[1];
17937 op2 = operands[2];
17939 else if (GET_CODE (operands[2]) == SUBREG)
17941 op1 = operands[2];
17942 op2 = operands[1];
17944 /* Optimize (__m128i) d | (__m128i) e and similar code
17945 when d and e are float vectors into float vector logical
17946 insn. In C/C++ without using intrinsics there is no other way
17947 to express vector logical operation on float vectors than
17948 to cast them temporarily to integer vectors. */
17949 if (op1
17950 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17951 && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR))
17952 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
17953 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
17954 && SUBREG_BYTE (op1) == 0
17955 && (GET_CODE (op2) == CONST_VECTOR
17956 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
17957 && SUBREG_BYTE (op2) == 0))
17958 && can_create_pseudo_p ())
17960 rtx dst;
17961 switch (GET_MODE (SUBREG_REG (op1)))
17963 case V4SFmode:
17964 case V8SFmode:
17965 case V16SFmode:
17966 case V2DFmode:
17967 case V4DFmode:
17968 case V8DFmode:
17969 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
17970 if (GET_CODE (op2) == CONST_VECTOR)
17972 op2 = gen_lowpart (GET_MODE (dst), op2);
17973 op2 = force_reg (GET_MODE (dst), op2);
17975 else
17977 op1 = operands[1];
17978 op2 = SUBREG_REG (operands[2]);
17979 if (!nonimmediate_operand (op2, GET_MODE (dst)))
17980 op2 = force_reg (GET_MODE (dst), op2);
17982 op1 = SUBREG_REG (op1);
17983 if (!nonimmediate_operand (op1, GET_MODE (dst)))
17984 op1 = force_reg (GET_MODE (dst), op1);
17985 emit_insn (gen_rtx_SET (VOIDmode, dst,
17986 gen_rtx_fmt_ee (code, GET_MODE (dst),
17987 op1, op2)));
17988 emit_move_insn (operands[0], gen_lowpart (mode, dst));
17989 return;
17990 default:
17991 break;
17994 if (!nonimmediate_operand (operands[1], mode))
17995 operands[1] = force_reg (mode, operands[1]);
17996 if (!nonimmediate_operand (operands[2], mode))
17997 operands[2] = force_reg (mode, operands[2]);
17998 ix86_fixup_binary_operands_no_copy (code, mode, operands);
17999 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18000 gen_rtx_fmt_ee (code, mode, operands[1],
18001 operands[2])));
18004 /* Return TRUE or FALSE depending on whether the binary operator meets the
18005 appropriate constraints. */
18007 bool
18008 ix86_binary_operator_ok (enum rtx_code code, machine_mode mode,
18009 rtx operands[3])
18011 rtx dst = operands[0];
18012 rtx src1 = operands[1];
18013 rtx src2 = operands[2];
18015 /* Both source operands cannot be in memory. */
18016 if (MEM_P (src1) && MEM_P (src2))
18017 return false;
18019 /* Canonicalize operand order for commutative operators. */
18020 if (ix86_swap_binary_operands_p (code, mode, operands))
18021 std::swap (src1, src2);
18023 /* If the destination is memory, we must have a matching source operand. */
18024 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
18025 return false;
18027 /* Source 1 cannot be a constant. */
18028 if (CONSTANT_P (src1))
18029 return false;
18031 /* Source 1 cannot be a non-matching memory. */
18032 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
18033 /* Support "andhi/andsi/anddi" as a zero-extending move. */
18034 return (code == AND
18035 && (mode == HImode
18036 || mode == SImode
18037 || (TARGET_64BIT && mode == DImode))
18038 && satisfies_constraint_L (src2));
18040 return true;
18043 /* Attempt to expand a unary operator. Make the expansion closer to the
18044 actual machine, then just general_operand, which will allow 2 separate
18045 memory references (one output, one input) in a single insn. */
18047 void
18048 ix86_expand_unary_operator (enum rtx_code code, machine_mode mode,
18049 rtx operands[])
18051 int matching_memory;
18052 rtx src, dst, op, clob;
18054 dst = operands[0];
18055 src = operands[1];
18057 /* If the destination is memory, and we do not have matching source
18058 operands, do things in registers. */
18059 matching_memory = 0;
18060 if (MEM_P (dst))
18062 if (rtx_equal_p (dst, src))
18063 matching_memory = 1;
18064 else
18065 dst = gen_reg_rtx (mode);
18068 /* When source operand is memory, destination must match. */
18069 if (MEM_P (src) && !matching_memory)
18070 src = force_reg (mode, src);
18072 /* Emit the instruction. */
18074 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
18075 if (reload_in_progress || code == NOT)
18077 /* Reload doesn't know about the flags register, and doesn't know that
18078 it doesn't want to clobber it. */
18079 gcc_assert (code == NOT);
18080 emit_insn (op);
18082 else
18084 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18085 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18088 /* Fix up the destination if needed. */
18089 if (dst != operands[0])
18090 emit_move_insn (operands[0], dst);
18093 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
18094 divisor are within the range [0-255]. */
18096 void
18097 ix86_split_idivmod (machine_mode mode, rtx operands[],
18098 bool signed_p)
18100 rtx_code_label *end_label, *qimode_label;
18101 rtx insn, div, mod;
18102 rtx scratch, tmp0, tmp1, tmp2;
18103 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
18104 rtx (*gen_zero_extend) (rtx, rtx);
18105 rtx (*gen_test_ccno_1) (rtx, rtx);
18107 switch (mode)
18109 case SImode:
18110 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
18111 gen_test_ccno_1 = gen_testsi_ccno_1;
18112 gen_zero_extend = gen_zero_extendqisi2;
18113 break;
18114 case DImode:
18115 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
18116 gen_test_ccno_1 = gen_testdi_ccno_1;
18117 gen_zero_extend = gen_zero_extendqidi2;
18118 break;
18119 default:
18120 gcc_unreachable ();
18123 end_label = gen_label_rtx ();
18124 qimode_label = gen_label_rtx ();
18126 scratch = gen_reg_rtx (mode);
18128 /* Use 8bit unsigned divimod if dividend and divisor are within
18129 the range [0-255]. */
18130 emit_move_insn (scratch, operands[2]);
18131 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
18132 scratch, 1, OPTAB_DIRECT);
18133 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
18134 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
18135 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
18136 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
18137 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
18138 pc_rtx);
18139 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
18140 predict_jump (REG_BR_PROB_BASE * 50 / 100);
18141 JUMP_LABEL (insn) = qimode_label;
18143 /* Generate original signed/unsigned divimod. */
18144 div = gen_divmod4_1 (operands[0], operands[1],
18145 operands[2], operands[3]);
18146 emit_insn (div);
18148 /* Branch to the end. */
18149 emit_jump_insn (gen_jump (end_label));
18150 emit_barrier ();
18152 /* Generate 8bit unsigned divide. */
18153 emit_label (qimode_label);
18154 /* Don't use operands[0] for result of 8bit divide since not all
18155 registers support QImode ZERO_EXTRACT. */
18156 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
18157 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
18158 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
18159 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
18161 if (signed_p)
18163 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
18164 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
18166 else
18168 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
18169 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
18172 /* Extract remainder from AH. */
18173 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
18174 if (REG_P (operands[1]))
18175 insn = emit_move_insn (operands[1], tmp1);
18176 else
18178 /* Need a new scratch register since the old one has result
18179 of 8bit divide. */
18180 scratch = gen_reg_rtx (mode);
18181 emit_move_insn (scratch, tmp1);
18182 insn = emit_move_insn (operands[1], scratch);
18184 set_unique_reg_note (insn, REG_EQUAL, mod);
18186 /* Zero extend quotient from AL. */
18187 tmp1 = gen_lowpart (QImode, tmp0);
18188 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
18189 set_unique_reg_note (insn, REG_EQUAL, div);
18191 emit_label (end_label);
18194 #define LEA_MAX_STALL (3)
18195 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
18197 /* Increase given DISTANCE in half-cycles according to
18198 dependencies between PREV and NEXT instructions.
18199 Add 1 half-cycle if there is no dependency and
18200 go to next cycle if there is some dependecy. */
18202 static unsigned int
18203 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
18205 df_ref def, use;
18207 if (!prev || !next)
18208 return distance + (distance & 1) + 2;
18210 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
18211 return distance + 1;
18213 FOR_EACH_INSN_USE (use, next)
18214 FOR_EACH_INSN_DEF (def, prev)
18215 if (!DF_REF_IS_ARTIFICIAL (def)
18216 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
18217 return distance + (distance & 1) + 2;
18219 return distance + 1;
18222 /* Function checks if instruction INSN defines register number
18223 REGNO1 or REGNO2. */
18225 static bool
18226 insn_defines_reg (unsigned int regno1, unsigned int regno2,
18227 rtx insn)
18229 df_ref def;
18231 FOR_EACH_INSN_DEF (def, insn)
18232 if (DF_REF_REG_DEF_P (def)
18233 && !DF_REF_IS_ARTIFICIAL (def)
18234 && (regno1 == DF_REF_REGNO (def)
18235 || regno2 == DF_REF_REGNO (def)))
18236 return true;
18238 return false;
18241 /* Function checks if instruction INSN uses register number
18242 REGNO as a part of address expression. */
18244 static bool
18245 insn_uses_reg_mem (unsigned int regno, rtx insn)
18247 df_ref use;
18249 FOR_EACH_INSN_USE (use, insn)
18250 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
18251 return true;
18253 return false;
18256 /* Search backward for non-agu definition of register number REGNO1
18257 or register number REGNO2 in basic block starting from instruction
18258 START up to head of basic block or instruction INSN.
18260 Function puts true value into *FOUND var if definition was found
18261 and false otherwise.
18263 Distance in half-cycles between START and found instruction or head
18264 of BB is added to DISTANCE and returned. */
18266 static int
18267 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
18268 rtx_insn *insn, int distance,
18269 rtx_insn *start, bool *found)
18271 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
18272 rtx_insn *prev = start;
18273 rtx_insn *next = NULL;
18275 *found = false;
18277 while (prev
18278 && prev != insn
18279 && distance < LEA_SEARCH_THRESHOLD)
18281 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
18283 distance = increase_distance (prev, next, distance);
18284 if (insn_defines_reg (regno1, regno2, prev))
18286 if (recog_memoized (prev) < 0
18287 || get_attr_type (prev) != TYPE_LEA)
18289 *found = true;
18290 return distance;
18294 next = prev;
18296 if (prev == BB_HEAD (bb))
18297 break;
18299 prev = PREV_INSN (prev);
18302 return distance;
18305 /* Search backward for non-agu definition of register number REGNO1
18306 or register number REGNO2 in INSN's basic block until
18307 1. Pass LEA_SEARCH_THRESHOLD instructions, or
18308 2. Reach neighbour BBs boundary, or
18309 3. Reach agu definition.
18310 Returns the distance between the non-agu definition point and INSN.
18311 If no definition point, returns -1. */
18313 static int
18314 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
18315 rtx_insn *insn)
18317 basic_block bb = BLOCK_FOR_INSN (insn);
18318 int distance = 0;
18319 bool found = false;
18321 if (insn != BB_HEAD (bb))
18322 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
18323 distance, PREV_INSN (insn),
18324 &found);
18326 if (!found && distance < LEA_SEARCH_THRESHOLD)
18328 edge e;
18329 edge_iterator ei;
18330 bool simple_loop = false;
18332 FOR_EACH_EDGE (e, ei, bb->preds)
18333 if (e->src == bb)
18335 simple_loop = true;
18336 break;
18339 if (simple_loop)
18340 distance = distance_non_agu_define_in_bb (regno1, regno2,
18341 insn, distance,
18342 BB_END (bb), &found);
18343 else
18345 int shortest_dist = -1;
18346 bool found_in_bb = false;
18348 FOR_EACH_EDGE (e, ei, bb->preds)
18350 int bb_dist
18351 = distance_non_agu_define_in_bb (regno1, regno2,
18352 insn, distance,
18353 BB_END (e->src),
18354 &found_in_bb);
18355 if (found_in_bb)
18357 if (shortest_dist < 0)
18358 shortest_dist = bb_dist;
18359 else if (bb_dist > 0)
18360 shortest_dist = MIN (bb_dist, shortest_dist);
18362 found = true;
18366 distance = shortest_dist;
18370 /* get_attr_type may modify recog data. We want to make sure
18371 that recog data is valid for instruction INSN, on which
18372 distance_non_agu_define is called. INSN is unchanged here. */
18373 extract_insn_cached (insn);
18375 if (!found)
18376 return -1;
18378 return distance >> 1;
18381 /* Return the distance in half-cycles between INSN and the next
18382 insn that uses register number REGNO in memory address added
18383 to DISTANCE. Return -1 if REGNO0 is set.
18385 Put true value into *FOUND if register usage was found and
18386 false otherwise.
18387 Put true value into *REDEFINED if register redefinition was
18388 found and false otherwise. */
18390 static int
18391 distance_agu_use_in_bb (unsigned int regno,
18392 rtx_insn *insn, int distance, rtx_insn *start,
18393 bool *found, bool *redefined)
18395 basic_block bb = NULL;
18396 rtx_insn *next = start;
18397 rtx_insn *prev = NULL;
18399 *found = false;
18400 *redefined = false;
18402 if (start != NULL_RTX)
18404 bb = BLOCK_FOR_INSN (start);
18405 if (start != BB_HEAD (bb))
18406 /* If insn and start belong to the same bb, set prev to insn,
18407 so the call to increase_distance will increase the distance
18408 between insns by 1. */
18409 prev = insn;
18412 while (next
18413 && next != insn
18414 && distance < LEA_SEARCH_THRESHOLD)
18416 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
18418 distance = increase_distance(prev, next, distance);
18419 if (insn_uses_reg_mem (regno, next))
18421 /* Return DISTANCE if OP0 is used in memory
18422 address in NEXT. */
18423 *found = true;
18424 return distance;
18427 if (insn_defines_reg (regno, INVALID_REGNUM, next))
18429 /* Return -1 if OP0 is set in NEXT. */
18430 *redefined = true;
18431 return -1;
18434 prev = next;
18437 if (next == BB_END (bb))
18438 break;
18440 next = NEXT_INSN (next);
18443 return distance;
18446 /* Return the distance between INSN and the next insn that uses
18447 register number REGNO0 in memory address. Return -1 if no such
18448 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
18450 static int
18451 distance_agu_use (unsigned int regno0, rtx_insn *insn)
18453 basic_block bb = BLOCK_FOR_INSN (insn);
18454 int distance = 0;
18455 bool found = false;
18456 bool redefined = false;
18458 if (insn != BB_END (bb))
18459 distance = distance_agu_use_in_bb (regno0, insn, distance,
18460 NEXT_INSN (insn),
18461 &found, &redefined);
18463 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
18465 edge e;
18466 edge_iterator ei;
18467 bool simple_loop = false;
18469 FOR_EACH_EDGE (e, ei, bb->succs)
18470 if (e->dest == bb)
18472 simple_loop = true;
18473 break;
18476 if (simple_loop)
18477 distance = distance_agu_use_in_bb (regno0, insn,
18478 distance, BB_HEAD (bb),
18479 &found, &redefined);
18480 else
18482 int shortest_dist = -1;
18483 bool found_in_bb = false;
18484 bool redefined_in_bb = false;
18486 FOR_EACH_EDGE (e, ei, bb->succs)
18488 int bb_dist
18489 = distance_agu_use_in_bb (regno0, insn,
18490 distance, BB_HEAD (e->dest),
18491 &found_in_bb, &redefined_in_bb);
18492 if (found_in_bb)
18494 if (shortest_dist < 0)
18495 shortest_dist = bb_dist;
18496 else if (bb_dist > 0)
18497 shortest_dist = MIN (bb_dist, shortest_dist);
18499 found = true;
18503 distance = shortest_dist;
18507 if (!found || redefined)
18508 return -1;
18510 return distance >> 1;
18513 /* Define this macro to tune LEA priority vs ADD, it take effect when
18514 there is a dilemma of choicing LEA or ADD
18515 Negative value: ADD is more preferred than LEA
18516 Zero: Netrual
18517 Positive value: LEA is more preferred than ADD*/
18518 #define IX86_LEA_PRIORITY 0
18520 /* Return true if usage of lea INSN has performance advantage
18521 over a sequence of instructions. Instructions sequence has
18522 SPLIT_COST cycles higher latency than lea latency. */
18524 static bool
18525 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
18526 unsigned int regno2, int split_cost, bool has_scale)
18528 int dist_define, dist_use;
18530 /* For Silvermont if using a 2-source or 3-source LEA for
18531 non-destructive destination purposes, or due to wanting
18532 ability to use SCALE, the use of LEA is justified. */
18533 if (TARGET_SILVERMONT || TARGET_INTEL)
18535 if (has_scale)
18536 return true;
18537 if (split_cost < 1)
18538 return false;
18539 if (regno0 == regno1 || regno0 == regno2)
18540 return false;
18541 return true;
18544 dist_define = distance_non_agu_define (regno1, regno2, insn);
18545 dist_use = distance_agu_use (regno0, insn);
18547 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
18549 /* If there is no non AGU operand definition, no AGU
18550 operand usage and split cost is 0 then both lea
18551 and non lea variants have same priority. Currently
18552 we prefer lea for 64 bit code and non lea on 32 bit
18553 code. */
18554 if (dist_use < 0 && split_cost == 0)
18555 return TARGET_64BIT || IX86_LEA_PRIORITY;
18556 else
18557 return true;
18560 /* With longer definitions distance lea is more preferable.
18561 Here we change it to take into account splitting cost and
18562 lea priority. */
18563 dist_define += split_cost + IX86_LEA_PRIORITY;
18565 /* If there is no use in memory addess then we just check
18566 that split cost exceeds AGU stall. */
18567 if (dist_use < 0)
18568 return dist_define > LEA_MAX_STALL;
18570 /* If this insn has both backward non-agu dependence and forward
18571 agu dependence, the one with short distance takes effect. */
18572 return dist_define >= dist_use;
18575 /* Return true if it is legal to clobber flags by INSN and
18576 false otherwise. */
18578 static bool
18579 ix86_ok_to_clobber_flags (rtx_insn *insn)
18581 basic_block bb = BLOCK_FOR_INSN (insn);
18582 df_ref use;
18583 bitmap live;
18585 while (insn)
18587 if (NONDEBUG_INSN_P (insn))
18589 FOR_EACH_INSN_USE (use, insn)
18590 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
18591 return false;
18593 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
18594 return true;
18597 if (insn == BB_END (bb))
18598 break;
18600 insn = NEXT_INSN (insn);
18603 live = df_get_live_out(bb);
18604 return !REGNO_REG_SET_P (live, FLAGS_REG);
18607 /* Return true if we need to split op0 = op1 + op2 into a sequence of
18608 move and add to avoid AGU stalls. */
18610 bool
18611 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
18613 unsigned int regno0, regno1, regno2;
18615 /* Check if we need to optimize. */
18616 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18617 return false;
18619 /* Check it is correct to split here. */
18620 if (!ix86_ok_to_clobber_flags(insn))
18621 return false;
18623 regno0 = true_regnum (operands[0]);
18624 regno1 = true_regnum (operands[1]);
18625 regno2 = true_regnum (operands[2]);
18627 /* We need to split only adds with non destructive
18628 destination operand. */
18629 if (regno0 == regno1 || regno0 == regno2)
18630 return false;
18631 else
18632 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
18635 /* Return true if we should emit lea instruction instead of mov
18636 instruction. */
18638 bool
18639 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
18641 unsigned int regno0, regno1;
18643 /* Check if we need to optimize. */
18644 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18645 return false;
18647 /* Use lea for reg to reg moves only. */
18648 if (!REG_P (operands[0]) || !REG_P (operands[1]))
18649 return false;
18651 regno0 = true_regnum (operands[0]);
18652 regno1 = true_regnum (operands[1]);
18654 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
18657 /* Return true if we need to split lea into a sequence of
18658 instructions to avoid AGU stalls. */
18660 bool
18661 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
18663 unsigned int regno0, regno1, regno2;
18664 int split_cost;
18665 struct ix86_address parts;
18666 int ok;
18668 /* Check we need to optimize. */
18669 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
18670 return false;
18672 /* The "at least two components" test below might not catch simple
18673 move or zero extension insns if parts.base is non-NULL and parts.disp
18674 is const0_rtx as the only components in the address, e.g. if the
18675 register is %rbp or %r13. As this test is much cheaper and moves or
18676 zero extensions are the common case, do this check first. */
18677 if (REG_P (operands[1])
18678 || (SImode_address_operand (operands[1], VOIDmode)
18679 && REG_P (XEXP (operands[1], 0))))
18680 return false;
18682 /* Check if it is OK to split here. */
18683 if (!ix86_ok_to_clobber_flags (insn))
18684 return false;
18686 ok = ix86_decompose_address (operands[1], &parts);
18687 gcc_assert (ok);
18689 /* There should be at least two components in the address. */
18690 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
18691 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
18692 return false;
18694 /* We should not split into add if non legitimate pic
18695 operand is used as displacement. */
18696 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
18697 return false;
18699 regno0 = true_regnum (operands[0]) ;
18700 regno1 = INVALID_REGNUM;
18701 regno2 = INVALID_REGNUM;
18703 if (parts.base)
18704 regno1 = true_regnum (parts.base);
18705 if (parts.index)
18706 regno2 = true_regnum (parts.index);
18708 split_cost = 0;
18710 /* Compute how many cycles we will add to execution time
18711 if split lea into a sequence of instructions. */
18712 if (parts.base || parts.index)
18714 /* Have to use mov instruction if non desctructive
18715 destination form is used. */
18716 if (regno1 != regno0 && regno2 != regno0)
18717 split_cost += 1;
18719 /* Have to add index to base if both exist. */
18720 if (parts.base && parts.index)
18721 split_cost += 1;
18723 /* Have to use shift and adds if scale is 2 or greater. */
18724 if (parts.scale > 1)
18726 if (regno0 != regno1)
18727 split_cost += 1;
18728 else if (regno2 == regno0)
18729 split_cost += 4;
18730 else
18731 split_cost += parts.scale;
18734 /* Have to use add instruction with immediate if
18735 disp is non zero. */
18736 if (parts.disp && parts.disp != const0_rtx)
18737 split_cost += 1;
18739 /* Subtract the price of lea. */
18740 split_cost -= 1;
18743 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
18744 parts.scale > 1);
18747 /* Emit x86 binary operand CODE in mode MODE, where the first operand
18748 matches destination. RTX includes clobber of FLAGS_REG. */
18750 static void
18751 ix86_emit_binop (enum rtx_code code, machine_mode mode,
18752 rtx dst, rtx src)
18754 rtx op, clob;
18756 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
18757 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18759 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18762 /* Return true if regno1 def is nearest to the insn. */
18764 static bool
18765 find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2)
18767 rtx_insn *prev = insn;
18768 rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn));
18770 if (insn == start)
18771 return false;
18772 while (prev && prev != start)
18774 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
18776 prev = PREV_INSN (prev);
18777 continue;
18779 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
18780 return true;
18781 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
18782 return false;
18783 prev = PREV_INSN (prev);
18786 /* None of the regs is defined in the bb. */
18787 return false;
18790 /* Split lea instructions into a sequence of instructions
18791 which are executed on ALU to avoid AGU stalls.
18792 It is assumed that it is allowed to clobber flags register
18793 at lea position. */
18795 void
18796 ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], machine_mode mode)
18798 unsigned int regno0, regno1, regno2;
18799 struct ix86_address parts;
18800 rtx target, tmp;
18801 int ok, adds;
18803 ok = ix86_decompose_address (operands[1], &parts);
18804 gcc_assert (ok);
18806 target = gen_lowpart (mode, operands[0]);
18808 regno0 = true_regnum (target);
18809 regno1 = INVALID_REGNUM;
18810 regno2 = INVALID_REGNUM;
18812 if (parts.base)
18814 parts.base = gen_lowpart (mode, parts.base);
18815 regno1 = true_regnum (parts.base);
18818 if (parts.index)
18820 parts.index = gen_lowpart (mode, parts.index);
18821 regno2 = true_regnum (parts.index);
18824 if (parts.disp)
18825 parts.disp = gen_lowpart (mode, parts.disp);
18827 if (parts.scale > 1)
18829 /* Case r1 = r1 + ... */
18830 if (regno1 == regno0)
18832 /* If we have a case r1 = r1 + C * r2 then we
18833 should use multiplication which is very
18834 expensive. Assume cost model is wrong if we
18835 have such case here. */
18836 gcc_assert (regno2 != regno0);
18838 for (adds = parts.scale; adds > 0; adds--)
18839 ix86_emit_binop (PLUS, mode, target, parts.index);
18841 else
18843 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
18844 if (regno0 != regno2)
18845 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18847 /* Use shift for scaling. */
18848 ix86_emit_binop (ASHIFT, mode, target,
18849 GEN_INT (exact_log2 (parts.scale)));
18851 if (parts.base)
18852 ix86_emit_binop (PLUS, mode, target, parts.base);
18854 if (parts.disp && parts.disp != const0_rtx)
18855 ix86_emit_binop (PLUS, mode, target, parts.disp);
18858 else if (!parts.base && !parts.index)
18860 gcc_assert(parts.disp);
18861 emit_insn (gen_rtx_SET (VOIDmode, target, parts.disp));
18863 else
18865 if (!parts.base)
18867 if (regno0 != regno2)
18868 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18870 else if (!parts.index)
18872 if (regno0 != regno1)
18873 emit_insn (gen_rtx_SET (VOIDmode, target, parts.base));
18875 else
18877 if (regno0 == regno1)
18878 tmp = parts.index;
18879 else if (regno0 == regno2)
18880 tmp = parts.base;
18881 else
18883 rtx tmp1;
18885 /* Find better operand for SET instruction, depending
18886 on which definition is farther from the insn. */
18887 if (find_nearest_reg_def (insn, regno1, regno2))
18888 tmp = parts.index, tmp1 = parts.base;
18889 else
18890 tmp = parts.base, tmp1 = parts.index;
18892 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18894 if (parts.disp && parts.disp != const0_rtx)
18895 ix86_emit_binop (PLUS, mode, target, parts.disp);
18897 ix86_emit_binop (PLUS, mode, target, tmp1);
18898 return;
18901 ix86_emit_binop (PLUS, mode, target, tmp);
18904 if (parts.disp && parts.disp != const0_rtx)
18905 ix86_emit_binop (PLUS, mode, target, parts.disp);
18909 /* Return true if it is ok to optimize an ADD operation to LEA
18910 operation to avoid flag register consumation. For most processors,
18911 ADD is faster than LEA. For the processors like BONNELL, if the
18912 destination register of LEA holds an actual address which will be
18913 used soon, LEA is better and otherwise ADD is better. */
18915 bool
18916 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
18918 unsigned int regno0 = true_regnum (operands[0]);
18919 unsigned int regno1 = true_regnum (operands[1]);
18920 unsigned int regno2 = true_regnum (operands[2]);
18922 /* If a = b + c, (a!=b && a!=c), must use lea form. */
18923 if (regno0 != regno1 && regno0 != regno2)
18924 return true;
18926 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18927 return false;
18929 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
18932 /* Return true if destination reg of SET_BODY is shift count of
18933 USE_BODY. */
18935 static bool
18936 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
18938 rtx set_dest;
18939 rtx shift_rtx;
18940 int i;
18942 /* Retrieve destination of SET_BODY. */
18943 switch (GET_CODE (set_body))
18945 case SET:
18946 set_dest = SET_DEST (set_body);
18947 if (!set_dest || !REG_P (set_dest))
18948 return false;
18949 break;
18950 case PARALLEL:
18951 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
18952 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
18953 use_body))
18954 return true;
18955 default:
18956 return false;
18957 break;
18960 /* Retrieve shift count of USE_BODY. */
18961 switch (GET_CODE (use_body))
18963 case SET:
18964 shift_rtx = XEXP (use_body, 1);
18965 break;
18966 case PARALLEL:
18967 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
18968 if (ix86_dep_by_shift_count_body (set_body,
18969 XVECEXP (use_body, 0, i)))
18970 return true;
18971 default:
18972 return false;
18973 break;
18976 if (shift_rtx
18977 && (GET_CODE (shift_rtx) == ASHIFT
18978 || GET_CODE (shift_rtx) == LSHIFTRT
18979 || GET_CODE (shift_rtx) == ASHIFTRT
18980 || GET_CODE (shift_rtx) == ROTATE
18981 || GET_CODE (shift_rtx) == ROTATERT))
18983 rtx shift_count = XEXP (shift_rtx, 1);
18985 /* Return true if shift count is dest of SET_BODY. */
18986 if (REG_P (shift_count))
18988 /* Add check since it can be invoked before register
18989 allocation in pre-reload schedule. */
18990 if (reload_completed
18991 && true_regnum (set_dest) == true_regnum (shift_count))
18992 return true;
18993 else if (REGNO(set_dest) == REGNO(shift_count))
18994 return true;
18998 return false;
19001 /* Return true if destination reg of SET_INSN is shift count of
19002 USE_INSN. */
19004 bool
19005 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
19007 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
19008 PATTERN (use_insn));
19011 /* Return TRUE or FALSE depending on whether the unary operator meets the
19012 appropriate constraints. */
19014 bool
19015 ix86_unary_operator_ok (enum rtx_code,
19016 machine_mode,
19017 rtx operands[2])
19019 /* If one of operands is memory, source and destination must match. */
19020 if ((MEM_P (operands[0])
19021 || MEM_P (operands[1]))
19022 && ! rtx_equal_p (operands[0], operands[1]))
19023 return false;
19024 return true;
19027 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
19028 are ok, keeping in mind the possible movddup alternative. */
19030 bool
19031 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
19033 if (MEM_P (operands[0]))
19034 return rtx_equal_p (operands[0], operands[1 + high]);
19035 if (MEM_P (operands[1]) && MEM_P (operands[2]))
19036 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
19037 return true;
19040 /* Post-reload splitter for converting an SF or DFmode value in an
19041 SSE register into an unsigned SImode. */
19043 void
19044 ix86_split_convert_uns_si_sse (rtx operands[])
19046 machine_mode vecmode;
19047 rtx value, large, zero_or_two31, input, two31, x;
19049 large = operands[1];
19050 zero_or_two31 = operands[2];
19051 input = operands[3];
19052 two31 = operands[4];
19053 vecmode = GET_MODE (large);
19054 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
19056 /* Load up the value into the low element. We must ensure that the other
19057 elements are valid floats -- zero is the easiest such value. */
19058 if (MEM_P (input))
19060 if (vecmode == V4SFmode)
19061 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
19062 else
19063 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
19065 else
19067 input = gen_rtx_REG (vecmode, REGNO (input));
19068 emit_move_insn (value, CONST0_RTX (vecmode));
19069 if (vecmode == V4SFmode)
19070 emit_insn (gen_sse_movss (value, value, input));
19071 else
19072 emit_insn (gen_sse2_movsd (value, value, input));
19075 emit_move_insn (large, two31);
19076 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
19078 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
19079 emit_insn (gen_rtx_SET (VOIDmode, large, x));
19081 x = gen_rtx_AND (vecmode, zero_or_two31, large);
19082 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
19084 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
19085 emit_insn (gen_rtx_SET (VOIDmode, value, x));
19087 large = gen_rtx_REG (V4SImode, REGNO (large));
19088 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
19090 x = gen_rtx_REG (V4SImode, REGNO (value));
19091 if (vecmode == V4SFmode)
19092 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
19093 else
19094 emit_insn (gen_sse2_cvttpd2dq (x, value));
19095 value = x;
19097 emit_insn (gen_xorv4si3 (value, value, large));
19100 /* Convert an unsigned DImode value into a DFmode, using only SSE.
19101 Expects the 64-bit DImode to be supplied in a pair of integral
19102 registers. Requires SSE2; will use SSE3 if available. For x86_32,
19103 -mfpmath=sse, !optimize_size only. */
19105 void
19106 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
19108 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
19109 rtx int_xmm, fp_xmm;
19110 rtx biases, exponents;
19111 rtx x;
19113 int_xmm = gen_reg_rtx (V4SImode);
19114 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
19115 emit_insn (gen_movdi_to_sse (int_xmm, input));
19116 else if (TARGET_SSE_SPLIT_REGS)
19118 emit_clobber (int_xmm);
19119 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
19121 else
19123 x = gen_reg_rtx (V2DImode);
19124 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
19125 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
19128 x = gen_rtx_CONST_VECTOR (V4SImode,
19129 gen_rtvec (4, GEN_INT (0x43300000UL),
19130 GEN_INT (0x45300000UL),
19131 const0_rtx, const0_rtx));
19132 exponents = validize_mem (force_const_mem (V4SImode, x));
19134 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
19135 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
19137 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
19138 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
19139 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
19140 (0x1.0p84 + double(fp_value_hi_xmm)).
19141 Note these exponents differ by 32. */
19143 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
19145 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
19146 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
19147 real_ldexp (&bias_lo_rvt, &dconst1, 52);
19148 real_ldexp (&bias_hi_rvt, &dconst1, 84);
19149 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
19150 x = const_double_from_real_value (bias_hi_rvt, DFmode);
19151 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
19152 biases = validize_mem (force_const_mem (V2DFmode, biases));
19153 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
19155 /* Add the upper and lower DFmode values together. */
19156 if (TARGET_SSE3)
19157 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
19158 else
19160 x = copy_to_mode_reg (V2DFmode, fp_xmm);
19161 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
19162 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
19165 ix86_expand_vector_extract (false, target, fp_xmm, 0);
19168 /* Not used, but eases macroization of patterns. */
19169 void
19170 ix86_expand_convert_uns_sixf_sse (rtx, rtx)
19172 gcc_unreachable ();
19175 /* Convert an unsigned SImode value into a DFmode. Only currently used
19176 for SSE, but applicable anywhere. */
19178 void
19179 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
19181 REAL_VALUE_TYPE TWO31r;
19182 rtx x, fp;
19184 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
19185 NULL, 1, OPTAB_DIRECT);
19187 fp = gen_reg_rtx (DFmode);
19188 emit_insn (gen_floatsidf2 (fp, x));
19190 real_ldexp (&TWO31r, &dconst1, 31);
19191 x = const_double_from_real_value (TWO31r, DFmode);
19193 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
19194 if (x != target)
19195 emit_move_insn (target, x);
19198 /* Convert a signed DImode value into a DFmode. Only used for SSE in
19199 32-bit mode; otherwise we have a direct convert instruction. */
19201 void
19202 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
19204 REAL_VALUE_TYPE TWO32r;
19205 rtx fp_lo, fp_hi, x;
19207 fp_lo = gen_reg_rtx (DFmode);
19208 fp_hi = gen_reg_rtx (DFmode);
19210 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
19212 real_ldexp (&TWO32r, &dconst1, 32);
19213 x = const_double_from_real_value (TWO32r, DFmode);
19214 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
19216 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
19218 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
19219 0, OPTAB_DIRECT);
19220 if (x != target)
19221 emit_move_insn (target, x);
19224 /* Convert an unsigned SImode value into a SFmode, using only SSE.
19225 For x86_32, -mfpmath=sse, !optimize_size only. */
19226 void
19227 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
19229 REAL_VALUE_TYPE ONE16r;
19230 rtx fp_hi, fp_lo, int_hi, int_lo, x;
19232 real_ldexp (&ONE16r, &dconst1, 16);
19233 x = const_double_from_real_value (ONE16r, SFmode);
19234 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
19235 NULL, 0, OPTAB_DIRECT);
19236 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
19237 NULL, 0, OPTAB_DIRECT);
19238 fp_hi = gen_reg_rtx (SFmode);
19239 fp_lo = gen_reg_rtx (SFmode);
19240 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
19241 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
19242 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
19243 0, OPTAB_DIRECT);
19244 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
19245 0, OPTAB_DIRECT);
19246 if (!rtx_equal_p (target, fp_hi))
19247 emit_move_insn (target, fp_hi);
19250 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
19251 a vector of unsigned ints VAL to vector of floats TARGET. */
19253 void
19254 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
19256 rtx tmp[8];
19257 REAL_VALUE_TYPE TWO16r;
19258 machine_mode intmode = GET_MODE (val);
19259 machine_mode fltmode = GET_MODE (target);
19260 rtx (*cvt) (rtx, rtx);
19262 if (intmode == V4SImode)
19263 cvt = gen_floatv4siv4sf2;
19264 else
19265 cvt = gen_floatv8siv8sf2;
19266 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
19267 tmp[0] = force_reg (intmode, tmp[0]);
19268 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
19269 OPTAB_DIRECT);
19270 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
19271 NULL_RTX, 1, OPTAB_DIRECT);
19272 tmp[3] = gen_reg_rtx (fltmode);
19273 emit_insn (cvt (tmp[3], tmp[1]));
19274 tmp[4] = gen_reg_rtx (fltmode);
19275 emit_insn (cvt (tmp[4], tmp[2]));
19276 real_ldexp (&TWO16r, &dconst1, 16);
19277 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
19278 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
19279 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
19280 OPTAB_DIRECT);
19281 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
19282 OPTAB_DIRECT);
19283 if (tmp[7] != target)
19284 emit_move_insn (target, tmp[7]);
19287 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
19288 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
19289 This is done by doing just signed conversion if < 0x1p31, and otherwise by
19290 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
19293 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
19295 REAL_VALUE_TYPE TWO31r;
19296 rtx two31r, tmp[4];
19297 machine_mode mode = GET_MODE (val);
19298 machine_mode scalarmode = GET_MODE_INNER (mode);
19299 machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
19300 rtx (*cmp) (rtx, rtx, rtx, rtx);
19301 int i;
19303 for (i = 0; i < 3; i++)
19304 tmp[i] = gen_reg_rtx (mode);
19305 real_ldexp (&TWO31r, &dconst1, 31);
19306 two31r = const_double_from_real_value (TWO31r, scalarmode);
19307 two31r = ix86_build_const_vector (mode, 1, two31r);
19308 two31r = force_reg (mode, two31r);
19309 switch (mode)
19311 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
19312 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
19313 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
19314 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
19315 default: gcc_unreachable ();
19317 tmp[3] = gen_rtx_LE (mode, two31r, val);
19318 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
19319 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
19320 0, OPTAB_DIRECT);
19321 if (intmode == V4SImode || TARGET_AVX2)
19322 *xorp = expand_simple_binop (intmode, ASHIFT,
19323 gen_lowpart (intmode, tmp[0]),
19324 GEN_INT (31), NULL_RTX, 0,
19325 OPTAB_DIRECT);
19326 else
19328 rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
19329 two31 = ix86_build_const_vector (intmode, 1, two31);
19330 *xorp = expand_simple_binop (intmode, AND,
19331 gen_lowpart (intmode, tmp[0]),
19332 two31, NULL_RTX, 0,
19333 OPTAB_DIRECT);
19335 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
19336 0, OPTAB_DIRECT);
19339 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
19340 then replicate the value for all elements of the vector
19341 register. */
19344 ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
19346 int i, n_elt;
19347 rtvec v;
19348 machine_mode scalar_mode;
19350 switch (mode)
19352 case V64QImode:
19353 case V32QImode:
19354 case V16QImode:
19355 case V32HImode:
19356 case V16HImode:
19357 case V8HImode:
19358 case V16SImode:
19359 case V8SImode:
19360 case V4SImode:
19361 case V8DImode:
19362 case V4DImode:
19363 case V2DImode:
19364 gcc_assert (vect);
19365 case V16SFmode:
19366 case V8SFmode:
19367 case V4SFmode:
19368 case V8DFmode:
19369 case V4DFmode:
19370 case V2DFmode:
19371 n_elt = GET_MODE_NUNITS (mode);
19372 v = rtvec_alloc (n_elt);
19373 scalar_mode = GET_MODE_INNER (mode);
19375 RTVEC_ELT (v, 0) = value;
19377 for (i = 1; i < n_elt; ++i)
19378 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
19380 return gen_rtx_CONST_VECTOR (mode, v);
19382 default:
19383 gcc_unreachable ();
19387 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
19388 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
19389 for an SSE register. If VECT is true, then replicate the mask for
19390 all elements of the vector register. If INVERT is true, then create
19391 a mask excluding the sign bit. */
19394 ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
19396 machine_mode vec_mode, imode;
19397 HOST_WIDE_INT hi, lo;
19398 int shift = 63;
19399 rtx v;
19400 rtx mask;
19402 /* Find the sign bit, sign extended to 2*HWI. */
19403 switch (mode)
19405 case V16SImode:
19406 case V16SFmode:
19407 case V8SImode:
19408 case V4SImode:
19409 case V8SFmode:
19410 case V4SFmode:
19411 vec_mode = mode;
19412 mode = GET_MODE_INNER (mode);
19413 imode = SImode;
19414 lo = 0x80000000, hi = lo < 0;
19415 break;
19417 case V8DImode:
19418 case V4DImode:
19419 case V2DImode:
19420 case V8DFmode:
19421 case V4DFmode:
19422 case V2DFmode:
19423 vec_mode = mode;
19424 mode = GET_MODE_INNER (mode);
19425 imode = DImode;
19426 if (HOST_BITS_PER_WIDE_INT >= 64)
19427 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
19428 else
19429 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19430 break;
19432 case TImode:
19433 case TFmode:
19434 vec_mode = VOIDmode;
19435 if (HOST_BITS_PER_WIDE_INT >= 64)
19437 imode = TImode;
19438 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
19440 else
19442 rtvec vec;
19444 imode = DImode;
19445 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19447 if (invert)
19449 lo = ~lo, hi = ~hi;
19450 v = constm1_rtx;
19452 else
19453 v = const0_rtx;
19455 mask = immed_double_const (lo, hi, imode);
19457 vec = gen_rtvec (2, v, mask);
19458 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
19459 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
19461 return v;
19463 break;
19465 default:
19466 gcc_unreachable ();
19469 if (invert)
19470 lo = ~lo, hi = ~hi;
19472 /* Force this value into the low part of a fp vector constant. */
19473 mask = immed_double_const (lo, hi, imode);
19474 mask = gen_lowpart (mode, mask);
19476 if (vec_mode == VOIDmode)
19477 return force_reg (mode, mask);
19479 v = ix86_build_const_vector (vec_mode, vect, mask);
19480 return force_reg (vec_mode, v);
19483 /* Generate code for floating point ABS or NEG. */
19485 void
19486 ix86_expand_fp_absneg_operator (enum rtx_code code, machine_mode mode,
19487 rtx operands[])
19489 rtx mask, set, dst, src;
19490 bool use_sse = false;
19491 bool vector_mode = VECTOR_MODE_P (mode);
19492 machine_mode vmode = mode;
19494 if (vector_mode)
19495 use_sse = true;
19496 else if (mode == TFmode)
19497 use_sse = true;
19498 else if (TARGET_SSE_MATH)
19500 use_sse = SSE_FLOAT_MODE_P (mode);
19501 if (mode == SFmode)
19502 vmode = V4SFmode;
19503 else if (mode == DFmode)
19504 vmode = V2DFmode;
19507 /* NEG and ABS performed with SSE use bitwise mask operations.
19508 Create the appropriate mask now. */
19509 if (use_sse)
19510 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
19511 else
19512 mask = NULL_RTX;
19514 dst = operands[0];
19515 src = operands[1];
19517 set = gen_rtx_fmt_e (code, mode, src);
19518 set = gen_rtx_SET (VOIDmode, dst, set);
19520 if (mask)
19522 rtx use, clob;
19523 rtvec par;
19525 use = gen_rtx_USE (VOIDmode, mask);
19526 if (vector_mode)
19527 par = gen_rtvec (2, set, use);
19528 else
19530 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19531 par = gen_rtvec (3, set, use, clob);
19533 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
19535 else
19536 emit_insn (set);
19539 /* Expand a copysign operation. Special case operand 0 being a constant. */
19541 void
19542 ix86_expand_copysign (rtx operands[])
19544 machine_mode mode, vmode;
19545 rtx dest, op0, op1, mask, nmask;
19547 dest = operands[0];
19548 op0 = operands[1];
19549 op1 = operands[2];
19551 mode = GET_MODE (dest);
19553 if (mode == SFmode)
19554 vmode = V4SFmode;
19555 else if (mode == DFmode)
19556 vmode = V2DFmode;
19557 else
19558 vmode = mode;
19560 if (GET_CODE (op0) == CONST_DOUBLE)
19562 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
19564 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
19565 op0 = simplify_unary_operation (ABS, mode, op0, mode);
19567 if (mode == SFmode || mode == DFmode)
19569 if (op0 == CONST0_RTX (mode))
19570 op0 = CONST0_RTX (vmode);
19571 else
19573 rtx v = ix86_build_const_vector (vmode, false, op0);
19575 op0 = force_reg (vmode, v);
19578 else if (op0 != CONST0_RTX (mode))
19579 op0 = force_reg (mode, op0);
19581 mask = ix86_build_signbit_mask (vmode, 0, 0);
19583 if (mode == SFmode)
19584 copysign_insn = gen_copysignsf3_const;
19585 else if (mode == DFmode)
19586 copysign_insn = gen_copysigndf3_const;
19587 else
19588 copysign_insn = gen_copysigntf3_const;
19590 emit_insn (copysign_insn (dest, op0, op1, mask));
19592 else
19594 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
19596 nmask = ix86_build_signbit_mask (vmode, 0, 1);
19597 mask = ix86_build_signbit_mask (vmode, 0, 0);
19599 if (mode == SFmode)
19600 copysign_insn = gen_copysignsf3_var;
19601 else if (mode == DFmode)
19602 copysign_insn = gen_copysigndf3_var;
19603 else
19604 copysign_insn = gen_copysigntf3_var;
19606 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
19610 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
19611 be a constant, and so has already been expanded into a vector constant. */
19613 void
19614 ix86_split_copysign_const (rtx operands[])
19616 machine_mode mode, vmode;
19617 rtx dest, op0, mask, x;
19619 dest = operands[0];
19620 op0 = operands[1];
19621 mask = operands[3];
19623 mode = GET_MODE (dest);
19624 vmode = GET_MODE (mask);
19626 dest = simplify_gen_subreg (vmode, dest, mode, 0);
19627 x = gen_rtx_AND (vmode, dest, mask);
19628 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19630 if (op0 != CONST0_RTX (vmode))
19632 x = gen_rtx_IOR (vmode, dest, op0);
19633 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19637 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
19638 so we have to do two masks. */
19640 void
19641 ix86_split_copysign_var (rtx operands[])
19643 machine_mode mode, vmode;
19644 rtx dest, scratch, op0, op1, mask, nmask, x;
19646 dest = operands[0];
19647 scratch = operands[1];
19648 op0 = operands[2];
19649 op1 = operands[3];
19650 nmask = operands[4];
19651 mask = operands[5];
19653 mode = GET_MODE (dest);
19654 vmode = GET_MODE (mask);
19656 if (rtx_equal_p (op0, op1))
19658 /* Shouldn't happen often (it's useless, obviously), but when it does
19659 we'd generate incorrect code if we continue below. */
19660 emit_move_insn (dest, op0);
19661 return;
19664 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
19666 gcc_assert (REGNO (op1) == REGNO (scratch));
19668 x = gen_rtx_AND (vmode, scratch, mask);
19669 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19671 dest = mask;
19672 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19673 x = gen_rtx_NOT (vmode, dest);
19674 x = gen_rtx_AND (vmode, x, op0);
19675 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19677 else
19679 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
19681 x = gen_rtx_AND (vmode, scratch, mask);
19683 else /* alternative 2,4 */
19685 gcc_assert (REGNO (mask) == REGNO (scratch));
19686 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
19687 x = gen_rtx_AND (vmode, scratch, op1);
19689 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19691 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
19693 dest = simplify_gen_subreg (vmode, op0, mode, 0);
19694 x = gen_rtx_AND (vmode, dest, nmask);
19696 else /* alternative 3,4 */
19698 gcc_assert (REGNO (nmask) == REGNO (dest));
19699 dest = nmask;
19700 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19701 x = gen_rtx_AND (vmode, dest, op0);
19703 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19706 x = gen_rtx_IOR (vmode, dest, scratch);
19707 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19710 /* Return TRUE or FALSE depending on whether the first SET in INSN
19711 has source and destination with matching CC modes, and that the
19712 CC mode is at least as constrained as REQ_MODE. */
19714 bool
19715 ix86_match_ccmode (rtx insn, machine_mode req_mode)
19717 rtx set;
19718 machine_mode set_mode;
19720 set = PATTERN (insn);
19721 if (GET_CODE (set) == PARALLEL)
19722 set = XVECEXP (set, 0, 0);
19723 gcc_assert (GET_CODE (set) == SET);
19724 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
19726 set_mode = GET_MODE (SET_DEST (set));
19727 switch (set_mode)
19729 case CCNOmode:
19730 if (req_mode != CCNOmode
19731 && (req_mode != CCmode
19732 || XEXP (SET_SRC (set), 1) != const0_rtx))
19733 return false;
19734 break;
19735 case CCmode:
19736 if (req_mode == CCGCmode)
19737 return false;
19738 /* FALLTHRU */
19739 case CCGCmode:
19740 if (req_mode == CCGOCmode || req_mode == CCNOmode)
19741 return false;
19742 /* FALLTHRU */
19743 case CCGOCmode:
19744 if (req_mode == CCZmode)
19745 return false;
19746 /* FALLTHRU */
19747 case CCZmode:
19748 break;
19750 case CCAmode:
19751 case CCCmode:
19752 case CCOmode:
19753 case CCSmode:
19754 if (set_mode != req_mode)
19755 return false;
19756 break;
19758 default:
19759 gcc_unreachable ();
19762 return GET_MODE (SET_SRC (set)) == set_mode;
19765 /* Generate insn patterns to do an integer compare of OPERANDS. */
19767 static rtx
19768 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
19770 machine_mode cmpmode;
19771 rtx tmp, flags;
19773 cmpmode = SELECT_CC_MODE (code, op0, op1);
19774 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
19776 /* This is very simple, but making the interface the same as in the
19777 FP case makes the rest of the code easier. */
19778 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
19779 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
19781 /* Return the test that should be put into the flags user, i.e.
19782 the bcc, scc, or cmov instruction. */
19783 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
19786 /* Figure out whether to use ordered or unordered fp comparisons.
19787 Return the appropriate mode to use. */
19789 machine_mode
19790 ix86_fp_compare_mode (enum rtx_code)
19792 /* ??? In order to make all comparisons reversible, we do all comparisons
19793 non-trapping when compiling for IEEE. Once gcc is able to distinguish
19794 all forms trapping and nontrapping comparisons, we can make inequality
19795 comparisons trapping again, since it results in better code when using
19796 FCOM based compares. */
19797 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
19800 machine_mode
19801 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
19803 machine_mode mode = GET_MODE (op0);
19805 if (SCALAR_FLOAT_MODE_P (mode))
19807 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
19808 return ix86_fp_compare_mode (code);
19811 switch (code)
19813 /* Only zero flag is needed. */
19814 case EQ: /* ZF=0 */
19815 case NE: /* ZF!=0 */
19816 return CCZmode;
19817 /* Codes needing carry flag. */
19818 case GEU: /* CF=0 */
19819 case LTU: /* CF=1 */
19820 /* Detect overflow checks. They need just the carry flag. */
19821 if (GET_CODE (op0) == PLUS
19822 && rtx_equal_p (op1, XEXP (op0, 0)))
19823 return CCCmode;
19824 else
19825 return CCmode;
19826 case GTU: /* CF=0 & ZF=0 */
19827 case LEU: /* CF=1 | ZF=1 */
19828 return CCmode;
19829 /* Codes possibly doable only with sign flag when
19830 comparing against zero. */
19831 case GE: /* SF=OF or SF=0 */
19832 case LT: /* SF<>OF or SF=1 */
19833 if (op1 == const0_rtx)
19834 return CCGOCmode;
19835 else
19836 /* For other cases Carry flag is not required. */
19837 return CCGCmode;
19838 /* Codes doable only with sign flag when comparing
19839 against zero, but we miss jump instruction for it
19840 so we need to use relational tests against overflow
19841 that thus needs to be zero. */
19842 case GT: /* ZF=0 & SF=OF */
19843 case LE: /* ZF=1 | SF<>OF */
19844 if (op1 == const0_rtx)
19845 return CCNOmode;
19846 else
19847 return CCGCmode;
19848 /* strcmp pattern do (use flags) and combine may ask us for proper
19849 mode. */
19850 case USE:
19851 return CCmode;
19852 default:
19853 gcc_unreachable ();
19857 /* Return the fixed registers used for condition codes. */
19859 static bool
19860 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
19862 *p1 = FLAGS_REG;
19863 *p2 = FPSR_REG;
19864 return true;
19867 /* If two condition code modes are compatible, return a condition code
19868 mode which is compatible with both. Otherwise, return
19869 VOIDmode. */
19871 static machine_mode
19872 ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
19874 if (m1 == m2)
19875 return m1;
19877 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
19878 return VOIDmode;
19880 if ((m1 == CCGCmode && m2 == CCGOCmode)
19881 || (m1 == CCGOCmode && m2 == CCGCmode))
19882 return CCGCmode;
19884 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
19885 return m2;
19886 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
19887 return m1;
19889 switch (m1)
19891 default:
19892 gcc_unreachable ();
19894 case CCmode:
19895 case CCGCmode:
19896 case CCGOCmode:
19897 case CCNOmode:
19898 case CCAmode:
19899 case CCCmode:
19900 case CCOmode:
19901 case CCSmode:
19902 case CCZmode:
19903 switch (m2)
19905 default:
19906 return VOIDmode;
19908 case CCmode:
19909 case CCGCmode:
19910 case CCGOCmode:
19911 case CCNOmode:
19912 case CCAmode:
19913 case CCCmode:
19914 case CCOmode:
19915 case CCSmode:
19916 case CCZmode:
19917 return CCmode;
19920 case CCFPmode:
19921 case CCFPUmode:
19922 /* These are only compatible with themselves, which we already
19923 checked above. */
19924 return VOIDmode;
19929 /* Return a comparison we can do and that it is equivalent to
19930 swap_condition (code) apart possibly from orderedness.
19931 But, never change orderedness if TARGET_IEEE_FP, returning
19932 UNKNOWN in that case if necessary. */
19934 static enum rtx_code
19935 ix86_fp_swap_condition (enum rtx_code code)
19937 switch (code)
19939 case GT: /* GTU - CF=0 & ZF=0 */
19940 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
19941 case GE: /* GEU - CF=0 */
19942 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
19943 case UNLT: /* LTU - CF=1 */
19944 return TARGET_IEEE_FP ? UNKNOWN : GT;
19945 case UNLE: /* LEU - CF=1 | ZF=1 */
19946 return TARGET_IEEE_FP ? UNKNOWN : GE;
19947 default:
19948 return swap_condition (code);
19952 /* Return cost of comparison CODE using the best strategy for performance.
19953 All following functions do use number of instructions as a cost metrics.
19954 In future this should be tweaked to compute bytes for optimize_size and
19955 take into account performance of various instructions on various CPUs. */
19957 static int
19958 ix86_fp_comparison_cost (enum rtx_code code)
19960 int arith_cost;
19962 /* The cost of code using bit-twiddling on %ah. */
19963 switch (code)
19965 case UNLE:
19966 case UNLT:
19967 case LTGT:
19968 case GT:
19969 case GE:
19970 case UNORDERED:
19971 case ORDERED:
19972 case UNEQ:
19973 arith_cost = 4;
19974 break;
19975 case LT:
19976 case NE:
19977 case EQ:
19978 case UNGE:
19979 arith_cost = TARGET_IEEE_FP ? 5 : 4;
19980 break;
19981 case LE:
19982 case UNGT:
19983 arith_cost = TARGET_IEEE_FP ? 6 : 4;
19984 break;
19985 default:
19986 gcc_unreachable ();
19989 switch (ix86_fp_comparison_strategy (code))
19991 case IX86_FPCMP_COMI:
19992 return arith_cost > 4 ? 3 : 2;
19993 case IX86_FPCMP_SAHF:
19994 return arith_cost > 4 ? 4 : 3;
19995 default:
19996 return arith_cost;
20000 /* Return strategy to use for floating-point. We assume that fcomi is always
20001 preferrable where available, since that is also true when looking at size
20002 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
20004 enum ix86_fpcmp_strategy
20005 ix86_fp_comparison_strategy (enum rtx_code)
20007 /* Do fcomi/sahf based test when profitable. */
20009 if (TARGET_CMOVE)
20010 return IX86_FPCMP_COMI;
20012 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
20013 return IX86_FPCMP_SAHF;
20015 return IX86_FPCMP_ARITH;
20018 /* Swap, force into registers, or otherwise massage the two operands
20019 to a fp comparison. The operands are updated in place; the new
20020 comparison code is returned. */
20022 static enum rtx_code
20023 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
20025 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
20026 rtx op0 = *pop0, op1 = *pop1;
20027 machine_mode op_mode = GET_MODE (op0);
20028 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
20030 /* All of the unordered compare instructions only work on registers.
20031 The same is true of the fcomi compare instructions. The XFmode
20032 compare instructions require registers except when comparing
20033 against zero or when converting operand 1 from fixed point to
20034 floating point. */
20036 if (!is_sse
20037 && (fpcmp_mode == CCFPUmode
20038 || (op_mode == XFmode
20039 && ! (standard_80387_constant_p (op0) == 1
20040 || standard_80387_constant_p (op1) == 1)
20041 && GET_CODE (op1) != FLOAT)
20042 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
20044 op0 = force_reg (op_mode, op0);
20045 op1 = force_reg (op_mode, op1);
20047 else
20049 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
20050 things around if they appear profitable, otherwise force op0
20051 into a register. */
20053 if (standard_80387_constant_p (op0) == 0
20054 || (MEM_P (op0)
20055 && ! (standard_80387_constant_p (op1) == 0
20056 || MEM_P (op1))))
20058 enum rtx_code new_code = ix86_fp_swap_condition (code);
20059 if (new_code != UNKNOWN)
20061 std::swap (op0, op1);
20062 code = new_code;
20066 if (!REG_P (op0))
20067 op0 = force_reg (op_mode, op0);
20069 if (CONSTANT_P (op1))
20071 int tmp = standard_80387_constant_p (op1);
20072 if (tmp == 0)
20073 op1 = validize_mem (force_const_mem (op_mode, op1));
20074 else if (tmp == 1)
20076 if (TARGET_CMOVE)
20077 op1 = force_reg (op_mode, op1);
20079 else
20080 op1 = force_reg (op_mode, op1);
20084 /* Try to rearrange the comparison to make it cheaper. */
20085 if (ix86_fp_comparison_cost (code)
20086 > ix86_fp_comparison_cost (swap_condition (code))
20087 && (REG_P (op1) || can_create_pseudo_p ()))
20089 std::swap (op0, op1);
20090 code = swap_condition (code);
20091 if (!REG_P (op0))
20092 op0 = force_reg (op_mode, op0);
20095 *pop0 = op0;
20096 *pop1 = op1;
20097 return code;
20100 /* Convert comparison codes we use to represent FP comparison to integer
20101 code that will result in proper branch. Return UNKNOWN if no such code
20102 is available. */
20104 enum rtx_code
20105 ix86_fp_compare_code_to_integer (enum rtx_code code)
20107 switch (code)
20109 case GT:
20110 return GTU;
20111 case GE:
20112 return GEU;
20113 case ORDERED:
20114 case UNORDERED:
20115 return code;
20116 break;
20117 case UNEQ:
20118 return EQ;
20119 break;
20120 case UNLT:
20121 return LTU;
20122 break;
20123 case UNLE:
20124 return LEU;
20125 break;
20126 case LTGT:
20127 return NE;
20128 break;
20129 default:
20130 return UNKNOWN;
20134 /* Generate insn patterns to do a floating point compare of OPERANDS. */
20136 static rtx
20137 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
20139 machine_mode fpcmp_mode, intcmp_mode;
20140 rtx tmp, tmp2;
20142 fpcmp_mode = ix86_fp_compare_mode (code);
20143 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
20145 /* Do fcomi/sahf based test when profitable. */
20146 switch (ix86_fp_comparison_strategy (code))
20148 case IX86_FPCMP_COMI:
20149 intcmp_mode = fpcmp_mode;
20150 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20151 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
20152 tmp);
20153 emit_insn (tmp);
20154 break;
20156 case IX86_FPCMP_SAHF:
20157 intcmp_mode = fpcmp_mode;
20158 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20159 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
20160 tmp);
20162 if (!scratch)
20163 scratch = gen_reg_rtx (HImode);
20164 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
20165 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
20166 break;
20168 case IX86_FPCMP_ARITH:
20169 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
20170 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20171 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
20172 if (!scratch)
20173 scratch = gen_reg_rtx (HImode);
20174 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
20176 /* In the unordered case, we have to check C2 for NaN's, which
20177 doesn't happen to work out to anything nice combination-wise.
20178 So do some bit twiddling on the value we've got in AH to come
20179 up with an appropriate set of condition codes. */
20181 intcmp_mode = CCNOmode;
20182 switch (code)
20184 case GT:
20185 case UNGT:
20186 if (code == GT || !TARGET_IEEE_FP)
20188 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
20189 code = EQ;
20191 else
20193 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20194 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
20195 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
20196 intcmp_mode = CCmode;
20197 code = GEU;
20199 break;
20200 case LT:
20201 case UNLT:
20202 if (code == LT && TARGET_IEEE_FP)
20204 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20205 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
20206 intcmp_mode = CCmode;
20207 code = EQ;
20209 else
20211 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
20212 code = NE;
20214 break;
20215 case GE:
20216 case UNGE:
20217 if (code == GE || !TARGET_IEEE_FP)
20219 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
20220 code = EQ;
20222 else
20224 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20225 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
20226 code = NE;
20228 break;
20229 case LE:
20230 case UNLE:
20231 if (code == LE && TARGET_IEEE_FP)
20233 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20234 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
20235 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
20236 intcmp_mode = CCmode;
20237 code = LTU;
20239 else
20241 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
20242 code = NE;
20244 break;
20245 case EQ:
20246 case UNEQ:
20247 if (code == EQ && TARGET_IEEE_FP)
20249 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20250 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
20251 intcmp_mode = CCmode;
20252 code = EQ;
20254 else
20256 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
20257 code = NE;
20259 break;
20260 case NE:
20261 case LTGT:
20262 if (code == NE && TARGET_IEEE_FP)
20264 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20265 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
20266 GEN_INT (0x40)));
20267 code = NE;
20269 else
20271 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
20272 code = EQ;
20274 break;
20276 case UNORDERED:
20277 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
20278 code = NE;
20279 break;
20280 case ORDERED:
20281 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
20282 code = EQ;
20283 break;
20285 default:
20286 gcc_unreachable ();
20288 break;
20290 default:
20291 gcc_unreachable();
20294 /* Return the test that should be put into the flags user, i.e.
20295 the bcc, scc, or cmov instruction. */
20296 return gen_rtx_fmt_ee (code, VOIDmode,
20297 gen_rtx_REG (intcmp_mode, FLAGS_REG),
20298 const0_rtx);
20301 static rtx
20302 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
20304 rtx ret;
20306 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
20307 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
20309 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
20311 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
20312 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20314 else
20315 ret = ix86_expand_int_compare (code, op0, op1);
20317 return ret;
20320 void
20321 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
20323 machine_mode mode = GET_MODE (op0);
20324 rtx tmp;
20326 switch (mode)
20328 case SFmode:
20329 case DFmode:
20330 case XFmode:
20331 case QImode:
20332 case HImode:
20333 case SImode:
20334 simple:
20335 tmp = ix86_expand_compare (code, op0, op1);
20336 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
20337 gen_rtx_LABEL_REF (VOIDmode, label),
20338 pc_rtx);
20339 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
20340 return;
20342 case DImode:
20343 if (TARGET_64BIT)
20344 goto simple;
20345 case TImode:
20346 /* Expand DImode branch into multiple compare+branch. */
20348 rtx lo[2], hi[2];
20349 rtx_code_label *label2;
20350 enum rtx_code code1, code2, code3;
20351 machine_mode submode;
20353 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
20355 std::swap (op0, op1);
20356 code = swap_condition (code);
20359 split_double_mode (mode, &op0, 1, lo+0, hi+0);
20360 split_double_mode (mode, &op1, 1, lo+1, hi+1);
20362 submode = mode == DImode ? SImode : DImode;
20364 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
20365 avoid two branches. This costs one extra insn, so disable when
20366 optimizing for size. */
20368 if ((code == EQ || code == NE)
20369 && (!optimize_insn_for_size_p ()
20370 || hi[1] == const0_rtx || lo[1] == const0_rtx))
20372 rtx xor0, xor1;
20374 xor1 = hi[0];
20375 if (hi[1] != const0_rtx)
20376 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
20377 NULL_RTX, 0, OPTAB_WIDEN);
20379 xor0 = lo[0];
20380 if (lo[1] != const0_rtx)
20381 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
20382 NULL_RTX, 0, OPTAB_WIDEN);
20384 tmp = expand_binop (submode, ior_optab, xor1, xor0,
20385 NULL_RTX, 0, OPTAB_WIDEN);
20387 ix86_expand_branch (code, tmp, const0_rtx, label);
20388 return;
20391 /* Otherwise, if we are doing less-than or greater-or-equal-than,
20392 op1 is a constant and the low word is zero, then we can just
20393 examine the high word. Similarly for low word -1 and
20394 less-or-equal-than or greater-than. */
20396 if (CONST_INT_P (hi[1]))
20397 switch (code)
20399 case LT: case LTU: case GE: case GEU:
20400 if (lo[1] == const0_rtx)
20402 ix86_expand_branch (code, hi[0], hi[1], label);
20403 return;
20405 break;
20406 case LE: case LEU: case GT: case GTU:
20407 if (lo[1] == constm1_rtx)
20409 ix86_expand_branch (code, hi[0], hi[1], label);
20410 return;
20412 break;
20413 default:
20414 break;
20417 /* Otherwise, we need two or three jumps. */
20419 label2 = gen_label_rtx ();
20421 code1 = code;
20422 code2 = swap_condition (code);
20423 code3 = unsigned_condition (code);
20425 switch (code)
20427 case LT: case GT: case LTU: case GTU:
20428 break;
20430 case LE: code1 = LT; code2 = GT; break;
20431 case GE: code1 = GT; code2 = LT; break;
20432 case LEU: code1 = LTU; code2 = GTU; break;
20433 case GEU: code1 = GTU; code2 = LTU; break;
20435 case EQ: code1 = UNKNOWN; code2 = NE; break;
20436 case NE: code2 = UNKNOWN; break;
20438 default:
20439 gcc_unreachable ();
20443 * a < b =>
20444 * if (hi(a) < hi(b)) goto true;
20445 * if (hi(a) > hi(b)) goto false;
20446 * if (lo(a) < lo(b)) goto true;
20447 * false:
20450 if (code1 != UNKNOWN)
20451 ix86_expand_branch (code1, hi[0], hi[1], label);
20452 if (code2 != UNKNOWN)
20453 ix86_expand_branch (code2, hi[0], hi[1], label2);
20455 ix86_expand_branch (code3, lo[0], lo[1], label);
20457 if (code2 != UNKNOWN)
20458 emit_label (label2);
20459 return;
20462 default:
20463 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
20464 goto simple;
20468 /* Split branch based on floating point condition. */
20469 void
20470 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
20471 rtx target1, rtx target2, rtx tmp)
20473 rtx condition;
20474 rtx i;
20476 if (target2 != pc_rtx)
20478 rtx tmp = target2;
20479 code = reverse_condition_maybe_unordered (code);
20480 target2 = target1;
20481 target1 = tmp;
20484 condition = ix86_expand_fp_compare (code, op1, op2,
20485 tmp);
20487 i = emit_jump_insn (gen_rtx_SET
20488 (VOIDmode, pc_rtx,
20489 gen_rtx_IF_THEN_ELSE (VOIDmode,
20490 condition, target1, target2)));
20491 if (split_branch_probability >= 0)
20492 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
20495 void
20496 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20498 rtx ret;
20500 gcc_assert (GET_MODE (dest) == QImode);
20502 ret = ix86_expand_compare (code, op0, op1);
20503 PUT_MODE (ret, QImode);
20504 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
20507 /* Expand comparison setting or clearing carry flag. Return true when
20508 successful and set pop for the operation. */
20509 static bool
20510 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
20512 machine_mode mode =
20513 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
20515 /* Do not handle double-mode compares that go through special path. */
20516 if (mode == (TARGET_64BIT ? TImode : DImode))
20517 return false;
20519 if (SCALAR_FLOAT_MODE_P (mode))
20521 rtx compare_op;
20522 rtx_insn *compare_seq;
20524 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
20526 /* Shortcut: following common codes never translate
20527 into carry flag compares. */
20528 if (code == EQ || code == NE || code == UNEQ || code == LTGT
20529 || code == ORDERED || code == UNORDERED)
20530 return false;
20532 /* These comparisons require zero flag; swap operands so they won't. */
20533 if ((code == GT || code == UNLE || code == LE || code == UNGT)
20534 && !TARGET_IEEE_FP)
20536 std::swap (op0, op1);
20537 code = swap_condition (code);
20540 /* Try to expand the comparison and verify that we end up with
20541 carry flag based comparison. This fails to be true only when
20542 we decide to expand comparison using arithmetic that is not
20543 too common scenario. */
20544 start_sequence ();
20545 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20546 compare_seq = get_insns ();
20547 end_sequence ();
20549 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
20550 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
20551 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
20552 else
20553 code = GET_CODE (compare_op);
20555 if (code != LTU && code != GEU)
20556 return false;
20558 emit_insn (compare_seq);
20559 *pop = compare_op;
20560 return true;
20563 if (!INTEGRAL_MODE_P (mode))
20564 return false;
20566 switch (code)
20568 case LTU:
20569 case GEU:
20570 break;
20572 /* Convert a==0 into (unsigned)a<1. */
20573 case EQ:
20574 case NE:
20575 if (op1 != const0_rtx)
20576 return false;
20577 op1 = const1_rtx;
20578 code = (code == EQ ? LTU : GEU);
20579 break;
20581 /* Convert a>b into b<a or a>=b-1. */
20582 case GTU:
20583 case LEU:
20584 if (CONST_INT_P (op1))
20586 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
20587 /* Bail out on overflow. We still can swap operands but that
20588 would force loading of the constant into register. */
20589 if (op1 == const0_rtx
20590 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
20591 return false;
20592 code = (code == GTU ? GEU : LTU);
20594 else
20596 std::swap (op1, op0);
20597 code = (code == GTU ? LTU : GEU);
20599 break;
20601 /* Convert a>=0 into (unsigned)a<0x80000000. */
20602 case LT:
20603 case GE:
20604 if (mode == DImode || op1 != const0_rtx)
20605 return false;
20606 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20607 code = (code == LT ? GEU : LTU);
20608 break;
20609 case LE:
20610 case GT:
20611 if (mode == DImode || op1 != constm1_rtx)
20612 return false;
20613 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20614 code = (code == LE ? GEU : LTU);
20615 break;
20617 default:
20618 return false;
20620 /* Swapping operands may cause constant to appear as first operand. */
20621 if (!nonimmediate_operand (op0, VOIDmode))
20623 if (!can_create_pseudo_p ())
20624 return false;
20625 op0 = force_reg (mode, op0);
20627 *pop = ix86_expand_compare (code, op0, op1);
20628 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
20629 return true;
20632 bool
20633 ix86_expand_int_movcc (rtx operands[])
20635 enum rtx_code code = GET_CODE (operands[1]), compare_code;
20636 rtx_insn *compare_seq;
20637 rtx compare_op;
20638 machine_mode mode = GET_MODE (operands[0]);
20639 bool sign_bit_compare_p = false;
20640 rtx op0 = XEXP (operands[1], 0);
20641 rtx op1 = XEXP (operands[1], 1);
20643 if (GET_MODE (op0) == TImode
20644 || (GET_MODE (op0) == DImode
20645 && !TARGET_64BIT))
20646 return false;
20648 start_sequence ();
20649 compare_op = ix86_expand_compare (code, op0, op1);
20650 compare_seq = get_insns ();
20651 end_sequence ();
20653 compare_code = GET_CODE (compare_op);
20655 if ((op1 == const0_rtx && (code == GE || code == LT))
20656 || (op1 == constm1_rtx && (code == GT || code == LE)))
20657 sign_bit_compare_p = true;
20659 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
20660 HImode insns, we'd be swallowed in word prefix ops. */
20662 if ((mode != HImode || TARGET_FAST_PREFIX)
20663 && (mode != (TARGET_64BIT ? TImode : DImode))
20664 && CONST_INT_P (operands[2])
20665 && CONST_INT_P (operands[3]))
20667 rtx out = operands[0];
20668 HOST_WIDE_INT ct = INTVAL (operands[2]);
20669 HOST_WIDE_INT cf = INTVAL (operands[3]);
20670 HOST_WIDE_INT diff;
20672 diff = ct - cf;
20673 /* Sign bit compares are better done using shifts than we do by using
20674 sbb. */
20675 if (sign_bit_compare_p
20676 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20678 /* Detect overlap between destination and compare sources. */
20679 rtx tmp = out;
20681 if (!sign_bit_compare_p)
20683 rtx flags;
20684 bool fpcmp = false;
20686 compare_code = GET_CODE (compare_op);
20688 flags = XEXP (compare_op, 0);
20690 if (GET_MODE (flags) == CCFPmode
20691 || GET_MODE (flags) == CCFPUmode)
20693 fpcmp = true;
20694 compare_code
20695 = ix86_fp_compare_code_to_integer (compare_code);
20698 /* To simplify rest of code, restrict to the GEU case. */
20699 if (compare_code == LTU)
20701 HOST_WIDE_INT tmp = ct;
20702 ct = cf;
20703 cf = tmp;
20704 compare_code = reverse_condition (compare_code);
20705 code = reverse_condition (code);
20707 else
20709 if (fpcmp)
20710 PUT_CODE (compare_op,
20711 reverse_condition_maybe_unordered
20712 (GET_CODE (compare_op)));
20713 else
20714 PUT_CODE (compare_op,
20715 reverse_condition (GET_CODE (compare_op)));
20717 diff = ct - cf;
20719 if (reg_overlap_mentioned_p (out, op0)
20720 || reg_overlap_mentioned_p (out, op1))
20721 tmp = gen_reg_rtx (mode);
20723 if (mode == DImode)
20724 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
20725 else
20726 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
20727 flags, compare_op));
20729 else
20731 if (code == GT || code == GE)
20732 code = reverse_condition (code);
20733 else
20735 HOST_WIDE_INT tmp = ct;
20736 ct = cf;
20737 cf = tmp;
20738 diff = ct - cf;
20740 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
20743 if (diff == 1)
20746 * cmpl op0,op1
20747 * sbbl dest,dest
20748 * [addl dest, ct]
20750 * Size 5 - 8.
20752 if (ct)
20753 tmp = expand_simple_binop (mode, PLUS,
20754 tmp, GEN_INT (ct),
20755 copy_rtx (tmp), 1, OPTAB_DIRECT);
20757 else if (cf == -1)
20760 * cmpl op0,op1
20761 * sbbl dest,dest
20762 * orl $ct, dest
20764 * Size 8.
20766 tmp = expand_simple_binop (mode, IOR,
20767 tmp, GEN_INT (ct),
20768 copy_rtx (tmp), 1, OPTAB_DIRECT);
20770 else if (diff == -1 && ct)
20773 * cmpl op0,op1
20774 * sbbl dest,dest
20775 * notl dest
20776 * [addl dest, cf]
20778 * Size 8 - 11.
20780 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20781 if (cf)
20782 tmp = expand_simple_binop (mode, PLUS,
20783 copy_rtx (tmp), GEN_INT (cf),
20784 copy_rtx (tmp), 1, OPTAB_DIRECT);
20786 else
20789 * cmpl op0,op1
20790 * sbbl dest,dest
20791 * [notl dest]
20792 * andl cf - ct, dest
20793 * [addl dest, ct]
20795 * Size 8 - 11.
20798 if (cf == 0)
20800 cf = ct;
20801 ct = 0;
20802 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20805 tmp = expand_simple_binop (mode, AND,
20806 copy_rtx (tmp),
20807 gen_int_mode (cf - ct, mode),
20808 copy_rtx (tmp), 1, OPTAB_DIRECT);
20809 if (ct)
20810 tmp = expand_simple_binop (mode, PLUS,
20811 copy_rtx (tmp), GEN_INT (ct),
20812 copy_rtx (tmp), 1, OPTAB_DIRECT);
20815 if (!rtx_equal_p (tmp, out))
20816 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
20818 return true;
20821 if (diff < 0)
20823 machine_mode cmp_mode = GET_MODE (op0);
20825 std::swap (ct, cf);
20826 diff = -diff;
20828 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20830 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20832 /* We may be reversing unordered compare to normal compare, that
20833 is not valid in general (we may convert non-trapping condition
20834 to trapping one), however on i386 we currently emit all
20835 comparisons unordered. */
20836 compare_code = reverse_condition_maybe_unordered (compare_code);
20837 code = reverse_condition_maybe_unordered (code);
20839 else
20841 compare_code = reverse_condition (compare_code);
20842 code = reverse_condition (code);
20846 compare_code = UNKNOWN;
20847 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
20848 && CONST_INT_P (op1))
20850 if (op1 == const0_rtx
20851 && (code == LT || code == GE))
20852 compare_code = code;
20853 else if (op1 == constm1_rtx)
20855 if (code == LE)
20856 compare_code = LT;
20857 else if (code == GT)
20858 compare_code = GE;
20862 /* Optimize dest = (op0 < 0) ? -1 : cf. */
20863 if (compare_code != UNKNOWN
20864 && GET_MODE (op0) == GET_MODE (out)
20865 && (cf == -1 || ct == -1))
20867 /* If lea code below could be used, only optimize
20868 if it results in a 2 insn sequence. */
20870 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
20871 || diff == 3 || diff == 5 || diff == 9)
20872 || (compare_code == LT && ct == -1)
20873 || (compare_code == GE && cf == -1))
20876 * notl op1 (if necessary)
20877 * sarl $31, op1
20878 * orl cf, op1
20880 if (ct != -1)
20882 cf = ct;
20883 ct = -1;
20884 code = reverse_condition (code);
20887 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20889 out = expand_simple_binop (mode, IOR,
20890 out, GEN_INT (cf),
20891 out, 1, OPTAB_DIRECT);
20892 if (out != operands[0])
20893 emit_move_insn (operands[0], out);
20895 return true;
20900 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
20901 || diff == 3 || diff == 5 || diff == 9)
20902 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
20903 && (mode != DImode
20904 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
20907 * xorl dest,dest
20908 * cmpl op1,op2
20909 * setcc dest
20910 * lea cf(dest*(ct-cf)),dest
20912 * Size 14.
20914 * This also catches the degenerate setcc-only case.
20917 rtx tmp;
20918 int nops;
20920 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20922 nops = 0;
20923 /* On x86_64 the lea instruction operates on Pmode, so we need
20924 to get arithmetics done in proper mode to match. */
20925 if (diff == 1)
20926 tmp = copy_rtx (out);
20927 else
20929 rtx out1;
20930 out1 = copy_rtx (out);
20931 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
20932 nops++;
20933 if (diff & 1)
20935 tmp = gen_rtx_PLUS (mode, tmp, out1);
20936 nops++;
20939 if (cf != 0)
20941 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
20942 nops++;
20944 if (!rtx_equal_p (tmp, out))
20946 if (nops == 1)
20947 out = force_operand (tmp, copy_rtx (out));
20948 else
20949 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
20951 if (!rtx_equal_p (out, operands[0]))
20952 emit_move_insn (operands[0], copy_rtx (out));
20954 return true;
20958 * General case: Jumpful:
20959 * xorl dest,dest cmpl op1, op2
20960 * cmpl op1, op2 movl ct, dest
20961 * setcc dest jcc 1f
20962 * decl dest movl cf, dest
20963 * andl (cf-ct),dest 1:
20964 * addl ct,dest
20966 * Size 20. Size 14.
20968 * This is reasonably steep, but branch mispredict costs are
20969 * high on modern cpus, so consider failing only if optimizing
20970 * for space.
20973 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20974 && BRANCH_COST (optimize_insn_for_speed_p (),
20975 false) >= 2)
20977 if (cf == 0)
20979 machine_mode cmp_mode = GET_MODE (op0);
20981 cf = ct;
20982 ct = 0;
20984 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20986 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20988 /* We may be reversing unordered compare to normal compare,
20989 that is not valid in general (we may convert non-trapping
20990 condition to trapping one), however on i386 we currently
20991 emit all comparisons unordered. */
20992 code = reverse_condition_maybe_unordered (code);
20994 else
20996 code = reverse_condition (code);
20997 if (compare_code != UNKNOWN)
20998 compare_code = reverse_condition (compare_code);
21002 if (compare_code != UNKNOWN)
21004 /* notl op1 (if needed)
21005 sarl $31, op1
21006 andl (cf-ct), op1
21007 addl ct, op1
21009 For x < 0 (resp. x <= -1) there will be no notl,
21010 so if possible swap the constants to get rid of the
21011 complement.
21012 True/false will be -1/0 while code below (store flag
21013 followed by decrement) is 0/-1, so the constants need
21014 to be exchanged once more. */
21016 if (compare_code == GE || !cf)
21018 code = reverse_condition (code);
21019 compare_code = LT;
21021 else
21022 std::swap (cf, ct);
21024 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
21026 else
21028 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
21030 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
21031 constm1_rtx,
21032 copy_rtx (out), 1, OPTAB_DIRECT);
21035 out = expand_simple_binop (mode, AND, copy_rtx (out),
21036 gen_int_mode (cf - ct, mode),
21037 copy_rtx (out), 1, OPTAB_DIRECT);
21038 if (ct)
21039 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
21040 copy_rtx (out), 1, OPTAB_DIRECT);
21041 if (!rtx_equal_p (out, operands[0]))
21042 emit_move_insn (operands[0], copy_rtx (out));
21044 return true;
21048 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
21050 /* Try a few things more with specific constants and a variable. */
21052 optab op;
21053 rtx var, orig_out, out, tmp;
21055 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
21056 return false;
21058 /* If one of the two operands is an interesting constant, load a
21059 constant with the above and mask it in with a logical operation. */
21061 if (CONST_INT_P (operands[2]))
21063 var = operands[3];
21064 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
21065 operands[3] = constm1_rtx, op = and_optab;
21066 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
21067 operands[3] = const0_rtx, op = ior_optab;
21068 else
21069 return false;
21071 else if (CONST_INT_P (operands[3]))
21073 var = operands[2];
21074 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
21075 operands[2] = constm1_rtx, op = and_optab;
21076 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
21077 operands[2] = const0_rtx, op = ior_optab;
21078 else
21079 return false;
21081 else
21082 return false;
21084 orig_out = operands[0];
21085 tmp = gen_reg_rtx (mode);
21086 operands[0] = tmp;
21088 /* Recurse to get the constant loaded. */
21089 if (ix86_expand_int_movcc (operands) == 0)
21090 return false;
21092 /* Mask in the interesting variable. */
21093 out = expand_binop (mode, op, var, tmp, orig_out, 0,
21094 OPTAB_WIDEN);
21095 if (!rtx_equal_p (out, orig_out))
21096 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
21098 return true;
21102 * For comparison with above,
21104 * movl cf,dest
21105 * movl ct,tmp
21106 * cmpl op1,op2
21107 * cmovcc tmp,dest
21109 * Size 15.
21112 if (! nonimmediate_operand (operands[2], mode))
21113 operands[2] = force_reg (mode, operands[2]);
21114 if (! nonimmediate_operand (operands[3], mode))
21115 operands[3] = force_reg (mode, operands[3]);
21117 if (! register_operand (operands[2], VOIDmode)
21118 && (mode == QImode
21119 || ! register_operand (operands[3], VOIDmode)))
21120 operands[2] = force_reg (mode, operands[2]);
21122 if (mode == QImode
21123 && ! register_operand (operands[3], VOIDmode))
21124 operands[3] = force_reg (mode, operands[3]);
21126 emit_insn (compare_seq);
21127 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21128 gen_rtx_IF_THEN_ELSE (mode,
21129 compare_op, operands[2],
21130 operands[3])));
21131 return true;
21134 /* Swap, force into registers, or otherwise massage the two operands
21135 to an sse comparison with a mask result. Thus we differ a bit from
21136 ix86_prepare_fp_compare_args which expects to produce a flags result.
21138 The DEST operand exists to help determine whether to commute commutative
21139 operators. The POP0/POP1 operands are updated in place. The new
21140 comparison code is returned, or UNKNOWN if not implementable. */
21142 static enum rtx_code
21143 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
21144 rtx *pop0, rtx *pop1)
21146 switch (code)
21148 case LTGT:
21149 case UNEQ:
21150 /* AVX supports all the needed comparisons. */
21151 if (TARGET_AVX)
21152 break;
21153 /* We have no LTGT as an operator. We could implement it with
21154 NE & ORDERED, but this requires an extra temporary. It's
21155 not clear that it's worth it. */
21156 return UNKNOWN;
21158 case LT:
21159 case LE:
21160 case UNGT:
21161 case UNGE:
21162 /* These are supported directly. */
21163 break;
21165 case EQ:
21166 case NE:
21167 case UNORDERED:
21168 case ORDERED:
21169 /* AVX has 3 operand comparisons, no need to swap anything. */
21170 if (TARGET_AVX)
21171 break;
21172 /* For commutative operators, try to canonicalize the destination
21173 operand to be first in the comparison - this helps reload to
21174 avoid extra moves. */
21175 if (!dest || !rtx_equal_p (dest, *pop1))
21176 break;
21177 /* FALLTHRU */
21179 case GE:
21180 case GT:
21181 case UNLE:
21182 case UNLT:
21183 /* These are not supported directly before AVX, and furthermore
21184 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
21185 comparison operands to transform into something that is
21186 supported. */
21187 std::swap (*pop0, *pop1);
21188 code = swap_condition (code);
21189 break;
21191 default:
21192 gcc_unreachable ();
21195 return code;
21198 /* Detect conditional moves that exactly match min/max operational
21199 semantics. Note that this is IEEE safe, as long as we don't
21200 interchange the operands.
21202 Returns FALSE if this conditional move doesn't match a MIN/MAX,
21203 and TRUE if the operation is successful and instructions are emitted. */
21205 static bool
21206 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
21207 rtx cmp_op1, rtx if_true, rtx if_false)
21209 machine_mode mode;
21210 bool is_min;
21211 rtx tmp;
21213 if (code == LT)
21215 else if (code == UNGE)
21216 std::swap (if_true, if_false);
21217 else
21218 return false;
21220 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
21221 is_min = true;
21222 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
21223 is_min = false;
21224 else
21225 return false;
21227 mode = GET_MODE (dest);
21229 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
21230 but MODE may be a vector mode and thus not appropriate. */
21231 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
21233 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
21234 rtvec v;
21236 if_true = force_reg (mode, if_true);
21237 v = gen_rtvec (2, if_true, if_false);
21238 tmp = gen_rtx_UNSPEC (mode, v, u);
21240 else
21242 code = is_min ? SMIN : SMAX;
21243 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
21246 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
21247 return true;
21250 /* Expand an sse vector comparison. Return the register with the result. */
21252 static rtx
21253 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
21254 rtx op_true, rtx op_false)
21256 machine_mode mode = GET_MODE (dest);
21257 machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
21259 /* In general case result of comparison can differ from operands' type. */
21260 machine_mode cmp_mode;
21262 /* In AVX512F the result of comparison is an integer mask. */
21263 bool maskcmp = false;
21264 rtx x;
21266 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
21268 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
21269 gcc_assert (cmp_mode != BLKmode);
21271 maskcmp = true;
21273 else
21274 cmp_mode = cmp_ops_mode;
21277 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
21278 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
21279 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
21281 if (optimize
21282 || reg_overlap_mentioned_p (dest, op_true)
21283 || reg_overlap_mentioned_p (dest, op_false))
21284 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
21286 /* Compare patterns for int modes are unspec in AVX512F only. */
21287 if (maskcmp && (code == GT || code == EQ))
21289 rtx (*gen)(rtx, rtx, rtx);
21291 switch (cmp_ops_mode)
21293 case V16SImode:
21294 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
21295 break;
21296 case V8DImode:
21297 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
21298 break;
21299 default:
21300 gen = NULL;
21303 if (gen)
21305 emit_insn (gen (dest, cmp_op0, cmp_op1));
21306 return dest;
21309 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
21311 if (cmp_mode != mode && !maskcmp)
21313 x = force_reg (cmp_ops_mode, x);
21314 convert_move (dest, x, false);
21316 else
21317 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21319 return dest;
21322 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
21323 operations. This is used for both scalar and vector conditional moves. */
21325 static void
21326 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
21328 machine_mode mode = GET_MODE (dest);
21329 machine_mode cmpmode = GET_MODE (cmp);
21331 /* In AVX512F the result of comparison is an integer mask. */
21332 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
21334 rtx t2, t3, x;
21336 if (vector_all_ones_operand (op_true, mode)
21337 && rtx_equal_p (op_false, CONST0_RTX (mode))
21338 && !maskcmp)
21340 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
21342 else if (op_false == CONST0_RTX (mode)
21343 && !maskcmp)
21345 op_true = force_reg (mode, op_true);
21346 x = gen_rtx_AND (mode, cmp, op_true);
21347 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21349 else if (op_true == CONST0_RTX (mode)
21350 && !maskcmp)
21352 op_false = force_reg (mode, op_false);
21353 x = gen_rtx_NOT (mode, cmp);
21354 x = gen_rtx_AND (mode, x, op_false);
21355 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21357 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
21358 && !maskcmp)
21360 op_false = force_reg (mode, op_false);
21361 x = gen_rtx_IOR (mode, cmp, op_false);
21362 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21364 else if (TARGET_XOP
21365 && !maskcmp)
21367 op_true = force_reg (mode, op_true);
21369 if (!nonimmediate_operand (op_false, mode))
21370 op_false = force_reg (mode, op_false);
21372 emit_insn (gen_rtx_SET (mode, dest,
21373 gen_rtx_IF_THEN_ELSE (mode, cmp,
21374 op_true,
21375 op_false)));
21377 else
21379 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21380 rtx d = dest;
21382 if (!nonimmediate_operand (op_true, mode))
21383 op_true = force_reg (mode, op_true);
21385 op_false = force_reg (mode, op_false);
21387 switch (mode)
21389 case V4SFmode:
21390 if (TARGET_SSE4_1)
21391 gen = gen_sse4_1_blendvps;
21392 break;
21393 case V2DFmode:
21394 if (TARGET_SSE4_1)
21395 gen = gen_sse4_1_blendvpd;
21396 break;
21397 case V16QImode:
21398 case V8HImode:
21399 case V4SImode:
21400 case V2DImode:
21401 if (TARGET_SSE4_1)
21403 gen = gen_sse4_1_pblendvb;
21404 if (mode != V16QImode)
21405 d = gen_reg_rtx (V16QImode);
21406 op_false = gen_lowpart (V16QImode, op_false);
21407 op_true = gen_lowpart (V16QImode, op_true);
21408 cmp = gen_lowpart (V16QImode, cmp);
21410 break;
21411 case V8SFmode:
21412 if (TARGET_AVX)
21413 gen = gen_avx_blendvps256;
21414 break;
21415 case V4DFmode:
21416 if (TARGET_AVX)
21417 gen = gen_avx_blendvpd256;
21418 break;
21419 case V32QImode:
21420 case V16HImode:
21421 case V8SImode:
21422 case V4DImode:
21423 if (TARGET_AVX2)
21425 gen = gen_avx2_pblendvb;
21426 if (mode != V32QImode)
21427 d = gen_reg_rtx (V32QImode);
21428 op_false = gen_lowpart (V32QImode, op_false);
21429 op_true = gen_lowpart (V32QImode, op_true);
21430 cmp = gen_lowpart (V32QImode, cmp);
21432 break;
21434 case V64QImode:
21435 gen = gen_avx512bw_blendmv64qi;
21436 break;
21437 case V32HImode:
21438 gen = gen_avx512bw_blendmv32hi;
21439 break;
21440 case V16SImode:
21441 gen = gen_avx512f_blendmv16si;
21442 break;
21443 case V8DImode:
21444 gen = gen_avx512f_blendmv8di;
21445 break;
21446 case V8DFmode:
21447 gen = gen_avx512f_blendmv8df;
21448 break;
21449 case V16SFmode:
21450 gen = gen_avx512f_blendmv16sf;
21451 break;
21453 default:
21454 break;
21457 if (gen != NULL)
21459 emit_insn (gen (d, op_false, op_true, cmp));
21460 if (d != dest)
21461 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
21463 else
21465 op_true = force_reg (mode, op_true);
21467 t2 = gen_reg_rtx (mode);
21468 if (optimize)
21469 t3 = gen_reg_rtx (mode);
21470 else
21471 t3 = dest;
21473 x = gen_rtx_AND (mode, op_true, cmp);
21474 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
21476 x = gen_rtx_NOT (mode, cmp);
21477 x = gen_rtx_AND (mode, x, op_false);
21478 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
21480 x = gen_rtx_IOR (mode, t3, t2);
21481 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21486 /* Expand a floating-point conditional move. Return true if successful. */
21488 bool
21489 ix86_expand_fp_movcc (rtx operands[])
21491 machine_mode mode = GET_MODE (operands[0]);
21492 enum rtx_code code = GET_CODE (operands[1]);
21493 rtx tmp, compare_op;
21494 rtx op0 = XEXP (operands[1], 0);
21495 rtx op1 = XEXP (operands[1], 1);
21497 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21499 machine_mode cmode;
21501 /* Since we've no cmove for sse registers, don't force bad register
21502 allocation just to gain access to it. Deny movcc when the
21503 comparison mode doesn't match the move mode. */
21504 cmode = GET_MODE (op0);
21505 if (cmode == VOIDmode)
21506 cmode = GET_MODE (op1);
21507 if (cmode != mode)
21508 return false;
21510 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
21511 if (code == UNKNOWN)
21512 return false;
21514 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
21515 operands[2], operands[3]))
21516 return true;
21518 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
21519 operands[2], operands[3]);
21520 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
21521 return true;
21524 if (GET_MODE (op0) == TImode
21525 || (GET_MODE (op0) == DImode
21526 && !TARGET_64BIT))
21527 return false;
21529 /* The floating point conditional move instructions don't directly
21530 support conditions resulting from a signed integer comparison. */
21532 compare_op = ix86_expand_compare (code, op0, op1);
21533 if (!fcmov_comparison_operator (compare_op, VOIDmode))
21535 tmp = gen_reg_rtx (QImode);
21536 ix86_expand_setcc (tmp, code, op0, op1);
21538 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
21541 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21542 gen_rtx_IF_THEN_ELSE (mode, compare_op,
21543 operands[2], operands[3])));
21545 return true;
21548 /* Expand a floating-point vector conditional move; a vcond operation
21549 rather than a movcc operation. */
21551 bool
21552 ix86_expand_fp_vcond (rtx operands[])
21554 enum rtx_code code = GET_CODE (operands[3]);
21555 rtx cmp;
21557 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
21558 &operands[4], &operands[5]);
21559 if (code == UNKNOWN)
21561 rtx temp;
21562 switch (GET_CODE (operands[3]))
21564 case LTGT:
21565 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
21566 operands[5], operands[0], operands[0]);
21567 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
21568 operands[5], operands[1], operands[2]);
21569 code = AND;
21570 break;
21571 case UNEQ:
21572 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
21573 operands[5], operands[0], operands[0]);
21574 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
21575 operands[5], operands[1], operands[2]);
21576 code = IOR;
21577 break;
21578 default:
21579 gcc_unreachable ();
21581 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
21582 OPTAB_DIRECT);
21583 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21584 return true;
21587 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
21588 operands[5], operands[1], operands[2]))
21589 return true;
21591 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
21592 operands[1], operands[2]);
21593 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21594 return true;
21597 /* Expand a signed/unsigned integral vector conditional move. */
21599 bool
21600 ix86_expand_int_vcond (rtx operands[])
21602 machine_mode data_mode = GET_MODE (operands[0]);
21603 machine_mode mode = GET_MODE (operands[4]);
21604 enum rtx_code code = GET_CODE (operands[3]);
21605 bool negate = false;
21606 rtx x, cop0, cop1;
21608 cop0 = operands[4];
21609 cop1 = operands[5];
21611 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
21612 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
21613 if ((code == LT || code == GE)
21614 && data_mode == mode
21615 && cop1 == CONST0_RTX (mode)
21616 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
21617 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
21618 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
21619 && (GET_MODE_SIZE (data_mode) == 16
21620 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
21622 rtx negop = operands[2 - (code == LT)];
21623 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
21624 if (negop == CONST1_RTX (data_mode))
21626 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
21627 operands[0], 1, OPTAB_DIRECT);
21628 if (res != operands[0])
21629 emit_move_insn (operands[0], res);
21630 return true;
21632 else if (GET_MODE_INNER (data_mode) != DImode
21633 && vector_all_ones_operand (negop, data_mode))
21635 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
21636 operands[0], 0, OPTAB_DIRECT);
21637 if (res != operands[0])
21638 emit_move_insn (operands[0], res);
21639 return true;
21643 if (!nonimmediate_operand (cop1, mode))
21644 cop1 = force_reg (mode, cop1);
21645 if (!general_operand (operands[1], data_mode))
21646 operands[1] = force_reg (data_mode, operands[1]);
21647 if (!general_operand (operands[2], data_mode))
21648 operands[2] = force_reg (data_mode, operands[2]);
21650 /* XOP supports all of the comparisons on all 128-bit vector int types. */
21651 if (TARGET_XOP
21652 && (mode == V16QImode || mode == V8HImode
21653 || mode == V4SImode || mode == V2DImode))
21655 else
21657 /* Canonicalize the comparison to EQ, GT, GTU. */
21658 switch (code)
21660 case EQ:
21661 case GT:
21662 case GTU:
21663 break;
21665 case NE:
21666 case LE:
21667 case LEU:
21668 code = reverse_condition (code);
21669 negate = true;
21670 break;
21672 case GE:
21673 case GEU:
21674 code = reverse_condition (code);
21675 negate = true;
21676 /* FALLTHRU */
21678 case LT:
21679 case LTU:
21680 std::swap (cop0, cop1);
21681 code = swap_condition (code);
21682 break;
21684 default:
21685 gcc_unreachable ();
21688 /* Only SSE4.1/SSE4.2 supports V2DImode. */
21689 if (mode == V2DImode)
21691 switch (code)
21693 case EQ:
21694 /* SSE4.1 supports EQ. */
21695 if (!TARGET_SSE4_1)
21696 return false;
21697 break;
21699 case GT:
21700 case GTU:
21701 /* SSE4.2 supports GT/GTU. */
21702 if (!TARGET_SSE4_2)
21703 return false;
21704 break;
21706 default:
21707 gcc_unreachable ();
21711 /* Unsigned parallel compare is not supported by the hardware.
21712 Play some tricks to turn this into a signed comparison
21713 against 0. */
21714 if (code == GTU)
21716 cop0 = force_reg (mode, cop0);
21718 switch (mode)
21720 case V16SImode:
21721 case V8DImode:
21722 case V8SImode:
21723 case V4DImode:
21724 case V4SImode:
21725 case V2DImode:
21727 rtx t1, t2, mask;
21728 rtx (*gen_sub3) (rtx, rtx, rtx);
21730 switch (mode)
21732 case V16SImode: gen_sub3 = gen_subv16si3; break;
21733 case V8DImode: gen_sub3 = gen_subv8di3; break;
21734 case V8SImode: gen_sub3 = gen_subv8si3; break;
21735 case V4DImode: gen_sub3 = gen_subv4di3; break;
21736 case V4SImode: gen_sub3 = gen_subv4si3; break;
21737 case V2DImode: gen_sub3 = gen_subv2di3; break;
21738 default:
21739 gcc_unreachable ();
21741 /* Subtract (-(INT MAX) - 1) from both operands to make
21742 them signed. */
21743 mask = ix86_build_signbit_mask (mode, true, false);
21744 t1 = gen_reg_rtx (mode);
21745 emit_insn (gen_sub3 (t1, cop0, mask));
21747 t2 = gen_reg_rtx (mode);
21748 emit_insn (gen_sub3 (t2, cop1, mask));
21750 cop0 = t1;
21751 cop1 = t2;
21752 code = GT;
21754 break;
21756 case V64QImode:
21757 case V32HImode:
21758 case V32QImode:
21759 case V16HImode:
21760 case V16QImode:
21761 case V8HImode:
21762 /* Perform a parallel unsigned saturating subtraction. */
21763 x = gen_reg_rtx (mode);
21764 emit_insn (gen_rtx_SET (VOIDmode, x,
21765 gen_rtx_US_MINUS (mode, cop0, cop1)));
21767 cop0 = x;
21768 cop1 = CONST0_RTX (mode);
21769 code = EQ;
21770 negate = !negate;
21771 break;
21773 default:
21774 gcc_unreachable ();
21779 /* Allow the comparison to be done in one mode, but the movcc to
21780 happen in another mode. */
21781 if (data_mode == mode)
21783 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
21784 operands[1+negate], operands[2-negate]);
21786 else
21788 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
21789 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
21790 operands[1+negate], operands[2-negate]);
21791 if (GET_MODE (x) == mode)
21792 x = gen_lowpart (data_mode, x);
21795 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
21796 operands[2-negate]);
21797 return true;
21800 /* AVX512F does support 64-byte integer vector operations,
21801 thus the longest vector we are faced with is V64QImode. */
21802 #define MAX_VECT_LEN 64
21804 struct expand_vec_perm_d
21806 rtx target, op0, op1;
21807 unsigned char perm[MAX_VECT_LEN];
21808 machine_mode vmode;
21809 unsigned char nelt;
21810 bool one_operand_p;
21811 bool testing_p;
21814 static bool
21815 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1,
21816 struct expand_vec_perm_d *d)
21818 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
21819 expander, so args are either in d, or in op0, op1 etc. */
21820 machine_mode mode = GET_MODE (d ? d->op0 : op0);
21821 machine_mode maskmode = mode;
21822 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21824 switch (mode)
21826 case V8HImode:
21827 if (TARGET_AVX512VL && TARGET_AVX512BW)
21828 gen = gen_avx512vl_vpermi2varv8hi3;
21829 break;
21830 case V16HImode:
21831 if (TARGET_AVX512VL && TARGET_AVX512BW)
21832 gen = gen_avx512vl_vpermi2varv16hi3;
21833 break;
21834 case V32HImode:
21835 if (TARGET_AVX512BW)
21836 gen = gen_avx512bw_vpermi2varv32hi3;
21837 break;
21838 case V4SImode:
21839 if (TARGET_AVX512VL)
21840 gen = gen_avx512vl_vpermi2varv4si3;
21841 break;
21842 case V8SImode:
21843 if (TARGET_AVX512VL)
21844 gen = gen_avx512vl_vpermi2varv8si3;
21845 break;
21846 case V16SImode:
21847 if (TARGET_AVX512F)
21848 gen = gen_avx512f_vpermi2varv16si3;
21849 break;
21850 case V4SFmode:
21851 if (TARGET_AVX512VL)
21853 gen = gen_avx512vl_vpermi2varv4sf3;
21854 maskmode = V4SImode;
21856 break;
21857 case V8SFmode:
21858 if (TARGET_AVX512VL)
21860 gen = gen_avx512vl_vpermi2varv8sf3;
21861 maskmode = V8SImode;
21863 break;
21864 case V16SFmode:
21865 if (TARGET_AVX512F)
21867 gen = gen_avx512f_vpermi2varv16sf3;
21868 maskmode = V16SImode;
21870 break;
21871 case V2DImode:
21872 if (TARGET_AVX512VL)
21873 gen = gen_avx512vl_vpermi2varv2di3;
21874 break;
21875 case V4DImode:
21876 if (TARGET_AVX512VL)
21877 gen = gen_avx512vl_vpermi2varv4di3;
21878 break;
21879 case V8DImode:
21880 if (TARGET_AVX512F)
21881 gen = gen_avx512f_vpermi2varv8di3;
21882 break;
21883 case V2DFmode:
21884 if (TARGET_AVX512VL)
21886 gen = gen_avx512vl_vpermi2varv2df3;
21887 maskmode = V2DImode;
21889 break;
21890 case V4DFmode:
21891 if (TARGET_AVX512VL)
21893 gen = gen_avx512vl_vpermi2varv4df3;
21894 maskmode = V4DImode;
21896 break;
21897 case V8DFmode:
21898 if (TARGET_AVX512F)
21900 gen = gen_avx512f_vpermi2varv8df3;
21901 maskmode = V8DImode;
21903 break;
21904 default:
21905 break;
21908 if (gen == NULL)
21909 return false;
21911 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
21912 expander, so args are either in d, or in op0, op1 etc. */
21913 if (d)
21915 rtx vec[64];
21916 target = d->target;
21917 op0 = d->op0;
21918 op1 = d->op1;
21919 for (int i = 0; i < d->nelt; ++i)
21920 vec[i] = GEN_INT (d->perm[i]);
21921 mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
21924 emit_insn (gen (target, op0, force_reg (maskmode, mask), op1));
21925 return true;
21928 /* Expand a variable vector permutation. */
21930 void
21931 ix86_expand_vec_perm (rtx operands[])
21933 rtx target = operands[0];
21934 rtx op0 = operands[1];
21935 rtx op1 = operands[2];
21936 rtx mask = operands[3];
21937 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
21938 machine_mode mode = GET_MODE (op0);
21939 machine_mode maskmode = GET_MODE (mask);
21940 int w, e, i;
21941 bool one_operand_shuffle = rtx_equal_p (op0, op1);
21943 /* Number of elements in the vector. */
21944 w = GET_MODE_NUNITS (mode);
21945 e = GET_MODE_UNIT_SIZE (mode);
21946 gcc_assert (w <= 64);
21948 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1, NULL))
21949 return;
21951 if (TARGET_AVX2)
21953 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
21955 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
21956 an constant shuffle operand. With a tiny bit of effort we can
21957 use VPERMD instead. A re-interpretation stall for V4DFmode is
21958 unfortunate but there's no avoiding it.
21959 Similarly for V16HImode we don't have instructions for variable
21960 shuffling, while for V32QImode we can use after preparing suitable
21961 masks vpshufb; vpshufb; vpermq; vpor. */
21963 if (mode == V16HImode)
21965 maskmode = mode = V32QImode;
21966 w = 32;
21967 e = 1;
21969 else
21971 maskmode = mode = V8SImode;
21972 w = 8;
21973 e = 4;
21975 t1 = gen_reg_rtx (maskmode);
21977 /* Replicate the low bits of the V4DImode mask into V8SImode:
21978 mask = { A B C D }
21979 t1 = { A A B B C C D D }. */
21980 for (i = 0; i < w / 2; ++i)
21981 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
21982 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21983 vt = force_reg (maskmode, vt);
21984 mask = gen_lowpart (maskmode, mask);
21985 if (maskmode == V8SImode)
21986 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
21987 else
21988 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
21990 /* Multiply the shuffle indicies by two. */
21991 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
21992 OPTAB_DIRECT);
21994 /* Add one to the odd shuffle indicies:
21995 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
21996 for (i = 0; i < w / 2; ++i)
21998 vec[i * 2] = const0_rtx;
21999 vec[i * 2 + 1] = const1_rtx;
22001 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22002 vt = validize_mem (force_const_mem (maskmode, vt));
22003 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
22004 OPTAB_DIRECT);
22006 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
22007 operands[3] = mask = t1;
22008 target = gen_reg_rtx (mode);
22009 op0 = gen_lowpart (mode, op0);
22010 op1 = gen_lowpart (mode, op1);
22013 switch (mode)
22015 case V8SImode:
22016 /* The VPERMD and VPERMPS instructions already properly ignore
22017 the high bits of the shuffle elements. No need for us to
22018 perform an AND ourselves. */
22019 if (one_operand_shuffle)
22021 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
22022 if (target != operands[0])
22023 emit_move_insn (operands[0],
22024 gen_lowpart (GET_MODE (operands[0]), target));
22026 else
22028 t1 = gen_reg_rtx (V8SImode);
22029 t2 = gen_reg_rtx (V8SImode);
22030 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
22031 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
22032 goto merge_two;
22034 return;
22036 case V8SFmode:
22037 mask = gen_lowpart (V8SImode, mask);
22038 if (one_operand_shuffle)
22039 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
22040 else
22042 t1 = gen_reg_rtx (V8SFmode);
22043 t2 = gen_reg_rtx (V8SFmode);
22044 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
22045 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
22046 goto merge_two;
22048 return;
22050 case V4SImode:
22051 /* By combining the two 128-bit input vectors into one 256-bit
22052 input vector, we can use VPERMD and VPERMPS for the full
22053 two-operand shuffle. */
22054 t1 = gen_reg_rtx (V8SImode);
22055 t2 = gen_reg_rtx (V8SImode);
22056 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
22057 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
22058 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
22059 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
22060 return;
22062 case V4SFmode:
22063 t1 = gen_reg_rtx (V8SFmode);
22064 t2 = gen_reg_rtx (V8SImode);
22065 mask = gen_lowpart (V4SImode, mask);
22066 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
22067 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
22068 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
22069 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
22070 return;
22072 case V32QImode:
22073 t1 = gen_reg_rtx (V32QImode);
22074 t2 = gen_reg_rtx (V32QImode);
22075 t3 = gen_reg_rtx (V32QImode);
22076 vt2 = GEN_INT (-128);
22077 for (i = 0; i < 32; i++)
22078 vec[i] = vt2;
22079 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
22080 vt = force_reg (V32QImode, vt);
22081 for (i = 0; i < 32; i++)
22082 vec[i] = i < 16 ? vt2 : const0_rtx;
22083 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
22084 vt2 = force_reg (V32QImode, vt2);
22085 /* From mask create two adjusted masks, which contain the same
22086 bits as mask in the low 7 bits of each vector element.
22087 The first mask will have the most significant bit clear
22088 if it requests element from the same 128-bit lane
22089 and MSB set if it requests element from the other 128-bit lane.
22090 The second mask will have the opposite values of the MSB,
22091 and additionally will have its 128-bit lanes swapped.
22092 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
22093 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
22094 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
22095 stands for other 12 bytes. */
22096 /* The bit whether element is from the same lane or the other
22097 lane is bit 4, so shift it up by 3 to the MSB position. */
22098 t5 = gen_reg_rtx (V4DImode);
22099 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
22100 GEN_INT (3)));
22101 /* Clear MSB bits from the mask just in case it had them set. */
22102 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
22103 /* After this t1 will have MSB set for elements from other lane. */
22104 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
22105 /* Clear bits other than MSB. */
22106 emit_insn (gen_andv32qi3 (t1, t1, vt));
22107 /* Or in the lower bits from mask into t3. */
22108 emit_insn (gen_iorv32qi3 (t3, t1, t2));
22109 /* And invert MSB bits in t1, so MSB is set for elements from the same
22110 lane. */
22111 emit_insn (gen_xorv32qi3 (t1, t1, vt));
22112 /* Swap 128-bit lanes in t3. */
22113 t6 = gen_reg_rtx (V4DImode);
22114 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
22115 const2_rtx, GEN_INT (3),
22116 const0_rtx, const1_rtx));
22117 /* And or in the lower bits from mask into t1. */
22118 emit_insn (gen_iorv32qi3 (t1, t1, t2));
22119 if (one_operand_shuffle)
22121 /* Each of these shuffles will put 0s in places where
22122 element from the other 128-bit lane is needed, otherwise
22123 will shuffle in the requested value. */
22124 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
22125 gen_lowpart (V32QImode, t6)));
22126 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
22127 /* For t3 the 128-bit lanes are swapped again. */
22128 t7 = gen_reg_rtx (V4DImode);
22129 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
22130 const2_rtx, GEN_INT (3),
22131 const0_rtx, const1_rtx));
22132 /* And oring both together leads to the result. */
22133 emit_insn (gen_iorv32qi3 (target, t1,
22134 gen_lowpart (V32QImode, t7)));
22135 if (target != operands[0])
22136 emit_move_insn (operands[0],
22137 gen_lowpart (GET_MODE (operands[0]), target));
22138 return;
22141 t4 = gen_reg_rtx (V32QImode);
22142 /* Similarly to the above one_operand_shuffle code,
22143 just for repeated twice for each operand. merge_two:
22144 code will merge the two results together. */
22145 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
22146 gen_lowpart (V32QImode, t6)));
22147 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
22148 gen_lowpart (V32QImode, t6)));
22149 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
22150 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
22151 t7 = gen_reg_rtx (V4DImode);
22152 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
22153 const2_rtx, GEN_INT (3),
22154 const0_rtx, const1_rtx));
22155 t8 = gen_reg_rtx (V4DImode);
22156 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
22157 const2_rtx, GEN_INT (3),
22158 const0_rtx, const1_rtx));
22159 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
22160 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
22161 t1 = t4;
22162 t2 = t3;
22163 goto merge_two;
22165 default:
22166 gcc_assert (GET_MODE_SIZE (mode) <= 16);
22167 break;
22171 if (TARGET_XOP)
22173 /* The XOP VPPERM insn supports three inputs. By ignoring the
22174 one_operand_shuffle special case, we avoid creating another
22175 set of constant vectors in memory. */
22176 one_operand_shuffle = false;
22178 /* mask = mask & {2*w-1, ...} */
22179 vt = GEN_INT (2*w - 1);
22181 else
22183 /* mask = mask & {w-1, ...} */
22184 vt = GEN_INT (w - 1);
22187 for (i = 0; i < w; i++)
22188 vec[i] = vt;
22189 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22190 mask = expand_simple_binop (maskmode, AND, mask, vt,
22191 NULL_RTX, 0, OPTAB_DIRECT);
22193 /* For non-QImode operations, convert the word permutation control
22194 into a byte permutation control. */
22195 if (mode != V16QImode)
22197 mask = expand_simple_binop (maskmode, ASHIFT, mask,
22198 GEN_INT (exact_log2 (e)),
22199 NULL_RTX, 0, OPTAB_DIRECT);
22201 /* Convert mask to vector of chars. */
22202 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
22204 /* Replicate each of the input bytes into byte positions:
22205 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
22206 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
22207 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
22208 for (i = 0; i < 16; ++i)
22209 vec[i] = GEN_INT (i/e * e);
22210 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
22211 vt = validize_mem (force_const_mem (V16QImode, vt));
22212 if (TARGET_XOP)
22213 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
22214 else
22215 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
22217 /* Convert it into the byte positions by doing
22218 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
22219 for (i = 0; i < 16; ++i)
22220 vec[i] = GEN_INT (i % e);
22221 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
22222 vt = validize_mem (force_const_mem (V16QImode, vt));
22223 emit_insn (gen_addv16qi3 (mask, mask, vt));
22226 /* The actual shuffle operations all operate on V16QImode. */
22227 op0 = gen_lowpart (V16QImode, op0);
22228 op1 = gen_lowpart (V16QImode, op1);
22230 if (TARGET_XOP)
22232 if (GET_MODE (target) != V16QImode)
22233 target = gen_reg_rtx (V16QImode);
22234 emit_insn (gen_xop_pperm (target, op0, op1, mask));
22235 if (target != operands[0])
22236 emit_move_insn (operands[0],
22237 gen_lowpart (GET_MODE (operands[0]), target));
22239 else if (one_operand_shuffle)
22241 if (GET_MODE (target) != V16QImode)
22242 target = gen_reg_rtx (V16QImode);
22243 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
22244 if (target != operands[0])
22245 emit_move_insn (operands[0],
22246 gen_lowpart (GET_MODE (operands[0]), target));
22248 else
22250 rtx xops[6];
22251 bool ok;
22253 /* Shuffle the two input vectors independently. */
22254 t1 = gen_reg_rtx (V16QImode);
22255 t2 = gen_reg_rtx (V16QImode);
22256 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
22257 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
22259 merge_two:
22260 /* Then merge them together. The key is whether any given control
22261 element contained a bit set that indicates the second word. */
22262 mask = operands[3];
22263 vt = GEN_INT (w);
22264 if (maskmode == V2DImode && !TARGET_SSE4_1)
22266 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
22267 more shuffle to convert the V2DI input mask into a V4SI
22268 input mask. At which point the masking that expand_int_vcond
22269 will work as desired. */
22270 rtx t3 = gen_reg_rtx (V4SImode);
22271 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
22272 const0_rtx, const0_rtx,
22273 const2_rtx, const2_rtx));
22274 mask = t3;
22275 maskmode = V4SImode;
22276 e = w = 4;
22279 for (i = 0; i < w; i++)
22280 vec[i] = vt;
22281 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22282 vt = force_reg (maskmode, vt);
22283 mask = expand_simple_binop (maskmode, AND, mask, vt,
22284 NULL_RTX, 0, OPTAB_DIRECT);
22286 if (GET_MODE (target) != mode)
22287 target = gen_reg_rtx (mode);
22288 xops[0] = target;
22289 xops[1] = gen_lowpart (mode, t2);
22290 xops[2] = gen_lowpart (mode, t1);
22291 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
22292 xops[4] = mask;
22293 xops[5] = vt;
22294 ok = ix86_expand_int_vcond (xops);
22295 gcc_assert (ok);
22296 if (target != operands[0])
22297 emit_move_insn (operands[0],
22298 gen_lowpart (GET_MODE (operands[0]), target));
22302 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
22303 true if we should do zero extension, else sign extension. HIGH_P is
22304 true if we want the N/2 high elements, else the low elements. */
22306 void
22307 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
22309 machine_mode imode = GET_MODE (src);
22310 rtx tmp;
22312 if (TARGET_SSE4_1)
22314 rtx (*unpack)(rtx, rtx);
22315 rtx (*extract)(rtx, rtx) = NULL;
22316 machine_mode halfmode = BLKmode;
22318 switch (imode)
22320 case V64QImode:
22321 if (unsigned_p)
22322 unpack = gen_avx512bw_zero_extendv32qiv32hi2;
22323 else
22324 unpack = gen_avx512bw_sign_extendv32qiv32hi2;
22325 halfmode = V32QImode;
22326 extract
22327 = high_p ? gen_vec_extract_hi_v64qi : gen_vec_extract_lo_v64qi;
22328 break;
22329 case V32QImode:
22330 if (unsigned_p)
22331 unpack = gen_avx2_zero_extendv16qiv16hi2;
22332 else
22333 unpack = gen_avx2_sign_extendv16qiv16hi2;
22334 halfmode = V16QImode;
22335 extract
22336 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
22337 break;
22338 case V32HImode:
22339 if (unsigned_p)
22340 unpack = gen_avx512f_zero_extendv16hiv16si2;
22341 else
22342 unpack = gen_avx512f_sign_extendv16hiv16si2;
22343 halfmode = V16HImode;
22344 extract
22345 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
22346 break;
22347 case V16HImode:
22348 if (unsigned_p)
22349 unpack = gen_avx2_zero_extendv8hiv8si2;
22350 else
22351 unpack = gen_avx2_sign_extendv8hiv8si2;
22352 halfmode = V8HImode;
22353 extract
22354 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
22355 break;
22356 case V16SImode:
22357 if (unsigned_p)
22358 unpack = gen_avx512f_zero_extendv8siv8di2;
22359 else
22360 unpack = gen_avx512f_sign_extendv8siv8di2;
22361 halfmode = V8SImode;
22362 extract
22363 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
22364 break;
22365 case V8SImode:
22366 if (unsigned_p)
22367 unpack = gen_avx2_zero_extendv4siv4di2;
22368 else
22369 unpack = gen_avx2_sign_extendv4siv4di2;
22370 halfmode = V4SImode;
22371 extract
22372 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
22373 break;
22374 case V16QImode:
22375 if (unsigned_p)
22376 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
22377 else
22378 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
22379 break;
22380 case V8HImode:
22381 if (unsigned_p)
22382 unpack = gen_sse4_1_zero_extendv4hiv4si2;
22383 else
22384 unpack = gen_sse4_1_sign_extendv4hiv4si2;
22385 break;
22386 case V4SImode:
22387 if (unsigned_p)
22388 unpack = gen_sse4_1_zero_extendv2siv2di2;
22389 else
22390 unpack = gen_sse4_1_sign_extendv2siv2di2;
22391 break;
22392 default:
22393 gcc_unreachable ();
22396 if (GET_MODE_SIZE (imode) >= 32)
22398 tmp = gen_reg_rtx (halfmode);
22399 emit_insn (extract (tmp, src));
22401 else if (high_p)
22403 /* Shift higher 8 bytes to lower 8 bytes. */
22404 tmp = gen_reg_rtx (V1TImode);
22405 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
22406 GEN_INT (64)));
22407 tmp = gen_lowpart (imode, tmp);
22409 else
22410 tmp = src;
22412 emit_insn (unpack (dest, tmp));
22414 else
22416 rtx (*unpack)(rtx, rtx, rtx);
22418 switch (imode)
22420 case V16QImode:
22421 if (high_p)
22422 unpack = gen_vec_interleave_highv16qi;
22423 else
22424 unpack = gen_vec_interleave_lowv16qi;
22425 break;
22426 case V8HImode:
22427 if (high_p)
22428 unpack = gen_vec_interleave_highv8hi;
22429 else
22430 unpack = gen_vec_interleave_lowv8hi;
22431 break;
22432 case V4SImode:
22433 if (high_p)
22434 unpack = gen_vec_interleave_highv4si;
22435 else
22436 unpack = gen_vec_interleave_lowv4si;
22437 break;
22438 default:
22439 gcc_unreachable ();
22442 if (unsigned_p)
22443 tmp = force_reg (imode, CONST0_RTX (imode));
22444 else
22445 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
22446 src, pc_rtx, pc_rtx);
22448 rtx tmp2 = gen_reg_rtx (imode);
22449 emit_insn (unpack (tmp2, src, tmp));
22450 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
22454 /* Expand conditional increment or decrement using adb/sbb instructions.
22455 The default case using setcc followed by the conditional move can be
22456 done by generic code. */
22457 bool
22458 ix86_expand_int_addcc (rtx operands[])
22460 enum rtx_code code = GET_CODE (operands[1]);
22461 rtx flags;
22462 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
22463 rtx compare_op;
22464 rtx val = const0_rtx;
22465 bool fpcmp = false;
22466 machine_mode mode;
22467 rtx op0 = XEXP (operands[1], 0);
22468 rtx op1 = XEXP (operands[1], 1);
22470 if (operands[3] != const1_rtx
22471 && operands[3] != constm1_rtx)
22472 return false;
22473 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
22474 return false;
22475 code = GET_CODE (compare_op);
22477 flags = XEXP (compare_op, 0);
22479 if (GET_MODE (flags) == CCFPmode
22480 || GET_MODE (flags) == CCFPUmode)
22482 fpcmp = true;
22483 code = ix86_fp_compare_code_to_integer (code);
22486 if (code != LTU)
22488 val = constm1_rtx;
22489 if (fpcmp)
22490 PUT_CODE (compare_op,
22491 reverse_condition_maybe_unordered
22492 (GET_CODE (compare_op)));
22493 else
22494 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
22497 mode = GET_MODE (operands[0]);
22499 /* Construct either adc or sbb insn. */
22500 if ((code == LTU) == (operands[3] == constm1_rtx))
22502 switch (mode)
22504 case QImode:
22505 insn = gen_subqi3_carry;
22506 break;
22507 case HImode:
22508 insn = gen_subhi3_carry;
22509 break;
22510 case SImode:
22511 insn = gen_subsi3_carry;
22512 break;
22513 case DImode:
22514 insn = gen_subdi3_carry;
22515 break;
22516 default:
22517 gcc_unreachable ();
22520 else
22522 switch (mode)
22524 case QImode:
22525 insn = gen_addqi3_carry;
22526 break;
22527 case HImode:
22528 insn = gen_addhi3_carry;
22529 break;
22530 case SImode:
22531 insn = gen_addsi3_carry;
22532 break;
22533 case DImode:
22534 insn = gen_adddi3_carry;
22535 break;
22536 default:
22537 gcc_unreachable ();
22540 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
22542 return true;
22546 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
22547 but works for floating pointer parameters and nonoffsetable memories.
22548 For pushes, it returns just stack offsets; the values will be saved
22549 in the right order. Maximally three parts are generated. */
22551 static int
22552 ix86_split_to_parts (rtx operand, rtx *parts, machine_mode mode)
22554 int size;
22556 if (!TARGET_64BIT)
22557 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
22558 else
22559 size = (GET_MODE_SIZE (mode) + 4) / 8;
22561 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
22562 gcc_assert (size >= 2 && size <= 4);
22564 /* Optimize constant pool reference to immediates. This is used by fp
22565 moves, that force all constants to memory to allow combining. */
22566 if (MEM_P (operand) && MEM_READONLY_P (operand))
22568 rtx tmp = maybe_get_pool_constant (operand);
22569 if (tmp)
22570 operand = tmp;
22573 if (MEM_P (operand) && !offsettable_memref_p (operand))
22575 /* The only non-offsetable memories we handle are pushes. */
22576 int ok = push_operand (operand, VOIDmode);
22578 gcc_assert (ok);
22580 operand = copy_rtx (operand);
22581 PUT_MODE (operand, word_mode);
22582 parts[0] = parts[1] = parts[2] = parts[3] = operand;
22583 return size;
22586 if (GET_CODE (operand) == CONST_VECTOR)
22588 machine_mode imode = int_mode_for_mode (mode);
22589 /* Caution: if we looked through a constant pool memory above,
22590 the operand may actually have a different mode now. That's
22591 ok, since we want to pun this all the way back to an integer. */
22592 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
22593 gcc_assert (operand != NULL);
22594 mode = imode;
22597 if (!TARGET_64BIT)
22599 if (mode == DImode)
22600 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22601 else
22603 int i;
22605 if (REG_P (operand))
22607 gcc_assert (reload_completed);
22608 for (i = 0; i < size; i++)
22609 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
22611 else if (offsettable_memref_p (operand))
22613 operand = adjust_address (operand, SImode, 0);
22614 parts[0] = operand;
22615 for (i = 1; i < size; i++)
22616 parts[i] = adjust_address (operand, SImode, 4 * i);
22618 else if (GET_CODE (operand) == CONST_DOUBLE)
22620 REAL_VALUE_TYPE r;
22621 long l[4];
22623 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22624 switch (mode)
22626 case TFmode:
22627 real_to_target (l, &r, mode);
22628 parts[3] = gen_int_mode (l[3], SImode);
22629 parts[2] = gen_int_mode (l[2], SImode);
22630 break;
22631 case XFmode:
22632 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
22633 long double may not be 80-bit. */
22634 real_to_target (l, &r, mode);
22635 parts[2] = gen_int_mode (l[2], SImode);
22636 break;
22637 case DFmode:
22638 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
22639 break;
22640 default:
22641 gcc_unreachable ();
22643 parts[1] = gen_int_mode (l[1], SImode);
22644 parts[0] = gen_int_mode (l[0], SImode);
22646 else
22647 gcc_unreachable ();
22650 else
22652 if (mode == TImode)
22653 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22654 if (mode == XFmode || mode == TFmode)
22656 machine_mode upper_mode = mode==XFmode ? SImode : DImode;
22657 if (REG_P (operand))
22659 gcc_assert (reload_completed);
22660 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
22661 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
22663 else if (offsettable_memref_p (operand))
22665 operand = adjust_address (operand, DImode, 0);
22666 parts[0] = operand;
22667 parts[1] = adjust_address (operand, upper_mode, 8);
22669 else if (GET_CODE (operand) == CONST_DOUBLE)
22671 REAL_VALUE_TYPE r;
22672 long l[4];
22674 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22675 real_to_target (l, &r, mode);
22677 /* Do not use shift by 32 to avoid warning on 32bit systems. */
22678 if (HOST_BITS_PER_WIDE_INT >= 64)
22679 parts[0]
22680 = gen_int_mode
22681 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
22682 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
22683 DImode);
22684 else
22685 parts[0] = immed_double_const (l[0], l[1], DImode);
22687 if (upper_mode == SImode)
22688 parts[1] = gen_int_mode (l[2], SImode);
22689 else if (HOST_BITS_PER_WIDE_INT >= 64)
22690 parts[1]
22691 = gen_int_mode
22692 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
22693 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
22694 DImode);
22695 else
22696 parts[1] = immed_double_const (l[2], l[3], DImode);
22698 else
22699 gcc_unreachable ();
22703 return size;
22706 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
22707 Return false when normal moves are needed; true when all required
22708 insns have been emitted. Operands 2-4 contain the input values
22709 int the correct order; operands 5-7 contain the output values. */
22711 void
22712 ix86_split_long_move (rtx operands[])
22714 rtx part[2][4];
22715 int nparts, i, j;
22716 int push = 0;
22717 int collisions = 0;
22718 machine_mode mode = GET_MODE (operands[0]);
22719 bool collisionparts[4];
22721 /* The DFmode expanders may ask us to move double.
22722 For 64bit target this is single move. By hiding the fact
22723 here we simplify i386.md splitters. */
22724 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
22726 /* Optimize constant pool reference to immediates. This is used by
22727 fp moves, that force all constants to memory to allow combining. */
22729 if (MEM_P (operands[1])
22730 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
22731 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
22732 operands[1] = get_pool_constant (XEXP (operands[1], 0));
22733 if (push_operand (operands[0], VOIDmode))
22735 operands[0] = copy_rtx (operands[0]);
22736 PUT_MODE (operands[0], word_mode);
22738 else
22739 operands[0] = gen_lowpart (DImode, operands[0]);
22740 operands[1] = gen_lowpart (DImode, operands[1]);
22741 emit_move_insn (operands[0], operands[1]);
22742 return;
22745 /* The only non-offsettable memory we handle is push. */
22746 if (push_operand (operands[0], VOIDmode))
22747 push = 1;
22748 else
22749 gcc_assert (!MEM_P (operands[0])
22750 || offsettable_memref_p (operands[0]));
22752 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
22753 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
22755 /* When emitting push, take care for source operands on the stack. */
22756 if (push && MEM_P (operands[1])
22757 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
22759 rtx src_base = XEXP (part[1][nparts - 1], 0);
22761 /* Compensate for the stack decrement by 4. */
22762 if (!TARGET_64BIT && nparts == 3
22763 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
22764 src_base = plus_constant (Pmode, src_base, 4);
22766 /* src_base refers to the stack pointer and is
22767 automatically decreased by emitted push. */
22768 for (i = 0; i < nparts; i++)
22769 part[1][i] = change_address (part[1][i],
22770 GET_MODE (part[1][i]), src_base);
22773 /* We need to do copy in the right order in case an address register
22774 of the source overlaps the destination. */
22775 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
22777 rtx tmp;
22779 for (i = 0; i < nparts; i++)
22781 collisionparts[i]
22782 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
22783 if (collisionparts[i])
22784 collisions++;
22787 /* Collision in the middle part can be handled by reordering. */
22788 if (collisions == 1 && nparts == 3 && collisionparts [1])
22790 std::swap (part[0][1], part[0][2]);
22791 std::swap (part[1][1], part[1][2]);
22793 else if (collisions == 1
22794 && nparts == 4
22795 && (collisionparts [1] || collisionparts [2]))
22797 if (collisionparts [1])
22799 std::swap (part[0][1], part[0][2]);
22800 std::swap (part[1][1], part[1][2]);
22802 else
22804 std::swap (part[0][2], part[0][3]);
22805 std::swap (part[1][2], part[1][3]);
22809 /* If there are more collisions, we can't handle it by reordering.
22810 Do an lea to the last part and use only one colliding move. */
22811 else if (collisions > 1)
22813 rtx base;
22815 collisions = 1;
22817 base = part[0][nparts - 1];
22819 /* Handle the case when the last part isn't valid for lea.
22820 Happens in 64-bit mode storing the 12-byte XFmode. */
22821 if (GET_MODE (base) != Pmode)
22822 base = gen_rtx_REG (Pmode, REGNO (base));
22824 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
22825 part[1][0] = replace_equiv_address (part[1][0], base);
22826 for (i = 1; i < nparts; i++)
22828 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
22829 part[1][i] = replace_equiv_address (part[1][i], tmp);
22834 if (push)
22836 if (!TARGET_64BIT)
22838 if (nparts == 3)
22840 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
22841 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
22842 stack_pointer_rtx, GEN_INT (-4)));
22843 emit_move_insn (part[0][2], part[1][2]);
22845 else if (nparts == 4)
22847 emit_move_insn (part[0][3], part[1][3]);
22848 emit_move_insn (part[0][2], part[1][2]);
22851 else
22853 /* In 64bit mode we don't have 32bit push available. In case this is
22854 register, it is OK - we will just use larger counterpart. We also
22855 retype memory - these comes from attempt to avoid REX prefix on
22856 moving of second half of TFmode value. */
22857 if (GET_MODE (part[1][1]) == SImode)
22859 switch (GET_CODE (part[1][1]))
22861 case MEM:
22862 part[1][1] = adjust_address (part[1][1], DImode, 0);
22863 break;
22865 case REG:
22866 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
22867 break;
22869 default:
22870 gcc_unreachable ();
22873 if (GET_MODE (part[1][0]) == SImode)
22874 part[1][0] = part[1][1];
22877 emit_move_insn (part[0][1], part[1][1]);
22878 emit_move_insn (part[0][0], part[1][0]);
22879 return;
22882 /* Choose correct order to not overwrite the source before it is copied. */
22883 if ((REG_P (part[0][0])
22884 && REG_P (part[1][1])
22885 && (REGNO (part[0][0]) == REGNO (part[1][1])
22886 || (nparts == 3
22887 && REGNO (part[0][0]) == REGNO (part[1][2]))
22888 || (nparts == 4
22889 && REGNO (part[0][0]) == REGNO (part[1][3]))))
22890 || (collisions > 0
22891 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
22893 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
22895 operands[2 + i] = part[0][j];
22896 operands[6 + i] = part[1][j];
22899 else
22901 for (i = 0; i < nparts; i++)
22903 operands[2 + i] = part[0][i];
22904 operands[6 + i] = part[1][i];
22908 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
22909 if (optimize_insn_for_size_p ())
22911 for (j = 0; j < nparts - 1; j++)
22912 if (CONST_INT_P (operands[6 + j])
22913 && operands[6 + j] != const0_rtx
22914 && REG_P (operands[2 + j]))
22915 for (i = j; i < nparts - 1; i++)
22916 if (CONST_INT_P (operands[7 + i])
22917 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
22918 operands[7 + i] = operands[2 + j];
22921 for (i = 0; i < nparts; i++)
22922 emit_move_insn (operands[2 + i], operands[6 + i]);
22924 return;
22927 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
22928 left shift by a constant, either using a single shift or
22929 a sequence of add instructions. */
22931 static void
22932 ix86_expand_ashl_const (rtx operand, int count, machine_mode mode)
22934 rtx (*insn)(rtx, rtx, rtx);
22936 if (count == 1
22937 || (count * ix86_cost->add <= ix86_cost->shift_const
22938 && !optimize_insn_for_size_p ()))
22940 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
22941 while (count-- > 0)
22942 emit_insn (insn (operand, operand, operand));
22944 else
22946 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22947 emit_insn (insn (operand, operand, GEN_INT (count)));
22951 void
22952 ix86_split_ashl (rtx *operands, rtx scratch, machine_mode mode)
22954 rtx (*gen_ashl3)(rtx, rtx, rtx);
22955 rtx (*gen_shld)(rtx, rtx, rtx);
22956 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22958 rtx low[2], high[2];
22959 int count;
22961 if (CONST_INT_P (operands[2]))
22963 split_double_mode (mode, operands, 2, low, high);
22964 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22966 if (count >= half_width)
22968 emit_move_insn (high[0], low[1]);
22969 emit_move_insn (low[0], const0_rtx);
22971 if (count > half_width)
22972 ix86_expand_ashl_const (high[0], count - half_width, mode);
22974 else
22976 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22978 if (!rtx_equal_p (operands[0], operands[1]))
22979 emit_move_insn (operands[0], operands[1]);
22981 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
22982 ix86_expand_ashl_const (low[0], count, mode);
22984 return;
22987 split_double_mode (mode, operands, 1, low, high);
22989 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22991 if (operands[1] == const1_rtx)
22993 /* Assuming we've chosen a QImode capable registers, then 1 << N
22994 can be done with two 32/64-bit shifts, no branches, no cmoves. */
22995 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
22997 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
22999 ix86_expand_clear (low[0]);
23000 ix86_expand_clear (high[0]);
23001 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
23003 d = gen_lowpart (QImode, low[0]);
23004 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
23005 s = gen_rtx_EQ (QImode, flags, const0_rtx);
23006 emit_insn (gen_rtx_SET (VOIDmode, d, s));
23008 d = gen_lowpart (QImode, high[0]);
23009 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
23010 s = gen_rtx_NE (QImode, flags, const0_rtx);
23011 emit_insn (gen_rtx_SET (VOIDmode, d, s));
23014 /* Otherwise, we can get the same results by manually performing
23015 a bit extract operation on bit 5/6, and then performing the two
23016 shifts. The two methods of getting 0/1 into low/high are exactly
23017 the same size. Avoiding the shift in the bit extract case helps
23018 pentium4 a bit; no one else seems to care much either way. */
23019 else
23021 machine_mode half_mode;
23022 rtx (*gen_lshr3)(rtx, rtx, rtx);
23023 rtx (*gen_and3)(rtx, rtx, rtx);
23024 rtx (*gen_xor3)(rtx, rtx, rtx);
23025 HOST_WIDE_INT bits;
23026 rtx x;
23028 if (mode == DImode)
23030 half_mode = SImode;
23031 gen_lshr3 = gen_lshrsi3;
23032 gen_and3 = gen_andsi3;
23033 gen_xor3 = gen_xorsi3;
23034 bits = 5;
23036 else
23038 half_mode = DImode;
23039 gen_lshr3 = gen_lshrdi3;
23040 gen_and3 = gen_anddi3;
23041 gen_xor3 = gen_xordi3;
23042 bits = 6;
23045 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
23046 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
23047 else
23048 x = gen_lowpart (half_mode, operands[2]);
23049 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
23051 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
23052 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
23053 emit_move_insn (low[0], high[0]);
23054 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
23057 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
23058 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
23059 return;
23062 if (operands[1] == constm1_rtx)
23064 /* For -1 << N, we can avoid the shld instruction, because we
23065 know that we're shifting 0...31/63 ones into a -1. */
23066 emit_move_insn (low[0], constm1_rtx);
23067 if (optimize_insn_for_size_p ())
23068 emit_move_insn (high[0], low[0]);
23069 else
23070 emit_move_insn (high[0], constm1_rtx);
23072 else
23074 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
23076 if (!rtx_equal_p (operands[0], operands[1]))
23077 emit_move_insn (operands[0], operands[1]);
23079 split_double_mode (mode, operands, 1, low, high);
23080 emit_insn (gen_shld (high[0], low[0], operands[2]));
23083 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
23085 if (TARGET_CMOVE && scratch)
23087 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23088 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23090 ix86_expand_clear (scratch);
23091 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
23093 else
23095 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
23096 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
23098 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
23102 void
23103 ix86_split_ashr (rtx *operands, rtx scratch, machine_mode mode)
23105 rtx (*gen_ashr3)(rtx, rtx, rtx)
23106 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
23107 rtx (*gen_shrd)(rtx, rtx, rtx);
23108 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23110 rtx low[2], high[2];
23111 int count;
23113 if (CONST_INT_P (operands[2]))
23115 split_double_mode (mode, operands, 2, low, high);
23116 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23118 if (count == GET_MODE_BITSIZE (mode) - 1)
23120 emit_move_insn (high[0], high[1]);
23121 emit_insn (gen_ashr3 (high[0], high[0],
23122 GEN_INT (half_width - 1)));
23123 emit_move_insn (low[0], high[0]);
23126 else if (count >= half_width)
23128 emit_move_insn (low[0], high[1]);
23129 emit_move_insn (high[0], low[0]);
23130 emit_insn (gen_ashr3 (high[0], high[0],
23131 GEN_INT (half_width - 1)));
23133 if (count > half_width)
23134 emit_insn (gen_ashr3 (low[0], low[0],
23135 GEN_INT (count - half_width)));
23137 else
23139 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23141 if (!rtx_equal_p (operands[0], operands[1]))
23142 emit_move_insn (operands[0], operands[1]);
23144 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
23145 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
23148 else
23150 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23152 if (!rtx_equal_p (operands[0], operands[1]))
23153 emit_move_insn (operands[0], operands[1]);
23155 split_double_mode (mode, operands, 1, low, high);
23157 emit_insn (gen_shrd (low[0], high[0], operands[2]));
23158 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
23160 if (TARGET_CMOVE && scratch)
23162 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23163 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23165 emit_move_insn (scratch, high[0]);
23166 emit_insn (gen_ashr3 (scratch, scratch,
23167 GEN_INT (half_width - 1)));
23168 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
23169 scratch));
23171 else
23173 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
23174 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
23176 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
23181 void
23182 ix86_split_lshr (rtx *operands, rtx scratch, machine_mode mode)
23184 rtx (*gen_lshr3)(rtx, rtx, rtx)
23185 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
23186 rtx (*gen_shrd)(rtx, rtx, rtx);
23187 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23189 rtx low[2], high[2];
23190 int count;
23192 if (CONST_INT_P (operands[2]))
23194 split_double_mode (mode, operands, 2, low, high);
23195 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23197 if (count >= half_width)
23199 emit_move_insn (low[0], high[1]);
23200 ix86_expand_clear (high[0]);
23202 if (count > half_width)
23203 emit_insn (gen_lshr3 (low[0], low[0],
23204 GEN_INT (count - half_width)));
23206 else
23208 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23210 if (!rtx_equal_p (operands[0], operands[1]))
23211 emit_move_insn (operands[0], operands[1]);
23213 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
23214 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
23217 else
23219 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23221 if (!rtx_equal_p (operands[0], operands[1]))
23222 emit_move_insn (operands[0], operands[1]);
23224 split_double_mode (mode, operands, 1, low, high);
23226 emit_insn (gen_shrd (low[0], high[0], operands[2]));
23227 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
23229 if (TARGET_CMOVE && scratch)
23231 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23232 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23234 ix86_expand_clear (scratch);
23235 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
23236 scratch));
23238 else
23240 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
23241 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
23243 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
23248 /* Predict just emitted jump instruction to be taken with probability PROB. */
23249 static void
23250 predict_jump (int prob)
23252 rtx insn = get_last_insn ();
23253 gcc_assert (JUMP_P (insn));
23254 add_int_reg_note (insn, REG_BR_PROB, prob);
23257 /* Helper function for the string operations below. Dest VARIABLE whether
23258 it is aligned to VALUE bytes. If true, jump to the label. */
23259 static rtx_code_label *
23260 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
23262 rtx_code_label *label = gen_label_rtx ();
23263 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
23264 if (GET_MODE (variable) == DImode)
23265 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
23266 else
23267 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
23268 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
23269 1, label);
23270 if (epilogue)
23271 predict_jump (REG_BR_PROB_BASE * 50 / 100);
23272 else
23273 predict_jump (REG_BR_PROB_BASE * 90 / 100);
23274 return label;
23277 /* Adjust COUNTER by the VALUE. */
23278 static void
23279 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
23281 rtx (*gen_add)(rtx, rtx, rtx)
23282 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
23284 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
23287 /* Zero extend possibly SImode EXP to Pmode register. */
23289 ix86_zero_extend_to_Pmode (rtx exp)
23291 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
23294 /* Divide COUNTREG by SCALE. */
23295 static rtx
23296 scale_counter (rtx countreg, int scale)
23298 rtx sc;
23300 if (scale == 1)
23301 return countreg;
23302 if (CONST_INT_P (countreg))
23303 return GEN_INT (INTVAL (countreg) / scale);
23304 gcc_assert (REG_P (countreg));
23306 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
23307 GEN_INT (exact_log2 (scale)),
23308 NULL, 1, OPTAB_DIRECT);
23309 return sc;
23312 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
23313 DImode for constant loop counts. */
23315 static machine_mode
23316 counter_mode (rtx count_exp)
23318 if (GET_MODE (count_exp) != VOIDmode)
23319 return GET_MODE (count_exp);
23320 if (!CONST_INT_P (count_exp))
23321 return Pmode;
23322 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
23323 return DImode;
23324 return SImode;
23327 /* Copy the address to a Pmode register. This is used for x32 to
23328 truncate DImode TLS address to a SImode register. */
23330 static rtx
23331 ix86_copy_addr_to_reg (rtx addr)
23333 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
23334 return copy_addr_to_reg (addr);
23335 else
23337 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
23338 return gen_rtx_SUBREG (SImode, copy_to_mode_reg (DImode, addr), 0);
23342 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
23343 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
23344 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
23345 memory by VALUE (supposed to be in MODE).
23347 The size is rounded down to whole number of chunk size moved at once.
23348 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
23351 static void
23352 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
23353 rtx destptr, rtx srcptr, rtx value,
23354 rtx count, machine_mode mode, int unroll,
23355 int expected_size, bool issetmem)
23357 rtx_code_label *out_label, *top_label;
23358 rtx iter, tmp;
23359 machine_mode iter_mode = counter_mode (count);
23360 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
23361 rtx piece_size = GEN_INT (piece_size_n);
23362 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
23363 rtx size;
23364 int i;
23366 top_label = gen_label_rtx ();
23367 out_label = gen_label_rtx ();
23368 iter = gen_reg_rtx (iter_mode);
23370 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
23371 NULL, 1, OPTAB_DIRECT);
23372 /* Those two should combine. */
23373 if (piece_size == const1_rtx)
23375 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
23376 true, out_label);
23377 predict_jump (REG_BR_PROB_BASE * 10 / 100);
23379 emit_move_insn (iter, const0_rtx);
23381 emit_label (top_label);
23383 tmp = convert_modes (Pmode, iter_mode, iter, true);
23385 /* This assert could be relaxed - in this case we'll need to compute
23386 smallest power of two, containing in PIECE_SIZE_N and pass it to
23387 offset_address. */
23388 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
23389 destmem = offset_address (destmem, tmp, piece_size_n);
23390 destmem = adjust_address (destmem, mode, 0);
23392 if (!issetmem)
23394 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
23395 srcmem = adjust_address (srcmem, mode, 0);
23397 /* When unrolling for chips that reorder memory reads and writes,
23398 we can save registers by using single temporary.
23399 Also using 4 temporaries is overkill in 32bit mode. */
23400 if (!TARGET_64BIT && 0)
23402 for (i = 0; i < unroll; i++)
23404 if (i)
23406 destmem =
23407 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23408 srcmem =
23409 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23411 emit_move_insn (destmem, srcmem);
23414 else
23416 rtx tmpreg[4];
23417 gcc_assert (unroll <= 4);
23418 for (i = 0; i < unroll; i++)
23420 tmpreg[i] = gen_reg_rtx (mode);
23421 if (i)
23423 srcmem =
23424 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23426 emit_move_insn (tmpreg[i], srcmem);
23428 for (i = 0; i < unroll; i++)
23430 if (i)
23432 destmem =
23433 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23435 emit_move_insn (destmem, tmpreg[i]);
23439 else
23440 for (i = 0; i < unroll; i++)
23442 if (i)
23443 destmem =
23444 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23445 emit_move_insn (destmem, value);
23448 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
23449 true, OPTAB_LIB_WIDEN);
23450 if (tmp != iter)
23451 emit_move_insn (iter, tmp);
23453 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
23454 true, top_label);
23455 if (expected_size != -1)
23457 expected_size /= GET_MODE_SIZE (mode) * unroll;
23458 if (expected_size == 0)
23459 predict_jump (0);
23460 else if (expected_size > REG_BR_PROB_BASE)
23461 predict_jump (REG_BR_PROB_BASE - 1);
23462 else
23463 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
23465 else
23466 predict_jump (REG_BR_PROB_BASE * 80 / 100);
23467 iter = ix86_zero_extend_to_Pmode (iter);
23468 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
23469 true, OPTAB_LIB_WIDEN);
23470 if (tmp != destptr)
23471 emit_move_insn (destptr, tmp);
23472 if (!issetmem)
23474 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
23475 true, OPTAB_LIB_WIDEN);
23476 if (tmp != srcptr)
23477 emit_move_insn (srcptr, tmp);
23479 emit_label (out_label);
23482 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
23483 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
23484 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
23485 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
23486 ORIG_VALUE is the original value passed to memset to fill the memory with.
23487 Other arguments have same meaning as for previous function. */
23489 static void
23490 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
23491 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
23492 rtx count,
23493 machine_mode mode, bool issetmem)
23495 rtx destexp;
23496 rtx srcexp;
23497 rtx countreg;
23498 HOST_WIDE_INT rounded_count;
23500 /* If possible, it is shorter to use rep movs.
23501 TODO: Maybe it is better to move this logic to decide_alg. */
23502 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
23503 && (!issetmem || orig_value == const0_rtx))
23504 mode = SImode;
23506 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
23507 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
23509 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
23510 GET_MODE_SIZE (mode)));
23511 if (mode != QImode)
23513 destexp = gen_rtx_ASHIFT (Pmode, countreg,
23514 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23515 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
23517 else
23518 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
23519 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
23521 rounded_count = (INTVAL (count)
23522 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23523 destmem = shallow_copy_rtx (destmem);
23524 set_mem_size (destmem, rounded_count);
23526 else if (MEM_SIZE_KNOWN_P (destmem))
23527 clear_mem_size (destmem);
23529 if (issetmem)
23531 value = force_reg (mode, gen_lowpart (mode, value));
23532 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
23534 else
23536 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
23537 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
23538 if (mode != QImode)
23540 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
23541 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23542 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
23544 else
23545 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
23546 if (CONST_INT_P (count))
23548 rounded_count = (INTVAL (count)
23549 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23550 srcmem = shallow_copy_rtx (srcmem);
23551 set_mem_size (srcmem, rounded_count);
23553 else
23555 if (MEM_SIZE_KNOWN_P (srcmem))
23556 clear_mem_size (srcmem);
23558 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
23559 destexp, srcexp));
23563 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
23564 DESTMEM.
23565 SRC is passed by pointer to be updated on return.
23566 Return value is updated DST. */
23567 static rtx
23568 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
23569 HOST_WIDE_INT size_to_move)
23571 rtx dst = destmem, src = *srcmem, adjust, tempreg;
23572 enum insn_code code;
23573 machine_mode move_mode;
23574 int piece_size, i;
23576 /* Find the widest mode in which we could perform moves.
23577 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23578 it until move of such size is supported. */
23579 piece_size = 1 << floor_log2 (size_to_move);
23580 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23581 code = optab_handler (mov_optab, move_mode);
23582 while (code == CODE_FOR_nothing && piece_size > 1)
23584 piece_size >>= 1;
23585 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23586 code = optab_handler (mov_optab, move_mode);
23589 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23590 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23591 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
23593 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
23594 move_mode = mode_for_vector (word_mode, nunits);
23595 code = optab_handler (mov_optab, move_mode);
23596 if (code == CODE_FOR_nothing)
23598 move_mode = word_mode;
23599 piece_size = GET_MODE_SIZE (move_mode);
23600 code = optab_handler (mov_optab, move_mode);
23603 gcc_assert (code != CODE_FOR_nothing);
23605 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23606 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
23608 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23609 gcc_assert (size_to_move % piece_size == 0);
23610 adjust = GEN_INT (piece_size);
23611 for (i = 0; i < size_to_move; i += piece_size)
23613 /* We move from memory to memory, so we'll need to do it via
23614 a temporary register. */
23615 tempreg = gen_reg_rtx (move_mode);
23616 emit_insn (GEN_FCN (code) (tempreg, src));
23617 emit_insn (GEN_FCN (code) (dst, tempreg));
23619 emit_move_insn (destptr,
23620 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23621 emit_move_insn (srcptr,
23622 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
23624 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23625 piece_size);
23626 src = adjust_automodify_address_nv (src, move_mode, srcptr,
23627 piece_size);
23630 /* Update DST and SRC rtx. */
23631 *srcmem = src;
23632 return dst;
23635 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
23636 static void
23637 expand_movmem_epilogue (rtx destmem, rtx srcmem,
23638 rtx destptr, rtx srcptr, rtx count, int max_size)
23640 rtx src, dest;
23641 if (CONST_INT_P (count))
23643 HOST_WIDE_INT countval = INTVAL (count);
23644 HOST_WIDE_INT epilogue_size = countval % max_size;
23645 int i;
23647 /* For now MAX_SIZE should be a power of 2. This assert could be
23648 relaxed, but it'll require a bit more complicated epilogue
23649 expanding. */
23650 gcc_assert ((max_size & (max_size - 1)) == 0);
23651 for (i = max_size; i >= 1; i >>= 1)
23653 if (epilogue_size & i)
23654 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23656 return;
23658 if (max_size > 8)
23660 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
23661 count, 1, OPTAB_DIRECT);
23662 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
23663 count, QImode, 1, 4, false);
23664 return;
23667 /* When there are stringops, we can cheaply increase dest and src pointers.
23668 Otherwise we save code size by maintaining offset (zero is readily
23669 available from preceding rep operation) and using x86 addressing modes.
23671 if (TARGET_SINGLE_STRINGOP)
23673 if (max_size > 4)
23675 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23676 src = change_address (srcmem, SImode, srcptr);
23677 dest = change_address (destmem, SImode, destptr);
23678 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23679 emit_label (label);
23680 LABEL_NUSES (label) = 1;
23682 if (max_size > 2)
23684 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23685 src = change_address (srcmem, HImode, srcptr);
23686 dest = change_address (destmem, HImode, destptr);
23687 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23688 emit_label (label);
23689 LABEL_NUSES (label) = 1;
23691 if (max_size > 1)
23693 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23694 src = change_address (srcmem, QImode, srcptr);
23695 dest = change_address (destmem, QImode, destptr);
23696 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23697 emit_label (label);
23698 LABEL_NUSES (label) = 1;
23701 else
23703 rtx offset = force_reg (Pmode, const0_rtx);
23704 rtx tmp;
23706 if (max_size > 4)
23708 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23709 src = change_address (srcmem, SImode, srcptr);
23710 dest = change_address (destmem, SImode, destptr);
23711 emit_move_insn (dest, src);
23712 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
23713 true, OPTAB_LIB_WIDEN);
23714 if (tmp != offset)
23715 emit_move_insn (offset, tmp);
23716 emit_label (label);
23717 LABEL_NUSES (label) = 1;
23719 if (max_size > 2)
23721 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23722 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23723 src = change_address (srcmem, HImode, tmp);
23724 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23725 dest = change_address (destmem, HImode, tmp);
23726 emit_move_insn (dest, src);
23727 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
23728 true, OPTAB_LIB_WIDEN);
23729 if (tmp != offset)
23730 emit_move_insn (offset, tmp);
23731 emit_label (label);
23732 LABEL_NUSES (label) = 1;
23734 if (max_size > 1)
23736 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23737 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23738 src = change_address (srcmem, QImode, tmp);
23739 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23740 dest = change_address (destmem, QImode, tmp);
23741 emit_move_insn (dest, src);
23742 emit_label (label);
23743 LABEL_NUSES (label) = 1;
23748 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
23749 with value PROMOTED_VAL.
23750 SRC is passed by pointer to be updated on return.
23751 Return value is updated DST. */
23752 static rtx
23753 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
23754 HOST_WIDE_INT size_to_move)
23756 rtx dst = destmem, adjust;
23757 enum insn_code code;
23758 machine_mode move_mode;
23759 int piece_size, i;
23761 /* Find the widest mode in which we could perform moves.
23762 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23763 it until move of such size is supported. */
23764 move_mode = GET_MODE (promoted_val);
23765 if (move_mode == VOIDmode)
23766 move_mode = QImode;
23767 if (size_to_move < GET_MODE_SIZE (move_mode))
23769 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
23770 promoted_val = gen_lowpart (move_mode, promoted_val);
23772 piece_size = GET_MODE_SIZE (move_mode);
23773 code = optab_handler (mov_optab, move_mode);
23774 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
23776 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23778 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23779 gcc_assert (size_to_move % piece_size == 0);
23780 adjust = GEN_INT (piece_size);
23781 for (i = 0; i < size_to_move; i += piece_size)
23783 if (piece_size <= GET_MODE_SIZE (word_mode))
23785 emit_insn (gen_strset (destptr, dst, promoted_val));
23786 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23787 piece_size);
23788 continue;
23791 emit_insn (GEN_FCN (code) (dst, promoted_val));
23793 emit_move_insn (destptr,
23794 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23796 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23797 piece_size);
23800 /* Update DST rtx. */
23801 return dst;
23803 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23804 static void
23805 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
23806 rtx count, int max_size)
23808 count =
23809 expand_simple_binop (counter_mode (count), AND, count,
23810 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
23811 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
23812 gen_lowpart (QImode, value), count, QImode,
23813 1, max_size / 2, true);
23816 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23817 static void
23818 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
23819 rtx count, int max_size)
23821 rtx dest;
23823 if (CONST_INT_P (count))
23825 HOST_WIDE_INT countval = INTVAL (count);
23826 HOST_WIDE_INT epilogue_size = countval % max_size;
23827 int i;
23829 /* For now MAX_SIZE should be a power of 2. This assert could be
23830 relaxed, but it'll require a bit more complicated epilogue
23831 expanding. */
23832 gcc_assert ((max_size & (max_size - 1)) == 0);
23833 for (i = max_size; i >= 1; i >>= 1)
23835 if (epilogue_size & i)
23837 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23838 destmem = emit_memset (destmem, destptr, vec_value, i);
23839 else
23840 destmem = emit_memset (destmem, destptr, value, i);
23843 return;
23845 if (max_size > 32)
23847 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
23848 return;
23850 if (max_size > 16)
23852 rtx_code_label *label = ix86_expand_aligntest (count, 16, true);
23853 if (TARGET_64BIT)
23855 dest = change_address (destmem, DImode, destptr);
23856 emit_insn (gen_strset (destptr, dest, value));
23857 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
23858 emit_insn (gen_strset (destptr, dest, value));
23860 else
23862 dest = change_address (destmem, SImode, destptr);
23863 emit_insn (gen_strset (destptr, dest, value));
23864 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23865 emit_insn (gen_strset (destptr, dest, value));
23866 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
23867 emit_insn (gen_strset (destptr, dest, value));
23868 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
23869 emit_insn (gen_strset (destptr, dest, value));
23871 emit_label (label);
23872 LABEL_NUSES (label) = 1;
23874 if (max_size > 8)
23876 rtx_code_label *label = ix86_expand_aligntest (count, 8, true);
23877 if (TARGET_64BIT)
23879 dest = change_address (destmem, DImode, destptr);
23880 emit_insn (gen_strset (destptr, dest, value));
23882 else
23884 dest = change_address (destmem, SImode, destptr);
23885 emit_insn (gen_strset (destptr, dest, value));
23886 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23887 emit_insn (gen_strset (destptr, dest, value));
23889 emit_label (label);
23890 LABEL_NUSES (label) = 1;
23892 if (max_size > 4)
23894 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23895 dest = change_address (destmem, SImode, destptr);
23896 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
23897 emit_label (label);
23898 LABEL_NUSES (label) = 1;
23900 if (max_size > 2)
23902 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23903 dest = change_address (destmem, HImode, destptr);
23904 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
23905 emit_label (label);
23906 LABEL_NUSES (label) = 1;
23908 if (max_size > 1)
23910 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23911 dest = change_address (destmem, QImode, destptr);
23912 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
23913 emit_label (label);
23914 LABEL_NUSES (label) = 1;
23918 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
23919 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
23920 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
23921 ignored.
23922 Return value is updated DESTMEM. */
23923 static rtx
23924 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
23925 rtx destptr, rtx srcptr, rtx value,
23926 rtx vec_value, rtx count, int align,
23927 int desired_alignment, bool issetmem)
23929 int i;
23930 for (i = 1; i < desired_alignment; i <<= 1)
23932 if (align <= i)
23934 rtx_code_label *label = ix86_expand_aligntest (destptr, i, false);
23935 if (issetmem)
23937 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23938 destmem = emit_memset (destmem, destptr, vec_value, i);
23939 else
23940 destmem = emit_memset (destmem, destptr, value, i);
23942 else
23943 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23944 ix86_adjust_counter (count, i);
23945 emit_label (label);
23946 LABEL_NUSES (label) = 1;
23947 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
23950 return destmem;
23953 /* Test if COUNT&SIZE is nonzero and if so, expand movme
23954 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
23955 and jump to DONE_LABEL. */
23956 static void
23957 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
23958 rtx destptr, rtx srcptr,
23959 rtx value, rtx vec_value,
23960 rtx count, int size,
23961 rtx done_label, bool issetmem)
23963 rtx_code_label *label = ix86_expand_aligntest (count, size, false);
23964 machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
23965 rtx modesize;
23966 int n;
23968 /* If we do not have vector value to copy, we must reduce size. */
23969 if (issetmem)
23971 if (!vec_value)
23973 if (GET_MODE (value) == VOIDmode && size > 8)
23974 mode = Pmode;
23975 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
23976 mode = GET_MODE (value);
23978 else
23979 mode = GET_MODE (vec_value), value = vec_value;
23981 else
23983 /* Choose appropriate vector mode. */
23984 if (size >= 32)
23985 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
23986 else if (size >= 16)
23987 mode = TARGET_SSE ? V16QImode : DImode;
23988 srcmem = change_address (srcmem, mode, srcptr);
23990 destmem = change_address (destmem, mode, destptr);
23991 modesize = GEN_INT (GET_MODE_SIZE (mode));
23992 gcc_assert (GET_MODE_SIZE (mode) <= size);
23993 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23995 if (issetmem)
23996 emit_move_insn (destmem, gen_lowpart (mode, value));
23997 else
23999 emit_move_insn (destmem, srcmem);
24000 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24002 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24005 destmem = offset_address (destmem, count, 1);
24006 destmem = offset_address (destmem, GEN_INT (-2 * size),
24007 GET_MODE_SIZE (mode));
24008 if (!issetmem)
24010 srcmem = offset_address (srcmem, count, 1);
24011 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
24012 GET_MODE_SIZE (mode));
24014 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
24016 if (issetmem)
24017 emit_move_insn (destmem, gen_lowpart (mode, value));
24018 else
24020 emit_move_insn (destmem, srcmem);
24021 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24023 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24025 emit_jump_insn (gen_jump (done_label));
24026 emit_barrier ();
24028 emit_label (label);
24029 LABEL_NUSES (label) = 1;
24032 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
24033 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
24034 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
24035 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
24036 DONE_LABEL is a label after the whole copying sequence. The label is created
24037 on demand if *DONE_LABEL is NULL.
24038 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
24039 bounds after the initial copies.
24041 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
24042 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
24043 we will dispatch to a library call for large blocks.
24045 In pseudocode we do:
24047 if (COUNT < SIZE)
24049 Assume that SIZE is 4. Bigger sizes are handled analogously
24050 if (COUNT & 4)
24052 copy 4 bytes from SRCPTR to DESTPTR
24053 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
24054 goto done_label
24056 if (!COUNT)
24057 goto done_label;
24058 copy 1 byte from SRCPTR to DESTPTR
24059 if (COUNT & 2)
24061 copy 2 bytes from SRCPTR to DESTPTR
24062 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
24065 else
24067 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
24068 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
24070 OLD_DESPTR = DESTPTR;
24071 Align DESTPTR up to DESIRED_ALIGN
24072 SRCPTR += DESTPTR - OLD_DESTPTR
24073 COUNT -= DEST_PTR - OLD_DESTPTR
24074 if (DYNAMIC_CHECK)
24075 Round COUNT down to multiple of SIZE
24076 << optional caller supplied zero size guard is here >>
24077 << optional caller suppplied dynamic check is here >>
24078 << caller supplied main copy loop is here >>
24080 done_label:
24082 static void
24083 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
24084 rtx *destptr, rtx *srcptr,
24085 machine_mode mode,
24086 rtx value, rtx vec_value,
24087 rtx *count,
24088 rtx_code_label **done_label,
24089 int size,
24090 int desired_align,
24091 int align,
24092 unsigned HOST_WIDE_INT *min_size,
24093 bool dynamic_check,
24094 bool issetmem)
24096 rtx_code_label *loop_label = NULL, *label;
24097 int n;
24098 rtx modesize;
24099 int prolog_size = 0;
24100 rtx mode_value;
24102 /* Chose proper value to copy. */
24103 if (issetmem && VECTOR_MODE_P (mode))
24104 mode_value = vec_value;
24105 else
24106 mode_value = value;
24107 gcc_assert (GET_MODE_SIZE (mode) <= size);
24109 /* See if block is big or small, handle small blocks. */
24110 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
24112 int size2 = size;
24113 loop_label = gen_label_rtx ();
24115 if (!*done_label)
24116 *done_label = gen_label_rtx ();
24118 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
24119 1, loop_label);
24120 size2 >>= 1;
24122 /* Handle sizes > 3. */
24123 for (;size2 > 2; size2 >>= 1)
24124 expand_small_movmem_or_setmem (destmem, srcmem,
24125 *destptr, *srcptr,
24126 value, vec_value,
24127 *count,
24128 size2, *done_label, issetmem);
24129 /* Nothing to copy? Jump to DONE_LABEL if so */
24130 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
24131 1, *done_label);
24133 /* Do a byte copy. */
24134 destmem = change_address (destmem, QImode, *destptr);
24135 if (issetmem)
24136 emit_move_insn (destmem, gen_lowpart (QImode, value));
24137 else
24139 srcmem = change_address (srcmem, QImode, *srcptr);
24140 emit_move_insn (destmem, srcmem);
24143 /* Handle sizes 2 and 3. */
24144 label = ix86_expand_aligntest (*count, 2, false);
24145 destmem = change_address (destmem, HImode, *destptr);
24146 destmem = offset_address (destmem, *count, 1);
24147 destmem = offset_address (destmem, GEN_INT (-2), 2);
24148 if (issetmem)
24149 emit_move_insn (destmem, gen_lowpart (HImode, value));
24150 else
24152 srcmem = change_address (srcmem, HImode, *srcptr);
24153 srcmem = offset_address (srcmem, *count, 1);
24154 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
24155 emit_move_insn (destmem, srcmem);
24158 emit_label (label);
24159 LABEL_NUSES (label) = 1;
24160 emit_jump_insn (gen_jump (*done_label));
24161 emit_barrier ();
24163 else
24164 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
24165 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
24167 /* Start memcpy for COUNT >= SIZE. */
24168 if (loop_label)
24170 emit_label (loop_label);
24171 LABEL_NUSES (loop_label) = 1;
24174 /* Copy first desired_align bytes. */
24175 if (!issetmem)
24176 srcmem = change_address (srcmem, mode, *srcptr);
24177 destmem = change_address (destmem, mode, *destptr);
24178 modesize = GEN_INT (GET_MODE_SIZE (mode));
24179 for (n = 0; prolog_size < desired_align - align; n++)
24181 if (issetmem)
24182 emit_move_insn (destmem, mode_value);
24183 else
24185 emit_move_insn (destmem, srcmem);
24186 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24188 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24189 prolog_size += GET_MODE_SIZE (mode);
24193 /* Copy last SIZE bytes. */
24194 destmem = offset_address (destmem, *count, 1);
24195 destmem = offset_address (destmem,
24196 GEN_INT (-size - prolog_size),
24198 if (issetmem)
24199 emit_move_insn (destmem, mode_value);
24200 else
24202 srcmem = offset_address (srcmem, *count, 1);
24203 srcmem = offset_address (srcmem,
24204 GEN_INT (-size - prolog_size),
24206 emit_move_insn (destmem, srcmem);
24208 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
24210 destmem = offset_address (destmem, modesize, 1);
24211 if (issetmem)
24212 emit_move_insn (destmem, mode_value);
24213 else
24215 srcmem = offset_address (srcmem, modesize, 1);
24216 emit_move_insn (destmem, srcmem);
24220 /* Align destination. */
24221 if (desired_align > 1 && desired_align > align)
24223 rtx saveddest = *destptr;
24225 gcc_assert (desired_align <= size);
24226 /* Align destptr up, place it to new register. */
24227 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
24228 GEN_INT (prolog_size),
24229 NULL_RTX, 1, OPTAB_DIRECT);
24230 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
24231 GEN_INT (-desired_align),
24232 *destptr, 1, OPTAB_DIRECT);
24233 /* See how many bytes we skipped. */
24234 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
24235 *destptr,
24236 saveddest, 1, OPTAB_DIRECT);
24237 /* Adjust srcptr and count. */
24238 if (!issetmem)
24239 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr, saveddest,
24240 *srcptr, 1, OPTAB_DIRECT);
24241 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
24242 saveddest, *count, 1, OPTAB_DIRECT);
24243 /* We copied at most size + prolog_size. */
24244 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
24245 *min_size = (*min_size - size) & ~(unsigned HOST_WIDE_INT)(size - 1);
24246 else
24247 *min_size = 0;
24249 /* Our loops always round down the bock size, but for dispatch to library
24250 we need precise value. */
24251 if (dynamic_check)
24252 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
24253 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
24255 else
24257 gcc_assert (prolog_size == 0);
24258 /* Decrease count, so we won't end up copying last word twice. */
24259 if (!CONST_INT_P (*count))
24260 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
24261 constm1_rtx, *count, 1, OPTAB_DIRECT);
24262 else
24263 *count = GEN_INT ((UINTVAL (*count) - 1) & ~(unsigned HOST_WIDE_INT)(size - 1));
24264 if (*min_size)
24265 *min_size = (*min_size - 1) & ~(unsigned HOST_WIDE_INT)(size - 1);
24270 /* This function is like the previous one, except here we know how many bytes
24271 need to be copied. That allows us to update alignment not only of DST, which
24272 is returned, but also of SRC, which is passed as a pointer for that
24273 reason. */
24274 static rtx
24275 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
24276 rtx srcreg, rtx value, rtx vec_value,
24277 int desired_align, int align_bytes,
24278 bool issetmem)
24280 rtx src = NULL;
24281 rtx orig_dst = dst;
24282 rtx orig_src = NULL;
24283 int piece_size = 1;
24284 int copied_bytes = 0;
24286 if (!issetmem)
24288 gcc_assert (srcp != NULL);
24289 src = *srcp;
24290 orig_src = src;
24293 for (piece_size = 1;
24294 piece_size <= desired_align && copied_bytes < align_bytes;
24295 piece_size <<= 1)
24297 if (align_bytes & piece_size)
24299 if (issetmem)
24301 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
24302 dst = emit_memset (dst, destreg, vec_value, piece_size);
24303 else
24304 dst = emit_memset (dst, destreg, value, piece_size);
24306 else
24307 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
24308 copied_bytes += piece_size;
24311 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
24312 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24313 if (MEM_SIZE_KNOWN_P (orig_dst))
24314 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
24316 if (!issetmem)
24318 int src_align_bytes = get_mem_align_offset (src, desired_align
24319 * BITS_PER_UNIT);
24320 if (src_align_bytes >= 0)
24321 src_align_bytes = desired_align - src_align_bytes;
24322 if (src_align_bytes >= 0)
24324 unsigned int src_align;
24325 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
24327 if ((src_align_bytes & (src_align - 1))
24328 == (align_bytes & (src_align - 1)))
24329 break;
24331 if (src_align > (unsigned int) desired_align)
24332 src_align = desired_align;
24333 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
24334 set_mem_align (src, src_align * BITS_PER_UNIT);
24336 if (MEM_SIZE_KNOWN_P (orig_src))
24337 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
24338 *srcp = src;
24341 return dst;
24344 /* Return true if ALG can be used in current context.
24345 Assume we expand memset if MEMSET is true. */
24346 static bool
24347 alg_usable_p (enum stringop_alg alg, bool memset)
24349 if (alg == no_stringop)
24350 return false;
24351 if (alg == vector_loop)
24352 return TARGET_SSE || TARGET_AVX;
24353 /* Algorithms using the rep prefix want at least edi and ecx;
24354 additionally, memset wants eax and memcpy wants esi. Don't
24355 consider such algorithms if the user has appropriated those
24356 registers for their own purposes. */
24357 if (alg == rep_prefix_1_byte
24358 || alg == rep_prefix_4_byte
24359 || alg == rep_prefix_8_byte)
24360 return !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
24361 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
24362 return true;
24365 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
24366 static enum stringop_alg
24367 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
24368 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
24369 bool memset, bool zero_memset, int *dynamic_check, bool *noalign)
24371 const struct stringop_algs * algs;
24372 bool optimize_for_speed;
24373 int max = 0;
24374 const struct processor_costs *cost;
24375 int i;
24376 bool any_alg_usable_p = false;
24378 *noalign = false;
24379 *dynamic_check = -1;
24381 /* Even if the string operation call is cold, we still might spend a lot
24382 of time processing large blocks. */
24383 if (optimize_function_for_size_p (cfun)
24384 || (optimize_insn_for_size_p ()
24385 && (max_size < 256
24386 || (expected_size != -1 && expected_size < 256))))
24387 optimize_for_speed = false;
24388 else
24389 optimize_for_speed = true;
24391 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
24392 if (memset)
24393 algs = &cost->memset[TARGET_64BIT != 0];
24394 else
24395 algs = &cost->memcpy[TARGET_64BIT != 0];
24397 /* See maximal size for user defined algorithm. */
24398 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24400 enum stringop_alg candidate = algs->size[i].alg;
24401 bool usable = alg_usable_p (candidate, memset);
24402 any_alg_usable_p |= usable;
24404 if (candidate != libcall && candidate && usable)
24405 max = algs->size[i].max;
24408 /* If expected size is not known but max size is small enough
24409 so inline version is a win, set expected size into
24410 the range. */
24411 if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
24412 && expected_size == -1)
24413 expected_size = min_size / 2 + max_size / 2;
24415 /* If user specified the algorithm, honnor it if possible. */
24416 if (ix86_stringop_alg != no_stringop
24417 && alg_usable_p (ix86_stringop_alg, memset))
24418 return ix86_stringop_alg;
24419 /* rep; movq or rep; movl is the smallest variant. */
24420 else if (!optimize_for_speed)
24422 *noalign = true;
24423 if (!count || (count & 3) || (memset && !zero_memset))
24424 return alg_usable_p (rep_prefix_1_byte, memset)
24425 ? rep_prefix_1_byte : loop_1_byte;
24426 else
24427 return alg_usable_p (rep_prefix_4_byte, memset)
24428 ? rep_prefix_4_byte : loop;
24430 /* Very tiny blocks are best handled via the loop, REP is expensive to
24431 setup. */
24432 else if (expected_size != -1 && expected_size < 4)
24433 return loop_1_byte;
24434 else if (expected_size != -1)
24436 enum stringop_alg alg = libcall;
24437 bool alg_noalign = false;
24438 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24440 /* We get here if the algorithms that were not libcall-based
24441 were rep-prefix based and we are unable to use rep prefixes
24442 based on global register usage. Break out of the loop and
24443 use the heuristic below. */
24444 if (algs->size[i].max == 0)
24445 break;
24446 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
24448 enum stringop_alg candidate = algs->size[i].alg;
24450 if (candidate != libcall && alg_usable_p (candidate, memset))
24452 alg = candidate;
24453 alg_noalign = algs->size[i].noalign;
24455 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
24456 last non-libcall inline algorithm. */
24457 if (TARGET_INLINE_ALL_STRINGOPS)
24459 /* When the current size is best to be copied by a libcall,
24460 but we are still forced to inline, run the heuristic below
24461 that will pick code for medium sized blocks. */
24462 if (alg != libcall)
24464 *noalign = alg_noalign;
24465 return alg;
24467 else if (!any_alg_usable_p)
24468 break;
24470 else if (alg_usable_p (candidate, memset))
24472 *noalign = algs->size[i].noalign;
24473 return candidate;
24478 /* When asked to inline the call anyway, try to pick meaningful choice.
24479 We look for maximal size of block that is faster to copy by hand and
24480 take blocks of at most of that size guessing that average size will
24481 be roughly half of the block.
24483 If this turns out to be bad, we might simply specify the preferred
24484 choice in ix86_costs. */
24485 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24486 && (algs->unknown_size == libcall
24487 || !alg_usable_p (algs->unknown_size, memset)))
24489 enum stringop_alg alg;
24491 /* If there aren't any usable algorithms, then recursing on
24492 smaller sizes isn't going to find anything. Just return the
24493 simple byte-at-a-time copy loop. */
24494 if (!any_alg_usable_p)
24496 /* Pick something reasonable. */
24497 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24498 *dynamic_check = 128;
24499 return loop_1_byte;
24501 if (max <= 0)
24502 max = 4096;
24503 alg = decide_alg (count, max / 2, min_size, max_size, memset,
24504 zero_memset, dynamic_check, noalign);
24505 gcc_assert (*dynamic_check == -1);
24506 gcc_assert (alg != libcall);
24507 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24508 *dynamic_check = max;
24509 return alg;
24511 return (alg_usable_p (algs->unknown_size, memset)
24512 ? algs->unknown_size : libcall);
24515 /* Decide on alignment. We know that the operand is already aligned to ALIGN
24516 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
24517 static int
24518 decide_alignment (int align,
24519 enum stringop_alg alg,
24520 int expected_size,
24521 machine_mode move_mode)
24523 int desired_align = 0;
24525 gcc_assert (alg != no_stringop);
24527 if (alg == libcall)
24528 return 0;
24529 if (move_mode == VOIDmode)
24530 return 0;
24532 desired_align = GET_MODE_SIZE (move_mode);
24533 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
24534 copying whole cacheline at once. */
24535 if (TARGET_PENTIUMPRO
24536 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
24537 desired_align = 8;
24539 if (optimize_size)
24540 desired_align = 1;
24541 if (desired_align < align)
24542 desired_align = align;
24543 if (expected_size != -1 && expected_size < 4)
24544 desired_align = align;
24546 return desired_align;
24550 /* Helper function for memcpy. For QImode value 0xXY produce
24551 0xXYXYXYXY of wide specified by MODE. This is essentially
24552 a * 0x10101010, but we can do slightly better than
24553 synth_mult by unwinding the sequence by hand on CPUs with
24554 slow multiply. */
24555 static rtx
24556 promote_duplicated_reg (machine_mode mode, rtx val)
24558 machine_mode valmode = GET_MODE (val);
24559 rtx tmp;
24560 int nops = mode == DImode ? 3 : 2;
24562 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
24563 if (val == const0_rtx)
24564 return copy_to_mode_reg (mode, CONST0_RTX (mode));
24565 if (CONST_INT_P (val))
24567 HOST_WIDE_INT v = INTVAL (val) & 255;
24569 v |= v << 8;
24570 v |= v << 16;
24571 if (mode == DImode)
24572 v |= (v << 16) << 16;
24573 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
24576 if (valmode == VOIDmode)
24577 valmode = QImode;
24578 if (valmode != QImode)
24579 val = gen_lowpart (QImode, val);
24580 if (mode == QImode)
24581 return val;
24582 if (!TARGET_PARTIAL_REG_STALL)
24583 nops--;
24584 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
24585 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
24586 <= (ix86_cost->shift_const + ix86_cost->add) * nops
24587 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
24589 rtx reg = convert_modes (mode, QImode, val, true);
24590 tmp = promote_duplicated_reg (mode, const1_rtx);
24591 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
24592 OPTAB_DIRECT);
24594 else
24596 rtx reg = convert_modes (mode, QImode, val, true);
24598 if (!TARGET_PARTIAL_REG_STALL)
24599 if (mode == SImode)
24600 emit_insn (gen_movsi_insv_1 (reg, reg));
24601 else
24602 emit_insn (gen_movdi_insv_1 (reg, reg));
24603 else
24605 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
24606 NULL, 1, OPTAB_DIRECT);
24607 reg =
24608 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24610 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
24611 NULL, 1, OPTAB_DIRECT);
24612 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24613 if (mode == SImode)
24614 return reg;
24615 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
24616 NULL, 1, OPTAB_DIRECT);
24617 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24618 return reg;
24622 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
24623 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
24624 alignment from ALIGN to DESIRED_ALIGN. */
24625 static rtx
24626 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
24627 int align)
24629 rtx promoted_val;
24631 if (TARGET_64BIT
24632 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
24633 promoted_val = promote_duplicated_reg (DImode, val);
24634 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
24635 promoted_val = promote_duplicated_reg (SImode, val);
24636 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
24637 promoted_val = promote_duplicated_reg (HImode, val);
24638 else
24639 promoted_val = val;
24641 return promoted_val;
24644 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
24645 operations when profitable. The code depends upon architecture, block size
24646 and alignment, but always has one of the following overall structures:
24648 Aligned move sequence:
24650 1) Prologue guard: Conditional that jumps up to epilogues for small
24651 blocks that can be handled by epilogue alone. This is faster
24652 but also needed for correctness, since prologue assume the block
24653 is larger than the desired alignment.
24655 Optional dynamic check for size and libcall for large
24656 blocks is emitted here too, with -minline-stringops-dynamically.
24658 2) Prologue: copy first few bytes in order to get destination
24659 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
24660 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
24661 copied. We emit either a jump tree on power of two sized
24662 blocks, or a byte loop.
24664 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24665 with specified algorithm.
24667 4) Epilogue: code copying tail of the block that is too small to be
24668 handled by main body (or up to size guarded by prologue guard).
24670 Misaligned move sequence
24672 1) missaligned move prologue/epilogue containing:
24673 a) Prologue handling small memory blocks and jumping to done_label
24674 (skipped if blocks are known to be large enough)
24675 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
24676 needed by single possibly misaligned move
24677 (skipped if alignment is not needed)
24678 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
24680 2) Zero size guard dispatching to done_label, if needed
24682 3) dispatch to library call, if needed,
24684 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24685 with specified algorithm. */
24686 bool
24687 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
24688 rtx align_exp, rtx expected_align_exp,
24689 rtx expected_size_exp, rtx min_size_exp,
24690 rtx max_size_exp, rtx probable_max_size_exp,
24691 bool issetmem)
24693 rtx destreg;
24694 rtx srcreg = NULL;
24695 rtx_code_label *label = NULL;
24696 rtx tmp;
24697 rtx_code_label *jump_around_label = NULL;
24698 HOST_WIDE_INT align = 1;
24699 unsigned HOST_WIDE_INT count = 0;
24700 HOST_WIDE_INT expected_size = -1;
24701 int size_needed = 0, epilogue_size_needed;
24702 int desired_align = 0, align_bytes = 0;
24703 enum stringop_alg alg;
24704 rtx promoted_val = NULL;
24705 rtx vec_promoted_val = NULL;
24706 bool force_loopy_epilogue = false;
24707 int dynamic_check;
24708 bool need_zero_guard = false;
24709 bool noalign;
24710 machine_mode move_mode = VOIDmode;
24711 int unroll_factor = 1;
24712 /* TODO: Once value ranges are available, fill in proper data. */
24713 unsigned HOST_WIDE_INT min_size = 0;
24714 unsigned HOST_WIDE_INT max_size = -1;
24715 unsigned HOST_WIDE_INT probable_max_size = -1;
24716 bool misaligned_prologue_used = false;
24718 if (CONST_INT_P (align_exp))
24719 align = INTVAL (align_exp);
24720 /* i386 can do misaligned access on reasonably increased cost. */
24721 if (CONST_INT_P (expected_align_exp)
24722 && INTVAL (expected_align_exp) > align)
24723 align = INTVAL (expected_align_exp);
24724 /* ALIGN is the minimum of destination and source alignment, but we care here
24725 just about destination alignment. */
24726 else if (!issetmem
24727 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
24728 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
24730 if (CONST_INT_P (count_exp))
24732 min_size = max_size = probable_max_size = count = expected_size
24733 = INTVAL (count_exp);
24734 /* When COUNT is 0, there is nothing to do. */
24735 if (!count)
24736 return true;
24738 else
24740 if (min_size_exp)
24741 min_size = INTVAL (min_size_exp);
24742 if (max_size_exp)
24743 max_size = INTVAL (max_size_exp);
24744 if (probable_max_size_exp)
24745 probable_max_size = INTVAL (probable_max_size_exp);
24746 if (CONST_INT_P (expected_size_exp))
24747 expected_size = INTVAL (expected_size_exp);
24750 /* Make sure we don't need to care about overflow later on. */
24751 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
24752 return false;
24754 /* Step 0: Decide on preferred algorithm, desired alignment and
24755 size of chunks to be copied by main loop. */
24756 alg = decide_alg (count, expected_size, min_size, probable_max_size,
24757 issetmem,
24758 issetmem && val_exp == const0_rtx,
24759 &dynamic_check, &noalign);
24760 if (alg == libcall)
24761 return false;
24762 gcc_assert (alg != no_stringop);
24764 /* For now vector-version of memset is generated only for memory zeroing, as
24765 creating of promoted vector value is very cheap in this case. */
24766 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
24767 alg = unrolled_loop;
24769 if (!count)
24770 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
24771 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
24772 if (!issetmem)
24773 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
24775 unroll_factor = 1;
24776 move_mode = word_mode;
24777 switch (alg)
24779 case libcall:
24780 case no_stringop:
24781 case last_alg:
24782 gcc_unreachable ();
24783 case loop_1_byte:
24784 need_zero_guard = true;
24785 move_mode = QImode;
24786 break;
24787 case loop:
24788 need_zero_guard = true;
24789 break;
24790 case unrolled_loop:
24791 need_zero_guard = true;
24792 unroll_factor = (TARGET_64BIT ? 4 : 2);
24793 break;
24794 case vector_loop:
24795 need_zero_guard = true;
24796 unroll_factor = 4;
24797 /* Find the widest supported mode. */
24798 move_mode = word_mode;
24799 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
24800 != CODE_FOR_nothing)
24801 move_mode = GET_MODE_WIDER_MODE (move_mode);
24803 /* Find the corresponding vector mode with the same size as MOVE_MODE.
24804 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
24805 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
24807 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
24808 move_mode = mode_for_vector (word_mode, nunits);
24809 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
24810 move_mode = word_mode;
24812 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
24813 break;
24814 case rep_prefix_8_byte:
24815 move_mode = DImode;
24816 break;
24817 case rep_prefix_4_byte:
24818 move_mode = SImode;
24819 break;
24820 case rep_prefix_1_byte:
24821 move_mode = QImode;
24822 break;
24824 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
24825 epilogue_size_needed = size_needed;
24827 desired_align = decide_alignment (align, alg, expected_size, move_mode);
24828 if (!TARGET_ALIGN_STRINGOPS || noalign)
24829 align = desired_align;
24831 /* Step 1: Prologue guard. */
24833 /* Alignment code needs count to be in register. */
24834 if (CONST_INT_P (count_exp) && desired_align > align)
24836 if (INTVAL (count_exp) > desired_align
24837 && INTVAL (count_exp) > size_needed)
24839 align_bytes
24840 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
24841 if (align_bytes <= 0)
24842 align_bytes = 0;
24843 else
24844 align_bytes = desired_align - align_bytes;
24846 if (align_bytes == 0)
24847 count_exp = force_reg (counter_mode (count_exp), count_exp);
24849 gcc_assert (desired_align >= 1 && align >= 1);
24851 /* Misaligned move sequences handle both prologue and epilogue at once.
24852 Default code generation results in a smaller code for large alignments
24853 and also avoids redundant job when sizes are known precisely. */
24854 misaligned_prologue_used
24855 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
24856 && MAX (desired_align, epilogue_size_needed) <= 32
24857 && desired_align <= epilogue_size_needed
24858 && ((desired_align > align && !align_bytes)
24859 || (!count && epilogue_size_needed > 1)));
24861 /* Do the cheap promotion to allow better CSE across the
24862 main loop and epilogue (ie one load of the big constant in the
24863 front of all code.
24864 For now the misaligned move sequences do not have fast path
24865 without broadcasting. */
24866 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
24868 if (alg == vector_loop)
24870 gcc_assert (val_exp == const0_rtx);
24871 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
24872 promoted_val = promote_duplicated_reg_to_size (val_exp,
24873 GET_MODE_SIZE (word_mode),
24874 desired_align, align);
24876 else
24878 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24879 desired_align, align);
24882 /* Misaligned move sequences handles both prologues and epilogues at once.
24883 Default code generation results in smaller code for large alignments and
24884 also avoids redundant job when sizes are known precisely. */
24885 if (misaligned_prologue_used)
24887 /* Misaligned move prologue handled small blocks by itself. */
24888 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
24889 (dst, src, &destreg, &srcreg,
24890 move_mode, promoted_val, vec_promoted_val,
24891 &count_exp,
24892 &jump_around_label,
24893 desired_align < align
24894 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
24895 desired_align, align, &min_size, dynamic_check, issetmem);
24896 if (!issetmem)
24897 src = change_address (src, BLKmode, srcreg);
24898 dst = change_address (dst, BLKmode, destreg);
24899 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24900 epilogue_size_needed = 0;
24901 if (need_zero_guard && !min_size)
24903 /* It is possible that we copied enough so the main loop will not
24904 execute. */
24905 gcc_assert (size_needed > 1);
24906 if (jump_around_label == NULL_RTX)
24907 jump_around_label = gen_label_rtx ();
24908 emit_cmp_and_jump_insns (count_exp,
24909 GEN_INT (size_needed),
24910 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
24911 if (expected_size == -1
24912 || expected_size < (desired_align - align) / 2 + size_needed)
24913 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24914 else
24915 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24918 /* Ensure that alignment prologue won't copy past end of block. */
24919 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
24921 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
24922 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
24923 Make sure it is power of 2. */
24924 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
24926 /* To improve performance of small blocks, we jump around the VAL
24927 promoting mode. This mean that if the promoted VAL is not constant,
24928 we might not use it in the epilogue and have to use byte
24929 loop variant. */
24930 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
24931 force_loopy_epilogue = true;
24932 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24933 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24935 /* If main algorithm works on QImode, no epilogue is needed.
24936 For small sizes just don't align anything. */
24937 if (size_needed == 1)
24938 desired_align = align;
24939 else
24940 goto epilogue;
24942 else if (!count
24943 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24945 label = gen_label_rtx ();
24946 emit_cmp_and_jump_insns (count_exp,
24947 GEN_INT (epilogue_size_needed),
24948 LTU, 0, counter_mode (count_exp), 1, label);
24949 if (expected_size == -1 || expected_size < epilogue_size_needed)
24950 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24951 else
24952 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24956 /* Emit code to decide on runtime whether library call or inline should be
24957 used. */
24958 if (dynamic_check != -1)
24960 if (!issetmem && CONST_INT_P (count_exp))
24962 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
24964 emit_block_move_via_libcall (dst, src, count_exp, false);
24965 count_exp = const0_rtx;
24966 goto epilogue;
24969 else
24971 rtx_code_label *hot_label = gen_label_rtx ();
24972 if (jump_around_label == NULL_RTX)
24973 jump_around_label = gen_label_rtx ();
24974 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
24975 LEU, 0, counter_mode (count_exp),
24976 1, hot_label);
24977 predict_jump (REG_BR_PROB_BASE * 90 / 100);
24978 if (issetmem)
24979 set_storage_via_libcall (dst, count_exp, val_exp, false);
24980 else
24981 emit_block_move_via_libcall (dst, src, count_exp, false);
24982 emit_jump (jump_around_label);
24983 emit_label (hot_label);
24987 /* Step 2: Alignment prologue. */
24988 /* Do the expensive promotion once we branched off the small blocks. */
24989 if (issetmem && !promoted_val)
24990 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24991 desired_align, align);
24993 if (desired_align > align && !misaligned_prologue_used)
24995 if (align_bytes == 0)
24997 /* Except for the first move in prologue, we no longer know
24998 constant offset in aliasing info. It don't seems to worth
24999 the pain to maintain it for the first move, so throw away
25000 the info early. */
25001 dst = change_address (dst, BLKmode, destreg);
25002 if (!issetmem)
25003 src = change_address (src, BLKmode, srcreg);
25004 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
25005 promoted_val, vec_promoted_val,
25006 count_exp, align, desired_align,
25007 issetmem);
25008 /* At most desired_align - align bytes are copied. */
25009 if (min_size < (unsigned)(desired_align - align))
25010 min_size = 0;
25011 else
25012 min_size -= desired_align - align;
25014 else
25016 /* If we know how many bytes need to be stored before dst is
25017 sufficiently aligned, maintain aliasing info accurately. */
25018 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
25019 srcreg,
25020 promoted_val,
25021 vec_promoted_val,
25022 desired_align,
25023 align_bytes,
25024 issetmem);
25026 count_exp = plus_constant (counter_mode (count_exp),
25027 count_exp, -align_bytes);
25028 count -= align_bytes;
25029 min_size -= align_bytes;
25030 max_size -= align_bytes;
25032 if (need_zero_guard
25033 && !min_size
25034 && (count < (unsigned HOST_WIDE_INT) size_needed
25035 || (align_bytes == 0
25036 && count < ((unsigned HOST_WIDE_INT) size_needed
25037 + desired_align - align))))
25039 /* It is possible that we copied enough so the main loop will not
25040 execute. */
25041 gcc_assert (size_needed > 1);
25042 if (label == NULL_RTX)
25043 label = gen_label_rtx ();
25044 emit_cmp_and_jump_insns (count_exp,
25045 GEN_INT (size_needed),
25046 LTU, 0, counter_mode (count_exp), 1, label);
25047 if (expected_size == -1
25048 || expected_size < (desired_align - align) / 2 + size_needed)
25049 predict_jump (REG_BR_PROB_BASE * 20 / 100);
25050 else
25051 predict_jump (REG_BR_PROB_BASE * 60 / 100);
25054 if (label && size_needed == 1)
25056 emit_label (label);
25057 LABEL_NUSES (label) = 1;
25058 label = NULL;
25059 epilogue_size_needed = 1;
25060 if (issetmem)
25061 promoted_val = val_exp;
25063 else if (label == NULL_RTX && !misaligned_prologue_used)
25064 epilogue_size_needed = size_needed;
25066 /* Step 3: Main loop. */
25068 switch (alg)
25070 case libcall:
25071 case no_stringop:
25072 case last_alg:
25073 gcc_unreachable ();
25074 case loop_1_byte:
25075 case loop:
25076 case unrolled_loop:
25077 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
25078 count_exp, move_mode, unroll_factor,
25079 expected_size, issetmem);
25080 break;
25081 case vector_loop:
25082 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
25083 vec_promoted_val, count_exp, move_mode,
25084 unroll_factor, expected_size, issetmem);
25085 break;
25086 case rep_prefix_8_byte:
25087 case rep_prefix_4_byte:
25088 case rep_prefix_1_byte:
25089 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
25090 val_exp, count_exp, move_mode, issetmem);
25091 break;
25093 /* Adjust properly the offset of src and dest memory for aliasing. */
25094 if (CONST_INT_P (count_exp))
25096 if (!issetmem)
25097 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
25098 (count / size_needed) * size_needed);
25099 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
25100 (count / size_needed) * size_needed);
25102 else
25104 if (!issetmem)
25105 src = change_address (src, BLKmode, srcreg);
25106 dst = change_address (dst, BLKmode, destreg);
25109 /* Step 4: Epilogue to copy the remaining bytes. */
25110 epilogue:
25111 if (label)
25113 /* When the main loop is done, COUNT_EXP might hold original count,
25114 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
25115 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
25116 bytes. Compensate if needed. */
25118 if (size_needed < epilogue_size_needed)
25120 tmp =
25121 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
25122 GEN_INT (size_needed - 1), count_exp, 1,
25123 OPTAB_DIRECT);
25124 if (tmp != count_exp)
25125 emit_move_insn (count_exp, tmp);
25127 emit_label (label);
25128 LABEL_NUSES (label) = 1;
25131 if (count_exp != const0_rtx && epilogue_size_needed > 1)
25133 if (force_loopy_epilogue)
25134 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
25135 epilogue_size_needed);
25136 else
25138 if (issetmem)
25139 expand_setmem_epilogue (dst, destreg, promoted_val,
25140 vec_promoted_val, count_exp,
25141 epilogue_size_needed);
25142 else
25143 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
25144 epilogue_size_needed);
25147 if (jump_around_label)
25148 emit_label (jump_around_label);
25149 return true;
25153 /* Expand the appropriate insns for doing strlen if not just doing
25154 repnz; scasb
25156 out = result, initialized with the start address
25157 align_rtx = alignment of the address.
25158 scratch = scratch register, initialized with the startaddress when
25159 not aligned, otherwise undefined
25161 This is just the body. It needs the initializations mentioned above and
25162 some address computing at the end. These things are done in i386.md. */
25164 static void
25165 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
25167 int align;
25168 rtx tmp;
25169 rtx_code_label *align_2_label = NULL;
25170 rtx_code_label *align_3_label = NULL;
25171 rtx_code_label *align_4_label = gen_label_rtx ();
25172 rtx_code_label *end_0_label = gen_label_rtx ();
25173 rtx mem;
25174 rtx tmpreg = gen_reg_rtx (SImode);
25175 rtx scratch = gen_reg_rtx (SImode);
25176 rtx cmp;
25178 align = 0;
25179 if (CONST_INT_P (align_rtx))
25180 align = INTVAL (align_rtx);
25182 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
25184 /* Is there a known alignment and is it less than 4? */
25185 if (align < 4)
25187 rtx scratch1 = gen_reg_rtx (Pmode);
25188 emit_move_insn (scratch1, out);
25189 /* Is there a known alignment and is it not 2? */
25190 if (align != 2)
25192 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
25193 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
25195 /* Leave just the 3 lower bits. */
25196 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
25197 NULL_RTX, 0, OPTAB_WIDEN);
25199 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
25200 Pmode, 1, align_4_label);
25201 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
25202 Pmode, 1, align_2_label);
25203 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
25204 Pmode, 1, align_3_label);
25206 else
25208 /* Since the alignment is 2, we have to check 2 or 0 bytes;
25209 check if is aligned to 4 - byte. */
25211 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
25212 NULL_RTX, 0, OPTAB_WIDEN);
25214 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
25215 Pmode, 1, align_4_label);
25218 mem = change_address (src, QImode, out);
25220 /* Now compare the bytes. */
25222 /* Compare the first n unaligned byte on a byte per byte basis. */
25223 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
25224 QImode, 1, end_0_label);
25226 /* Increment the address. */
25227 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25229 /* Not needed with an alignment of 2 */
25230 if (align != 2)
25232 emit_label (align_2_label);
25234 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
25235 end_0_label);
25237 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25239 emit_label (align_3_label);
25242 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
25243 end_0_label);
25245 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25248 /* Generate loop to check 4 bytes at a time. It is not a good idea to
25249 align this loop. It gives only huge programs, but does not help to
25250 speed up. */
25251 emit_label (align_4_label);
25253 mem = change_address (src, SImode, out);
25254 emit_move_insn (scratch, mem);
25255 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
25257 /* This formula yields a nonzero result iff one of the bytes is zero.
25258 This saves three branches inside loop and many cycles. */
25260 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
25261 emit_insn (gen_one_cmplsi2 (scratch, scratch));
25262 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
25263 emit_insn (gen_andsi3 (tmpreg, tmpreg,
25264 gen_int_mode (0x80808080, SImode)));
25265 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
25266 align_4_label);
25268 if (TARGET_CMOVE)
25270 rtx reg = gen_reg_rtx (SImode);
25271 rtx reg2 = gen_reg_rtx (Pmode);
25272 emit_move_insn (reg, tmpreg);
25273 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
25275 /* If zero is not in the first two bytes, move two bytes forward. */
25276 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
25277 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25278 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
25279 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
25280 gen_rtx_IF_THEN_ELSE (SImode, tmp,
25281 reg,
25282 tmpreg)));
25283 /* Emit lea manually to avoid clobbering of flags. */
25284 emit_insn (gen_rtx_SET (SImode, reg2,
25285 gen_rtx_PLUS (Pmode, out, const2_rtx)));
25287 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25288 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
25289 emit_insn (gen_rtx_SET (VOIDmode, out,
25290 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
25291 reg2,
25292 out)));
25294 else
25296 rtx_code_label *end_2_label = gen_label_rtx ();
25297 /* Is zero in the first two bytes? */
25299 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
25300 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25301 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
25302 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
25303 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
25304 pc_rtx);
25305 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
25306 JUMP_LABEL (tmp) = end_2_label;
25308 /* Not in the first two. Move two bytes forward. */
25309 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
25310 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
25312 emit_label (end_2_label);
25316 /* Avoid branch in fixing the byte. */
25317 tmpreg = gen_lowpart (QImode, tmpreg);
25318 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
25319 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
25320 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
25321 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
25323 emit_label (end_0_label);
25326 /* Expand strlen. */
25328 bool
25329 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
25331 rtx addr, scratch1, scratch2, scratch3, scratch4;
25333 /* The generic case of strlen expander is long. Avoid it's
25334 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
25336 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
25337 && !TARGET_INLINE_ALL_STRINGOPS
25338 && !optimize_insn_for_size_p ()
25339 && (!CONST_INT_P (align) || INTVAL (align) < 4))
25340 return false;
25342 addr = force_reg (Pmode, XEXP (src, 0));
25343 scratch1 = gen_reg_rtx (Pmode);
25345 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
25346 && !optimize_insn_for_size_p ())
25348 /* Well it seems that some optimizer does not combine a call like
25349 foo(strlen(bar), strlen(bar));
25350 when the move and the subtraction is done here. It does calculate
25351 the length just once when these instructions are done inside of
25352 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
25353 often used and I use one fewer register for the lifetime of
25354 output_strlen_unroll() this is better. */
25356 emit_move_insn (out, addr);
25358 ix86_expand_strlensi_unroll_1 (out, src, align);
25360 /* strlensi_unroll_1 returns the address of the zero at the end of
25361 the string, like memchr(), so compute the length by subtracting
25362 the start address. */
25363 emit_insn (ix86_gen_sub3 (out, out, addr));
25365 else
25367 rtx unspec;
25369 /* Can't use this if the user has appropriated eax, ecx, or edi. */
25370 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
25371 return false;
25373 scratch2 = gen_reg_rtx (Pmode);
25374 scratch3 = gen_reg_rtx (Pmode);
25375 scratch4 = force_reg (Pmode, constm1_rtx);
25377 emit_move_insn (scratch3, addr);
25378 eoschar = force_reg (QImode, eoschar);
25380 src = replace_equiv_address_nv (src, scratch3);
25382 /* If .md starts supporting :P, this can be done in .md. */
25383 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
25384 scratch4), UNSPEC_SCAS);
25385 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
25386 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
25387 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
25389 return true;
25392 /* For given symbol (function) construct code to compute address of it's PLT
25393 entry in large x86-64 PIC model. */
25394 static rtx
25395 construct_plt_address (rtx symbol)
25397 rtx tmp, unspec;
25399 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
25400 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
25401 gcc_assert (Pmode == DImode);
25403 tmp = gen_reg_rtx (Pmode);
25404 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
25406 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
25407 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
25408 return tmp;
25412 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
25413 rtx callarg2,
25414 rtx pop, bool sibcall)
25416 rtx vec[3];
25417 rtx use = NULL, call;
25418 unsigned int vec_len = 0;
25420 if (pop == const0_rtx)
25421 pop = NULL;
25422 gcc_assert (!TARGET_64BIT || !pop);
25424 if (TARGET_MACHO && !TARGET_64BIT)
25426 #if TARGET_MACHO
25427 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
25428 fnaddr = machopic_indirect_call_target (fnaddr);
25429 #endif
25431 else
25433 /* Static functions and indirect calls don't need the pic register. */
25434 if (flag_pic
25435 && (!TARGET_64BIT
25436 || (ix86_cmodel == CM_LARGE_PIC
25437 && DEFAULT_ABI != MS_ABI))
25438 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25439 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
25441 use_reg (&use, gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM));
25442 if (ix86_use_pseudo_pic_reg ())
25443 emit_move_insn (gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM),
25444 pic_offset_table_rtx);
25448 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
25450 rtx al = gen_rtx_REG (QImode, AX_REG);
25451 emit_move_insn (al, callarg2);
25452 use_reg (&use, al);
25455 if (ix86_cmodel == CM_LARGE_PIC
25456 && !TARGET_PECOFF
25457 && MEM_P (fnaddr)
25458 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25459 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
25460 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
25461 else if (sibcall
25462 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
25463 : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
25465 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
25466 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
25469 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
25471 if (retval)
25473 /* We should add bounds as destination register in case
25474 pointer with bounds may be returned. */
25475 if (TARGET_MPX && SCALAR_INT_MODE_P (GET_MODE (retval)))
25477 rtx b0 = gen_rtx_REG (BND64mode, FIRST_BND_REG);
25478 rtx b1 = gen_rtx_REG (BND64mode, FIRST_BND_REG + 1);
25479 retval = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, retval, b0, b1));
25480 chkp_put_regs_to_expr_list (retval);
25483 call = gen_rtx_SET (VOIDmode, retval, call);
25485 vec[vec_len++] = call;
25487 if (pop)
25489 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
25490 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
25491 vec[vec_len++] = pop;
25494 if (TARGET_64BIT_MS_ABI
25495 && (!callarg2 || INTVAL (callarg2) != -2))
25497 int const cregs_size
25498 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
25499 int i;
25501 for (i = 0; i < cregs_size; i++)
25503 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
25504 machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
25506 clobber_reg (&use, gen_rtx_REG (mode, regno));
25510 if (vec_len > 1)
25511 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
25512 call = emit_call_insn (call);
25513 if (use)
25514 CALL_INSN_FUNCTION_USAGE (call) = use;
25516 return call;
25519 /* Output the assembly for a call instruction. */
25521 const char *
25522 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
25524 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
25525 bool seh_nop_p = false;
25526 const char *xasm;
25528 if (SIBLING_CALL_P (insn))
25530 if (direct_p)
25531 xasm = "%!jmp\t%P0";
25532 /* SEH epilogue detection requires the indirect branch case
25533 to include REX.W. */
25534 else if (TARGET_SEH)
25535 xasm = "%!rex.W jmp %A0";
25536 else
25537 xasm = "%!jmp\t%A0";
25539 output_asm_insn (xasm, &call_op);
25540 return "";
25543 /* SEH unwinding can require an extra nop to be emitted in several
25544 circumstances. Determine if we have one of those. */
25545 if (TARGET_SEH)
25547 rtx_insn *i;
25549 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
25551 /* If we get to another real insn, we don't need the nop. */
25552 if (INSN_P (i))
25553 break;
25555 /* If we get to the epilogue note, prevent a catch region from
25556 being adjacent to the standard epilogue sequence. If non-
25557 call-exceptions, we'll have done this during epilogue emission. */
25558 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
25559 && !flag_non_call_exceptions
25560 && !can_throw_internal (insn))
25562 seh_nop_p = true;
25563 break;
25567 /* If we didn't find a real insn following the call, prevent the
25568 unwinder from looking into the next function. */
25569 if (i == NULL)
25570 seh_nop_p = true;
25573 if (direct_p)
25574 xasm = "%!call\t%P0";
25575 else
25576 xasm = "%!call\t%A0";
25578 output_asm_insn (xasm, &call_op);
25580 if (seh_nop_p)
25581 return "nop";
25583 return "";
25586 /* Clear stack slot assignments remembered from previous functions.
25587 This is called from INIT_EXPANDERS once before RTL is emitted for each
25588 function. */
25590 static struct machine_function *
25591 ix86_init_machine_status (void)
25593 struct machine_function *f;
25595 f = ggc_cleared_alloc<machine_function> ();
25596 f->use_fast_prologue_epilogue_nregs = -1;
25597 f->call_abi = ix86_abi;
25599 return f;
25602 /* Return a MEM corresponding to a stack slot with mode MODE.
25603 Allocate a new slot if necessary.
25605 The RTL for a function can have several slots available: N is
25606 which slot to use. */
25609 assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
25611 struct stack_local_entry *s;
25613 gcc_assert (n < MAX_386_STACK_LOCALS);
25615 for (s = ix86_stack_locals; s; s = s->next)
25616 if (s->mode == mode && s->n == n)
25617 return validize_mem (copy_rtx (s->rtl));
25619 s = ggc_alloc<stack_local_entry> ();
25620 s->n = n;
25621 s->mode = mode;
25622 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
25624 s->next = ix86_stack_locals;
25625 ix86_stack_locals = s;
25626 return validize_mem (copy_rtx (s->rtl));
25629 static void
25630 ix86_instantiate_decls (void)
25632 struct stack_local_entry *s;
25634 for (s = ix86_stack_locals; s; s = s->next)
25635 if (s->rtl != NULL_RTX)
25636 instantiate_decl_rtl (s->rtl);
25639 /* Check whether x86 address PARTS is a pc-relative address. */
25641 static bool
25642 rip_relative_addr_p (struct ix86_address *parts)
25644 rtx base, index, disp;
25646 base = parts->base;
25647 index = parts->index;
25648 disp = parts->disp;
25650 if (disp && !base && !index)
25652 if (TARGET_64BIT)
25654 rtx symbol = disp;
25656 if (GET_CODE (disp) == CONST)
25657 symbol = XEXP (disp, 0);
25658 if (GET_CODE (symbol) == PLUS
25659 && CONST_INT_P (XEXP (symbol, 1)))
25660 symbol = XEXP (symbol, 0);
25662 if (GET_CODE (symbol) == LABEL_REF
25663 || (GET_CODE (symbol) == SYMBOL_REF
25664 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
25665 || (GET_CODE (symbol) == UNSPEC
25666 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
25667 || XINT (symbol, 1) == UNSPEC_PCREL
25668 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
25669 return true;
25672 return false;
25675 /* Calculate the length of the memory address in the instruction encoding.
25676 Includes addr32 prefix, does not include the one-byte modrm, opcode,
25677 or other prefixes. We never generate addr32 prefix for LEA insn. */
25680 memory_address_length (rtx addr, bool lea)
25682 struct ix86_address parts;
25683 rtx base, index, disp;
25684 int len;
25685 int ok;
25687 if (GET_CODE (addr) == PRE_DEC
25688 || GET_CODE (addr) == POST_INC
25689 || GET_CODE (addr) == PRE_MODIFY
25690 || GET_CODE (addr) == POST_MODIFY)
25691 return 0;
25693 ok = ix86_decompose_address (addr, &parts);
25694 gcc_assert (ok);
25696 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
25698 /* If this is not LEA instruction, add the length of addr32 prefix. */
25699 if (TARGET_64BIT && !lea
25700 && (SImode_address_operand (addr, VOIDmode)
25701 || (parts.base && GET_MODE (parts.base) == SImode)
25702 || (parts.index && GET_MODE (parts.index) == SImode)))
25703 len++;
25705 base = parts.base;
25706 index = parts.index;
25707 disp = parts.disp;
25709 if (base && GET_CODE (base) == SUBREG)
25710 base = SUBREG_REG (base);
25711 if (index && GET_CODE (index) == SUBREG)
25712 index = SUBREG_REG (index);
25714 gcc_assert (base == NULL_RTX || REG_P (base));
25715 gcc_assert (index == NULL_RTX || REG_P (index));
25717 /* Rule of thumb:
25718 - esp as the base always wants an index,
25719 - ebp as the base always wants a displacement,
25720 - r12 as the base always wants an index,
25721 - r13 as the base always wants a displacement. */
25723 /* Register Indirect. */
25724 if (base && !index && !disp)
25726 /* esp (for its index) and ebp (for its displacement) need
25727 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
25728 code. */
25729 if (base == arg_pointer_rtx
25730 || base == frame_pointer_rtx
25731 || REGNO (base) == SP_REG
25732 || REGNO (base) == BP_REG
25733 || REGNO (base) == R12_REG
25734 || REGNO (base) == R13_REG)
25735 len++;
25738 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
25739 is not disp32, but disp32(%rip), so for disp32
25740 SIB byte is needed, unless print_operand_address
25741 optimizes it into disp32(%rip) or (%rip) is implied
25742 by UNSPEC. */
25743 else if (disp && !base && !index)
25745 len += 4;
25746 if (rip_relative_addr_p (&parts))
25747 len++;
25749 else
25751 /* Find the length of the displacement constant. */
25752 if (disp)
25754 if (base && satisfies_constraint_K (disp))
25755 len += 1;
25756 else
25757 len += 4;
25759 /* ebp always wants a displacement. Similarly r13. */
25760 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
25761 len++;
25763 /* An index requires the two-byte modrm form.... */
25764 if (index
25765 /* ...like esp (or r12), which always wants an index. */
25766 || base == arg_pointer_rtx
25767 || base == frame_pointer_rtx
25768 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
25769 len++;
25772 return len;
25775 /* Compute default value for "length_immediate" attribute. When SHORTFORM
25776 is set, expect that insn have 8bit immediate alternative. */
25778 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
25780 int len = 0;
25781 int i;
25782 extract_insn_cached (insn);
25783 for (i = recog_data.n_operands - 1; i >= 0; --i)
25784 if (CONSTANT_P (recog_data.operand[i]))
25786 enum attr_mode mode = get_attr_mode (insn);
25788 gcc_assert (!len);
25789 if (shortform && CONST_INT_P (recog_data.operand[i]))
25791 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
25792 switch (mode)
25794 case MODE_QI:
25795 len = 1;
25796 continue;
25797 case MODE_HI:
25798 ival = trunc_int_for_mode (ival, HImode);
25799 break;
25800 case MODE_SI:
25801 ival = trunc_int_for_mode (ival, SImode);
25802 break;
25803 default:
25804 break;
25806 if (IN_RANGE (ival, -128, 127))
25808 len = 1;
25809 continue;
25812 switch (mode)
25814 case MODE_QI:
25815 len = 1;
25816 break;
25817 case MODE_HI:
25818 len = 2;
25819 break;
25820 case MODE_SI:
25821 len = 4;
25822 break;
25823 /* Immediates for DImode instructions are encoded
25824 as 32bit sign extended values. */
25825 case MODE_DI:
25826 len = 4;
25827 break;
25828 default:
25829 fatal_insn ("unknown insn mode", insn);
25832 return len;
25835 /* Compute default value for "length_address" attribute. */
25837 ix86_attr_length_address_default (rtx_insn *insn)
25839 int i;
25841 if (get_attr_type (insn) == TYPE_LEA)
25843 rtx set = PATTERN (insn), addr;
25845 if (GET_CODE (set) == PARALLEL)
25846 set = XVECEXP (set, 0, 0);
25848 gcc_assert (GET_CODE (set) == SET);
25850 addr = SET_SRC (set);
25852 return memory_address_length (addr, true);
25855 extract_insn_cached (insn);
25856 for (i = recog_data.n_operands - 1; i >= 0; --i)
25857 if (MEM_P (recog_data.operand[i]))
25859 constrain_operands_cached (insn, reload_completed);
25860 if (which_alternative != -1)
25862 const char *constraints = recog_data.constraints[i];
25863 int alt = which_alternative;
25865 while (*constraints == '=' || *constraints == '+')
25866 constraints++;
25867 while (alt-- > 0)
25868 while (*constraints++ != ',')
25870 /* Skip ignored operands. */
25871 if (*constraints == 'X')
25872 continue;
25874 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
25876 return 0;
25879 /* Compute default value for "length_vex" attribute. It includes
25880 2 or 3 byte VEX prefix and 1 opcode byte. */
25883 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
25884 bool has_vex_w)
25886 int i;
25888 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
25889 byte VEX prefix. */
25890 if (!has_0f_opcode || has_vex_w)
25891 return 3 + 1;
25893 /* We can always use 2 byte VEX prefix in 32bit. */
25894 if (!TARGET_64BIT)
25895 return 2 + 1;
25897 extract_insn_cached (insn);
25899 for (i = recog_data.n_operands - 1; i >= 0; --i)
25900 if (REG_P (recog_data.operand[i]))
25902 /* REX.W bit uses 3 byte VEX prefix. */
25903 if (GET_MODE (recog_data.operand[i]) == DImode
25904 && GENERAL_REG_P (recog_data.operand[i]))
25905 return 3 + 1;
25907 else
25909 /* REX.X or REX.B bits use 3 byte VEX prefix. */
25910 if (MEM_P (recog_data.operand[i])
25911 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
25912 return 3 + 1;
25915 return 2 + 1;
25918 /* Return the maximum number of instructions a cpu can issue. */
25920 static int
25921 ix86_issue_rate (void)
25923 switch (ix86_tune)
25925 case PROCESSOR_PENTIUM:
25926 case PROCESSOR_BONNELL:
25927 case PROCESSOR_SILVERMONT:
25928 case PROCESSOR_INTEL:
25929 case PROCESSOR_K6:
25930 case PROCESSOR_BTVER2:
25931 case PROCESSOR_PENTIUM4:
25932 case PROCESSOR_NOCONA:
25933 return 2;
25935 case PROCESSOR_PENTIUMPRO:
25936 case PROCESSOR_ATHLON:
25937 case PROCESSOR_K8:
25938 case PROCESSOR_AMDFAM10:
25939 case PROCESSOR_GENERIC:
25940 case PROCESSOR_BTVER1:
25941 return 3;
25943 case PROCESSOR_BDVER1:
25944 case PROCESSOR_BDVER2:
25945 case PROCESSOR_BDVER3:
25946 case PROCESSOR_BDVER4:
25947 case PROCESSOR_CORE2:
25948 case PROCESSOR_NEHALEM:
25949 case PROCESSOR_SANDYBRIDGE:
25950 case PROCESSOR_HASWELL:
25951 return 4;
25953 default:
25954 return 1;
25958 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
25959 by DEP_INSN and nothing set by DEP_INSN. */
25961 static bool
25962 ix86_flags_dependent (rtx_insn *insn, rtx_insn *dep_insn, enum attr_type insn_type)
25964 rtx set, set2;
25966 /* Simplify the test for uninteresting insns. */
25967 if (insn_type != TYPE_SETCC
25968 && insn_type != TYPE_ICMOV
25969 && insn_type != TYPE_FCMOV
25970 && insn_type != TYPE_IBR)
25971 return false;
25973 if ((set = single_set (dep_insn)) != 0)
25975 set = SET_DEST (set);
25976 set2 = NULL_RTX;
25978 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
25979 && XVECLEN (PATTERN (dep_insn), 0) == 2
25980 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
25981 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
25983 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25984 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25986 else
25987 return false;
25989 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
25990 return false;
25992 /* This test is true if the dependent insn reads the flags but
25993 not any other potentially set register. */
25994 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
25995 return false;
25997 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
25998 return false;
26000 return true;
26003 /* Return true iff USE_INSN has a memory address with operands set by
26004 SET_INSN. */
26006 bool
26007 ix86_agi_dependent (rtx_insn *set_insn, rtx_insn *use_insn)
26009 int i;
26010 extract_insn_cached (use_insn);
26011 for (i = recog_data.n_operands - 1; i >= 0; --i)
26012 if (MEM_P (recog_data.operand[i]))
26014 rtx addr = XEXP (recog_data.operand[i], 0);
26015 return modified_in_p (addr, set_insn) != 0;
26017 return false;
26020 /* Helper function for exact_store_load_dependency.
26021 Return true if addr is found in insn. */
26022 static bool
26023 exact_dependency_1 (rtx addr, rtx insn)
26025 enum rtx_code code;
26026 const char *format_ptr;
26027 int i, j;
26029 code = GET_CODE (insn);
26030 switch (code)
26032 case MEM:
26033 if (rtx_equal_p (addr, insn))
26034 return true;
26035 break;
26036 case REG:
26037 CASE_CONST_ANY:
26038 case SYMBOL_REF:
26039 case CODE_LABEL:
26040 case PC:
26041 case CC0:
26042 case EXPR_LIST:
26043 return false;
26044 default:
26045 break;
26048 format_ptr = GET_RTX_FORMAT (code);
26049 for (i = 0; i < GET_RTX_LENGTH (code); i++)
26051 switch (*format_ptr++)
26053 case 'e':
26054 if (exact_dependency_1 (addr, XEXP (insn, i)))
26055 return true;
26056 break;
26057 case 'E':
26058 for (j = 0; j < XVECLEN (insn, i); j++)
26059 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
26060 return true;
26061 break;
26064 return false;
26067 /* Return true if there exists exact dependency for store & load, i.e.
26068 the same memory address is used in them. */
26069 static bool
26070 exact_store_load_dependency (rtx_insn *store, rtx_insn *load)
26072 rtx set1, set2;
26074 set1 = single_set (store);
26075 if (!set1)
26076 return false;
26077 if (!MEM_P (SET_DEST (set1)))
26078 return false;
26079 set2 = single_set (load);
26080 if (!set2)
26081 return false;
26082 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
26083 return true;
26084 return false;
26087 static int
26088 ix86_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
26090 enum attr_type insn_type, dep_insn_type;
26091 enum attr_memory memory;
26092 rtx set, set2;
26093 int dep_insn_code_number;
26095 /* Anti and output dependencies have zero cost on all CPUs. */
26096 if (REG_NOTE_KIND (link) != 0)
26097 return 0;
26099 dep_insn_code_number = recog_memoized (dep_insn);
26101 /* If we can't recognize the insns, we can't really do anything. */
26102 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
26103 return cost;
26105 insn_type = get_attr_type (insn);
26106 dep_insn_type = get_attr_type (dep_insn);
26108 switch (ix86_tune)
26110 case PROCESSOR_PENTIUM:
26111 /* Address Generation Interlock adds a cycle of latency. */
26112 if (insn_type == TYPE_LEA)
26114 rtx addr = PATTERN (insn);
26116 if (GET_CODE (addr) == PARALLEL)
26117 addr = XVECEXP (addr, 0, 0);
26119 gcc_assert (GET_CODE (addr) == SET);
26121 addr = SET_SRC (addr);
26122 if (modified_in_p (addr, dep_insn))
26123 cost += 1;
26125 else if (ix86_agi_dependent (dep_insn, insn))
26126 cost += 1;
26128 /* ??? Compares pair with jump/setcc. */
26129 if (ix86_flags_dependent (insn, dep_insn, insn_type))
26130 cost = 0;
26132 /* Floating point stores require value to be ready one cycle earlier. */
26133 if (insn_type == TYPE_FMOV
26134 && get_attr_memory (insn) == MEMORY_STORE
26135 && !ix86_agi_dependent (dep_insn, insn))
26136 cost += 1;
26137 break;
26139 case PROCESSOR_PENTIUMPRO:
26140 /* INT->FP conversion is expensive. */
26141 if (get_attr_fp_int_src (dep_insn))
26142 cost += 5;
26144 /* There is one cycle extra latency between an FP op and a store. */
26145 if (insn_type == TYPE_FMOV
26146 && (set = single_set (dep_insn)) != NULL_RTX
26147 && (set2 = single_set (insn)) != NULL_RTX
26148 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
26149 && MEM_P (SET_DEST (set2)))
26150 cost += 1;
26152 memory = get_attr_memory (insn);
26154 /* Show ability of reorder buffer to hide latency of load by executing
26155 in parallel with previous instruction in case
26156 previous instruction is not needed to compute the address. */
26157 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26158 && !ix86_agi_dependent (dep_insn, insn))
26160 /* Claim moves to take one cycle, as core can issue one load
26161 at time and the next load can start cycle later. */
26162 if (dep_insn_type == TYPE_IMOV
26163 || dep_insn_type == TYPE_FMOV)
26164 cost = 1;
26165 else if (cost > 1)
26166 cost--;
26168 break;
26170 case PROCESSOR_K6:
26171 /* The esp dependency is resolved before
26172 the instruction is really finished. */
26173 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26174 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26175 return 1;
26177 /* INT->FP conversion is expensive. */
26178 if (get_attr_fp_int_src (dep_insn))
26179 cost += 5;
26181 memory = get_attr_memory (insn);
26183 /* Show ability of reorder buffer to hide latency of load by executing
26184 in parallel with previous instruction in case
26185 previous instruction is not needed to compute the address. */
26186 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26187 && !ix86_agi_dependent (dep_insn, insn))
26189 /* Claim moves to take one cycle, as core can issue one load
26190 at time and the next load can start cycle later. */
26191 if (dep_insn_type == TYPE_IMOV
26192 || dep_insn_type == TYPE_FMOV)
26193 cost = 1;
26194 else if (cost > 2)
26195 cost -= 2;
26196 else
26197 cost = 1;
26199 break;
26201 case PROCESSOR_AMDFAM10:
26202 case PROCESSOR_BDVER1:
26203 case PROCESSOR_BDVER2:
26204 case PROCESSOR_BDVER3:
26205 case PROCESSOR_BDVER4:
26206 case PROCESSOR_BTVER1:
26207 case PROCESSOR_BTVER2:
26208 case PROCESSOR_GENERIC:
26209 /* Stack engine allows to execute push&pop instructions in parall. */
26210 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26211 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26212 return 0;
26213 /* FALLTHRU */
26215 case PROCESSOR_ATHLON:
26216 case PROCESSOR_K8:
26217 memory = get_attr_memory (insn);
26219 /* Show ability of reorder buffer to hide latency of load by executing
26220 in parallel with previous instruction in case
26221 previous instruction is not needed to compute the address. */
26222 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26223 && !ix86_agi_dependent (dep_insn, insn))
26225 enum attr_unit unit = get_attr_unit (insn);
26226 int loadcost = 3;
26228 /* Because of the difference between the length of integer and
26229 floating unit pipeline preparation stages, the memory operands
26230 for floating point are cheaper.
26232 ??? For Athlon it the difference is most probably 2. */
26233 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
26234 loadcost = 3;
26235 else
26236 loadcost = TARGET_ATHLON ? 2 : 0;
26238 if (cost >= loadcost)
26239 cost -= loadcost;
26240 else
26241 cost = 0;
26243 break;
26245 case PROCESSOR_CORE2:
26246 case PROCESSOR_NEHALEM:
26247 case PROCESSOR_SANDYBRIDGE:
26248 case PROCESSOR_HASWELL:
26249 /* Stack engine allows to execute push&pop instructions in parall. */
26250 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26251 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26252 return 0;
26254 memory = get_attr_memory (insn);
26256 /* Show ability of reorder buffer to hide latency of load by executing
26257 in parallel with previous instruction in case
26258 previous instruction is not needed to compute the address. */
26259 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26260 && !ix86_agi_dependent (dep_insn, insn))
26262 if (cost >= 4)
26263 cost -= 4;
26264 else
26265 cost = 0;
26267 break;
26269 case PROCESSOR_SILVERMONT:
26270 case PROCESSOR_INTEL:
26271 if (!reload_completed)
26272 return cost;
26274 /* Increase cost of integer loads. */
26275 memory = get_attr_memory (dep_insn);
26276 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26278 enum attr_unit unit = get_attr_unit (dep_insn);
26279 if (unit == UNIT_INTEGER && cost == 1)
26281 if (memory == MEMORY_LOAD)
26282 cost = 3;
26283 else
26285 /* Increase cost of ld/st for short int types only
26286 because of store forwarding issue. */
26287 rtx set = single_set (dep_insn);
26288 if (set && (GET_MODE (SET_DEST (set)) == QImode
26289 || GET_MODE (SET_DEST (set)) == HImode))
26291 /* Increase cost of store/load insn if exact
26292 dependence exists and it is load insn. */
26293 enum attr_memory insn_memory = get_attr_memory (insn);
26294 if (insn_memory == MEMORY_LOAD
26295 && exact_store_load_dependency (dep_insn, insn))
26296 cost = 3;
26302 default:
26303 break;
26306 return cost;
26309 /* How many alternative schedules to try. This should be as wide as the
26310 scheduling freedom in the DFA, but no wider. Making this value too
26311 large results extra work for the scheduler. */
26313 static int
26314 ia32_multipass_dfa_lookahead (void)
26316 switch (ix86_tune)
26318 case PROCESSOR_PENTIUM:
26319 return 2;
26321 case PROCESSOR_PENTIUMPRO:
26322 case PROCESSOR_K6:
26323 return 1;
26325 case PROCESSOR_BDVER1:
26326 case PROCESSOR_BDVER2:
26327 case PROCESSOR_BDVER3:
26328 case PROCESSOR_BDVER4:
26329 /* We use lookahead value 4 for BD both before and after reload
26330 schedules. Plan is to have value 8 included for O3. */
26331 return 4;
26333 case PROCESSOR_CORE2:
26334 case PROCESSOR_NEHALEM:
26335 case PROCESSOR_SANDYBRIDGE:
26336 case PROCESSOR_HASWELL:
26337 case PROCESSOR_BONNELL:
26338 case PROCESSOR_SILVERMONT:
26339 case PROCESSOR_INTEL:
26340 /* Generally, we want haifa-sched:max_issue() to look ahead as far
26341 as many instructions can be executed on a cycle, i.e.,
26342 issue_rate. I wonder why tuning for many CPUs does not do this. */
26343 if (reload_completed)
26344 return ix86_issue_rate ();
26345 /* Don't use lookahead for pre-reload schedule to save compile time. */
26346 return 0;
26348 default:
26349 return 0;
26353 /* Return true if target platform supports macro-fusion. */
26355 static bool
26356 ix86_macro_fusion_p ()
26358 return TARGET_FUSE_CMP_AND_BRANCH;
26361 /* Check whether current microarchitecture support macro fusion
26362 for insn pair "CONDGEN + CONDJMP". Refer to
26363 "Intel Architectures Optimization Reference Manual". */
26365 static bool
26366 ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
26368 rtx src, dest;
26369 enum rtx_code ccode;
26370 rtx compare_set = NULL_RTX, test_if, cond;
26371 rtx alu_set = NULL_RTX, addr = NULL_RTX;
26373 if (!any_condjump_p (condjmp))
26374 return false;
26376 if (get_attr_type (condgen) != TYPE_TEST
26377 && get_attr_type (condgen) != TYPE_ICMP
26378 && get_attr_type (condgen) != TYPE_INCDEC
26379 && get_attr_type (condgen) != TYPE_ALU)
26380 return false;
26382 compare_set = single_set (condgen);
26383 if (compare_set == NULL_RTX
26384 && !TARGET_FUSE_ALU_AND_BRANCH)
26385 return false;
26387 if (compare_set == NULL_RTX)
26389 int i;
26390 rtx pat = PATTERN (condgen);
26391 for (i = 0; i < XVECLEN (pat, 0); i++)
26392 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
26394 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
26395 if (GET_CODE (set_src) == COMPARE)
26396 compare_set = XVECEXP (pat, 0, i);
26397 else
26398 alu_set = XVECEXP (pat, 0, i);
26401 if (compare_set == NULL_RTX)
26402 return false;
26403 src = SET_SRC (compare_set);
26404 if (GET_CODE (src) != COMPARE)
26405 return false;
26407 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
26408 supported. */
26409 if ((MEM_P (XEXP (src, 0))
26410 && CONST_INT_P (XEXP (src, 1)))
26411 || (MEM_P (XEXP (src, 1))
26412 && CONST_INT_P (XEXP (src, 0))))
26413 return false;
26415 /* No fusion for RIP-relative address. */
26416 if (MEM_P (XEXP (src, 0)))
26417 addr = XEXP (XEXP (src, 0), 0);
26418 else if (MEM_P (XEXP (src, 1)))
26419 addr = XEXP (XEXP (src, 1), 0);
26421 if (addr) {
26422 ix86_address parts;
26423 int ok = ix86_decompose_address (addr, &parts);
26424 gcc_assert (ok);
26426 if (rip_relative_addr_p (&parts))
26427 return false;
26430 test_if = SET_SRC (pc_set (condjmp));
26431 cond = XEXP (test_if, 0);
26432 ccode = GET_CODE (cond);
26433 /* Check whether conditional jump use Sign or Overflow Flags. */
26434 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
26435 && (ccode == GE
26436 || ccode == GT
26437 || ccode == LE
26438 || ccode == LT))
26439 return false;
26441 /* Return true for TYPE_TEST and TYPE_ICMP. */
26442 if (get_attr_type (condgen) == TYPE_TEST
26443 || get_attr_type (condgen) == TYPE_ICMP)
26444 return true;
26446 /* The following is the case that macro-fusion for alu + jmp. */
26447 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
26448 return false;
26450 /* No fusion for alu op with memory destination operand. */
26451 dest = SET_DEST (alu_set);
26452 if (MEM_P (dest))
26453 return false;
26455 /* Macro-fusion for inc/dec + unsigned conditional jump is not
26456 supported. */
26457 if (get_attr_type (condgen) == TYPE_INCDEC
26458 && (ccode == GEU
26459 || ccode == GTU
26460 || ccode == LEU
26461 || ccode == LTU))
26462 return false;
26464 return true;
26467 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
26468 execution. It is applied if
26469 (1) IMUL instruction is on the top of list;
26470 (2) There exists the only producer of independent IMUL instruction in
26471 ready list.
26472 Return index of IMUL producer if it was found and -1 otherwise. */
26473 static int
26474 do_reorder_for_imul (rtx_insn **ready, int n_ready)
26476 rtx_insn *insn;
26477 rtx set, insn1, insn2;
26478 sd_iterator_def sd_it;
26479 dep_t dep;
26480 int index = -1;
26481 int i;
26483 if (!TARGET_BONNELL)
26484 return index;
26486 /* Check that IMUL instruction is on the top of ready list. */
26487 insn = ready[n_ready - 1];
26488 set = single_set (insn);
26489 if (!set)
26490 return index;
26491 if (!(GET_CODE (SET_SRC (set)) == MULT
26492 && GET_MODE (SET_SRC (set)) == SImode))
26493 return index;
26495 /* Search for producer of independent IMUL instruction. */
26496 for (i = n_ready - 2; i >= 0; i--)
26498 insn = ready[i];
26499 if (!NONDEBUG_INSN_P (insn))
26500 continue;
26501 /* Skip IMUL instruction. */
26502 insn2 = PATTERN (insn);
26503 if (GET_CODE (insn2) == PARALLEL)
26504 insn2 = XVECEXP (insn2, 0, 0);
26505 if (GET_CODE (insn2) == SET
26506 && GET_CODE (SET_SRC (insn2)) == MULT
26507 && GET_MODE (SET_SRC (insn2)) == SImode)
26508 continue;
26510 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
26512 rtx con;
26513 con = DEP_CON (dep);
26514 if (!NONDEBUG_INSN_P (con))
26515 continue;
26516 insn1 = PATTERN (con);
26517 if (GET_CODE (insn1) == PARALLEL)
26518 insn1 = XVECEXP (insn1, 0, 0);
26520 if (GET_CODE (insn1) == SET
26521 && GET_CODE (SET_SRC (insn1)) == MULT
26522 && GET_MODE (SET_SRC (insn1)) == SImode)
26524 sd_iterator_def sd_it1;
26525 dep_t dep1;
26526 /* Check if there is no other dependee for IMUL. */
26527 index = i;
26528 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
26530 rtx pro;
26531 pro = DEP_PRO (dep1);
26532 if (!NONDEBUG_INSN_P (pro))
26533 continue;
26534 if (pro != insn)
26535 index = -1;
26537 if (index >= 0)
26538 break;
26541 if (index >= 0)
26542 break;
26544 return index;
26547 /* Try to find the best candidate on the top of ready list if two insns
26548 have the same priority - candidate is best if its dependees were
26549 scheduled earlier. Applied for Silvermont only.
26550 Return true if top 2 insns must be interchanged. */
26551 static bool
26552 swap_top_of_ready_list (rtx_insn **ready, int n_ready)
26554 rtx_insn *top = ready[n_ready - 1];
26555 rtx_insn *next = ready[n_ready - 2];
26556 rtx set;
26557 sd_iterator_def sd_it;
26558 dep_t dep;
26559 int clock1 = -1;
26560 int clock2 = -1;
26561 #define INSN_TICK(INSN) (HID (INSN)->tick)
26563 if (!TARGET_SILVERMONT && !TARGET_INTEL)
26564 return false;
26566 if (!NONDEBUG_INSN_P (top))
26567 return false;
26568 if (!NONJUMP_INSN_P (top))
26569 return false;
26570 if (!NONDEBUG_INSN_P (next))
26571 return false;
26572 if (!NONJUMP_INSN_P (next))
26573 return false;
26574 set = single_set (top);
26575 if (!set)
26576 return false;
26577 set = single_set (next);
26578 if (!set)
26579 return false;
26581 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
26583 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
26584 return false;
26585 /* Determine winner more precise. */
26586 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
26588 rtx pro;
26589 pro = DEP_PRO (dep);
26590 if (!NONDEBUG_INSN_P (pro))
26591 continue;
26592 if (INSN_TICK (pro) > clock1)
26593 clock1 = INSN_TICK (pro);
26595 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
26597 rtx pro;
26598 pro = DEP_PRO (dep);
26599 if (!NONDEBUG_INSN_P (pro))
26600 continue;
26601 if (INSN_TICK (pro) > clock2)
26602 clock2 = INSN_TICK (pro);
26605 if (clock1 == clock2)
26607 /* Determine winner - load must win. */
26608 enum attr_memory memory1, memory2;
26609 memory1 = get_attr_memory (top);
26610 memory2 = get_attr_memory (next);
26611 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
26612 return true;
26614 return (bool) (clock2 < clock1);
26616 return false;
26617 #undef INSN_TICK
26620 /* Perform possible reodering of ready list for Atom/Silvermont only.
26621 Return issue rate. */
26622 static int
26623 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
26624 int *pn_ready, int clock_var)
26626 int issue_rate = -1;
26627 int n_ready = *pn_ready;
26628 int i;
26629 rtx_insn *insn;
26630 int index = -1;
26632 /* Set up issue rate. */
26633 issue_rate = ix86_issue_rate ();
26635 /* Do reodering for BONNELL/SILVERMONT only. */
26636 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
26637 return issue_rate;
26639 /* Nothing to do if ready list contains only 1 instruction. */
26640 if (n_ready <= 1)
26641 return issue_rate;
26643 /* Do reodering for post-reload scheduler only. */
26644 if (!reload_completed)
26645 return issue_rate;
26647 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
26649 if (sched_verbose > 1)
26650 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
26651 INSN_UID (ready[index]));
26653 /* Put IMUL producer (ready[index]) at the top of ready list. */
26654 insn = ready[index];
26655 for (i = index; i < n_ready - 1; i++)
26656 ready[i] = ready[i + 1];
26657 ready[n_ready - 1] = insn;
26658 return issue_rate;
26660 if (clock_var != 0 && swap_top_of_ready_list (ready, n_ready))
26662 if (sched_verbose > 1)
26663 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
26664 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
26665 /* Swap 2 top elements of ready list. */
26666 insn = ready[n_ready - 1];
26667 ready[n_ready - 1] = ready[n_ready - 2];
26668 ready[n_ready - 2] = insn;
26670 return issue_rate;
26673 static bool
26674 ix86_class_likely_spilled_p (reg_class_t);
26676 /* Returns true if lhs of insn is HW function argument register and set up
26677 is_spilled to true if it is likely spilled HW register. */
26678 static bool
26679 insn_is_function_arg (rtx insn, bool* is_spilled)
26681 rtx dst;
26683 if (!NONDEBUG_INSN_P (insn))
26684 return false;
26685 /* Call instructions are not movable, ignore it. */
26686 if (CALL_P (insn))
26687 return false;
26688 insn = PATTERN (insn);
26689 if (GET_CODE (insn) == PARALLEL)
26690 insn = XVECEXP (insn, 0, 0);
26691 if (GET_CODE (insn) != SET)
26692 return false;
26693 dst = SET_DEST (insn);
26694 if (REG_P (dst) && HARD_REGISTER_P (dst)
26695 && ix86_function_arg_regno_p (REGNO (dst)))
26697 /* Is it likely spilled HW register? */
26698 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
26699 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
26700 *is_spilled = true;
26701 return true;
26703 return false;
26706 /* Add output dependencies for chain of function adjacent arguments if only
26707 there is a move to likely spilled HW register. Return first argument
26708 if at least one dependence was added or NULL otherwise. */
26709 static rtx_insn *
26710 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
26712 rtx_insn *insn;
26713 rtx_insn *last = call;
26714 rtx_insn *first_arg = NULL;
26715 bool is_spilled = false;
26717 head = PREV_INSN (head);
26719 /* Find nearest to call argument passing instruction. */
26720 while (true)
26722 last = PREV_INSN (last);
26723 if (last == head)
26724 return NULL;
26725 if (!NONDEBUG_INSN_P (last))
26726 continue;
26727 if (insn_is_function_arg (last, &is_spilled))
26728 break;
26729 return NULL;
26732 first_arg = last;
26733 while (true)
26735 insn = PREV_INSN (last);
26736 if (!INSN_P (insn))
26737 break;
26738 if (insn == head)
26739 break;
26740 if (!NONDEBUG_INSN_P (insn))
26742 last = insn;
26743 continue;
26745 if (insn_is_function_arg (insn, &is_spilled))
26747 /* Add output depdendence between two function arguments if chain
26748 of output arguments contains likely spilled HW registers. */
26749 if (is_spilled)
26750 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26751 first_arg = last = insn;
26753 else
26754 break;
26756 if (!is_spilled)
26757 return NULL;
26758 return first_arg;
26761 /* Add output or anti dependency from insn to first_arg to restrict its code
26762 motion. */
26763 static void
26764 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
26766 rtx set;
26767 rtx tmp;
26769 set = single_set (insn);
26770 if (!set)
26771 return;
26772 tmp = SET_DEST (set);
26773 if (REG_P (tmp))
26775 /* Add output dependency to the first function argument. */
26776 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26777 return;
26779 /* Add anti dependency. */
26780 add_dependence (first_arg, insn, REG_DEP_ANTI);
26783 /* Avoid cross block motion of function argument through adding dependency
26784 from the first non-jump instruction in bb. */
26785 static void
26786 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
26788 rtx_insn *insn = BB_END (bb);
26790 while (insn)
26792 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
26794 rtx set = single_set (insn);
26795 if (set)
26797 avoid_func_arg_motion (arg, insn);
26798 return;
26801 if (insn == BB_HEAD (bb))
26802 return;
26803 insn = PREV_INSN (insn);
26807 /* Hook for pre-reload schedule - avoid motion of function arguments
26808 passed in likely spilled HW registers. */
26809 static void
26810 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
26812 rtx_insn *insn;
26813 rtx_insn *first_arg = NULL;
26814 if (reload_completed)
26815 return;
26816 while (head != tail && DEBUG_INSN_P (head))
26817 head = NEXT_INSN (head);
26818 for (insn = tail; insn != head; insn = PREV_INSN (insn))
26819 if (INSN_P (insn) && CALL_P (insn))
26821 first_arg = add_parameter_dependencies (insn, head);
26822 if (first_arg)
26824 /* Add dependee for first argument to predecessors if only
26825 region contains more than one block. */
26826 basic_block bb = BLOCK_FOR_INSN (insn);
26827 int rgn = CONTAINING_RGN (bb->index);
26828 int nr_blks = RGN_NR_BLOCKS (rgn);
26829 /* Skip trivial regions and region head blocks that can have
26830 predecessors outside of region. */
26831 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
26833 edge e;
26834 edge_iterator ei;
26836 /* Regions are SCCs with the exception of selective
26837 scheduling with pipelining of outer blocks enabled.
26838 So also check that immediate predecessors of a non-head
26839 block are in the same region. */
26840 FOR_EACH_EDGE (e, ei, bb->preds)
26842 /* Avoid creating of loop-carried dependencies through
26843 using topological ordering in the region. */
26844 if (rgn == CONTAINING_RGN (e->src->index)
26845 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
26846 add_dependee_for_func_arg (first_arg, e->src);
26849 insn = first_arg;
26850 if (insn == head)
26851 break;
26854 else if (first_arg)
26855 avoid_func_arg_motion (first_arg, insn);
26858 /* Hook for pre-reload schedule - set priority of moves from likely spilled
26859 HW registers to maximum, to schedule them at soon as possible. These are
26860 moves from function argument registers at the top of the function entry
26861 and moves from function return value registers after call. */
26862 static int
26863 ix86_adjust_priority (rtx_insn *insn, int priority)
26865 rtx set;
26867 if (reload_completed)
26868 return priority;
26870 if (!NONDEBUG_INSN_P (insn))
26871 return priority;
26873 set = single_set (insn);
26874 if (set)
26876 rtx tmp = SET_SRC (set);
26877 if (REG_P (tmp)
26878 && HARD_REGISTER_P (tmp)
26879 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
26880 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
26881 return current_sched_info->sched_max_insns_priority;
26884 return priority;
26887 /* Model decoder of Core 2/i7.
26888 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
26889 track the instruction fetch block boundaries and make sure that long
26890 (9+ bytes) instructions are assigned to D0. */
26892 /* Maximum length of an insn that can be handled by
26893 a secondary decoder unit. '8' for Core 2/i7. */
26894 static int core2i7_secondary_decoder_max_insn_size;
26896 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
26897 '16' for Core 2/i7. */
26898 static int core2i7_ifetch_block_size;
26900 /* Maximum number of instructions decoder can handle per cycle.
26901 '6' for Core 2/i7. */
26902 static int core2i7_ifetch_block_max_insns;
26904 typedef struct ix86_first_cycle_multipass_data_ *
26905 ix86_first_cycle_multipass_data_t;
26906 typedef const struct ix86_first_cycle_multipass_data_ *
26907 const_ix86_first_cycle_multipass_data_t;
26909 /* A variable to store target state across calls to max_issue within
26910 one cycle. */
26911 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
26912 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
26914 /* Initialize DATA. */
26915 static void
26916 core2i7_first_cycle_multipass_init (void *_data)
26918 ix86_first_cycle_multipass_data_t data
26919 = (ix86_first_cycle_multipass_data_t) _data;
26921 data->ifetch_block_len = 0;
26922 data->ifetch_block_n_insns = 0;
26923 data->ready_try_change = NULL;
26924 data->ready_try_change_size = 0;
26927 /* Advancing the cycle; reset ifetch block counts. */
26928 static void
26929 core2i7_dfa_post_advance_cycle (void)
26931 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
26933 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26935 data->ifetch_block_len = 0;
26936 data->ifetch_block_n_insns = 0;
26939 static int min_insn_size (rtx_insn *);
26941 /* Filter out insns from ready_try that the core will not be able to issue
26942 on current cycle due to decoder. */
26943 static void
26944 core2i7_first_cycle_multipass_filter_ready_try
26945 (const_ix86_first_cycle_multipass_data_t data,
26946 signed char *ready_try, int n_ready, bool first_cycle_insn_p)
26948 while (n_ready--)
26950 rtx_insn *insn;
26951 int insn_size;
26953 if (ready_try[n_ready])
26954 continue;
26956 insn = get_ready_element (n_ready);
26957 insn_size = min_insn_size (insn);
26959 if (/* If this is a too long an insn for a secondary decoder ... */
26960 (!first_cycle_insn_p
26961 && insn_size > core2i7_secondary_decoder_max_insn_size)
26962 /* ... or it would not fit into the ifetch block ... */
26963 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
26964 /* ... or the decoder is full already ... */
26965 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
26966 /* ... mask the insn out. */
26968 ready_try[n_ready] = 1;
26970 if (data->ready_try_change)
26971 bitmap_set_bit (data->ready_try_change, n_ready);
26976 /* Prepare for a new round of multipass lookahead scheduling. */
26977 static void
26978 core2i7_first_cycle_multipass_begin (void *_data,
26979 signed char *ready_try, int n_ready,
26980 bool first_cycle_insn_p)
26982 ix86_first_cycle_multipass_data_t data
26983 = (ix86_first_cycle_multipass_data_t) _data;
26984 const_ix86_first_cycle_multipass_data_t prev_data
26985 = ix86_first_cycle_multipass_data;
26987 /* Restore the state from the end of the previous round. */
26988 data->ifetch_block_len = prev_data->ifetch_block_len;
26989 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
26991 /* Filter instructions that cannot be issued on current cycle due to
26992 decoder restrictions. */
26993 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26994 first_cycle_insn_p);
26997 /* INSN is being issued in current solution. Account for its impact on
26998 the decoder model. */
26999 static void
27000 core2i7_first_cycle_multipass_issue (void *_data,
27001 signed char *ready_try, int n_ready,
27002 rtx_insn *insn, const void *_prev_data)
27004 ix86_first_cycle_multipass_data_t data
27005 = (ix86_first_cycle_multipass_data_t) _data;
27006 const_ix86_first_cycle_multipass_data_t prev_data
27007 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
27009 int insn_size = min_insn_size (insn);
27011 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
27012 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
27013 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
27014 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
27016 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
27017 if (!data->ready_try_change)
27019 data->ready_try_change = sbitmap_alloc (n_ready);
27020 data->ready_try_change_size = n_ready;
27022 else if (data->ready_try_change_size < n_ready)
27024 data->ready_try_change = sbitmap_resize (data->ready_try_change,
27025 n_ready, 0);
27026 data->ready_try_change_size = n_ready;
27028 bitmap_clear (data->ready_try_change);
27030 /* Filter out insns from ready_try that the core will not be able to issue
27031 on current cycle due to decoder. */
27032 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
27033 false);
27036 /* Revert the effect on ready_try. */
27037 static void
27038 core2i7_first_cycle_multipass_backtrack (const void *_data,
27039 signed char *ready_try,
27040 int n_ready ATTRIBUTE_UNUSED)
27042 const_ix86_first_cycle_multipass_data_t data
27043 = (const_ix86_first_cycle_multipass_data_t) _data;
27044 unsigned int i = 0;
27045 sbitmap_iterator sbi;
27047 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
27048 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
27050 ready_try[i] = 0;
27054 /* Save the result of multipass lookahead scheduling for the next round. */
27055 static void
27056 core2i7_first_cycle_multipass_end (const void *_data)
27058 const_ix86_first_cycle_multipass_data_t data
27059 = (const_ix86_first_cycle_multipass_data_t) _data;
27060 ix86_first_cycle_multipass_data_t next_data
27061 = ix86_first_cycle_multipass_data;
27063 if (data != NULL)
27065 next_data->ifetch_block_len = data->ifetch_block_len;
27066 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
27070 /* Deallocate target data. */
27071 static void
27072 core2i7_first_cycle_multipass_fini (void *_data)
27074 ix86_first_cycle_multipass_data_t data
27075 = (ix86_first_cycle_multipass_data_t) _data;
27077 if (data->ready_try_change)
27079 sbitmap_free (data->ready_try_change);
27080 data->ready_try_change = NULL;
27081 data->ready_try_change_size = 0;
27085 /* Prepare for scheduling pass. */
27086 static void
27087 ix86_sched_init_global (FILE *, int, int)
27089 /* Install scheduling hooks for current CPU. Some of these hooks are used
27090 in time-critical parts of the scheduler, so we only set them up when
27091 they are actually used. */
27092 switch (ix86_tune)
27094 case PROCESSOR_CORE2:
27095 case PROCESSOR_NEHALEM:
27096 case PROCESSOR_SANDYBRIDGE:
27097 case PROCESSOR_HASWELL:
27098 /* Do not perform multipass scheduling for pre-reload schedule
27099 to save compile time. */
27100 if (reload_completed)
27102 targetm.sched.dfa_post_advance_cycle
27103 = core2i7_dfa_post_advance_cycle;
27104 targetm.sched.first_cycle_multipass_init
27105 = core2i7_first_cycle_multipass_init;
27106 targetm.sched.first_cycle_multipass_begin
27107 = core2i7_first_cycle_multipass_begin;
27108 targetm.sched.first_cycle_multipass_issue
27109 = core2i7_first_cycle_multipass_issue;
27110 targetm.sched.first_cycle_multipass_backtrack
27111 = core2i7_first_cycle_multipass_backtrack;
27112 targetm.sched.first_cycle_multipass_end
27113 = core2i7_first_cycle_multipass_end;
27114 targetm.sched.first_cycle_multipass_fini
27115 = core2i7_first_cycle_multipass_fini;
27117 /* Set decoder parameters. */
27118 core2i7_secondary_decoder_max_insn_size = 8;
27119 core2i7_ifetch_block_size = 16;
27120 core2i7_ifetch_block_max_insns = 6;
27121 break;
27123 /* ... Fall through ... */
27124 default:
27125 targetm.sched.dfa_post_advance_cycle = NULL;
27126 targetm.sched.first_cycle_multipass_init = NULL;
27127 targetm.sched.first_cycle_multipass_begin = NULL;
27128 targetm.sched.first_cycle_multipass_issue = NULL;
27129 targetm.sched.first_cycle_multipass_backtrack = NULL;
27130 targetm.sched.first_cycle_multipass_end = NULL;
27131 targetm.sched.first_cycle_multipass_fini = NULL;
27132 break;
27137 /* Compute the alignment given to a constant that is being placed in memory.
27138 EXP is the constant and ALIGN is the alignment that the object would
27139 ordinarily have.
27140 The value of this function is used instead of that alignment to align
27141 the object. */
27144 ix86_constant_alignment (tree exp, int align)
27146 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
27147 || TREE_CODE (exp) == INTEGER_CST)
27149 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
27150 return 64;
27151 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
27152 return 128;
27154 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
27155 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
27156 return BITS_PER_WORD;
27158 return align;
27161 /* Compute the alignment for a static variable.
27162 TYPE is the data type, and ALIGN is the alignment that
27163 the object would ordinarily have. The value of this function is used
27164 instead of that alignment to align the object. */
27167 ix86_data_alignment (tree type, int align, bool opt)
27169 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
27170 for symbols from other compilation units or symbols that don't need
27171 to bind locally. In order to preserve some ABI compatibility with
27172 those compilers, ensure we don't decrease alignment from what we
27173 used to assume. */
27175 int max_align_compat
27176 = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
27178 /* A data structure, equal or greater than the size of a cache line
27179 (64 bytes in the Pentium 4 and other recent Intel processors, including
27180 processors based on Intel Core microarchitecture) should be aligned
27181 so that its base address is a multiple of a cache line size. */
27183 int max_align
27184 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
27186 if (max_align < BITS_PER_WORD)
27187 max_align = BITS_PER_WORD;
27189 if (opt
27190 && AGGREGATE_TYPE_P (type)
27191 && TYPE_SIZE (type)
27192 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
27194 if (wi::geu_p (TYPE_SIZE (type), max_align_compat)
27195 && align < max_align_compat)
27196 align = max_align_compat;
27197 if (wi::geu_p (TYPE_SIZE (type), max_align)
27198 && align < max_align)
27199 align = max_align;
27202 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
27203 to 16byte boundary. */
27204 if (TARGET_64BIT)
27206 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
27207 && TYPE_SIZE (type)
27208 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
27209 && wi::geu_p (TYPE_SIZE (type), 128)
27210 && align < 128)
27211 return 128;
27214 if (!opt)
27215 return align;
27217 if (TREE_CODE (type) == ARRAY_TYPE)
27219 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
27220 return 64;
27221 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
27222 return 128;
27224 else if (TREE_CODE (type) == COMPLEX_TYPE)
27227 if (TYPE_MODE (type) == DCmode && align < 64)
27228 return 64;
27229 if ((TYPE_MODE (type) == XCmode
27230 || TYPE_MODE (type) == TCmode) && align < 128)
27231 return 128;
27233 else if ((TREE_CODE (type) == RECORD_TYPE
27234 || TREE_CODE (type) == UNION_TYPE
27235 || TREE_CODE (type) == QUAL_UNION_TYPE)
27236 && TYPE_FIELDS (type))
27238 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
27239 return 64;
27240 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
27241 return 128;
27243 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
27244 || TREE_CODE (type) == INTEGER_TYPE)
27246 if (TYPE_MODE (type) == DFmode && align < 64)
27247 return 64;
27248 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
27249 return 128;
27252 return align;
27255 /* Compute the alignment for a local variable or a stack slot. EXP is
27256 the data type or decl itself, MODE is the widest mode available and
27257 ALIGN is the alignment that the object would ordinarily have. The
27258 value of this macro is used instead of that alignment to align the
27259 object. */
27261 unsigned int
27262 ix86_local_alignment (tree exp, machine_mode mode,
27263 unsigned int align)
27265 tree type, decl;
27267 if (exp && DECL_P (exp))
27269 type = TREE_TYPE (exp);
27270 decl = exp;
27272 else
27274 type = exp;
27275 decl = NULL;
27278 /* Don't do dynamic stack realignment for long long objects with
27279 -mpreferred-stack-boundary=2. */
27280 if (!TARGET_64BIT
27281 && align == 64
27282 && ix86_preferred_stack_boundary < 64
27283 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
27284 && (!type || !TYPE_USER_ALIGN (type))
27285 && (!decl || !DECL_USER_ALIGN (decl)))
27286 align = 32;
27288 /* If TYPE is NULL, we are allocating a stack slot for caller-save
27289 register in MODE. We will return the largest alignment of XF
27290 and DF. */
27291 if (!type)
27293 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
27294 align = GET_MODE_ALIGNMENT (DFmode);
27295 return align;
27298 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
27299 to 16byte boundary. Exact wording is:
27301 An array uses the same alignment as its elements, except that a local or
27302 global array variable of length at least 16 bytes or
27303 a C99 variable-length array variable always has alignment of at least 16 bytes.
27305 This was added to allow use of aligned SSE instructions at arrays. This
27306 rule is meant for static storage (where compiler can not do the analysis
27307 by itself). We follow it for automatic variables only when convenient.
27308 We fully control everything in the function compiled and functions from
27309 other unit can not rely on the alignment.
27311 Exclude va_list type. It is the common case of local array where
27312 we can not benefit from the alignment.
27314 TODO: Probably one should optimize for size only when var is not escaping. */
27315 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
27316 && TARGET_SSE)
27318 if (AGGREGATE_TYPE_P (type)
27319 && (va_list_type_node == NULL_TREE
27320 || (TYPE_MAIN_VARIANT (type)
27321 != TYPE_MAIN_VARIANT (va_list_type_node)))
27322 && TYPE_SIZE (type)
27323 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
27324 && wi::geu_p (TYPE_SIZE (type), 16)
27325 && align < 128)
27326 return 128;
27328 if (TREE_CODE (type) == ARRAY_TYPE)
27330 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
27331 return 64;
27332 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
27333 return 128;
27335 else if (TREE_CODE (type) == COMPLEX_TYPE)
27337 if (TYPE_MODE (type) == DCmode && align < 64)
27338 return 64;
27339 if ((TYPE_MODE (type) == XCmode
27340 || TYPE_MODE (type) == TCmode) && align < 128)
27341 return 128;
27343 else if ((TREE_CODE (type) == RECORD_TYPE
27344 || TREE_CODE (type) == UNION_TYPE
27345 || TREE_CODE (type) == QUAL_UNION_TYPE)
27346 && TYPE_FIELDS (type))
27348 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
27349 return 64;
27350 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
27351 return 128;
27353 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
27354 || TREE_CODE (type) == INTEGER_TYPE)
27357 if (TYPE_MODE (type) == DFmode && align < 64)
27358 return 64;
27359 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
27360 return 128;
27362 return align;
27365 /* Compute the minimum required alignment for dynamic stack realignment
27366 purposes for a local variable, parameter or a stack slot. EXP is
27367 the data type or decl itself, MODE is its mode and ALIGN is the
27368 alignment that the object would ordinarily have. */
27370 unsigned int
27371 ix86_minimum_alignment (tree exp, machine_mode mode,
27372 unsigned int align)
27374 tree type, decl;
27376 if (exp && DECL_P (exp))
27378 type = TREE_TYPE (exp);
27379 decl = exp;
27381 else
27383 type = exp;
27384 decl = NULL;
27387 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
27388 return align;
27390 /* Don't do dynamic stack realignment for long long objects with
27391 -mpreferred-stack-boundary=2. */
27392 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
27393 && (!type || !TYPE_USER_ALIGN (type))
27394 && (!decl || !DECL_USER_ALIGN (decl)))
27395 return 32;
27397 return align;
27400 /* Find a location for the static chain incoming to a nested function.
27401 This is a register, unless all free registers are used by arguments. */
27403 static rtx
27404 ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
27406 unsigned regno;
27408 /* While this function won't be called by the middle-end when a static
27409 chain isn't needed, it's also used throughout the backend so it's
27410 easiest to keep this check centralized. */
27411 if (DECL_P (fndecl_or_type) && !DECL_STATIC_CHAIN (fndecl_or_type))
27412 return NULL;
27414 if (TARGET_64BIT)
27416 /* We always use R10 in 64-bit mode. */
27417 regno = R10_REG;
27419 else
27421 const_tree fntype, fndecl;
27422 unsigned int ccvt;
27424 /* By default in 32-bit mode we use ECX to pass the static chain. */
27425 regno = CX_REG;
27427 if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
27429 fntype = TREE_TYPE (fndecl_or_type);
27430 fndecl = fndecl_or_type;
27432 else
27434 fntype = fndecl_or_type;
27435 fndecl = NULL;
27438 ccvt = ix86_get_callcvt (fntype);
27439 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
27441 /* Fastcall functions use ecx/edx for arguments, which leaves
27442 us with EAX for the static chain.
27443 Thiscall functions use ecx for arguments, which also
27444 leaves us with EAX for the static chain. */
27445 regno = AX_REG;
27447 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
27449 /* Thiscall functions use ecx for arguments, which leaves
27450 us with EAX and EDX for the static chain.
27451 We are using for abi-compatibility EAX. */
27452 regno = AX_REG;
27454 else if (ix86_function_regparm (fntype, fndecl) == 3)
27456 /* For regparm 3, we have no free call-clobbered registers in
27457 which to store the static chain. In order to implement this,
27458 we have the trampoline push the static chain to the stack.
27459 However, we can't push a value below the return address when
27460 we call the nested function directly, so we have to use an
27461 alternate entry point. For this we use ESI, and have the
27462 alternate entry point push ESI, so that things appear the
27463 same once we're executing the nested function. */
27464 if (incoming_p)
27466 if (fndecl == current_function_decl)
27467 ix86_static_chain_on_stack = true;
27468 return gen_frame_mem (SImode,
27469 plus_constant (Pmode,
27470 arg_pointer_rtx, -8));
27472 regno = SI_REG;
27476 return gen_rtx_REG (Pmode, regno);
27479 /* Emit RTL insns to initialize the variable parts of a trampoline.
27480 FNDECL is the decl of the target address; M_TRAMP is a MEM for
27481 the trampoline, and CHAIN_VALUE is an RTX for the static chain
27482 to be passed to the target function. */
27484 static void
27485 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
27487 rtx mem, fnaddr;
27488 int opcode;
27489 int offset = 0;
27491 fnaddr = XEXP (DECL_RTL (fndecl), 0);
27493 if (TARGET_64BIT)
27495 int size;
27497 /* Load the function address to r11. Try to load address using
27498 the shorter movl instead of movabs. We may want to support
27499 movq for kernel mode, but kernel does not use trampolines at
27500 the moment. FNADDR is a 32bit address and may not be in
27501 DImode when ptr_mode == SImode. Always use movl in this
27502 case. */
27503 if (ptr_mode == SImode
27504 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
27506 fnaddr = copy_addr_to_reg (fnaddr);
27508 mem = adjust_address (m_tramp, HImode, offset);
27509 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
27511 mem = adjust_address (m_tramp, SImode, offset + 2);
27512 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
27513 offset += 6;
27515 else
27517 mem = adjust_address (m_tramp, HImode, offset);
27518 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
27520 mem = adjust_address (m_tramp, DImode, offset + 2);
27521 emit_move_insn (mem, fnaddr);
27522 offset += 10;
27525 /* Load static chain using movabs to r10. Use the shorter movl
27526 instead of movabs when ptr_mode == SImode. */
27527 if (ptr_mode == SImode)
27529 opcode = 0xba41;
27530 size = 6;
27532 else
27534 opcode = 0xba49;
27535 size = 10;
27538 mem = adjust_address (m_tramp, HImode, offset);
27539 emit_move_insn (mem, gen_int_mode (opcode, HImode));
27541 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
27542 emit_move_insn (mem, chain_value);
27543 offset += size;
27545 /* Jump to r11; the last (unused) byte is a nop, only there to
27546 pad the write out to a single 32-bit store. */
27547 mem = adjust_address (m_tramp, SImode, offset);
27548 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
27549 offset += 4;
27551 else
27553 rtx disp, chain;
27555 /* Depending on the static chain location, either load a register
27556 with a constant, or push the constant to the stack. All of the
27557 instructions are the same size. */
27558 chain = ix86_static_chain (fndecl, true);
27559 if (REG_P (chain))
27561 switch (REGNO (chain))
27563 case AX_REG:
27564 opcode = 0xb8; break;
27565 case CX_REG:
27566 opcode = 0xb9; break;
27567 default:
27568 gcc_unreachable ();
27571 else
27572 opcode = 0x68;
27574 mem = adjust_address (m_tramp, QImode, offset);
27575 emit_move_insn (mem, gen_int_mode (opcode, QImode));
27577 mem = adjust_address (m_tramp, SImode, offset + 1);
27578 emit_move_insn (mem, chain_value);
27579 offset += 5;
27581 mem = adjust_address (m_tramp, QImode, offset);
27582 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
27584 mem = adjust_address (m_tramp, SImode, offset + 1);
27586 /* Compute offset from the end of the jmp to the target function.
27587 In the case in which the trampoline stores the static chain on
27588 the stack, we need to skip the first insn which pushes the
27589 (call-saved) register static chain; this push is 1 byte. */
27590 offset += 5;
27591 disp = expand_binop (SImode, sub_optab, fnaddr,
27592 plus_constant (Pmode, XEXP (m_tramp, 0),
27593 offset - (MEM_P (chain) ? 1 : 0)),
27594 NULL_RTX, 1, OPTAB_DIRECT);
27595 emit_move_insn (mem, disp);
27598 gcc_assert (offset <= TRAMPOLINE_SIZE);
27600 #ifdef HAVE_ENABLE_EXECUTE_STACK
27601 #ifdef CHECK_EXECUTE_STACK_ENABLED
27602 if (CHECK_EXECUTE_STACK_ENABLED)
27603 #endif
27604 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
27605 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
27606 #endif
27609 /* The following file contains several enumerations and data structures
27610 built from the definitions in i386-builtin-types.def. */
27612 #include "i386-builtin-types.inc"
27614 /* Table for the ix86 builtin non-function types. */
27615 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
27617 /* Retrieve an element from the above table, building some of
27618 the types lazily. */
27620 static tree
27621 ix86_get_builtin_type (enum ix86_builtin_type tcode)
27623 unsigned int index;
27624 tree type, itype;
27626 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
27628 type = ix86_builtin_type_tab[(int) tcode];
27629 if (type != NULL)
27630 return type;
27632 gcc_assert (tcode > IX86_BT_LAST_PRIM);
27633 if (tcode <= IX86_BT_LAST_VECT)
27635 machine_mode mode;
27637 index = tcode - IX86_BT_LAST_PRIM - 1;
27638 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
27639 mode = ix86_builtin_type_vect_mode[index];
27641 type = build_vector_type_for_mode (itype, mode);
27643 else
27645 int quals;
27647 index = tcode - IX86_BT_LAST_VECT - 1;
27648 if (tcode <= IX86_BT_LAST_PTR)
27649 quals = TYPE_UNQUALIFIED;
27650 else
27651 quals = TYPE_QUAL_CONST;
27653 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
27654 if (quals != TYPE_UNQUALIFIED)
27655 itype = build_qualified_type (itype, quals);
27657 type = build_pointer_type (itype);
27660 ix86_builtin_type_tab[(int) tcode] = type;
27661 return type;
27664 /* Table for the ix86 builtin function types. */
27665 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
27667 /* Retrieve an element from the above table, building some of
27668 the types lazily. */
27670 static tree
27671 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
27673 tree type;
27675 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
27677 type = ix86_builtin_func_type_tab[(int) tcode];
27678 if (type != NULL)
27679 return type;
27681 if (tcode <= IX86_BT_LAST_FUNC)
27683 unsigned start = ix86_builtin_func_start[(int) tcode];
27684 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
27685 tree rtype, atype, args = void_list_node;
27686 unsigned i;
27688 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
27689 for (i = after - 1; i > start; --i)
27691 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
27692 args = tree_cons (NULL, atype, args);
27695 type = build_function_type (rtype, args);
27697 else
27699 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
27700 enum ix86_builtin_func_type icode;
27702 icode = ix86_builtin_func_alias_base[index];
27703 type = ix86_get_builtin_func_type (icode);
27706 ix86_builtin_func_type_tab[(int) tcode] = type;
27707 return type;
27711 /* Codes for all the SSE/MMX builtins. */
27712 enum ix86_builtins
27714 IX86_BUILTIN_ADDPS,
27715 IX86_BUILTIN_ADDSS,
27716 IX86_BUILTIN_DIVPS,
27717 IX86_BUILTIN_DIVSS,
27718 IX86_BUILTIN_MULPS,
27719 IX86_BUILTIN_MULSS,
27720 IX86_BUILTIN_SUBPS,
27721 IX86_BUILTIN_SUBSS,
27723 IX86_BUILTIN_CMPEQPS,
27724 IX86_BUILTIN_CMPLTPS,
27725 IX86_BUILTIN_CMPLEPS,
27726 IX86_BUILTIN_CMPGTPS,
27727 IX86_BUILTIN_CMPGEPS,
27728 IX86_BUILTIN_CMPNEQPS,
27729 IX86_BUILTIN_CMPNLTPS,
27730 IX86_BUILTIN_CMPNLEPS,
27731 IX86_BUILTIN_CMPNGTPS,
27732 IX86_BUILTIN_CMPNGEPS,
27733 IX86_BUILTIN_CMPORDPS,
27734 IX86_BUILTIN_CMPUNORDPS,
27735 IX86_BUILTIN_CMPEQSS,
27736 IX86_BUILTIN_CMPLTSS,
27737 IX86_BUILTIN_CMPLESS,
27738 IX86_BUILTIN_CMPNEQSS,
27739 IX86_BUILTIN_CMPNLTSS,
27740 IX86_BUILTIN_CMPNLESS,
27741 IX86_BUILTIN_CMPORDSS,
27742 IX86_BUILTIN_CMPUNORDSS,
27744 IX86_BUILTIN_COMIEQSS,
27745 IX86_BUILTIN_COMILTSS,
27746 IX86_BUILTIN_COMILESS,
27747 IX86_BUILTIN_COMIGTSS,
27748 IX86_BUILTIN_COMIGESS,
27749 IX86_BUILTIN_COMINEQSS,
27750 IX86_BUILTIN_UCOMIEQSS,
27751 IX86_BUILTIN_UCOMILTSS,
27752 IX86_BUILTIN_UCOMILESS,
27753 IX86_BUILTIN_UCOMIGTSS,
27754 IX86_BUILTIN_UCOMIGESS,
27755 IX86_BUILTIN_UCOMINEQSS,
27757 IX86_BUILTIN_CVTPI2PS,
27758 IX86_BUILTIN_CVTPS2PI,
27759 IX86_BUILTIN_CVTSI2SS,
27760 IX86_BUILTIN_CVTSI642SS,
27761 IX86_BUILTIN_CVTSS2SI,
27762 IX86_BUILTIN_CVTSS2SI64,
27763 IX86_BUILTIN_CVTTPS2PI,
27764 IX86_BUILTIN_CVTTSS2SI,
27765 IX86_BUILTIN_CVTTSS2SI64,
27767 IX86_BUILTIN_MAXPS,
27768 IX86_BUILTIN_MAXSS,
27769 IX86_BUILTIN_MINPS,
27770 IX86_BUILTIN_MINSS,
27772 IX86_BUILTIN_LOADUPS,
27773 IX86_BUILTIN_STOREUPS,
27774 IX86_BUILTIN_MOVSS,
27776 IX86_BUILTIN_MOVHLPS,
27777 IX86_BUILTIN_MOVLHPS,
27778 IX86_BUILTIN_LOADHPS,
27779 IX86_BUILTIN_LOADLPS,
27780 IX86_BUILTIN_STOREHPS,
27781 IX86_BUILTIN_STORELPS,
27783 IX86_BUILTIN_MASKMOVQ,
27784 IX86_BUILTIN_MOVMSKPS,
27785 IX86_BUILTIN_PMOVMSKB,
27787 IX86_BUILTIN_MOVNTPS,
27788 IX86_BUILTIN_MOVNTQ,
27790 IX86_BUILTIN_LOADDQU,
27791 IX86_BUILTIN_STOREDQU,
27793 IX86_BUILTIN_PACKSSWB,
27794 IX86_BUILTIN_PACKSSDW,
27795 IX86_BUILTIN_PACKUSWB,
27797 IX86_BUILTIN_PADDB,
27798 IX86_BUILTIN_PADDW,
27799 IX86_BUILTIN_PADDD,
27800 IX86_BUILTIN_PADDQ,
27801 IX86_BUILTIN_PADDSB,
27802 IX86_BUILTIN_PADDSW,
27803 IX86_BUILTIN_PADDUSB,
27804 IX86_BUILTIN_PADDUSW,
27805 IX86_BUILTIN_PSUBB,
27806 IX86_BUILTIN_PSUBW,
27807 IX86_BUILTIN_PSUBD,
27808 IX86_BUILTIN_PSUBQ,
27809 IX86_BUILTIN_PSUBSB,
27810 IX86_BUILTIN_PSUBSW,
27811 IX86_BUILTIN_PSUBUSB,
27812 IX86_BUILTIN_PSUBUSW,
27814 IX86_BUILTIN_PAND,
27815 IX86_BUILTIN_PANDN,
27816 IX86_BUILTIN_POR,
27817 IX86_BUILTIN_PXOR,
27819 IX86_BUILTIN_PAVGB,
27820 IX86_BUILTIN_PAVGW,
27822 IX86_BUILTIN_PCMPEQB,
27823 IX86_BUILTIN_PCMPEQW,
27824 IX86_BUILTIN_PCMPEQD,
27825 IX86_BUILTIN_PCMPGTB,
27826 IX86_BUILTIN_PCMPGTW,
27827 IX86_BUILTIN_PCMPGTD,
27829 IX86_BUILTIN_PMADDWD,
27831 IX86_BUILTIN_PMAXSW,
27832 IX86_BUILTIN_PMAXUB,
27833 IX86_BUILTIN_PMINSW,
27834 IX86_BUILTIN_PMINUB,
27836 IX86_BUILTIN_PMULHUW,
27837 IX86_BUILTIN_PMULHW,
27838 IX86_BUILTIN_PMULLW,
27840 IX86_BUILTIN_PSADBW,
27841 IX86_BUILTIN_PSHUFW,
27843 IX86_BUILTIN_PSLLW,
27844 IX86_BUILTIN_PSLLD,
27845 IX86_BUILTIN_PSLLQ,
27846 IX86_BUILTIN_PSRAW,
27847 IX86_BUILTIN_PSRAD,
27848 IX86_BUILTIN_PSRLW,
27849 IX86_BUILTIN_PSRLD,
27850 IX86_BUILTIN_PSRLQ,
27851 IX86_BUILTIN_PSLLWI,
27852 IX86_BUILTIN_PSLLDI,
27853 IX86_BUILTIN_PSLLQI,
27854 IX86_BUILTIN_PSRAWI,
27855 IX86_BUILTIN_PSRADI,
27856 IX86_BUILTIN_PSRLWI,
27857 IX86_BUILTIN_PSRLDI,
27858 IX86_BUILTIN_PSRLQI,
27860 IX86_BUILTIN_PUNPCKHBW,
27861 IX86_BUILTIN_PUNPCKHWD,
27862 IX86_BUILTIN_PUNPCKHDQ,
27863 IX86_BUILTIN_PUNPCKLBW,
27864 IX86_BUILTIN_PUNPCKLWD,
27865 IX86_BUILTIN_PUNPCKLDQ,
27867 IX86_BUILTIN_SHUFPS,
27869 IX86_BUILTIN_RCPPS,
27870 IX86_BUILTIN_RCPSS,
27871 IX86_BUILTIN_RSQRTPS,
27872 IX86_BUILTIN_RSQRTPS_NR,
27873 IX86_BUILTIN_RSQRTSS,
27874 IX86_BUILTIN_RSQRTF,
27875 IX86_BUILTIN_SQRTPS,
27876 IX86_BUILTIN_SQRTPS_NR,
27877 IX86_BUILTIN_SQRTSS,
27879 IX86_BUILTIN_UNPCKHPS,
27880 IX86_BUILTIN_UNPCKLPS,
27882 IX86_BUILTIN_ANDPS,
27883 IX86_BUILTIN_ANDNPS,
27884 IX86_BUILTIN_ORPS,
27885 IX86_BUILTIN_XORPS,
27887 IX86_BUILTIN_EMMS,
27888 IX86_BUILTIN_LDMXCSR,
27889 IX86_BUILTIN_STMXCSR,
27890 IX86_BUILTIN_SFENCE,
27892 IX86_BUILTIN_FXSAVE,
27893 IX86_BUILTIN_FXRSTOR,
27894 IX86_BUILTIN_FXSAVE64,
27895 IX86_BUILTIN_FXRSTOR64,
27897 IX86_BUILTIN_XSAVE,
27898 IX86_BUILTIN_XRSTOR,
27899 IX86_BUILTIN_XSAVE64,
27900 IX86_BUILTIN_XRSTOR64,
27902 IX86_BUILTIN_XSAVEOPT,
27903 IX86_BUILTIN_XSAVEOPT64,
27905 IX86_BUILTIN_XSAVEC,
27906 IX86_BUILTIN_XSAVEC64,
27908 IX86_BUILTIN_XSAVES,
27909 IX86_BUILTIN_XRSTORS,
27910 IX86_BUILTIN_XSAVES64,
27911 IX86_BUILTIN_XRSTORS64,
27913 /* 3DNow! Original */
27914 IX86_BUILTIN_FEMMS,
27915 IX86_BUILTIN_PAVGUSB,
27916 IX86_BUILTIN_PF2ID,
27917 IX86_BUILTIN_PFACC,
27918 IX86_BUILTIN_PFADD,
27919 IX86_BUILTIN_PFCMPEQ,
27920 IX86_BUILTIN_PFCMPGE,
27921 IX86_BUILTIN_PFCMPGT,
27922 IX86_BUILTIN_PFMAX,
27923 IX86_BUILTIN_PFMIN,
27924 IX86_BUILTIN_PFMUL,
27925 IX86_BUILTIN_PFRCP,
27926 IX86_BUILTIN_PFRCPIT1,
27927 IX86_BUILTIN_PFRCPIT2,
27928 IX86_BUILTIN_PFRSQIT1,
27929 IX86_BUILTIN_PFRSQRT,
27930 IX86_BUILTIN_PFSUB,
27931 IX86_BUILTIN_PFSUBR,
27932 IX86_BUILTIN_PI2FD,
27933 IX86_BUILTIN_PMULHRW,
27935 /* 3DNow! Athlon Extensions */
27936 IX86_BUILTIN_PF2IW,
27937 IX86_BUILTIN_PFNACC,
27938 IX86_BUILTIN_PFPNACC,
27939 IX86_BUILTIN_PI2FW,
27940 IX86_BUILTIN_PSWAPDSI,
27941 IX86_BUILTIN_PSWAPDSF,
27943 /* SSE2 */
27944 IX86_BUILTIN_ADDPD,
27945 IX86_BUILTIN_ADDSD,
27946 IX86_BUILTIN_DIVPD,
27947 IX86_BUILTIN_DIVSD,
27948 IX86_BUILTIN_MULPD,
27949 IX86_BUILTIN_MULSD,
27950 IX86_BUILTIN_SUBPD,
27951 IX86_BUILTIN_SUBSD,
27953 IX86_BUILTIN_CMPEQPD,
27954 IX86_BUILTIN_CMPLTPD,
27955 IX86_BUILTIN_CMPLEPD,
27956 IX86_BUILTIN_CMPGTPD,
27957 IX86_BUILTIN_CMPGEPD,
27958 IX86_BUILTIN_CMPNEQPD,
27959 IX86_BUILTIN_CMPNLTPD,
27960 IX86_BUILTIN_CMPNLEPD,
27961 IX86_BUILTIN_CMPNGTPD,
27962 IX86_BUILTIN_CMPNGEPD,
27963 IX86_BUILTIN_CMPORDPD,
27964 IX86_BUILTIN_CMPUNORDPD,
27965 IX86_BUILTIN_CMPEQSD,
27966 IX86_BUILTIN_CMPLTSD,
27967 IX86_BUILTIN_CMPLESD,
27968 IX86_BUILTIN_CMPNEQSD,
27969 IX86_BUILTIN_CMPNLTSD,
27970 IX86_BUILTIN_CMPNLESD,
27971 IX86_BUILTIN_CMPORDSD,
27972 IX86_BUILTIN_CMPUNORDSD,
27974 IX86_BUILTIN_COMIEQSD,
27975 IX86_BUILTIN_COMILTSD,
27976 IX86_BUILTIN_COMILESD,
27977 IX86_BUILTIN_COMIGTSD,
27978 IX86_BUILTIN_COMIGESD,
27979 IX86_BUILTIN_COMINEQSD,
27980 IX86_BUILTIN_UCOMIEQSD,
27981 IX86_BUILTIN_UCOMILTSD,
27982 IX86_BUILTIN_UCOMILESD,
27983 IX86_BUILTIN_UCOMIGTSD,
27984 IX86_BUILTIN_UCOMIGESD,
27985 IX86_BUILTIN_UCOMINEQSD,
27987 IX86_BUILTIN_MAXPD,
27988 IX86_BUILTIN_MAXSD,
27989 IX86_BUILTIN_MINPD,
27990 IX86_BUILTIN_MINSD,
27992 IX86_BUILTIN_ANDPD,
27993 IX86_BUILTIN_ANDNPD,
27994 IX86_BUILTIN_ORPD,
27995 IX86_BUILTIN_XORPD,
27997 IX86_BUILTIN_SQRTPD,
27998 IX86_BUILTIN_SQRTSD,
28000 IX86_BUILTIN_UNPCKHPD,
28001 IX86_BUILTIN_UNPCKLPD,
28003 IX86_BUILTIN_SHUFPD,
28005 IX86_BUILTIN_LOADUPD,
28006 IX86_BUILTIN_STOREUPD,
28007 IX86_BUILTIN_MOVSD,
28009 IX86_BUILTIN_LOADHPD,
28010 IX86_BUILTIN_LOADLPD,
28012 IX86_BUILTIN_CVTDQ2PD,
28013 IX86_BUILTIN_CVTDQ2PS,
28015 IX86_BUILTIN_CVTPD2DQ,
28016 IX86_BUILTIN_CVTPD2PI,
28017 IX86_BUILTIN_CVTPD2PS,
28018 IX86_BUILTIN_CVTTPD2DQ,
28019 IX86_BUILTIN_CVTTPD2PI,
28021 IX86_BUILTIN_CVTPI2PD,
28022 IX86_BUILTIN_CVTSI2SD,
28023 IX86_BUILTIN_CVTSI642SD,
28025 IX86_BUILTIN_CVTSD2SI,
28026 IX86_BUILTIN_CVTSD2SI64,
28027 IX86_BUILTIN_CVTSD2SS,
28028 IX86_BUILTIN_CVTSS2SD,
28029 IX86_BUILTIN_CVTTSD2SI,
28030 IX86_BUILTIN_CVTTSD2SI64,
28032 IX86_BUILTIN_CVTPS2DQ,
28033 IX86_BUILTIN_CVTPS2PD,
28034 IX86_BUILTIN_CVTTPS2DQ,
28036 IX86_BUILTIN_MOVNTI,
28037 IX86_BUILTIN_MOVNTI64,
28038 IX86_BUILTIN_MOVNTPD,
28039 IX86_BUILTIN_MOVNTDQ,
28041 IX86_BUILTIN_MOVQ128,
28043 /* SSE2 MMX */
28044 IX86_BUILTIN_MASKMOVDQU,
28045 IX86_BUILTIN_MOVMSKPD,
28046 IX86_BUILTIN_PMOVMSKB128,
28048 IX86_BUILTIN_PACKSSWB128,
28049 IX86_BUILTIN_PACKSSDW128,
28050 IX86_BUILTIN_PACKUSWB128,
28052 IX86_BUILTIN_PADDB128,
28053 IX86_BUILTIN_PADDW128,
28054 IX86_BUILTIN_PADDD128,
28055 IX86_BUILTIN_PADDQ128,
28056 IX86_BUILTIN_PADDSB128,
28057 IX86_BUILTIN_PADDSW128,
28058 IX86_BUILTIN_PADDUSB128,
28059 IX86_BUILTIN_PADDUSW128,
28060 IX86_BUILTIN_PSUBB128,
28061 IX86_BUILTIN_PSUBW128,
28062 IX86_BUILTIN_PSUBD128,
28063 IX86_BUILTIN_PSUBQ128,
28064 IX86_BUILTIN_PSUBSB128,
28065 IX86_BUILTIN_PSUBSW128,
28066 IX86_BUILTIN_PSUBUSB128,
28067 IX86_BUILTIN_PSUBUSW128,
28069 IX86_BUILTIN_PAND128,
28070 IX86_BUILTIN_PANDN128,
28071 IX86_BUILTIN_POR128,
28072 IX86_BUILTIN_PXOR128,
28074 IX86_BUILTIN_PAVGB128,
28075 IX86_BUILTIN_PAVGW128,
28077 IX86_BUILTIN_PCMPEQB128,
28078 IX86_BUILTIN_PCMPEQW128,
28079 IX86_BUILTIN_PCMPEQD128,
28080 IX86_BUILTIN_PCMPGTB128,
28081 IX86_BUILTIN_PCMPGTW128,
28082 IX86_BUILTIN_PCMPGTD128,
28084 IX86_BUILTIN_PMADDWD128,
28086 IX86_BUILTIN_PMAXSW128,
28087 IX86_BUILTIN_PMAXUB128,
28088 IX86_BUILTIN_PMINSW128,
28089 IX86_BUILTIN_PMINUB128,
28091 IX86_BUILTIN_PMULUDQ,
28092 IX86_BUILTIN_PMULUDQ128,
28093 IX86_BUILTIN_PMULHUW128,
28094 IX86_BUILTIN_PMULHW128,
28095 IX86_BUILTIN_PMULLW128,
28097 IX86_BUILTIN_PSADBW128,
28098 IX86_BUILTIN_PSHUFHW,
28099 IX86_BUILTIN_PSHUFLW,
28100 IX86_BUILTIN_PSHUFD,
28102 IX86_BUILTIN_PSLLDQI128,
28103 IX86_BUILTIN_PSLLWI128,
28104 IX86_BUILTIN_PSLLDI128,
28105 IX86_BUILTIN_PSLLQI128,
28106 IX86_BUILTIN_PSRAWI128,
28107 IX86_BUILTIN_PSRADI128,
28108 IX86_BUILTIN_PSRLDQI128,
28109 IX86_BUILTIN_PSRLWI128,
28110 IX86_BUILTIN_PSRLDI128,
28111 IX86_BUILTIN_PSRLQI128,
28113 IX86_BUILTIN_PSLLDQ128,
28114 IX86_BUILTIN_PSLLW128,
28115 IX86_BUILTIN_PSLLD128,
28116 IX86_BUILTIN_PSLLQ128,
28117 IX86_BUILTIN_PSRAW128,
28118 IX86_BUILTIN_PSRAD128,
28119 IX86_BUILTIN_PSRLW128,
28120 IX86_BUILTIN_PSRLD128,
28121 IX86_BUILTIN_PSRLQ128,
28123 IX86_BUILTIN_PUNPCKHBW128,
28124 IX86_BUILTIN_PUNPCKHWD128,
28125 IX86_BUILTIN_PUNPCKHDQ128,
28126 IX86_BUILTIN_PUNPCKHQDQ128,
28127 IX86_BUILTIN_PUNPCKLBW128,
28128 IX86_BUILTIN_PUNPCKLWD128,
28129 IX86_BUILTIN_PUNPCKLDQ128,
28130 IX86_BUILTIN_PUNPCKLQDQ128,
28132 IX86_BUILTIN_CLFLUSH,
28133 IX86_BUILTIN_MFENCE,
28134 IX86_BUILTIN_LFENCE,
28135 IX86_BUILTIN_PAUSE,
28137 IX86_BUILTIN_FNSTENV,
28138 IX86_BUILTIN_FLDENV,
28139 IX86_BUILTIN_FNSTSW,
28140 IX86_BUILTIN_FNCLEX,
28142 IX86_BUILTIN_BSRSI,
28143 IX86_BUILTIN_BSRDI,
28144 IX86_BUILTIN_RDPMC,
28145 IX86_BUILTIN_RDTSC,
28146 IX86_BUILTIN_RDTSCP,
28147 IX86_BUILTIN_ROLQI,
28148 IX86_BUILTIN_ROLHI,
28149 IX86_BUILTIN_RORQI,
28150 IX86_BUILTIN_RORHI,
28152 /* SSE3. */
28153 IX86_BUILTIN_ADDSUBPS,
28154 IX86_BUILTIN_HADDPS,
28155 IX86_BUILTIN_HSUBPS,
28156 IX86_BUILTIN_MOVSHDUP,
28157 IX86_BUILTIN_MOVSLDUP,
28158 IX86_BUILTIN_ADDSUBPD,
28159 IX86_BUILTIN_HADDPD,
28160 IX86_BUILTIN_HSUBPD,
28161 IX86_BUILTIN_LDDQU,
28163 IX86_BUILTIN_MONITOR,
28164 IX86_BUILTIN_MWAIT,
28166 /* SSSE3. */
28167 IX86_BUILTIN_PHADDW,
28168 IX86_BUILTIN_PHADDD,
28169 IX86_BUILTIN_PHADDSW,
28170 IX86_BUILTIN_PHSUBW,
28171 IX86_BUILTIN_PHSUBD,
28172 IX86_BUILTIN_PHSUBSW,
28173 IX86_BUILTIN_PMADDUBSW,
28174 IX86_BUILTIN_PMULHRSW,
28175 IX86_BUILTIN_PSHUFB,
28176 IX86_BUILTIN_PSIGNB,
28177 IX86_BUILTIN_PSIGNW,
28178 IX86_BUILTIN_PSIGND,
28179 IX86_BUILTIN_PALIGNR,
28180 IX86_BUILTIN_PABSB,
28181 IX86_BUILTIN_PABSW,
28182 IX86_BUILTIN_PABSD,
28184 IX86_BUILTIN_PHADDW128,
28185 IX86_BUILTIN_PHADDD128,
28186 IX86_BUILTIN_PHADDSW128,
28187 IX86_BUILTIN_PHSUBW128,
28188 IX86_BUILTIN_PHSUBD128,
28189 IX86_BUILTIN_PHSUBSW128,
28190 IX86_BUILTIN_PMADDUBSW128,
28191 IX86_BUILTIN_PMULHRSW128,
28192 IX86_BUILTIN_PSHUFB128,
28193 IX86_BUILTIN_PSIGNB128,
28194 IX86_BUILTIN_PSIGNW128,
28195 IX86_BUILTIN_PSIGND128,
28196 IX86_BUILTIN_PALIGNR128,
28197 IX86_BUILTIN_PABSB128,
28198 IX86_BUILTIN_PABSW128,
28199 IX86_BUILTIN_PABSD128,
28201 /* AMDFAM10 - SSE4A New Instructions. */
28202 IX86_BUILTIN_MOVNTSD,
28203 IX86_BUILTIN_MOVNTSS,
28204 IX86_BUILTIN_EXTRQI,
28205 IX86_BUILTIN_EXTRQ,
28206 IX86_BUILTIN_INSERTQI,
28207 IX86_BUILTIN_INSERTQ,
28209 /* SSE4.1. */
28210 IX86_BUILTIN_BLENDPD,
28211 IX86_BUILTIN_BLENDPS,
28212 IX86_BUILTIN_BLENDVPD,
28213 IX86_BUILTIN_BLENDVPS,
28214 IX86_BUILTIN_PBLENDVB128,
28215 IX86_BUILTIN_PBLENDW128,
28217 IX86_BUILTIN_DPPD,
28218 IX86_BUILTIN_DPPS,
28220 IX86_BUILTIN_INSERTPS128,
28222 IX86_BUILTIN_MOVNTDQA,
28223 IX86_BUILTIN_MPSADBW128,
28224 IX86_BUILTIN_PACKUSDW128,
28225 IX86_BUILTIN_PCMPEQQ,
28226 IX86_BUILTIN_PHMINPOSUW128,
28228 IX86_BUILTIN_PMAXSB128,
28229 IX86_BUILTIN_PMAXSD128,
28230 IX86_BUILTIN_PMAXUD128,
28231 IX86_BUILTIN_PMAXUW128,
28233 IX86_BUILTIN_PMINSB128,
28234 IX86_BUILTIN_PMINSD128,
28235 IX86_BUILTIN_PMINUD128,
28236 IX86_BUILTIN_PMINUW128,
28238 IX86_BUILTIN_PMOVSXBW128,
28239 IX86_BUILTIN_PMOVSXBD128,
28240 IX86_BUILTIN_PMOVSXBQ128,
28241 IX86_BUILTIN_PMOVSXWD128,
28242 IX86_BUILTIN_PMOVSXWQ128,
28243 IX86_BUILTIN_PMOVSXDQ128,
28245 IX86_BUILTIN_PMOVZXBW128,
28246 IX86_BUILTIN_PMOVZXBD128,
28247 IX86_BUILTIN_PMOVZXBQ128,
28248 IX86_BUILTIN_PMOVZXWD128,
28249 IX86_BUILTIN_PMOVZXWQ128,
28250 IX86_BUILTIN_PMOVZXDQ128,
28252 IX86_BUILTIN_PMULDQ128,
28253 IX86_BUILTIN_PMULLD128,
28255 IX86_BUILTIN_ROUNDSD,
28256 IX86_BUILTIN_ROUNDSS,
28258 IX86_BUILTIN_ROUNDPD,
28259 IX86_BUILTIN_ROUNDPS,
28261 IX86_BUILTIN_FLOORPD,
28262 IX86_BUILTIN_CEILPD,
28263 IX86_BUILTIN_TRUNCPD,
28264 IX86_BUILTIN_RINTPD,
28265 IX86_BUILTIN_ROUNDPD_AZ,
28267 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
28268 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
28269 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
28271 IX86_BUILTIN_FLOORPS,
28272 IX86_BUILTIN_CEILPS,
28273 IX86_BUILTIN_TRUNCPS,
28274 IX86_BUILTIN_RINTPS,
28275 IX86_BUILTIN_ROUNDPS_AZ,
28277 IX86_BUILTIN_FLOORPS_SFIX,
28278 IX86_BUILTIN_CEILPS_SFIX,
28279 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
28281 IX86_BUILTIN_PTESTZ,
28282 IX86_BUILTIN_PTESTC,
28283 IX86_BUILTIN_PTESTNZC,
28285 IX86_BUILTIN_VEC_INIT_V2SI,
28286 IX86_BUILTIN_VEC_INIT_V4HI,
28287 IX86_BUILTIN_VEC_INIT_V8QI,
28288 IX86_BUILTIN_VEC_EXT_V2DF,
28289 IX86_BUILTIN_VEC_EXT_V2DI,
28290 IX86_BUILTIN_VEC_EXT_V4SF,
28291 IX86_BUILTIN_VEC_EXT_V4SI,
28292 IX86_BUILTIN_VEC_EXT_V8HI,
28293 IX86_BUILTIN_VEC_EXT_V2SI,
28294 IX86_BUILTIN_VEC_EXT_V4HI,
28295 IX86_BUILTIN_VEC_EXT_V16QI,
28296 IX86_BUILTIN_VEC_SET_V2DI,
28297 IX86_BUILTIN_VEC_SET_V4SF,
28298 IX86_BUILTIN_VEC_SET_V4SI,
28299 IX86_BUILTIN_VEC_SET_V8HI,
28300 IX86_BUILTIN_VEC_SET_V4HI,
28301 IX86_BUILTIN_VEC_SET_V16QI,
28303 IX86_BUILTIN_VEC_PACK_SFIX,
28304 IX86_BUILTIN_VEC_PACK_SFIX256,
28306 /* SSE4.2. */
28307 IX86_BUILTIN_CRC32QI,
28308 IX86_BUILTIN_CRC32HI,
28309 IX86_BUILTIN_CRC32SI,
28310 IX86_BUILTIN_CRC32DI,
28312 IX86_BUILTIN_PCMPESTRI128,
28313 IX86_BUILTIN_PCMPESTRM128,
28314 IX86_BUILTIN_PCMPESTRA128,
28315 IX86_BUILTIN_PCMPESTRC128,
28316 IX86_BUILTIN_PCMPESTRO128,
28317 IX86_BUILTIN_PCMPESTRS128,
28318 IX86_BUILTIN_PCMPESTRZ128,
28319 IX86_BUILTIN_PCMPISTRI128,
28320 IX86_BUILTIN_PCMPISTRM128,
28321 IX86_BUILTIN_PCMPISTRA128,
28322 IX86_BUILTIN_PCMPISTRC128,
28323 IX86_BUILTIN_PCMPISTRO128,
28324 IX86_BUILTIN_PCMPISTRS128,
28325 IX86_BUILTIN_PCMPISTRZ128,
28327 IX86_BUILTIN_PCMPGTQ,
28329 /* AES instructions */
28330 IX86_BUILTIN_AESENC128,
28331 IX86_BUILTIN_AESENCLAST128,
28332 IX86_BUILTIN_AESDEC128,
28333 IX86_BUILTIN_AESDECLAST128,
28334 IX86_BUILTIN_AESIMC128,
28335 IX86_BUILTIN_AESKEYGENASSIST128,
28337 /* PCLMUL instruction */
28338 IX86_BUILTIN_PCLMULQDQ128,
28340 /* AVX */
28341 IX86_BUILTIN_ADDPD256,
28342 IX86_BUILTIN_ADDPS256,
28343 IX86_BUILTIN_ADDSUBPD256,
28344 IX86_BUILTIN_ADDSUBPS256,
28345 IX86_BUILTIN_ANDPD256,
28346 IX86_BUILTIN_ANDPS256,
28347 IX86_BUILTIN_ANDNPD256,
28348 IX86_BUILTIN_ANDNPS256,
28349 IX86_BUILTIN_BLENDPD256,
28350 IX86_BUILTIN_BLENDPS256,
28351 IX86_BUILTIN_BLENDVPD256,
28352 IX86_BUILTIN_BLENDVPS256,
28353 IX86_BUILTIN_DIVPD256,
28354 IX86_BUILTIN_DIVPS256,
28355 IX86_BUILTIN_DPPS256,
28356 IX86_BUILTIN_HADDPD256,
28357 IX86_BUILTIN_HADDPS256,
28358 IX86_BUILTIN_HSUBPD256,
28359 IX86_BUILTIN_HSUBPS256,
28360 IX86_BUILTIN_MAXPD256,
28361 IX86_BUILTIN_MAXPS256,
28362 IX86_BUILTIN_MINPD256,
28363 IX86_BUILTIN_MINPS256,
28364 IX86_BUILTIN_MULPD256,
28365 IX86_BUILTIN_MULPS256,
28366 IX86_BUILTIN_ORPD256,
28367 IX86_BUILTIN_ORPS256,
28368 IX86_BUILTIN_SHUFPD256,
28369 IX86_BUILTIN_SHUFPS256,
28370 IX86_BUILTIN_SUBPD256,
28371 IX86_BUILTIN_SUBPS256,
28372 IX86_BUILTIN_XORPD256,
28373 IX86_BUILTIN_XORPS256,
28374 IX86_BUILTIN_CMPSD,
28375 IX86_BUILTIN_CMPSS,
28376 IX86_BUILTIN_CMPPD,
28377 IX86_BUILTIN_CMPPS,
28378 IX86_BUILTIN_CMPPD256,
28379 IX86_BUILTIN_CMPPS256,
28380 IX86_BUILTIN_CVTDQ2PD256,
28381 IX86_BUILTIN_CVTDQ2PS256,
28382 IX86_BUILTIN_CVTPD2PS256,
28383 IX86_BUILTIN_CVTPS2DQ256,
28384 IX86_BUILTIN_CVTPS2PD256,
28385 IX86_BUILTIN_CVTTPD2DQ256,
28386 IX86_BUILTIN_CVTPD2DQ256,
28387 IX86_BUILTIN_CVTTPS2DQ256,
28388 IX86_BUILTIN_EXTRACTF128PD256,
28389 IX86_BUILTIN_EXTRACTF128PS256,
28390 IX86_BUILTIN_EXTRACTF128SI256,
28391 IX86_BUILTIN_VZEROALL,
28392 IX86_BUILTIN_VZEROUPPER,
28393 IX86_BUILTIN_VPERMILVARPD,
28394 IX86_BUILTIN_VPERMILVARPS,
28395 IX86_BUILTIN_VPERMILVARPD256,
28396 IX86_BUILTIN_VPERMILVARPS256,
28397 IX86_BUILTIN_VPERMILPD,
28398 IX86_BUILTIN_VPERMILPS,
28399 IX86_BUILTIN_VPERMILPD256,
28400 IX86_BUILTIN_VPERMILPS256,
28401 IX86_BUILTIN_VPERMIL2PD,
28402 IX86_BUILTIN_VPERMIL2PS,
28403 IX86_BUILTIN_VPERMIL2PD256,
28404 IX86_BUILTIN_VPERMIL2PS256,
28405 IX86_BUILTIN_VPERM2F128PD256,
28406 IX86_BUILTIN_VPERM2F128PS256,
28407 IX86_BUILTIN_VPERM2F128SI256,
28408 IX86_BUILTIN_VBROADCASTSS,
28409 IX86_BUILTIN_VBROADCASTSD256,
28410 IX86_BUILTIN_VBROADCASTSS256,
28411 IX86_BUILTIN_VBROADCASTPD256,
28412 IX86_BUILTIN_VBROADCASTPS256,
28413 IX86_BUILTIN_VINSERTF128PD256,
28414 IX86_BUILTIN_VINSERTF128PS256,
28415 IX86_BUILTIN_VINSERTF128SI256,
28416 IX86_BUILTIN_LOADUPD256,
28417 IX86_BUILTIN_LOADUPS256,
28418 IX86_BUILTIN_STOREUPD256,
28419 IX86_BUILTIN_STOREUPS256,
28420 IX86_BUILTIN_LDDQU256,
28421 IX86_BUILTIN_MOVNTDQ256,
28422 IX86_BUILTIN_MOVNTPD256,
28423 IX86_BUILTIN_MOVNTPS256,
28424 IX86_BUILTIN_LOADDQU256,
28425 IX86_BUILTIN_STOREDQU256,
28426 IX86_BUILTIN_MASKLOADPD,
28427 IX86_BUILTIN_MASKLOADPS,
28428 IX86_BUILTIN_MASKSTOREPD,
28429 IX86_BUILTIN_MASKSTOREPS,
28430 IX86_BUILTIN_MASKLOADPD256,
28431 IX86_BUILTIN_MASKLOADPS256,
28432 IX86_BUILTIN_MASKSTOREPD256,
28433 IX86_BUILTIN_MASKSTOREPS256,
28434 IX86_BUILTIN_MOVSHDUP256,
28435 IX86_BUILTIN_MOVSLDUP256,
28436 IX86_BUILTIN_MOVDDUP256,
28438 IX86_BUILTIN_SQRTPD256,
28439 IX86_BUILTIN_SQRTPS256,
28440 IX86_BUILTIN_SQRTPS_NR256,
28441 IX86_BUILTIN_RSQRTPS256,
28442 IX86_BUILTIN_RSQRTPS_NR256,
28444 IX86_BUILTIN_RCPPS256,
28446 IX86_BUILTIN_ROUNDPD256,
28447 IX86_BUILTIN_ROUNDPS256,
28449 IX86_BUILTIN_FLOORPD256,
28450 IX86_BUILTIN_CEILPD256,
28451 IX86_BUILTIN_TRUNCPD256,
28452 IX86_BUILTIN_RINTPD256,
28453 IX86_BUILTIN_ROUNDPD_AZ256,
28455 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
28456 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
28457 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
28459 IX86_BUILTIN_FLOORPS256,
28460 IX86_BUILTIN_CEILPS256,
28461 IX86_BUILTIN_TRUNCPS256,
28462 IX86_BUILTIN_RINTPS256,
28463 IX86_BUILTIN_ROUNDPS_AZ256,
28465 IX86_BUILTIN_FLOORPS_SFIX256,
28466 IX86_BUILTIN_CEILPS_SFIX256,
28467 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
28469 IX86_BUILTIN_UNPCKHPD256,
28470 IX86_BUILTIN_UNPCKLPD256,
28471 IX86_BUILTIN_UNPCKHPS256,
28472 IX86_BUILTIN_UNPCKLPS256,
28474 IX86_BUILTIN_SI256_SI,
28475 IX86_BUILTIN_PS256_PS,
28476 IX86_BUILTIN_PD256_PD,
28477 IX86_BUILTIN_SI_SI256,
28478 IX86_BUILTIN_PS_PS256,
28479 IX86_BUILTIN_PD_PD256,
28481 IX86_BUILTIN_VTESTZPD,
28482 IX86_BUILTIN_VTESTCPD,
28483 IX86_BUILTIN_VTESTNZCPD,
28484 IX86_BUILTIN_VTESTZPS,
28485 IX86_BUILTIN_VTESTCPS,
28486 IX86_BUILTIN_VTESTNZCPS,
28487 IX86_BUILTIN_VTESTZPD256,
28488 IX86_BUILTIN_VTESTCPD256,
28489 IX86_BUILTIN_VTESTNZCPD256,
28490 IX86_BUILTIN_VTESTZPS256,
28491 IX86_BUILTIN_VTESTCPS256,
28492 IX86_BUILTIN_VTESTNZCPS256,
28493 IX86_BUILTIN_PTESTZ256,
28494 IX86_BUILTIN_PTESTC256,
28495 IX86_BUILTIN_PTESTNZC256,
28497 IX86_BUILTIN_MOVMSKPD256,
28498 IX86_BUILTIN_MOVMSKPS256,
28500 /* AVX2 */
28501 IX86_BUILTIN_MPSADBW256,
28502 IX86_BUILTIN_PABSB256,
28503 IX86_BUILTIN_PABSW256,
28504 IX86_BUILTIN_PABSD256,
28505 IX86_BUILTIN_PACKSSDW256,
28506 IX86_BUILTIN_PACKSSWB256,
28507 IX86_BUILTIN_PACKUSDW256,
28508 IX86_BUILTIN_PACKUSWB256,
28509 IX86_BUILTIN_PADDB256,
28510 IX86_BUILTIN_PADDW256,
28511 IX86_BUILTIN_PADDD256,
28512 IX86_BUILTIN_PADDQ256,
28513 IX86_BUILTIN_PADDSB256,
28514 IX86_BUILTIN_PADDSW256,
28515 IX86_BUILTIN_PADDUSB256,
28516 IX86_BUILTIN_PADDUSW256,
28517 IX86_BUILTIN_PALIGNR256,
28518 IX86_BUILTIN_AND256I,
28519 IX86_BUILTIN_ANDNOT256I,
28520 IX86_BUILTIN_PAVGB256,
28521 IX86_BUILTIN_PAVGW256,
28522 IX86_BUILTIN_PBLENDVB256,
28523 IX86_BUILTIN_PBLENDVW256,
28524 IX86_BUILTIN_PCMPEQB256,
28525 IX86_BUILTIN_PCMPEQW256,
28526 IX86_BUILTIN_PCMPEQD256,
28527 IX86_BUILTIN_PCMPEQQ256,
28528 IX86_BUILTIN_PCMPGTB256,
28529 IX86_BUILTIN_PCMPGTW256,
28530 IX86_BUILTIN_PCMPGTD256,
28531 IX86_BUILTIN_PCMPGTQ256,
28532 IX86_BUILTIN_PHADDW256,
28533 IX86_BUILTIN_PHADDD256,
28534 IX86_BUILTIN_PHADDSW256,
28535 IX86_BUILTIN_PHSUBW256,
28536 IX86_BUILTIN_PHSUBD256,
28537 IX86_BUILTIN_PHSUBSW256,
28538 IX86_BUILTIN_PMADDUBSW256,
28539 IX86_BUILTIN_PMADDWD256,
28540 IX86_BUILTIN_PMAXSB256,
28541 IX86_BUILTIN_PMAXSW256,
28542 IX86_BUILTIN_PMAXSD256,
28543 IX86_BUILTIN_PMAXUB256,
28544 IX86_BUILTIN_PMAXUW256,
28545 IX86_BUILTIN_PMAXUD256,
28546 IX86_BUILTIN_PMINSB256,
28547 IX86_BUILTIN_PMINSW256,
28548 IX86_BUILTIN_PMINSD256,
28549 IX86_BUILTIN_PMINUB256,
28550 IX86_BUILTIN_PMINUW256,
28551 IX86_BUILTIN_PMINUD256,
28552 IX86_BUILTIN_PMOVMSKB256,
28553 IX86_BUILTIN_PMOVSXBW256,
28554 IX86_BUILTIN_PMOVSXBD256,
28555 IX86_BUILTIN_PMOVSXBQ256,
28556 IX86_BUILTIN_PMOVSXWD256,
28557 IX86_BUILTIN_PMOVSXWQ256,
28558 IX86_BUILTIN_PMOVSXDQ256,
28559 IX86_BUILTIN_PMOVZXBW256,
28560 IX86_BUILTIN_PMOVZXBD256,
28561 IX86_BUILTIN_PMOVZXBQ256,
28562 IX86_BUILTIN_PMOVZXWD256,
28563 IX86_BUILTIN_PMOVZXWQ256,
28564 IX86_BUILTIN_PMOVZXDQ256,
28565 IX86_BUILTIN_PMULDQ256,
28566 IX86_BUILTIN_PMULHRSW256,
28567 IX86_BUILTIN_PMULHUW256,
28568 IX86_BUILTIN_PMULHW256,
28569 IX86_BUILTIN_PMULLW256,
28570 IX86_BUILTIN_PMULLD256,
28571 IX86_BUILTIN_PMULUDQ256,
28572 IX86_BUILTIN_POR256,
28573 IX86_BUILTIN_PSADBW256,
28574 IX86_BUILTIN_PSHUFB256,
28575 IX86_BUILTIN_PSHUFD256,
28576 IX86_BUILTIN_PSHUFHW256,
28577 IX86_BUILTIN_PSHUFLW256,
28578 IX86_BUILTIN_PSIGNB256,
28579 IX86_BUILTIN_PSIGNW256,
28580 IX86_BUILTIN_PSIGND256,
28581 IX86_BUILTIN_PSLLDQI256,
28582 IX86_BUILTIN_PSLLWI256,
28583 IX86_BUILTIN_PSLLW256,
28584 IX86_BUILTIN_PSLLDI256,
28585 IX86_BUILTIN_PSLLD256,
28586 IX86_BUILTIN_PSLLQI256,
28587 IX86_BUILTIN_PSLLQ256,
28588 IX86_BUILTIN_PSRAWI256,
28589 IX86_BUILTIN_PSRAW256,
28590 IX86_BUILTIN_PSRADI256,
28591 IX86_BUILTIN_PSRAD256,
28592 IX86_BUILTIN_PSRLDQI256,
28593 IX86_BUILTIN_PSRLWI256,
28594 IX86_BUILTIN_PSRLW256,
28595 IX86_BUILTIN_PSRLDI256,
28596 IX86_BUILTIN_PSRLD256,
28597 IX86_BUILTIN_PSRLQI256,
28598 IX86_BUILTIN_PSRLQ256,
28599 IX86_BUILTIN_PSUBB256,
28600 IX86_BUILTIN_PSUBW256,
28601 IX86_BUILTIN_PSUBD256,
28602 IX86_BUILTIN_PSUBQ256,
28603 IX86_BUILTIN_PSUBSB256,
28604 IX86_BUILTIN_PSUBSW256,
28605 IX86_BUILTIN_PSUBUSB256,
28606 IX86_BUILTIN_PSUBUSW256,
28607 IX86_BUILTIN_PUNPCKHBW256,
28608 IX86_BUILTIN_PUNPCKHWD256,
28609 IX86_BUILTIN_PUNPCKHDQ256,
28610 IX86_BUILTIN_PUNPCKHQDQ256,
28611 IX86_BUILTIN_PUNPCKLBW256,
28612 IX86_BUILTIN_PUNPCKLWD256,
28613 IX86_BUILTIN_PUNPCKLDQ256,
28614 IX86_BUILTIN_PUNPCKLQDQ256,
28615 IX86_BUILTIN_PXOR256,
28616 IX86_BUILTIN_MOVNTDQA256,
28617 IX86_BUILTIN_VBROADCASTSS_PS,
28618 IX86_BUILTIN_VBROADCASTSS_PS256,
28619 IX86_BUILTIN_VBROADCASTSD_PD256,
28620 IX86_BUILTIN_VBROADCASTSI256,
28621 IX86_BUILTIN_PBLENDD256,
28622 IX86_BUILTIN_PBLENDD128,
28623 IX86_BUILTIN_PBROADCASTB256,
28624 IX86_BUILTIN_PBROADCASTW256,
28625 IX86_BUILTIN_PBROADCASTD256,
28626 IX86_BUILTIN_PBROADCASTQ256,
28627 IX86_BUILTIN_PBROADCASTB128,
28628 IX86_BUILTIN_PBROADCASTW128,
28629 IX86_BUILTIN_PBROADCASTD128,
28630 IX86_BUILTIN_PBROADCASTQ128,
28631 IX86_BUILTIN_VPERMVARSI256,
28632 IX86_BUILTIN_VPERMDF256,
28633 IX86_BUILTIN_VPERMVARSF256,
28634 IX86_BUILTIN_VPERMDI256,
28635 IX86_BUILTIN_VPERMTI256,
28636 IX86_BUILTIN_VEXTRACT128I256,
28637 IX86_BUILTIN_VINSERT128I256,
28638 IX86_BUILTIN_MASKLOADD,
28639 IX86_BUILTIN_MASKLOADQ,
28640 IX86_BUILTIN_MASKLOADD256,
28641 IX86_BUILTIN_MASKLOADQ256,
28642 IX86_BUILTIN_MASKSTORED,
28643 IX86_BUILTIN_MASKSTOREQ,
28644 IX86_BUILTIN_MASKSTORED256,
28645 IX86_BUILTIN_MASKSTOREQ256,
28646 IX86_BUILTIN_PSLLVV4DI,
28647 IX86_BUILTIN_PSLLVV2DI,
28648 IX86_BUILTIN_PSLLVV8SI,
28649 IX86_BUILTIN_PSLLVV4SI,
28650 IX86_BUILTIN_PSRAVV8SI,
28651 IX86_BUILTIN_PSRAVV4SI,
28652 IX86_BUILTIN_PSRLVV4DI,
28653 IX86_BUILTIN_PSRLVV2DI,
28654 IX86_BUILTIN_PSRLVV8SI,
28655 IX86_BUILTIN_PSRLVV4SI,
28657 IX86_BUILTIN_GATHERSIV2DF,
28658 IX86_BUILTIN_GATHERSIV4DF,
28659 IX86_BUILTIN_GATHERDIV2DF,
28660 IX86_BUILTIN_GATHERDIV4DF,
28661 IX86_BUILTIN_GATHERSIV4SF,
28662 IX86_BUILTIN_GATHERSIV8SF,
28663 IX86_BUILTIN_GATHERDIV4SF,
28664 IX86_BUILTIN_GATHERDIV8SF,
28665 IX86_BUILTIN_GATHERSIV2DI,
28666 IX86_BUILTIN_GATHERSIV4DI,
28667 IX86_BUILTIN_GATHERDIV2DI,
28668 IX86_BUILTIN_GATHERDIV4DI,
28669 IX86_BUILTIN_GATHERSIV4SI,
28670 IX86_BUILTIN_GATHERSIV8SI,
28671 IX86_BUILTIN_GATHERDIV4SI,
28672 IX86_BUILTIN_GATHERDIV8SI,
28674 /* AVX512F */
28675 IX86_BUILTIN_SI512_SI256,
28676 IX86_BUILTIN_PD512_PD256,
28677 IX86_BUILTIN_PS512_PS256,
28678 IX86_BUILTIN_SI512_SI,
28679 IX86_BUILTIN_PD512_PD,
28680 IX86_BUILTIN_PS512_PS,
28681 IX86_BUILTIN_ADDPD512,
28682 IX86_BUILTIN_ADDPS512,
28683 IX86_BUILTIN_ADDSD_ROUND,
28684 IX86_BUILTIN_ADDSS_ROUND,
28685 IX86_BUILTIN_ALIGND512,
28686 IX86_BUILTIN_ALIGNQ512,
28687 IX86_BUILTIN_BLENDMD512,
28688 IX86_BUILTIN_BLENDMPD512,
28689 IX86_BUILTIN_BLENDMPS512,
28690 IX86_BUILTIN_BLENDMQ512,
28691 IX86_BUILTIN_BROADCASTF32X4_512,
28692 IX86_BUILTIN_BROADCASTF64X4_512,
28693 IX86_BUILTIN_BROADCASTI32X4_512,
28694 IX86_BUILTIN_BROADCASTI64X4_512,
28695 IX86_BUILTIN_BROADCASTSD512,
28696 IX86_BUILTIN_BROADCASTSS512,
28697 IX86_BUILTIN_CMPD512,
28698 IX86_BUILTIN_CMPPD512,
28699 IX86_BUILTIN_CMPPS512,
28700 IX86_BUILTIN_CMPQ512,
28701 IX86_BUILTIN_CMPSD_MASK,
28702 IX86_BUILTIN_CMPSS_MASK,
28703 IX86_BUILTIN_COMIDF,
28704 IX86_BUILTIN_COMISF,
28705 IX86_BUILTIN_COMPRESSPD512,
28706 IX86_BUILTIN_COMPRESSPDSTORE512,
28707 IX86_BUILTIN_COMPRESSPS512,
28708 IX86_BUILTIN_COMPRESSPSSTORE512,
28709 IX86_BUILTIN_CVTDQ2PD512,
28710 IX86_BUILTIN_CVTDQ2PS512,
28711 IX86_BUILTIN_CVTPD2DQ512,
28712 IX86_BUILTIN_CVTPD2PS512,
28713 IX86_BUILTIN_CVTPD2UDQ512,
28714 IX86_BUILTIN_CVTPH2PS512,
28715 IX86_BUILTIN_CVTPS2DQ512,
28716 IX86_BUILTIN_CVTPS2PD512,
28717 IX86_BUILTIN_CVTPS2PH512,
28718 IX86_BUILTIN_CVTPS2UDQ512,
28719 IX86_BUILTIN_CVTSD2SS_ROUND,
28720 IX86_BUILTIN_CVTSI2SD64,
28721 IX86_BUILTIN_CVTSI2SS32,
28722 IX86_BUILTIN_CVTSI2SS64,
28723 IX86_BUILTIN_CVTSS2SD_ROUND,
28724 IX86_BUILTIN_CVTTPD2DQ512,
28725 IX86_BUILTIN_CVTTPD2UDQ512,
28726 IX86_BUILTIN_CVTTPS2DQ512,
28727 IX86_BUILTIN_CVTTPS2UDQ512,
28728 IX86_BUILTIN_CVTUDQ2PD512,
28729 IX86_BUILTIN_CVTUDQ2PS512,
28730 IX86_BUILTIN_CVTUSI2SD32,
28731 IX86_BUILTIN_CVTUSI2SD64,
28732 IX86_BUILTIN_CVTUSI2SS32,
28733 IX86_BUILTIN_CVTUSI2SS64,
28734 IX86_BUILTIN_DIVPD512,
28735 IX86_BUILTIN_DIVPS512,
28736 IX86_BUILTIN_DIVSD_ROUND,
28737 IX86_BUILTIN_DIVSS_ROUND,
28738 IX86_BUILTIN_EXPANDPD512,
28739 IX86_BUILTIN_EXPANDPD512Z,
28740 IX86_BUILTIN_EXPANDPDLOAD512,
28741 IX86_BUILTIN_EXPANDPDLOAD512Z,
28742 IX86_BUILTIN_EXPANDPS512,
28743 IX86_BUILTIN_EXPANDPS512Z,
28744 IX86_BUILTIN_EXPANDPSLOAD512,
28745 IX86_BUILTIN_EXPANDPSLOAD512Z,
28746 IX86_BUILTIN_EXTRACTF32X4,
28747 IX86_BUILTIN_EXTRACTF64X4,
28748 IX86_BUILTIN_EXTRACTI32X4,
28749 IX86_BUILTIN_EXTRACTI64X4,
28750 IX86_BUILTIN_FIXUPIMMPD512_MASK,
28751 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
28752 IX86_BUILTIN_FIXUPIMMPS512_MASK,
28753 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
28754 IX86_BUILTIN_FIXUPIMMSD128_MASK,
28755 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
28756 IX86_BUILTIN_FIXUPIMMSS128_MASK,
28757 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
28758 IX86_BUILTIN_GETEXPPD512,
28759 IX86_BUILTIN_GETEXPPS512,
28760 IX86_BUILTIN_GETEXPSD128,
28761 IX86_BUILTIN_GETEXPSS128,
28762 IX86_BUILTIN_GETMANTPD512,
28763 IX86_BUILTIN_GETMANTPS512,
28764 IX86_BUILTIN_GETMANTSD128,
28765 IX86_BUILTIN_GETMANTSS128,
28766 IX86_BUILTIN_INSERTF32X4,
28767 IX86_BUILTIN_INSERTF64X4,
28768 IX86_BUILTIN_INSERTI32X4,
28769 IX86_BUILTIN_INSERTI64X4,
28770 IX86_BUILTIN_LOADAPD512,
28771 IX86_BUILTIN_LOADAPS512,
28772 IX86_BUILTIN_LOADDQUDI512,
28773 IX86_BUILTIN_LOADDQUSI512,
28774 IX86_BUILTIN_LOADUPD512,
28775 IX86_BUILTIN_LOADUPS512,
28776 IX86_BUILTIN_MAXPD512,
28777 IX86_BUILTIN_MAXPS512,
28778 IX86_BUILTIN_MAXSD_ROUND,
28779 IX86_BUILTIN_MAXSS_ROUND,
28780 IX86_BUILTIN_MINPD512,
28781 IX86_BUILTIN_MINPS512,
28782 IX86_BUILTIN_MINSD_ROUND,
28783 IX86_BUILTIN_MINSS_ROUND,
28784 IX86_BUILTIN_MOVAPD512,
28785 IX86_BUILTIN_MOVAPS512,
28786 IX86_BUILTIN_MOVDDUP512,
28787 IX86_BUILTIN_MOVDQA32LOAD512,
28788 IX86_BUILTIN_MOVDQA32STORE512,
28789 IX86_BUILTIN_MOVDQA32_512,
28790 IX86_BUILTIN_MOVDQA64LOAD512,
28791 IX86_BUILTIN_MOVDQA64STORE512,
28792 IX86_BUILTIN_MOVDQA64_512,
28793 IX86_BUILTIN_MOVNTDQ512,
28794 IX86_BUILTIN_MOVNTDQA512,
28795 IX86_BUILTIN_MOVNTPD512,
28796 IX86_BUILTIN_MOVNTPS512,
28797 IX86_BUILTIN_MOVSHDUP512,
28798 IX86_BUILTIN_MOVSLDUP512,
28799 IX86_BUILTIN_MULPD512,
28800 IX86_BUILTIN_MULPS512,
28801 IX86_BUILTIN_MULSD_ROUND,
28802 IX86_BUILTIN_MULSS_ROUND,
28803 IX86_BUILTIN_PABSD512,
28804 IX86_BUILTIN_PABSQ512,
28805 IX86_BUILTIN_PADDD512,
28806 IX86_BUILTIN_PADDQ512,
28807 IX86_BUILTIN_PANDD512,
28808 IX86_BUILTIN_PANDND512,
28809 IX86_BUILTIN_PANDNQ512,
28810 IX86_BUILTIN_PANDQ512,
28811 IX86_BUILTIN_PBROADCASTD512,
28812 IX86_BUILTIN_PBROADCASTD512_GPR,
28813 IX86_BUILTIN_PBROADCASTMB512,
28814 IX86_BUILTIN_PBROADCASTMW512,
28815 IX86_BUILTIN_PBROADCASTQ512,
28816 IX86_BUILTIN_PBROADCASTQ512_GPR,
28817 IX86_BUILTIN_PBROADCASTQ512_MEM,
28818 IX86_BUILTIN_PCMPEQD512_MASK,
28819 IX86_BUILTIN_PCMPEQQ512_MASK,
28820 IX86_BUILTIN_PCMPGTD512_MASK,
28821 IX86_BUILTIN_PCMPGTQ512_MASK,
28822 IX86_BUILTIN_PCOMPRESSD512,
28823 IX86_BUILTIN_PCOMPRESSDSTORE512,
28824 IX86_BUILTIN_PCOMPRESSQ512,
28825 IX86_BUILTIN_PCOMPRESSQSTORE512,
28826 IX86_BUILTIN_PEXPANDD512,
28827 IX86_BUILTIN_PEXPANDD512Z,
28828 IX86_BUILTIN_PEXPANDDLOAD512,
28829 IX86_BUILTIN_PEXPANDDLOAD512Z,
28830 IX86_BUILTIN_PEXPANDQ512,
28831 IX86_BUILTIN_PEXPANDQ512Z,
28832 IX86_BUILTIN_PEXPANDQLOAD512,
28833 IX86_BUILTIN_PEXPANDQLOAD512Z,
28834 IX86_BUILTIN_PMAXSD512,
28835 IX86_BUILTIN_PMAXSQ512,
28836 IX86_BUILTIN_PMAXUD512,
28837 IX86_BUILTIN_PMAXUQ512,
28838 IX86_BUILTIN_PMINSD512,
28839 IX86_BUILTIN_PMINSQ512,
28840 IX86_BUILTIN_PMINUD512,
28841 IX86_BUILTIN_PMINUQ512,
28842 IX86_BUILTIN_PMOVDB512,
28843 IX86_BUILTIN_PMOVDB512_MEM,
28844 IX86_BUILTIN_PMOVDW512,
28845 IX86_BUILTIN_PMOVDW512_MEM,
28846 IX86_BUILTIN_PMOVQB512,
28847 IX86_BUILTIN_PMOVQB512_MEM,
28848 IX86_BUILTIN_PMOVQD512,
28849 IX86_BUILTIN_PMOVQD512_MEM,
28850 IX86_BUILTIN_PMOVQW512,
28851 IX86_BUILTIN_PMOVQW512_MEM,
28852 IX86_BUILTIN_PMOVSDB512,
28853 IX86_BUILTIN_PMOVSDB512_MEM,
28854 IX86_BUILTIN_PMOVSDW512,
28855 IX86_BUILTIN_PMOVSDW512_MEM,
28856 IX86_BUILTIN_PMOVSQB512,
28857 IX86_BUILTIN_PMOVSQB512_MEM,
28858 IX86_BUILTIN_PMOVSQD512,
28859 IX86_BUILTIN_PMOVSQD512_MEM,
28860 IX86_BUILTIN_PMOVSQW512,
28861 IX86_BUILTIN_PMOVSQW512_MEM,
28862 IX86_BUILTIN_PMOVSXBD512,
28863 IX86_BUILTIN_PMOVSXBQ512,
28864 IX86_BUILTIN_PMOVSXDQ512,
28865 IX86_BUILTIN_PMOVSXWD512,
28866 IX86_BUILTIN_PMOVSXWQ512,
28867 IX86_BUILTIN_PMOVUSDB512,
28868 IX86_BUILTIN_PMOVUSDB512_MEM,
28869 IX86_BUILTIN_PMOVUSDW512,
28870 IX86_BUILTIN_PMOVUSDW512_MEM,
28871 IX86_BUILTIN_PMOVUSQB512,
28872 IX86_BUILTIN_PMOVUSQB512_MEM,
28873 IX86_BUILTIN_PMOVUSQD512,
28874 IX86_BUILTIN_PMOVUSQD512_MEM,
28875 IX86_BUILTIN_PMOVUSQW512,
28876 IX86_BUILTIN_PMOVUSQW512_MEM,
28877 IX86_BUILTIN_PMOVZXBD512,
28878 IX86_BUILTIN_PMOVZXBQ512,
28879 IX86_BUILTIN_PMOVZXDQ512,
28880 IX86_BUILTIN_PMOVZXWD512,
28881 IX86_BUILTIN_PMOVZXWQ512,
28882 IX86_BUILTIN_PMULDQ512,
28883 IX86_BUILTIN_PMULLD512,
28884 IX86_BUILTIN_PMULUDQ512,
28885 IX86_BUILTIN_PORD512,
28886 IX86_BUILTIN_PORQ512,
28887 IX86_BUILTIN_PROLD512,
28888 IX86_BUILTIN_PROLQ512,
28889 IX86_BUILTIN_PROLVD512,
28890 IX86_BUILTIN_PROLVQ512,
28891 IX86_BUILTIN_PRORD512,
28892 IX86_BUILTIN_PRORQ512,
28893 IX86_BUILTIN_PRORVD512,
28894 IX86_BUILTIN_PRORVQ512,
28895 IX86_BUILTIN_PSHUFD512,
28896 IX86_BUILTIN_PSLLD512,
28897 IX86_BUILTIN_PSLLDI512,
28898 IX86_BUILTIN_PSLLQ512,
28899 IX86_BUILTIN_PSLLQI512,
28900 IX86_BUILTIN_PSLLVV16SI,
28901 IX86_BUILTIN_PSLLVV8DI,
28902 IX86_BUILTIN_PSRAD512,
28903 IX86_BUILTIN_PSRADI512,
28904 IX86_BUILTIN_PSRAQ512,
28905 IX86_BUILTIN_PSRAQI512,
28906 IX86_BUILTIN_PSRAVV16SI,
28907 IX86_BUILTIN_PSRAVV8DI,
28908 IX86_BUILTIN_PSRLD512,
28909 IX86_BUILTIN_PSRLDI512,
28910 IX86_BUILTIN_PSRLQ512,
28911 IX86_BUILTIN_PSRLQI512,
28912 IX86_BUILTIN_PSRLVV16SI,
28913 IX86_BUILTIN_PSRLVV8DI,
28914 IX86_BUILTIN_PSUBD512,
28915 IX86_BUILTIN_PSUBQ512,
28916 IX86_BUILTIN_PTESTMD512,
28917 IX86_BUILTIN_PTESTMQ512,
28918 IX86_BUILTIN_PTESTNMD512,
28919 IX86_BUILTIN_PTESTNMQ512,
28920 IX86_BUILTIN_PUNPCKHDQ512,
28921 IX86_BUILTIN_PUNPCKHQDQ512,
28922 IX86_BUILTIN_PUNPCKLDQ512,
28923 IX86_BUILTIN_PUNPCKLQDQ512,
28924 IX86_BUILTIN_PXORD512,
28925 IX86_BUILTIN_PXORQ512,
28926 IX86_BUILTIN_RCP14PD512,
28927 IX86_BUILTIN_RCP14PS512,
28928 IX86_BUILTIN_RCP14SD,
28929 IX86_BUILTIN_RCP14SS,
28930 IX86_BUILTIN_RNDSCALEPD,
28931 IX86_BUILTIN_RNDSCALEPS,
28932 IX86_BUILTIN_RNDSCALESD,
28933 IX86_BUILTIN_RNDSCALESS,
28934 IX86_BUILTIN_RSQRT14PD512,
28935 IX86_BUILTIN_RSQRT14PS512,
28936 IX86_BUILTIN_RSQRT14SD,
28937 IX86_BUILTIN_RSQRT14SS,
28938 IX86_BUILTIN_SCALEFPD512,
28939 IX86_BUILTIN_SCALEFPS512,
28940 IX86_BUILTIN_SCALEFSD,
28941 IX86_BUILTIN_SCALEFSS,
28942 IX86_BUILTIN_SHUFPD512,
28943 IX86_BUILTIN_SHUFPS512,
28944 IX86_BUILTIN_SHUF_F32x4,
28945 IX86_BUILTIN_SHUF_F64x2,
28946 IX86_BUILTIN_SHUF_I32x4,
28947 IX86_BUILTIN_SHUF_I64x2,
28948 IX86_BUILTIN_SQRTPD512,
28949 IX86_BUILTIN_SQRTPD512_MASK,
28950 IX86_BUILTIN_SQRTPS512_MASK,
28951 IX86_BUILTIN_SQRTPS_NR512,
28952 IX86_BUILTIN_SQRTSD_ROUND,
28953 IX86_BUILTIN_SQRTSS_ROUND,
28954 IX86_BUILTIN_STOREAPD512,
28955 IX86_BUILTIN_STOREAPS512,
28956 IX86_BUILTIN_STOREDQUDI512,
28957 IX86_BUILTIN_STOREDQUSI512,
28958 IX86_BUILTIN_STOREUPD512,
28959 IX86_BUILTIN_STOREUPS512,
28960 IX86_BUILTIN_SUBPD512,
28961 IX86_BUILTIN_SUBPS512,
28962 IX86_BUILTIN_SUBSD_ROUND,
28963 IX86_BUILTIN_SUBSS_ROUND,
28964 IX86_BUILTIN_UCMPD512,
28965 IX86_BUILTIN_UCMPQ512,
28966 IX86_BUILTIN_UNPCKHPD512,
28967 IX86_BUILTIN_UNPCKHPS512,
28968 IX86_BUILTIN_UNPCKLPD512,
28969 IX86_BUILTIN_UNPCKLPS512,
28970 IX86_BUILTIN_VCVTSD2SI32,
28971 IX86_BUILTIN_VCVTSD2SI64,
28972 IX86_BUILTIN_VCVTSD2USI32,
28973 IX86_BUILTIN_VCVTSD2USI64,
28974 IX86_BUILTIN_VCVTSS2SI32,
28975 IX86_BUILTIN_VCVTSS2SI64,
28976 IX86_BUILTIN_VCVTSS2USI32,
28977 IX86_BUILTIN_VCVTSS2USI64,
28978 IX86_BUILTIN_VCVTTSD2SI32,
28979 IX86_BUILTIN_VCVTTSD2SI64,
28980 IX86_BUILTIN_VCVTTSD2USI32,
28981 IX86_BUILTIN_VCVTTSD2USI64,
28982 IX86_BUILTIN_VCVTTSS2SI32,
28983 IX86_BUILTIN_VCVTTSS2SI64,
28984 IX86_BUILTIN_VCVTTSS2USI32,
28985 IX86_BUILTIN_VCVTTSS2USI64,
28986 IX86_BUILTIN_VFMADDPD512_MASK,
28987 IX86_BUILTIN_VFMADDPD512_MASK3,
28988 IX86_BUILTIN_VFMADDPD512_MASKZ,
28989 IX86_BUILTIN_VFMADDPS512_MASK,
28990 IX86_BUILTIN_VFMADDPS512_MASK3,
28991 IX86_BUILTIN_VFMADDPS512_MASKZ,
28992 IX86_BUILTIN_VFMADDSD3_ROUND,
28993 IX86_BUILTIN_VFMADDSS3_ROUND,
28994 IX86_BUILTIN_VFMADDSUBPD512_MASK,
28995 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
28996 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
28997 IX86_BUILTIN_VFMADDSUBPS512_MASK,
28998 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
28999 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
29000 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
29001 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
29002 IX86_BUILTIN_VFMSUBPD512_MASK3,
29003 IX86_BUILTIN_VFMSUBPS512_MASK3,
29004 IX86_BUILTIN_VFMSUBSD3_MASK3,
29005 IX86_BUILTIN_VFMSUBSS3_MASK3,
29006 IX86_BUILTIN_VFNMADDPD512_MASK,
29007 IX86_BUILTIN_VFNMADDPS512_MASK,
29008 IX86_BUILTIN_VFNMSUBPD512_MASK,
29009 IX86_BUILTIN_VFNMSUBPD512_MASK3,
29010 IX86_BUILTIN_VFNMSUBPS512_MASK,
29011 IX86_BUILTIN_VFNMSUBPS512_MASK3,
29012 IX86_BUILTIN_VPCLZCNTD512,
29013 IX86_BUILTIN_VPCLZCNTQ512,
29014 IX86_BUILTIN_VPCONFLICTD512,
29015 IX86_BUILTIN_VPCONFLICTQ512,
29016 IX86_BUILTIN_VPERMDF512,
29017 IX86_BUILTIN_VPERMDI512,
29018 IX86_BUILTIN_VPERMI2VARD512,
29019 IX86_BUILTIN_VPERMI2VARPD512,
29020 IX86_BUILTIN_VPERMI2VARPS512,
29021 IX86_BUILTIN_VPERMI2VARQ512,
29022 IX86_BUILTIN_VPERMILPD512,
29023 IX86_BUILTIN_VPERMILPS512,
29024 IX86_BUILTIN_VPERMILVARPD512,
29025 IX86_BUILTIN_VPERMILVARPS512,
29026 IX86_BUILTIN_VPERMT2VARD512,
29027 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
29028 IX86_BUILTIN_VPERMT2VARPD512,
29029 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
29030 IX86_BUILTIN_VPERMT2VARPS512,
29031 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
29032 IX86_BUILTIN_VPERMT2VARQ512,
29033 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
29034 IX86_BUILTIN_VPERMVARDF512,
29035 IX86_BUILTIN_VPERMVARDI512,
29036 IX86_BUILTIN_VPERMVARSF512,
29037 IX86_BUILTIN_VPERMVARSI512,
29038 IX86_BUILTIN_VTERNLOGD512_MASK,
29039 IX86_BUILTIN_VTERNLOGD512_MASKZ,
29040 IX86_BUILTIN_VTERNLOGQ512_MASK,
29041 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
29043 /* Mask arithmetic operations */
29044 IX86_BUILTIN_KAND16,
29045 IX86_BUILTIN_KANDN16,
29046 IX86_BUILTIN_KNOT16,
29047 IX86_BUILTIN_KOR16,
29048 IX86_BUILTIN_KORTESTC16,
29049 IX86_BUILTIN_KORTESTZ16,
29050 IX86_BUILTIN_KUNPCKBW,
29051 IX86_BUILTIN_KXNOR16,
29052 IX86_BUILTIN_KXOR16,
29053 IX86_BUILTIN_KMOV16,
29055 /* AVX512VL. */
29056 IX86_BUILTIN_PMOVUSQD256_MEM,
29057 IX86_BUILTIN_PMOVUSQD128_MEM,
29058 IX86_BUILTIN_PMOVSQD256_MEM,
29059 IX86_BUILTIN_PMOVSQD128_MEM,
29060 IX86_BUILTIN_PMOVQD256_MEM,
29061 IX86_BUILTIN_PMOVQD128_MEM,
29062 IX86_BUILTIN_PMOVUSQW256_MEM,
29063 IX86_BUILTIN_PMOVUSQW128_MEM,
29064 IX86_BUILTIN_PMOVSQW256_MEM,
29065 IX86_BUILTIN_PMOVSQW128_MEM,
29066 IX86_BUILTIN_PMOVQW256_MEM,
29067 IX86_BUILTIN_PMOVQW128_MEM,
29068 IX86_BUILTIN_PMOVUSQB256_MEM,
29069 IX86_BUILTIN_PMOVUSQB128_MEM,
29070 IX86_BUILTIN_PMOVSQB256_MEM,
29071 IX86_BUILTIN_PMOVSQB128_MEM,
29072 IX86_BUILTIN_PMOVQB256_MEM,
29073 IX86_BUILTIN_PMOVQB128_MEM,
29074 IX86_BUILTIN_PMOVUSDW256_MEM,
29075 IX86_BUILTIN_PMOVUSDW128_MEM,
29076 IX86_BUILTIN_PMOVSDW256_MEM,
29077 IX86_BUILTIN_PMOVSDW128_MEM,
29078 IX86_BUILTIN_PMOVDW256_MEM,
29079 IX86_BUILTIN_PMOVDW128_MEM,
29080 IX86_BUILTIN_PMOVUSDB256_MEM,
29081 IX86_BUILTIN_PMOVUSDB128_MEM,
29082 IX86_BUILTIN_PMOVSDB256_MEM,
29083 IX86_BUILTIN_PMOVSDB128_MEM,
29084 IX86_BUILTIN_PMOVDB256_MEM,
29085 IX86_BUILTIN_PMOVDB128_MEM,
29086 IX86_BUILTIN_MOVDQA64LOAD256_MASK,
29087 IX86_BUILTIN_MOVDQA64LOAD128_MASK,
29088 IX86_BUILTIN_MOVDQA32LOAD256_MASK,
29089 IX86_BUILTIN_MOVDQA32LOAD128_MASK,
29090 IX86_BUILTIN_MOVDQA64STORE256_MASK,
29091 IX86_BUILTIN_MOVDQA64STORE128_MASK,
29092 IX86_BUILTIN_MOVDQA32STORE256_MASK,
29093 IX86_BUILTIN_MOVDQA32STORE128_MASK,
29094 IX86_BUILTIN_LOADAPD256_MASK,
29095 IX86_BUILTIN_LOADAPD128_MASK,
29096 IX86_BUILTIN_LOADAPS256_MASK,
29097 IX86_BUILTIN_LOADAPS128_MASK,
29098 IX86_BUILTIN_STOREAPD256_MASK,
29099 IX86_BUILTIN_STOREAPD128_MASK,
29100 IX86_BUILTIN_STOREAPS256_MASK,
29101 IX86_BUILTIN_STOREAPS128_MASK,
29102 IX86_BUILTIN_LOADUPD256_MASK,
29103 IX86_BUILTIN_LOADUPD128_MASK,
29104 IX86_BUILTIN_LOADUPS256_MASK,
29105 IX86_BUILTIN_LOADUPS128_MASK,
29106 IX86_BUILTIN_STOREUPD256_MASK,
29107 IX86_BUILTIN_STOREUPD128_MASK,
29108 IX86_BUILTIN_STOREUPS256_MASK,
29109 IX86_BUILTIN_STOREUPS128_MASK,
29110 IX86_BUILTIN_LOADDQUDI256_MASK,
29111 IX86_BUILTIN_LOADDQUDI128_MASK,
29112 IX86_BUILTIN_LOADDQUSI256_MASK,
29113 IX86_BUILTIN_LOADDQUSI128_MASK,
29114 IX86_BUILTIN_LOADDQUHI256_MASK,
29115 IX86_BUILTIN_LOADDQUHI128_MASK,
29116 IX86_BUILTIN_LOADDQUQI256_MASK,
29117 IX86_BUILTIN_LOADDQUQI128_MASK,
29118 IX86_BUILTIN_STOREDQUDI256_MASK,
29119 IX86_BUILTIN_STOREDQUDI128_MASK,
29120 IX86_BUILTIN_STOREDQUSI256_MASK,
29121 IX86_BUILTIN_STOREDQUSI128_MASK,
29122 IX86_BUILTIN_STOREDQUHI256_MASK,
29123 IX86_BUILTIN_STOREDQUHI128_MASK,
29124 IX86_BUILTIN_STOREDQUQI256_MASK,
29125 IX86_BUILTIN_STOREDQUQI128_MASK,
29126 IX86_BUILTIN_COMPRESSPDSTORE256,
29127 IX86_BUILTIN_COMPRESSPDSTORE128,
29128 IX86_BUILTIN_COMPRESSPSSTORE256,
29129 IX86_BUILTIN_COMPRESSPSSTORE128,
29130 IX86_BUILTIN_PCOMPRESSQSTORE256,
29131 IX86_BUILTIN_PCOMPRESSQSTORE128,
29132 IX86_BUILTIN_PCOMPRESSDSTORE256,
29133 IX86_BUILTIN_PCOMPRESSDSTORE128,
29134 IX86_BUILTIN_EXPANDPDLOAD256,
29135 IX86_BUILTIN_EXPANDPDLOAD128,
29136 IX86_BUILTIN_EXPANDPSLOAD256,
29137 IX86_BUILTIN_EXPANDPSLOAD128,
29138 IX86_BUILTIN_PEXPANDQLOAD256,
29139 IX86_BUILTIN_PEXPANDQLOAD128,
29140 IX86_BUILTIN_PEXPANDDLOAD256,
29141 IX86_BUILTIN_PEXPANDDLOAD128,
29142 IX86_BUILTIN_EXPANDPDLOAD256Z,
29143 IX86_BUILTIN_EXPANDPDLOAD128Z,
29144 IX86_BUILTIN_EXPANDPSLOAD256Z,
29145 IX86_BUILTIN_EXPANDPSLOAD128Z,
29146 IX86_BUILTIN_PEXPANDQLOAD256Z,
29147 IX86_BUILTIN_PEXPANDQLOAD128Z,
29148 IX86_BUILTIN_PEXPANDDLOAD256Z,
29149 IX86_BUILTIN_PEXPANDDLOAD128Z,
29150 IX86_BUILTIN_PALIGNR256_MASK,
29151 IX86_BUILTIN_PALIGNR128_MASK,
29152 IX86_BUILTIN_MOVDQA64_256_MASK,
29153 IX86_BUILTIN_MOVDQA64_128_MASK,
29154 IX86_BUILTIN_MOVDQA32_256_MASK,
29155 IX86_BUILTIN_MOVDQA32_128_MASK,
29156 IX86_BUILTIN_MOVAPD256_MASK,
29157 IX86_BUILTIN_MOVAPD128_MASK,
29158 IX86_BUILTIN_MOVAPS256_MASK,
29159 IX86_BUILTIN_MOVAPS128_MASK,
29160 IX86_BUILTIN_MOVDQUHI256_MASK,
29161 IX86_BUILTIN_MOVDQUHI128_MASK,
29162 IX86_BUILTIN_MOVDQUQI256_MASK,
29163 IX86_BUILTIN_MOVDQUQI128_MASK,
29164 IX86_BUILTIN_MINPS128_MASK,
29165 IX86_BUILTIN_MAXPS128_MASK,
29166 IX86_BUILTIN_MINPD128_MASK,
29167 IX86_BUILTIN_MAXPD128_MASK,
29168 IX86_BUILTIN_MAXPD256_MASK,
29169 IX86_BUILTIN_MAXPS256_MASK,
29170 IX86_BUILTIN_MINPD256_MASK,
29171 IX86_BUILTIN_MINPS256_MASK,
29172 IX86_BUILTIN_MULPS128_MASK,
29173 IX86_BUILTIN_DIVPS128_MASK,
29174 IX86_BUILTIN_MULPD128_MASK,
29175 IX86_BUILTIN_DIVPD128_MASK,
29176 IX86_BUILTIN_DIVPD256_MASK,
29177 IX86_BUILTIN_DIVPS256_MASK,
29178 IX86_BUILTIN_MULPD256_MASK,
29179 IX86_BUILTIN_MULPS256_MASK,
29180 IX86_BUILTIN_ADDPD128_MASK,
29181 IX86_BUILTIN_ADDPD256_MASK,
29182 IX86_BUILTIN_ADDPS128_MASK,
29183 IX86_BUILTIN_ADDPS256_MASK,
29184 IX86_BUILTIN_SUBPD128_MASK,
29185 IX86_BUILTIN_SUBPD256_MASK,
29186 IX86_BUILTIN_SUBPS128_MASK,
29187 IX86_BUILTIN_SUBPS256_MASK,
29188 IX86_BUILTIN_XORPD256_MASK,
29189 IX86_BUILTIN_XORPD128_MASK,
29190 IX86_BUILTIN_XORPS256_MASK,
29191 IX86_BUILTIN_XORPS128_MASK,
29192 IX86_BUILTIN_ORPD256_MASK,
29193 IX86_BUILTIN_ORPD128_MASK,
29194 IX86_BUILTIN_ORPS256_MASK,
29195 IX86_BUILTIN_ORPS128_MASK,
29196 IX86_BUILTIN_BROADCASTF32x2_256,
29197 IX86_BUILTIN_BROADCASTI32x2_256,
29198 IX86_BUILTIN_BROADCASTI32x2_128,
29199 IX86_BUILTIN_BROADCASTF64X2_256,
29200 IX86_BUILTIN_BROADCASTI64X2_256,
29201 IX86_BUILTIN_BROADCASTF32X4_256,
29202 IX86_BUILTIN_BROADCASTI32X4_256,
29203 IX86_BUILTIN_EXTRACTF32X4_256,
29204 IX86_BUILTIN_EXTRACTI32X4_256,
29205 IX86_BUILTIN_DBPSADBW256,
29206 IX86_BUILTIN_DBPSADBW128,
29207 IX86_BUILTIN_CVTTPD2QQ256,
29208 IX86_BUILTIN_CVTTPD2QQ128,
29209 IX86_BUILTIN_CVTTPD2UQQ256,
29210 IX86_BUILTIN_CVTTPD2UQQ128,
29211 IX86_BUILTIN_CVTPD2QQ256,
29212 IX86_BUILTIN_CVTPD2QQ128,
29213 IX86_BUILTIN_CVTPD2UQQ256,
29214 IX86_BUILTIN_CVTPD2UQQ128,
29215 IX86_BUILTIN_CVTPD2UDQ256_MASK,
29216 IX86_BUILTIN_CVTPD2UDQ128_MASK,
29217 IX86_BUILTIN_CVTTPS2QQ256,
29218 IX86_BUILTIN_CVTTPS2QQ128,
29219 IX86_BUILTIN_CVTTPS2UQQ256,
29220 IX86_BUILTIN_CVTTPS2UQQ128,
29221 IX86_BUILTIN_CVTTPS2DQ256_MASK,
29222 IX86_BUILTIN_CVTTPS2DQ128_MASK,
29223 IX86_BUILTIN_CVTTPS2UDQ256,
29224 IX86_BUILTIN_CVTTPS2UDQ128,
29225 IX86_BUILTIN_CVTTPD2DQ256_MASK,
29226 IX86_BUILTIN_CVTTPD2DQ128_MASK,
29227 IX86_BUILTIN_CVTTPD2UDQ256_MASK,
29228 IX86_BUILTIN_CVTTPD2UDQ128_MASK,
29229 IX86_BUILTIN_CVTPD2DQ256_MASK,
29230 IX86_BUILTIN_CVTPD2DQ128_MASK,
29231 IX86_BUILTIN_CVTDQ2PD256_MASK,
29232 IX86_BUILTIN_CVTDQ2PD128_MASK,
29233 IX86_BUILTIN_CVTUDQ2PD256_MASK,
29234 IX86_BUILTIN_CVTUDQ2PD128_MASK,
29235 IX86_BUILTIN_CVTDQ2PS256_MASK,
29236 IX86_BUILTIN_CVTDQ2PS128_MASK,
29237 IX86_BUILTIN_CVTUDQ2PS256_MASK,
29238 IX86_BUILTIN_CVTUDQ2PS128_MASK,
29239 IX86_BUILTIN_CVTPS2PD256_MASK,
29240 IX86_BUILTIN_CVTPS2PD128_MASK,
29241 IX86_BUILTIN_PBROADCASTB256_MASK,
29242 IX86_BUILTIN_PBROADCASTB256_GPR_MASK,
29243 IX86_BUILTIN_PBROADCASTB128_MASK,
29244 IX86_BUILTIN_PBROADCASTB128_GPR_MASK,
29245 IX86_BUILTIN_PBROADCASTW256_MASK,
29246 IX86_BUILTIN_PBROADCASTW256_GPR_MASK,
29247 IX86_BUILTIN_PBROADCASTW128_MASK,
29248 IX86_BUILTIN_PBROADCASTW128_GPR_MASK,
29249 IX86_BUILTIN_PBROADCASTD256_MASK,
29250 IX86_BUILTIN_PBROADCASTD256_GPR_MASK,
29251 IX86_BUILTIN_PBROADCASTD128_MASK,
29252 IX86_BUILTIN_PBROADCASTD128_GPR_MASK,
29253 IX86_BUILTIN_PBROADCASTQ256_MASK,
29254 IX86_BUILTIN_PBROADCASTQ256_GPR_MASK,
29255 IX86_BUILTIN_PBROADCASTQ256_MEM_MASK,
29256 IX86_BUILTIN_PBROADCASTQ128_MASK,
29257 IX86_BUILTIN_PBROADCASTQ128_GPR_MASK,
29258 IX86_BUILTIN_PBROADCASTQ128_MEM_MASK,
29259 IX86_BUILTIN_BROADCASTSS256,
29260 IX86_BUILTIN_BROADCASTSS128,
29261 IX86_BUILTIN_BROADCASTSD256,
29262 IX86_BUILTIN_EXTRACTF64X2_256,
29263 IX86_BUILTIN_EXTRACTI64X2_256,
29264 IX86_BUILTIN_INSERTF32X4_256,
29265 IX86_BUILTIN_INSERTI32X4_256,
29266 IX86_BUILTIN_PMOVSXBW256_MASK,
29267 IX86_BUILTIN_PMOVSXBW128_MASK,
29268 IX86_BUILTIN_PMOVSXBD256_MASK,
29269 IX86_BUILTIN_PMOVSXBD128_MASK,
29270 IX86_BUILTIN_PMOVSXBQ256_MASK,
29271 IX86_BUILTIN_PMOVSXBQ128_MASK,
29272 IX86_BUILTIN_PMOVSXWD256_MASK,
29273 IX86_BUILTIN_PMOVSXWD128_MASK,
29274 IX86_BUILTIN_PMOVSXWQ256_MASK,
29275 IX86_BUILTIN_PMOVSXWQ128_MASK,
29276 IX86_BUILTIN_PMOVSXDQ256_MASK,
29277 IX86_BUILTIN_PMOVSXDQ128_MASK,
29278 IX86_BUILTIN_PMOVZXBW256_MASK,
29279 IX86_BUILTIN_PMOVZXBW128_MASK,
29280 IX86_BUILTIN_PMOVZXBD256_MASK,
29281 IX86_BUILTIN_PMOVZXBD128_MASK,
29282 IX86_BUILTIN_PMOVZXBQ256_MASK,
29283 IX86_BUILTIN_PMOVZXBQ128_MASK,
29284 IX86_BUILTIN_PMOVZXWD256_MASK,
29285 IX86_BUILTIN_PMOVZXWD128_MASK,
29286 IX86_BUILTIN_PMOVZXWQ256_MASK,
29287 IX86_BUILTIN_PMOVZXWQ128_MASK,
29288 IX86_BUILTIN_PMOVZXDQ256_MASK,
29289 IX86_BUILTIN_PMOVZXDQ128_MASK,
29290 IX86_BUILTIN_REDUCEPD256_MASK,
29291 IX86_BUILTIN_REDUCEPD128_MASK,
29292 IX86_BUILTIN_REDUCEPS256_MASK,
29293 IX86_BUILTIN_REDUCEPS128_MASK,
29294 IX86_BUILTIN_REDUCESD_MASK,
29295 IX86_BUILTIN_REDUCESS_MASK,
29296 IX86_BUILTIN_VPERMVARHI256_MASK,
29297 IX86_BUILTIN_VPERMVARHI128_MASK,
29298 IX86_BUILTIN_VPERMT2VARHI256,
29299 IX86_BUILTIN_VPERMT2VARHI256_MASKZ,
29300 IX86_BUILTIN_VPERMT2VARHI128,
29301 IX86_BUILTIN_VPERMT2VARHI128_MASKZ,
29302 IX86_BUILTIN_VPERMI2VARHI256,
29303 IX86_BUILTIN_VPERMI2VARHI128,
29304 IX86_BUILTIN_RCP14PD256,
29305 IX86_BUILTIN_RCP14PD128,
29306 IX86_BUILTIN_RCP14PS256,
29307 IX86_BUILTIN_RCP14PS128,
29308 IX86_BUILTIN_RSQRT14PD256_MASK,
29309 IX86_BUILTIN_RSQRT14PD128_MASK,
29310 IX86_BUILTIN_RSQRT14PS256_MASK,
29311 IX86_BUILTIN_RSQRT14PS128_MASK,
29312 IX86_BUILTIN_SQRTPD256_MASK,
29313 IX86_BUILTIN_SQRTPD128_MASK,
29314 IX86_BUILTIN_SQRTPS256_MASK,
29315 IX86_BUILTIN_SQRTPS128_MASK,
29316 IX86_BUILTIN_PADDB128_MASK,
29317 IX86_BUILTIN_PADDW128_MASK,
29318 IX86_BUILTIN_PADDD128_MASK,
29319 IX86_BUILTIN_PADDQ128_MASK,
29320 IX86_BUILTIN_PSUBB128_MASK,
29321 IX86_BUILTIN_PSUBW128_MASK,
29322 IX86_BUILTIN_PSUBD128_MASK,
29323 IX86_BUILTIN_PSUBQ128_MASK,
29324 IX86_BUILTIN_PADDSB128_MASK,
29325 IX86_BUILTIN_PADDSW128_MASK,
29326 IX86_BUILTIN_PSUBSB128_MASK,
29327 IX86_BUILTIN_PSUBSW128_MASK,
29328 IX86_BUILTIN_PADDUSB128_MASK,
29329 IX86_BUILTIN_PADDUSW128_MASK,
29330 IX86_BUILTIN_PSUBUSB128_MASK,
29331 IX86_BUILTIN_PSUBUSW128_MASK,
29332 IX86_BUILTIN_PADDB256_MASK,
29333 IX86_BUILTIN_PADDW256_MASK,
29334 IX86_BUILTIN_PADDD256_MASK,
29335 IX86_BUILTIN_PADDQ256_MASK,
29336 IX86_BUILTIN_PADDSB256_MASK,
29337 IX86_BUILTIN_PADDSW256_MASK,
29338 IX86_BUILTIN_PADDUSB256_MASK,
29339 IX86_BUILTIN_PADDUSW256_MASK,
29340 IX86_BUILTIN_PSUBB256_MASK,
29341 IX86_BUILTIN_PSUBW256_MASK,
29342 IX86_BUILTIN_PSUBD256_MASK,
29343 IX86_BUILTIN_PSUBQ256_MASK,
29344 IX86_BUILTIN_PSUBSB256_MASK,
29345 IX86_BUILTIN_PSUBSW256_MASK,
29346 IX86_BUILTIN_PSUBUSB256_MASK,
29347 IX86_BUILTIN_PSUBUSW256_MASK,
29348 IX86_BUILTIN_SHUF_F64x2_256,
29349 IX86_BUILTIN_SHUF_I64x2_256,
29350 IX86_BUILTIN_SHUF_I32x4_256,
29351 IX86_BUILTIN_SHUF_F32x4_256,
29352 IX86_BUILTIN_PMOVWB128,
29353 IX86_BUILTIN_PMOVWB256,
29354 IX86_BUILTIN_PMOVSWB128,
29355 IX86_BUILTIN_PMOVSWB256,
29356 IX86_BUILTIN_PMOVUSWB128,
29357 IX86_BUILTIN_PMOVUSWB256,
29358 IX86_BUILTIN_PMOVDB128,
29359 IX86_BUILTIN_PMOVDB256,
29360 IX86_BUILTIN_PMOVSDB128,
29361 IX86_BUILTIN_PMOVSDB256,
29362 IX86_BUILTIN_PMOVUSDB128,
29363 IX86_BUILTIN_PMOVUSDB256,
29364 IX86_BUILTIN_PMOVDW128,
29365 IX86_BUILTIN_PMOVDW256,
29366 IX86_BUILTIN_PMOVSDW128,
29367 IX86_BUILTIN_PMOVSDW256,
29368 IX86_BUILTIN_PMOVUSDW128,
29369 IX86_BUILTIN_PMOVUSDW256,
29370 IX86_BUILTIN_PMOVQB128,
29371 IX86_BUILTIN_PMOVQB256,
29372 IX86_BUILTIN_PMOVSQB128,
29373 IX86_BUILTIN_PMOVSQB256,
29374 IX86_BUILTIN_PMOVUSQB128,
29375 IX86_BUILTIN_PMOVUSQB256,
29376 IX86_BUILTIN_PMOVQW128,
29377 IX86_BUILTIN_PMOVQW256,
29378 IX86_BUILTIN_PMOVSQW128,
29379 IX86_BUILTIN_PMOVSQW256,
29380 IX86_BUILTIN_PMOVUSQW128,
29381 IX86_BUILTIN_PMOVUSQW256,
29382 IX86_BUILTIN_PMOVQD128,
29383 IX86_BUILTIN_PMOVQD256,
29384 IX86_BUILTIN_PMOVSQD128,
29385 IX86_BUILTIN_PMOVSQD256,
29386 IX86_BUILTIN_PMOVUSQD128,
29387 IX86_BUILTIN_PMOVUSQD256,
29388 IX86_BUILTIN_RANGEPD256,
29389 IX86_BUILTIN_RANGEPD128,
29390 IX86_BUILTIN_RANGEPS256,
29391 IX86_BUILTIN_RANGEPS128,
29392 IX86_BUILTIN_GETEXPPS256,
29393 IX86_BUILTIN_GETEXPPD256,
29394 IX86_BUILTIN_GETEXPPS128,
29395 IX86_BUILTIN_GETEXPPD128,
29396 IX86_BUILTIN_FIXUPIMMPD256_MASK,
29397 IX86_BUILTIN_FIXUPIMMPD256_MASKZ,
29398 IX86_BUILTIN_FIXUPIMMPS256_MASK,
29399 IX86_BUILTIN_FIXUPIMMPS256_MASKZ,
29400 IX86_BUILTIN_FIXUPIMMPD128_MASK,
29401 IX86_BUILTIN_FIXUPIMMPD128_MASKZ,
29402 IX86_BUILTIN_FIXUPIMMPS128_MASK,
29403 IX86_BUILTIN_FIXUPIMMPS128_MASKZ,
29404 IX86_BUILTIN_PABSQ256,
29405 IX86_BUILTIN_PABSQ128,
29406 IX86_BUILTIN_PABSD256_MASK,
29407 IX86_BUILTIN_PABSD128_MASK,
29408 IX86_BUILTIN_PMULHRSW256_MASK,
29409 IX86_BUILTIN_PMULHRSW128_MASK,
29410 IX86_BUILTIN_PMULHUW128_MASK,
29411 IX86_BUILTIN_PMULHUW256_MASK,
29412 IX86_BUILTIN_PMULHW256_MASK,
29413 IX86_BUILTIN_PMULHW128_MASK,
29414 IX86_BUILTIN_PMULLW256_MASK,
29415 IX86_BUILTIN_PMULLW128_MASK,
29416 IX86_BUILTIN_PMULLQ256,
29417 IX86_BUILTIN_PMULLQ128,
29418 IX86_BUILTIN_ANDPD256_MASK,
29419 IX86_BUILTIN_ANDPD128_MASK,
29420 IX86_BUILTIN_ANDPS256_MASK,
29421 IX86_BUILTIN_ANDPS128_MASK,
29422 IX86_BUILTIN_ANDNPD256_MASK,
29423 IX86_BUILTIN_ANDNPD128_MASK,
29424 IX86_BUILTIN_ANDNPS256_MASK,
29425 IX86_BUILTIN_ANDNPS128_MASK,
29426 IX86_BUILTIN_PSLLWI128_MASK,
29427 IX86_BUILTIN_PSLLDI128_MASK,
29428 IX86_BUILTIN_PSLLQI128_MASK,
29429 IX86_BUILTIN_PSLLW128_MASK,
29430 IX86_BUILTIN_PSLLD128_MASK,
29431 IX86_BUILTIN_PSLLQ128_MASK,
29432 IX86_BUILTIN_PSLLWI256_MASK ,
29433 IX86_BUILTIN_PSLLW256_MASK,
29434 IX86_BUILTIN_PSLLDI256_MASK,
29435 IX86_BUILTIN_PSLLD256_MASK,
29436 IX86_BUILTIN_PSLLQI256_MASK,
29437 IX86_BUILTIN_PSLLQ256_MASK,
29438 IX86_BUILTIN_PSRADI128_MASK,
29439 IX86_BUILTIN_PSRAD128_MASK,
29440 IX86_BUILTIN_PSRADI256_MASK,
29441 IX86_BUILTIN_PSRAD256_MASK,
29442 IX86_BUILTIN_PSRAQI128_MASK,
29443 IX86_BUILTIN_PSRAQ128_MASK,
29444 IX86_BUILTIN_PSRAQI256_MASK,
29445 IX86_BUILTIN_PSRAQ256_MASK,
29446 IX86_BUILTIN_PANDD256,
29447 IX86_BUILTIN_PANDD128,
29448 IX86_BUILTIN_PSRLDI128_MASK,
29449 IX86_BUILTIN_PSRLD128_MASK,
29450 IX86_BUILTIN_PSRLDI256_MASK,
29451 IX86_BUILTIN_PSRLD256_MASK,
29452 IX86_BUILTIN_PSRLQI128_MASK,
29453 IX86_BUILTIN_PSRLQ128_MASK,
29454 IX86_BUILTIN_PSRLQI256_MASK,
29455 IX86_BUILTIN_PSRLQ256_MASK,
29456 IX86_BUILTIN_PANDQ256,
29457 IX86_BUILTIN_PANDQ128,
29458 IX86_BUILTIN_PANDND256,
29459 IX86_BUILTIN_PANDND128,
29460 IX86_BUILTIN_PANDNQ256,
29461 IX86_BUILTIN_PANDNQ128,
29462 IX86_BUILTIN_PORD256,
29463 IX86_BUILTIN_PORD128,
29464 IX86_BUILTIN_PORQ256,
29465 IX86_BUILTIN_PORQ128,
29466 IX86_BUILTIN_PXORD256,
29467 IX86_BUILTIN_PXORD128,
29468 IX86_BUILTIN_PXORQ256,
29469 IX86_BUILTIN_PXORQ128,
29470 IX86_BUILTIN_PACKSSWB256_MASK,
29471 IX86_BUILTIN_PACKSSWB128_MASK,
29472 IX86_BUILTIN_PACKUSWB256_MASK,
29473 IX86_BUILTIN_PACKUSWB128_MASK,
29474 IX86_BUILTIN_RNDSCALEPS256,
29475 IX86_BUILTIN_RNDSCALEPD256,
29476 IX86_BUILTIN_RNDSCALEPS128,
29477 IX86_BUILTIN_RNDSCALEPD128,
29478 IX86_BUILTIN_VTERNLOGQ256_MASK,
29479 IX86_BUILTIN_VTERNLOGQ256_MASKZ,
29480 IX86_BUILTIN_VTERNLOGD256_MASK,
29481 IX86_BUILTIN_VTERNLOGD256_MASKZ,
29482 IX86_BUILTIN_VTERNLOGQ128_MASK,
29483 IX86_BUILTIN_VTERNLOGQ128_MASKZ,
29484 IX86_BUILTIN_VTERNLOGD128_MASK,
29485 IX86_BUILTIN_VTERNLOGD128_MASKZ,
29486 IX86_BUILTIN_SCALEFPD256,
29487 IX86_BUILTIN_SCALEFPS256,
29488 IX86_BUILTIN_SCALEFPD128,
29489 IX86_BUILTIN_SCALEFPS128,
29490 IX86_BUILTIN_VFMADDPD256_MASK,
29491 IX86_BUILTIN_VFMADDPD256_MASK3,
29492 IX86_BUILTIN_VFMADDPD256_MASKZ,
29493 IX86_BUILTIN_VFMADDPD128_MASK,
29494 IX86_BUILTIN_VFMADDPD128_MASK3,
29495 IX86_BUILTIN_VFMADDPD128_MASKZ,
29496 IX86_BUILTIN_VFMADDPS256_MASK,
29497 IX86_BUILTIN_VFMADDPS256_MASK3,
29498 IX86_BUILTIN_VFMADDPS256_MASKZ,
29499 IX86_BUILTIN_VFMADDPS128_MASK,
29500 IX86_BUILTIN_VFMADDPS128_MASK3,
29501 IX86_BUILTIN_VFMADDPS128_MASKZ,
29502 IX86_BUILTIN_VFMSUBPD256_MASK3,
29503 IX86_BUILTIN_VFMSUBPD128_MASK3,
29504 IX86_BUILTIN_VFMSUBPS256_MASK3,
29505 IX86_BUILTIN_VFMSUBPS128_MASK3,
29506 IX86_BUILTIN_VFNMADDPD256_MASK,
29507 IX86_BUILTIN_VFNMADDPD128_MASK,
29508 IX86_BUILTIN_VFNMADDPS256_MASK,
29509 IX86_BUILTIN_VFNMADDPS128_MASK,
29510 IX86_BUILTIN_VFNMSUBPD256_MASK,
29511 IX86_BUILTIN_VFNMSUBPD256_MASK3,
29512 IX86_BUILTIN_VFNMSUBPD128_MASK,
29513 IX86_BUILTIN_VFNMSUBPD128_MASK3,
29514 IX86_BUILTIN_VFNMSUBPS256_MASK,
29515 IX86_BUILTIN_VFNMSUBPS256_MASK3,
29516 IX86_BUILTIN_VFNMSUBPS128_MASK,
29517 IX86_BUILTIN_VFNMSUBPS128_MASK3,
29518 IX86_BUILTIN_VFMADDSUBPD256_MASK,
29519 IX86_BUILTIN_VFMADDSUBPD256_MASK3,
29520 IX86_BUILTIN_VFMADDSUBPD256_MASKZ,
29521 IX86_BUILTIN_VFMADDSUBPD128_MASK,
29522 IX86_BUILTIN_VFMADDSUBPD128_MASK3,
29523 IX86_BUILTIN_VFMADDSUBPD128_MASKZ,
29524 IX86_BUILTIN_VFMADDSUBPS256_MASK,
29525 IX86_BUILTIN_VFMADDSUBPS256_MASK3,
29526 IX86_BUILTIN_VFMADDSUBPS256_MASKZ,
29527 IX86_BUILTIN_VFMADDSUBPS128_MASK,
29528 IX86_BUILTIN_VFMADDSUBPS128_MASK3,
29529 IX86_BUILTIN_VFMADDSUBPS128_MASKZ,
29530 IX86_BUILTIN_VFMSUBADDPD256_MASK3,
29531 IX86_BUILTIN_VFMSUBADDPD128_MASK3,
29532 IX86_BUILTIN_VFMSUBADDPS256_MASK3,
29533 IX86_BUILTIN_VFMSUBADDPS128_MASK3,
29534 IX86_BUILTIN_INSERTF64X2_256,
29535 IX86_BUILTIN_INSERTI64X2_256,
29536 IX86_BUILTIN_PSRAVV16HI,
29537 IX86_BUILTIN_PSRAVV8HI,
29538 IX86_BUILTIN_PMADDUBSW256_MASK,
29539 IX86_BUILTIN_PMADDUBSW128_MASK,
29540 IX86_BUILTIN_PMADDWD256_MASK,
29541 IX86_BUILTIN_PMADDWD128_MASK,
29542 IX86_BUILTIN_PSRLVV16HI,
29543 IX86_BUILTIN_PSRLVV8HI,
29544 IX86_BUILTIN_CVTPS2DQ256_MASK,
29545 IX86_BUILTIN_CVTPS2DQ128_MASK,
29546 IX86_BUILTIN_CVTPS2UDQ256,
29547 IX86_BUILTIN_CVTPS2UDQ128,
29548 IX86_BUILTIN_CVTPS2QQ256,
29549 IX86_BUILTIN_CVTPS2QQ128,
29550 IX86_BUILTIN_CVTPS2UQQ256,
29551 IX86_BUILTIN_CVTPS2UQQ128,
29552 IX86_BUILTIN_GETMANTPS256,
29553 IX86_BUILTIN_GETMANTPS128,
29554 IX86_BUILTIN_GETMANTPD256,
29555 IX86_BUILTIN_GETMANTPD128,
29556 IX86_BUILTIN_MOVDDUP256_MASK,
29557 IX86_BUILTIN_MOVDDUP128_MASK,
29558 IX86_BUILTIN_MOVSHDUP256_MASK,
29559 IX86_BUILTIN_MOVSHDUP128_MASK,
29560 IX86_BUILTIN_MOVSLDUP256_MASK,
29561 IX86_BUILTIN_MOVSLDUP128_MASK,
29562 IX86_BUILTIN_CVTQQ2PS256,
29563 IX86_BUILTIN_CVTQQ2PS128,
29564 IX86_BUILTIN_CVTUQQ2PS256,
29565 IX86_BUILTIN_CVTUQQ2PS128,
29566 IX86_BUILTIN_CVTQQ2PD256,
29567 IX86_BUILTIN_CVTQQ2PD128,
29568 IX86_BUILTIN_CVTUQQ2PD256,
29569 IX86_BUILTIN_CVTUQQ2PD128,
29570 IX86_BUILTIN_VPERMT2VARQ256,
29571 IX86_BUILTIN_VPERMT2VARQ256_MASKZ,
29572 IX86_BUILTIN_VPERMT2VARD256,
29573 IX86_BUILTIN_VPERMT2VARD256_MASKZ,
29574 IX86_BUILTIN_VPERMI2VARQ256,
29575 IX86_BUILTIN_VPERMI2VARD256,
29576 IX86_BUILTIN_VPERMT2VARPD256,
29577 IX86_BUILTIN_VPERMT2VARPD256_MASKZ,
29578 IX86_BUILTIN_VPERMT2VARPS256,
29579 IX86_BUILTIN_VPERMT2VARPS256_MASKZ,
29580 IX86_BUILTIN_VPERMI2VARPD256,
29581 IX86_BUILTIN_VPERMI2VARPS256,
29582 IX86_BUILTIN_VPERMT2VARQ128,
29583 IX86_BUILTIN_VPERMT2VARQ128_MASKZ,
29584 IX86_BUILTIN_VPERMT2VARD128,
29585 IX86_BUILTIN_VPERMT2VARD128_MASKZ,
29586 IX86_BUILTIN_VPERMI2VARQ128,
29587 IX86_BUILTIN_VPERMI2VARD128,
29588 IX86_BUILTIN_VPERMT2VARPD128,
29589 IX86_BUILTIN_VPERMT2VARPD128_MASKZ,
29590 IX86_BUILTIN_VPERMT2VARPS128,
29591 IX86_BUILTIN_VPERMT2VARPS128_MASKZ,
29592 IX86_BUILTIN_VPERMI2VARPD128,
29593 IX86_BUILTIN_VPERMI2VARPS128,
29594 IX86_BUILTIN_PSHUFB256_MASK,
29595 IX86_BUILTIN_PSHUFB128_MASK,
29596 IX86_BUILTIN_PSHUFHW256_MASK,
29597 IX86_BUILTIN_PSHUFHW128_MASK,
29598 IX86_BUILTIN_PSHUFLW256_MASK,
29599 IX86_BUILTIN_PSHUFLW128_MASK,
29600 IX86_BUILTIN_PSHUFD256_MASK,
29601 IX86_BUILTIN_PSHUFD128_MASK,
29602 IX86_BUILTIN_SHUFPD256_MASK,
29603 IX86_BUILTIN_SHUFPD128_MASK,
29604 IX86_BUILTIN_SHUFPS256_MASK,
29605 IX86_BUILTIN_SHUFPS128_MASK,
29606 IX86_BUILTIN_PROLVQ256,
29607 IX86_BUILTIN_PROLVQ128,
29608 IX86_BUILTIN_PROLQ256,
29609 IX86_BUILTIN_PROLQ128,
29610 IX86_BUILTIN_PRORVQ256,
29611 IX86_BUILTIN_PRORVQ128,
29612 IX86_BUILTIN_PRORQ256,
29613 IX86_BUILTIN_PRORQ128,
29614 IX86_BUILTIN_PSRAVQ128,
29615 IX86_BUILTIN_PSRAVQ256,
29616 IX86_BUILTIN_PSLLVV4DI_MASK,
29617 IX86_BUILTIN_PSLLVV2DI_MASK,
29618 IX86_BUILTIN_PSLLVV8SI_MASK,
29619 IX86_BUILTIN_PSLLVV4SI_MASK,
29620 IX86_BUILTIN_PSRAVV8SI_MASK,
29621 IX86_BUILTIN_PSRAVV4SI_MASK,
29622 IX86_BUILTIN_PSRLVV4DI_MASK,
29623 IX86_BUILTIN_PSRLVV2DI_MASK,
29624 IX86_BUILTIN_PSRLVV8SI_MASK,
29625 IX86_BUILTIN_PSRLVV4SI_MASK,
29626 IX86_BUILTIN_PSRAWI256_MASK,
29627 IX86_BUILTIN_PSRAW256_MASK,
29628 IX86_BUILTIN_PSRAWI128_MASK,
29629 IX86_BUILTIN_PSRAW128_MASK,
29630 IX86_BUILTIN_PSRLWI256_MASK,
29631 IX86_BUILTIN_PSRLW256_MASK,
29632 IX86_BUILTIN_PSRLWI128_MASK,
29633 IX86_BUILTIN_PSRLW128_MASK,
29634 IX86_BUILTIN_PRORVD256,
29635 IX86_BUILTIN_PROLVD256,
29636 IX86_BUILTIN_PRORD256,
29637 IX86_BUILTIN_PROLD256,
29638 IX86_BUILTIN_PRORVD128,
29639 IX86_BUILTIN_PROLVD128,
29640 IX86_BUILTIN_PRORD128,
29641 IX86_BUILTIN_PROLD128,
29642 IX86_BUILTIN_FPCLASSPD256,
29643 IX86_BUILTIN_FPCLASSPD128,
29644 IX86_BUILTIN_FPCLASSSD,
29645 IX86_BUILTIN_FPCLASSPS256,
29646 IX86_BUILTIN_FPCLASSPS128,
29647 IX86_BUILTIN_FPCLASSSS,
29648 IX86_BUILTIN_CVTB2MASK128,
29649 IX86_BUILTIN_CVTB2MASK256,
29650 IX86_BUILTIN_CVTW2MASK128,
29651 IX86_BUILTIN_CVTW2MASK256,
29652 IX86_BUILTIN_CVTD2MASK128,
29653 IX86_BUILTIN_CVTD2MASK256,
29654 IX86_BUILTIN_CVTQ2MASK128,
29655 IX86_BUILTIN_CVTQ2MASK256,
29656 IX86_BUILTIN_CVTMASK2B128,
29657 IX86_BUILTIN_CVTMASK2B256,
29658 IX86_BUILTIN_CVTMASK2W128,
29659 IX86_BUILTIN_CVTMASK2W256,
29660 IX86_BUILTIN_CVTMASK2D128,
29661 IX86_BUILTIN_CVTMASK2D256,
29662 IX86_BUILTIN_CVTMASK2Q128,
29663 IX86_BUILTIN_CVTMASK2Q256,
29664 IX86_BUILTIN_PCMPEQB128_MASK,
29665 IX86_BUILTIN_PCMPEQB256_MASK,
29666 IX86_BUILTIN_PCMPEQW128_MASK,
29667 IX86_BUILTIN_PCMPEQW256_MASK,
29668 IX86_BUILTIN_PCMPEQD128_MASK,
29669 IX86_BUILTIN_PCMPEQD256_MASK,
29670 IX86_BUILTIN_PCMPEQQ128_MASK,
29671 IX86_BUILTIN_PCMPEQQ256_MASK,
29672 IX86_BUILTIN_PCMPGTB128_MASK,
29673 IX86_BUILTIN_PCMPGTB256_MASK,
29674 IX86_BUILTIN_PCMPGTW128_MASK,
29675 IX86_BUILTIN_PCMPGTW256_MASK,
29676 IX86_BUILTIN_PCMPGTD128_MASK,
29677 IX86_BUILTIN_PCMPGTD256_MASK,
29678 IX86_BUILTIN_PCMPGTQ128_MASK,
29679 IX86_BUILTIN_PCMPGTQ256_MASK,
29680 IX86_BUILTIN_PTESTMB128,
29681 IX86_BUILTIN_PTESTMB256,
29682 IX86_BUILTIN_PTESTMW128,
29683 IX86_BUILTIN_PTESTMW256,
29684 IX86_BUILTIN_PTESTMD128,
29685 IX86_BUILTIN_PTESTMD256,
29686 IX86_BUILTIN_PTESTMQ128,
29687 IX86_BUILTIN_PTESTMQ256,
29688 IX86_BUILTIN_PTESTNMB128,
29689 IX86_BUILTIN_PTESTNMB256,
29690 IX86_BUILTIN_PTESTNMW128,
29691 IX86_BUILTIN_PTESTNMW256,
29692 IX86_BUILTIN_PTESTNMD128,
29693 IX86_BUILTIN_PTESTNMD256,
29694 IX86_BUILTIN_PTESTNMQ128,
29695 IX86_BUILTIN_PTESTNMQ256,
29696 IX86_BUILTIN_PBROADCASTMB128,
29697 IX86_BUILTIN_PBROADCASTMB256,
29698 IX86_BUILTIN_PBROADCASTMW128,
29699 IX86_BUILTIN_PBROADCASTMW256,
29700 IX86_BUILTIN_COMPRESSPD256,
29701 IX86_BUILTIN_COMPRESSPD128,
29702 IX86_BUILTIN_COMPRESSPS256,
29703 IX86_BUILTIN_COMPRESSPS128,
29704 IX86_BUILTIN_PCOMPRESSQ256,
29705 IX86_BUILTIN_PCOMPRESSQ128,
29706 IX86_BUILTIN_PCOMPRESSD256,
29707 IX86_BUILTIN_PCOMPRESSD128,
29708 IX86_BUILTIN_EXPANDPD256,
29709 IX86_BUILTIN_EXPANDPD128,
29710 IX86_BUILTIN_EXPANDPS256,
29711 IX86_BUILTIN_EXPANDPS128,
29712 IX86_BUILTIN_PEXPANDQ256,
29713 IX86_BUILTIN_PEXPANDQ128,
29714 IX86_BUILTIN_PEXPANDD256,
29715 IX86_BUILTIN_PEXPANDD128,
29716 IX86_BUILTIN_EXPANDPD256Z,
29717 IX86_BUILTIN_EXPANDPD128Z,
29718 IX86_BUILTIN_EXPANDPS256Z,
29719 IX86_BUILTIN_EXPANDPS128Z,
29720 IX86_BUILTIN_PEXPANDQ256Z,
29721 IX86_BUILTIN_PEXPANDQ128Z,
29722 IX86_BUILTIN_PEXPANDD256Z,
29723 IX86_BUILTIN_PEXPANDD128Z,
29724 IX86_BUILTIN_PMAXSD256_MASK,
29725 IX86_BUILTIN_PMINSD256_MASK,
29726 IX86_BUILTIN_PMAXUD256_MASK,
29727 IX86_BUILTIN_PMINUD256_MASK,
29728 IX86_BUILTIN_PMAXSD128_MASK,
29729 IX86_BUILTIN_PMINSD128_MASK,
29730 IX86_BUILTIN_PMAXUD128_MASK,
29731 IX86_BUILTIN_PMINUD128_MASK,
29732 IX86_BUILTIN_PMAXSQ256_MASK,
29733 IX86_BUILTIN_PMINSQ256_MASK,
29734 IX86_BUILTIN_PMAXUQ256_MASK,
29735 IX86_BUILTIN_PMINUQ256_MASK,
29736 IX86_BUILTIN_PMAXSQ128_MASK,
29737 IX86_BUILTIN_PMINSQ128_MASK,
29738 IX86_BUILTIN_PMAXUQ128_MASK,
29739 IX86_BUILTIN_PMINUQ128_MASK,
29740 IX86_BUILTIN_PMINSB256_MASK,
29741 IX86_BUILTIN_PMINUB256_MASK,
29742 IX86_BUILTIN_PMAXSB256_MASK,
29743 IX86_BUILTIN_PMAXUB256_MASK,
29744 IX86_BUILTIN_PMINSB128_MASK,
29745 IX86_BUILTIN_PMINUB128_MASK,
29746 IX86_BUILTIN_PMAXSB128_MASK,
29747 IX86_BUILTIN_PMAXUB128_MASK,
29748 IX86_BUILTIN_PMINSW256_MASK,
29749 IX86_BUILTIN_PMINUW256_MASK,
29750 IX86_BUILTIN_PMAXSW256_MASK,
29751 IX86_BUILTIN_PMAXUW256_MASK,
29752 IX86_BUILTIN_PMINSW128_MASK,
29753 IX86_BUILTIN_PMINUW128_MASK,
29754 IX86_BUILTIN_PMAXSW128_MASK,
29755 IX86_BUILTIN_PMAXUW128_MASK,
29756 IX86_BUILTIN_VPCONFLICTQ256,
29757 IX86_BUILTIN_VPCONFLICTD256,
29758 IX86_BUILTIN_VPCLZCNTQ256,
29759 IX86_BUILTIN_VPCLZCNTD256,
29760 IX86_BUILTIN_UNPCKHPD256_MASK,
29761 IX86_BUILTIN_UNPCKHPD128_MASK,
29762 IX86_BUILTIN_UNPCKHPS256_MASK,
29763 IX86_BUILTIN_UNPCKHPS128_MASK,
29764 IX86_BUILTIN_UNPCKLPD256_MASK,
29765 IX86_BUILTIN_UNPCKLPD128_MASK,
29766 IX86_BUILTIN_UNPCKLPS256_MASK,
29767 IX86_BUILTIN_VPCONFLICTQ128,
29768 IX86_BUILTIN_VPCONFLICTD128,
29769 IX86_BUILTIN_VPCLZCNTQ128,
29770 IX86_BUILTIN_VPCLZCNTD128,
29771 IX86_BUILTIN_UNPCKLPS128_MASK,
29772 IX86_BUILTIN_ALIGND256,
29773 IX86_BUILTIN_ALIGNQ256,
29774 IX86_BUILTIN_ALIGND128,
29775 IX86_BUILTIN_ALIGNQ128,
29776 IX86_BUILTIN_CVTPS2PH256_MASK,
29777 IX86_BUILTIN_CVTPS2PH_MASK,
29778 IX86_BUILTIN_CVTPH2PS_MASK,
29779 IX86_BUILTIN_CVTPH2PS256_MASK,
29780 IX86_BUILTIN_PUNPCKHDQ128_MASK,
29781 IX86_BUILTIN_PUNPCKHDQ256_MASK,
29782 IX86_BUILTIN_PUNPCKHQDQ128_MASK,
29783 IX86_BUILTIN_PUNPCKHQDQ256_MASK,
29784 IX86_BUILTIN_PUNPCKLDQ128_MASK,
29785 IX86_BUILTIN_PUNPCKLDQ256_MASK,
29786 IX86_BUILTIN_PUNPCKLQDQ128_MASK,
29787 IX86_BUILTIN_PUNPCKLQDQ256_MASK,
29788 IX86_BUILTIN_PUNPCKHBW128_MASK,
29789 IX86_BUILTIN_PUNPCKHBW256_MASK,
29790 IX86_BUILTIN_PUNPCKHWD128_MASK,
29791 IX86_BUILTIN_PUNPCKHWD256_MASK,
29792 IX86_BUILTIN_PUNPCKLBW128_MASK,
29793 IX86_BUILTIN_PUNPCKLBW256_MASK,
29794 IX86_BUILTIN_PUNPCKLWD128_MASK,
29795 IX86_BUILTIN_PUNPCKLWD256_MASK,
29796 IX86_BUILTIN_PSLLVV16HI,
29797 IX86_BUILTIN_PSLLVV8HI,
29798 IX86_BUILTIN_PACKSSDW256_MASK,
29799 IX86_BUILTIN_PACKSSDW128_MASK,
29800 IX86_BUILTIN_PACKUSDW256_MASK,
29801 IX86_BUILTIN_PACKUSDW128_MASK,
29802 IX86_BUILTIN_PAVGB256_MASK,
29803 IX86_BUILTIN_PAVGW256_MASK,
29804 IX86_BUILTIN_PAVGB128_MASK,
29805 IX86_BUILTIN_PAVGW128_MASK,
29806 IX86_BUILTIN_VPERMVARSF256_MASK,
29807 IX86_BUILTIN_VPERMVARDF256_MASK,
29808 IX86_BUILTIN_VPERMDF256_MASK,
29809 IX86_BUILTIN_PABSB256_MASK,
29810 IX86_BUILTIN_PABSB128_MASK,
29811 IX86_BUILTIN_PABSW256_MASK,
29812 IX86_BUILTIN_PABSW128_MASK,
29813 IX86_BUILTIN_VPERMILVARPD_MASK,
29814 IX86_BUILTIN_VPERMILVARPS_MASK,
29815 IX86_BUILTIN_VPERMILVARPD256_MASK,
29816 IX86_BUILTIN_VPERMILVARPS256_MASK,
29817 IX86_BUILTIN_VPERMILPD_MASK,
29818 IX86_BUILTIN_VPERMILPS_MASK,
29819 IX86_BUILTIN_VPERMILPD256_MASK,
29820 IX86_BUILTIN_VPERMILPS256_MASK,
29821 IX86_BUILTIN_BLENDMQ256,
29822 IX86_BUILTIN_BLENDMD256,
29823 IX86_BUILTIN_BLENDMPD256,
29824 IX86_BUILTIN_BLENDMPS256,
29825 IX86_BUILTIN_BLENDMQ128,
29826 IX86_BUILTIN_BLENDMD128,
29827 IX86_BUILTIN_BLENDMPD128,
29828 IX86_BUILTIN_BLENDMPS128,
29829 IX86_BUILTIN_BLENDMW256,
29830 IX86_BUILTIN_BLENDMB256,
29831 IX86_BUILTIN_BLENDMW128,
29832 IX86_BUILTIN_BLENDMB128,
29833 IX86_BUILTIN_PMULLD256_MASK,
29834 IX86_BUILTIN_PMULLD128_MASK,
29835 IX86_BUILTIN_PMULUDQ256_MASK,
29836 IX86_BUILTIN_PMULDQ256_MASK,
29837 IX86_BUILTIN_PMULDQ128_MASK,
29838 IX86_BUILTIN_PMULUDQ128_MASK,
29839 IX86_BUILTIN_CVTPD2PS256_MASK,
29840 IX86_BUILTIN_CVTPD2PS_MASK,
29841 IX86_BUILTIN_VPERMVARSI256_MASK,
29842 IX86_BUILTIN_VPERMVARDI256_MASK,
29843 IX86_BUILTIN_VPERMDI256_MASK,
29844 IX86_BUILTIN_CMPQ256,
29845 IX86_BUILTIN_CMPD256,
29846 IX86_BUILTIN_UCMPQ256,
29847 IX86_BUILTIN_UCMPD256,
29848 IX86_BUILTIN_CMPB256,
29849 IX86_BUILTIN_CMPW256,
29850 IX86_BUILTIN_UCMPB256,
29851 IX86_BUILTIN_UCMPW256,
29852 IX86_BUILTIN_CMPPD256_MASK,
29853 IX86_BUILTIN_CMPPS256_MASK,
29854 IX86_BUILTIN_CMPQ128,
29855 IX86_BUILTIN_CMPD128,
29856 IX86_BUILTIN_UCMPQ128,
29857 IX86_BUILTIN_UCMPD128,
29858 IX86_BUILTIN_CMPB128,
29859 IX86_BUILTIN_CMPW128,
29860 IX86_BUILTIN_UCMPB128,
29861 IX86_BUILTIN_UCMPW128,
29862 IX86_BUILTIN_CMPPD128_MASK,
29863 IX86_BUILTIN_CMPPS128_MASK,
29865 IX86_BUILTIN_GATHER3SIV8SF,
29866 IX86_BUILTIN_GATHER3SIV4SF,
29867 IX86_BUILTIN_GATHER3SIV4DF,
29868 IX86_BUILTIN_GATHER3SIV2DF,
29869 IX86_BUILTIN_GATHER3DIV8SF,
29870 IX86_BUILTIN_GATHER3DIV4SF,
29871 IX86_BUILTIN_GATHER3DIV4DF,
29872 IX86_BUILTIN_GATHER3DIV2DF,
29873 IX86_BUILTIN_GATHER3SIV8SI,
29874 IX86_BUILTIN_GATHER3SIV4SI,
29875 IX86_BUILTIN_GATHER3SIV4DI,
29876 IX86_BUILTIN_GATHER3SIV2DI,
29877 IX86_BUILTIN_GATHER3DIV8SI,
29878 IX86_BUILTIN_GATHER3DIV4SI,
29879 IX86_BUILTIN_GATHER3DIV4DI,
29880 IX86_BUILTIN_GATHER3DIV2DI,
29881 IX86_BUILTIN_SCATTERSIV8SF,
29882 IX86_BUILTIN_SCATTERSIV4SF,
29883 IX86_BUILTIN_SCATTERSIV4DF,
29884 IX86_BUILTIN_SCATTERSIV2DF,
29885 IX86_BUILTIN_SCATTERDIV8SF,
29886 IX86_BUILTIN_SCATTERDIV4SF,
29887 IX86_BUILTIN_SCATTERDIV4DF,
29888 IX86_BUILTIN_SCATTERDIV2DF,
29889 IX86_BUILTIN_SCATTERSIV8SI,
29890 IX86_BUILTIN_SCATTERSIV4SI,
29891 IX86_BUILTIN_SCATTERSIV4DI,
29892 IX86_BUILTIN_SCATTERSIV2DI,
29893 IX86_BUILTIN_SCATTERDIV8SI,
29894 IX86_BUILTIN_SCATTERDIV4SI,
29895 IX86_BUILTIN_SCATTERDIV4DI,
29896 IX86_BUILTIN_SCATTERDIV2DI,
29898 /* AVX512DQ. */
29899 IX86_BUILTIN_RANGESD128,
29900 IX86_BUILTIN_RANGESS128,
29901 IX86_BUILTIN_KUNPCKWD,
29902 IX86_BUILTIN_KUNPCKDQ,
29903 IX86_BUILTIN_BROADCASTF32x2_512,
29904 IX86_BUILTIN_BROADCASTI32x2_512,
29905 IX86_BUILTIN_BROADCASTF64X2_512,
29906 IX86_BUILTIN_BROADCASTI64X2_512,
29907 IX86_BUILTIN_BROADCASTF32X8_512,
29908 IX86_BUILTIN_BROADCASTI32X8_512,
29909 IX86_BUILTIN_EXTRACTF64X2_512,
29910 IX86_BUILTIN_EXTRACTF32X8,
29911 IX86_BUILTIN_EXTRACTI64X2_512,
29912 IX86_BUILTIN_EXTRACTI32X8,
29913 IX86_BUILTIN_REDUCEPD512_MASK,
29914 IX86_BUILTIN_REDUCEPS512_MASK,
29915 IX86_BUILTIN_PMULLQ512,
29916 IX86_BUILTIN_XORPD512,
29917 IX86_BUILTIN_XORPS512,
29918 IX86_BUILTIN_ORPD512,
29919 IX86_BUILTIN_ORPS512,
29920 IX86_BUILTIN_ANDPD512,
29921 IX86_BUILTIN_ANDPS512,
29922 IX86_BUILTIN_ANDNPD512,
29923 IX86_BUILTIN_ANDNPS512,
29924 IX86_BUILTIN_INSERTF32X8,
29925 IX86_BUILTIN_INSERTI32X8,
29926 IX86_BUILTIN_INSERTF64X2_512,
29927 IX86_BUILTIN_INSERTI64X2_512,
29928 IX86_BUILTIN_FPCLASSPD512,
29929 IX86_BUILTIN_FPCLASSPS512,
29930 IX86_BUILTIN_CVTD2MASK512,
29931 IX86_BUILTIN_CVTQ2MASK512,
29932 IX86_BUILTIN_CVTMASK2D512,
29933 IX86_BUILTIN_CVTMASK2Q512,
29934 IX86_BUILTIN_CVTPD2QQ512,
29935 IX86_BUILTIN_CVTPS2QQ512,
29936 IX86_BUILTIN_CVTPD2UQQ512,
29937 IX86_BUILTIN_CVTPS2UQQ512,
29938 IX86_BUILTIN_CVTQQ2PS512,
29939 IX86_BUILTIN_CVTUQQ2PS512,
29940 IX86_BUILTIN_CVTQQ2PD512,
29941 IX86_BUILTIN_CVTUQQ2PD512,
29942 IX86_BUILTIN_CVTTPS2QQ512,
29943 IX86_BUILTIN_CVTTPS2UQQ512,
29944 IX86_BUILTIN_CVTTPD2QQ512,
29945 IX86_BUILTIN_CVTTPD2UQQ512,
29946 IX86_BUILTIN_RANGEPS512,
29947 IX86_BUILTIN_RANGEPD512,
29949 /* AVX512BW. */
29950 IX86_BUILTIN_PACKUSDW512,
29951 IX86_BUILTIN_PACKSSDW512,
29952 IX86_BUILTIN_LOADDQUHI512_MASK,
29953 IX86_BUILTIN_LOADDQUQI512_MASK,
29954 IX86_BUILTIN_PSLLDQ512,
29955 IX86_BUILTIN_PSRLDQ512,
29956 IX86_BUILTIN_STOREDQUHI512_MASK,
29957 IX86_BUILTIN_STOREDQUQI512_MASK,
29958 IX86_BUILTIN_PALIGNR512,
29959 IX86_BUILTIN_PALIGNR512_MASK,
29960 IX86_BUILTIN_MOVDQUHI512_MASK,
29961 IX86_BUILTIN_MOVDQUQI512_MASK,
29962 IX86_BUILTIN_PSADBW512,
29963 IX86_BUILTIN_DBPSADBW512,
29964 IX86_BUILTIN_PBROADCASTB512,
29965 IX86_BUILTIN_PBROADCASTB512_GPR,
29966 IX86_BUILTIN_PBROADCASTW512,
29967 IX86_BUILTIN_PBROADCASTW512_GPR,
29968 IX86_BUILTIN_PMOVSXBW512_MASK,
29969 IX86_BUILTIN_PMOVZXBW512_MASK,
29970 IX86_BUILTIN_VPERMVARHI512_MASK,
29971 IX86_BUILTIN_VPERMT2VARHI512,
29972 IX86_BUILTIN_VPERMT2VARHI512_MASKZ,
29973 IX86_BUILTIN_VPERMI2VARHI512,
29974 IX86_BUILTIN_PAVGB512,
29975 IX86_BUILTIN_PAVGW512,
29976 IX86_BUILTIN_PADDB512,
29977 IX86_BUILTIN_PSUBB512,
29978 IX86_BUILTIN_PSUBSB512,
29979 IX86_BUILTIN_PADDSB512,
29980 IX86_BUILTIN_PSUBUSB512,
29981 IX86_BUILTIN_PADDUSB512,
29982 IX86_BUILTIN_PSUBW512,
29983 IX86_BUILTIN_PADDW512,
29984 IX86_BUILTIN_PSUBSW512,
29985 IX86_BUILTIN_PADDSW512,
29986 IX86_BUILTIN_PSUBUSW512,
29987 IX86_BUILTIN_PADDUSW512,
29988 IX86_BUILTIN_PMAXUW512,
29989 IX86_BUILTIN_PMAXSW512,
29990 IX86_BUILTIN_PMINUW512,
29991 IX86_BUILTIN_PMINSW512,
29992 IX86_BUILTIN_PMAXUB512,
29993 IX86_BUILTIN_PMAXSB512,
29994 IX86_BUILTIN_PMINUB512,
29995 IX86_BUILTIN_PMINSB512,
29996 IX86_BUILTIN_PMOVWB512,
29997 IX86_BUILTIN_PMOVSWB512,
29998 IX86_BUILTIN_PMOVUSWB512,
29999 IX86_BUILTIN_PMULHRSW512_MASK,
30000 IX86_BUILTIN_PMULHUW512_MASK,
30001 IX86_BUILTIN_PMULHW512_MASK,
30002 IX86_BUILTIN_PMULLW512_MASK,
30003 IX86_BUILTIN_PSLLWI512_MASK,
30004 IX86_BUILTIN_PSLLW512_MASK,
30005 IX86_BUILTIN_PACKSSWB512,
30006 IX86_BUILTIN_PACKUSWB512,
30007 IX86_BUILTIN_PSRAVV32HI,
30008 IX86_BUILTIN_PMADDUBSW512_MASK,
30009 IX86_BUILTIN_PMADDWD512_MASK,
30010 IX86_BUILTIN_PSRLVV32HI,
30011 IX86_BUILTIN_PUNPCKHBW512,
30012 IX86_BUILTIN_PUNPCKHWD512,
30013 IX86_BUILTIN_PUNPCKLBW512,
30014 IX86_BUILTIN_PUNPCKLWD512,
30015 IX86_BUILTIN_PSHUFB512,
30016 IX86_BUILTIN_PSHUFHW512,
30017 IX86_BUILTIN_PSHUFLW512,
30018 IX86_BUILTIN_PSRAWI512,
30019 IX86_BUILTIN_PSRAW512,
30020 IX86_BUILTIN_PSRLWI512,
30021 IX86_BUILTIN_PSRLW512,
30022 IX86_BUILTIN_CVTB2MASK512,
30023 IX86_BUILTIN_CVTW2MASK512,
30024 IX86_BUILTIN_CVTMASK2B512,
30025 IX86_BUILTIN_CVTMASK2W512,
30026 IX86_BUILTIN_PCMPEQB512_MASK,
30027 IX86_BUILTIN_PCMPEQW512_MASK,
30028 IX86_BUILTIN_PCMPGTB512_MASK,
30029 IX86_BUILTIN_PCMPGTW512_MASK,
30030 IX86_BUILTIN_PTESTMB512,
30031 IX86_BUILTIN_PTESTMW512,
30032 IX86_BUILTIN_PTESTNMB512,
30033 IX86_BUILTIN_PTESTNMW512,
30034 IX86_BUILTIN_PSLLVV32HI,
30035 IX86_BUILTIN_PABSB512,
30036 IX86_BUILTIN_PABSW512,
30037 IX86_BUILTIN_BLENDMW512,
30038 IX86_BUILTIN_BLENDMB512,
30039 IX86_BUILTIN_CMPB512,
30040 IX86_BUILTIN_CMPW512,
30041 IX86_BUILTIN_UCMPB512,
30042 IX86_BUILTIN_UCMPW512,
30044 /* Alternate 4 and 8 element gather/scatter for the vectorizer
30045 where all operands are 32-byte or 64-byte wide respectively. */
30046 IX86_BUILTIN_GATHERALTSIV4DF,
30047 IX86_BUILTIN_GATHERALTDIV8SF,
30048 IX86_BUILTIN_GATHERALTSIV4DI,
30049 IX86_BUILTIN_GATHERALTDIV8SI,
30050 IX86_BUILTIN_GATHER3ALTDIV16SF,
30051 IX86_BUILTIN_GATHER3ALTDIV16SI,
30052 IX86_BUILTIN_GATHER3ALTSIV4DF,
30053 IX86_BUILTIN_GATHER3ALTDIV8SF,
30054 IX86_BUILTIN_GATHER3ALTSIV4DI,
30055 IX86_BUILTIN_GATHER3ALTDIV8SI,
30056 IX86_BUILTIN_GATHER3ALTSIV8DF,
30057 IX86_BUILTIN_GATHER3ALTSIV8DI,
30058 IX86_BUILTIN_GATHER3DIV16SF,
30059 IX86_BUILTIN_GATHER3DIV16SI,
30060 IX86_BUILTIN_GATHER3DIV8DF,
30061 IX86_BUILTIN_GATHER3DIV8DI,
30062 IX86_BUILTIN_GATHER3SIV16SF,
30063 IX86_BUILTIN_GATHER3SIV16SI,
30064 IX86_BUILTIN_GATHER3SIV8DF,
30065 IX86_BUILTIN_GATHER3SIV8DI,
30066 IX86_BUILTIN_SCATTERDIV16SF,
30067 IX86_BUILTIN_SCATTERDIV16SI,
30068 IX86_BUILTIN_SCATTERDIV8DF,
30069 IX86_BUILTIN_SCATTERDIV8DI,
30070 IX86_BUILTIN_SCATTERSIV16SF,
30071 IX86_BUILTIN_SCATTERSIV16SI,
30072 IX86_BUILTIN_SCATTERSIV8DF,
30073 IX86_BUILTIN_SCATTERSIV8DI,
30075 /* AVX512PF */
30076 IX86_BUILTIN_GATHERPFQPD,
30077 IX86_BUILTIN_GATHERPFDPS,
30078 IX86_BUILTIN_GATHERPFDPD,
30079 IX86_BUILTIN_GATHERPFQPS,
30080 IX86_BUILTIN_SCATTERPFDPD,
30081 IX86_BUILTIN_SCATTERPFDPS,
30082 IX86_BUILTIN_SCATTERPFQPD,
30083 IX86_BUILTIN_SCATTERPFQPS,
30085 /* AVX-512ER */
30086 IX86_BUILTIN_EXP2PD_MASK,
30087 IX86_BUILTIN_EXP2PS_MASK,
30088 IX86_BUILTIN_EXP2PS,
30089 IX86_BUILTIN_RCP28PD,
30090 IX86_BUILTIN_RCP28PS,
30091 IX86_BUILTIN_RCP28SD,
30092 IX86_BUILTIN_RCP28SS,
30093 IX86_BUILTIN_RSQRT28PD,
30094 IX86_BUILTIN_RSQRT28PS,
30095 IX86_BUILTIN_RSQRT28SD,
30096 IX86_BUILTIN_RSQRT28SS,
30098 /* AVX-512IFMA */
30099 IX86_BUILTIN_VPMADD52LUQ512,
30100 IX86_BUILTIN_VPMADD52HUQ512,
30101 IX86_BUILTIN_VPMADD52LUQ256,
30102 IX86_BUILTIN_VPMADD52HUQ256,
30103 IX86_BUILTIN_VPMADD52LUQ128,
30104 IX86_BUILTIN_VPMADD52HUQ128,
30105 IX86_BUILTIN_VPMADD52LUQ512_MASKZ,
30106 IX86_BUILTIN_VPMADD52HUQ512_MASKZ,
30107 IX86_BUILTIN_VPMADD52LUQ256_MASKZ,
30108 IX86_BUILTIN_VPMADD52HUQ256_MASKZ,
30109 IX86_BUILTIN_VPMADD52LUQ128_MASKZ,
30110 IX86_BUILTIN_VPMADD52HUQ128_MASKZ,
30112 /* AVX-512VBMI */
30113 IX86_BUILTIN_VPMULTISHIFTQB512,
30114 IX86_BUILTIN_VPMULTISHIFTQB256,
30115 IX86_BUILTIN_VPMULTISHIFTQB128,
30116 IX86_BUILTIN_VPERMVARQI512_MASK,
30117 IX86_BUILTIN_VPERMT2VARQI512,
30118 IX86_BUILTIN_VPERMT2VARQI512_MASKZ,
30119 IX86_BUILTIN_VPERMI2VARQI512,
30120 IX86_BUILTIN_VPERMVARQI256_MASK,
30121 IX86_BUILTIN_VPERMVARQI128_MASK,
30122 IX86_BUILTIN_VPERMT2VARQI256,
30123 IX86_BUILTIN_VPERMT2VARQI256_MASKZ,
30124 IX86_BUILTIN_VPERMT2VARQI128,
30125 IX86_BUILTIN_VPERMT2VARQI128_MASKZ,
30126 IX86_BUILTIN_VPERMI2VARQI256,
30127 IX86_BUILTIN_VPERMI2VARQI128,
30129 /* SHA builtins. */
30130 IX86_BUILTIN_SHA1MSG1,
30131 IX86_BUILTIN_SHA1MSG2,
30132 IX86_BUILTIN_SHA1NEXTE,
30133 IX86_BUILTIN_SHA1RNDS4,
30134 IX86_BUILTIN_SHA256MSG1,
30135 IX86_BUILTIN_SHA256MSG2,
30136 IX86_BUILTIN_SHA256RNDS2,
30138 /* CLWB instructions. */
30139 IX86_BUILTIN_CLWB,
30141 /* PCOMMIT instructions. */
30142 IX86_BUILTIN_PCOMMIT,
30144 /* CLFLUSHOPT instructions. */
30145 IX86_BUILTIN_CLFLUSHOPT,
30147 /* TFmode support builtins. */
30148 IX86_BUILTIN_INFQ,
30149 IX86_BUILTIN_HUGE_VALQ,
30150 IX86_BUILTIN_FABSQ,
30151 IX86_BUILTIN_COPYSIGNQ,
30153 /* Vectorizer support builtins. */
30154 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
30155 IX86_BUILTIN_CPYSGNPS,
30156 IX86_BUILTIN_CPYSGNPD,
30157 IX86_BUILTIN_CPYSGNPS256,
30158 IX86_BUILTIN_CPYSGNPS512,
30159 IX86_BUILTIN_CPYSGNPD256,
30160 IX86_BUILTIN_CPYSGNPD512,
30161 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
30162 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
30165 /* FMA4 instructions. */
30166 IX86_BUILTIN_VFMADDSS,
30167 IX86_BUILTIN_VFMADDSD,
30168 IX86_BUILTIN_VFMADDPS,
30169 IX86_BUILTIN_VFMADDPD,
30170 IX86_BUILTIN_VFMADDPS256,
30171 IX86_BUILTIN_VFMADDPD256,
30172 IX86_BUILTIN_VFMADDSUBPS,
30173 IX86_BUILTIN_VFMADDSUBPD,
30174 IX86_BUILTIN_VFMADDSUBPS256,
30175 IX86_BUILTIN_VFMADDSUBPD256,
30177 /* FMA3 instructions. */
30178 IX86_BUILTIN_VFMADDSS3,
30179 IX86_BUILTIN_VFMADDSD3,
30181 /* XOP instructions. */
30182 IX86_BUILTIN_VPCMOV,
30183 IX86_BUILTIN_VPCMOV_V2DI,
30184 IX86_BUILTIN_VPCMOV_V4SI,
30185 IX86_BUILTIN_VPCMOV_V8HI,
30186 IX86_BUILTIN_VPCMOV_V16QI,
30187 IX86_BUILTIN_VPCMOV_V4SF,
30188 IX86_BUILTIN_VPCMOV_V2DF,
30189 IX86_BUILTIN_VPCMOV256,
30190 IX86_BUILTIN_VPCMOV_V4DI256,
30191 IX86_BUILTIN_VPCMOV_V8SI256,
30192 IX86_BUILTIN_VPCMOV_V16HI256,
30193 IX86_BUILTIN_VPCMOV_V32QI256,
30194 IX86_BUILTIN_VPCMOV_V8SF256,
30195 IX86_BUILTIN_VPCMOV_V4DF256,
30197 IX86_BUILTIN_VPPERM,
30199 IX86_BUILTIN_VPMACSSWW,
30200 IX86_BUILTIN_VPMACSWW,
30201 IX86_BUILTIN_VPMACSSWD,
30202 IX86_BUILTIN_VPMACSWD,
30203 IX86_BUILTIN_VPMACSSDD,
30204 IX86_BUILTIN_VPMACSDD,
30205 IX86_BUILTIN_VPMACSSDQL,
30206 IX86_BUILTIN_VPMACSSDQH,
30207 IX86_BUILTIN_VPMACSDQL,
30208 IX86_BUILTIN_VPMACSDQH,
30209 IX86_BUILTIN_VPMADCSSWD,
30210 IX86_BUILTIN_VPMADCSWD,
30212 IX86_BUILTIN_VPHADDBW,
30213 IX86_BUILTIN_VPHADDBD,
30214 IX86_BUILTIN_VPHADDBQ,
30215 IX86_BUILTIN_VPHADDWD,
30216 IX86_BUILTIN_VPHADDWQ,
30217 IX86_BUILTIN_VPHADDDQ,
30218 IX86_BUILTIN_VPHADDUBW,
30219 IX86_BUILTIN_VPHADDUBD,
30220 IX86_BUILTIN_VPHADDUBQ,
30221 IX86_BUILTIN_VPHADDUWD,
30222 IX86_BUILTIN_VPHADDUWQ,
30223 IX86_BUILTIN_VPHADDUDQ,
30224 IX86_BUILTIN_VPHSUBBW,
30225 IX86_BUILTIN_VPHSUBWD,
30226 IX86_BUILTIN_VPHSUBDQ,
30228 IX86_BUILTIN_VPROTB,
30229 IX86_BUILTIN_VPROTW,
30230 IX86_BUILTIN_VPROTD,
30231 IX86_BUILTIN_VPROTQ,
30232 IX86_BUILTIN_VPROTB_IMM,
30233 IX86_BUILTIN_VPROTW_IMM,
30234 IX86_BUILTIN_VPROTD_IMM,
30235 IX86_BUILTIN_VPROTQ_IMM,
30237 IX86_BUILTIN_VPSHLB,
30238 IX86_BUILTIN_VPSHLW,
30239 IX86_BUILTIN_VPSHLD,
30240 IX86_BUILTIN_VPSHLQ,
30241 IX86_BUILTIN_VPSHAB,
30242 IX86_BUILTIN_VPSHAW,
30243 IX86_BUILTIN_VPSHAD,
30244 IX86_BUILTIN_VPSHAQ,
30246 IX86_BUILTIN_VFRCZSS,
30247 IX86_BUILTIN_VFRCZSD,
30248 IX86_BUILTIN_VFRCZPS,
30249 IX86_BUILTIN_VFRCZPD,
30250 IX86_BUILTIN_VFRCZPS256,
30251 IX86_BUILTIN_VFRCZPD256,
30253 IX86_BUILTIN_VPCOMEQUB,
30254 IX86_BUILTIN_VPCOMNEUB,
30255 IX86_BUILTIN_VPCOMLTUB,
30256 IX86_BUILTIN_VPCOMLEUB,
30257 IX86_BUILTIN_VPCOMGTUB,
30258 IX86_BUILTIN_VPCOMGEUB,
30259 IX86_BUILTIN_VPCOMFALSEUB,
30260 IX86_BUILTIN_VPCOMTRUEUB,
30262 IX86_BUILTIN_VPCOMEQUW,
30263 IX86_BUILTIN_VPCOMNEUW,
30264 IX86_BUILTIN_VPCOMLTUW,
30265 IX86_BUILTIN_VPCOMLEUW,
30266 IX86_BUILTIN_VPCOMGTUW,
30267 IX86_BUILTIN_VPCOMGEUW,
30268 IX86_BUILTIN_VPCOMFALSEUW,
30269 IX86_BUILTIN_VPCOMTRUEUW,
30271 IX86_BUILTIN_VPCOMEQUD,
30272 IX86_BUILTIN_VPCOMNEUD,
30273 IX86_BUILTIN_VPCOMLTUD,
30274 IX86_BUILTIN_VPCOMLEUD,
30275 IX86_BUILTIN_VPCOMGTUD,
30276 IX86_BUILTIN_VPCOMGEUD,
30277 IX86_BUILTIN_VPCOMFALSEUD,
30278 IX86_BUILTIN_VPCOMTRUEUD,
30280 IX86_BUILTIN_VPCOMEQUQ,
30281 IX86_BUILTIN_VPCOMNEUQ,
30282 IX86_BUILTIN_VPCOMLTUQ,
30283 IX86_BUILTIN_VPCOMLEUQ,
30284 IX86_BUILTIN_VPCOMGTUQ,
30285 IX86_BUILTIN_VPCOMGEUQ,
30286 IX86_BUILTIN_VPCOMFALSEUQ,
30287 IX86_BUILTIN_VPCOMTRUEUQ,
30289 IX86_BUILTIN_VPCOMEQB,
30290 IX86_BUILTIN_VPCOMNEB,
30291 IX86_BUILTIN_VPCOMLTB,
30292 IX86_BUILTIN_VPCOMLEB,
30293 IX86_BUILTIN_VPCOMGTB,
30294 IX86_BUILTIN_VPCOMGEB,
30295 IX86_BUILTIN_VPCOMFALSEB,
30296 IX86_BUILTIN_VPCOMTRUEB,
30298 IX86_BUILTIN_VPCOMEQW,
30299 IX86_BUILTIN_VPCOMNEW,
30300 IX86_BUILTIN_VPCOMLTW,
30301 IX86_BUILTIN_VPCOMLEW,
30302 IX86_BUILTIN_VPCOMGTW,
30303 IX86_BUILTIN_VPCOMGEW,
30304 IX86_BUILTIN_VPCOMFALSEW,
30305 IX86_BUILTIN_VPCOMTRUEW,
30307 IX86_BUILTIN_VPCOMEQD,
30308 IX86_BUILTIN_VPCOMNED,
30309 IX86_BUILTIN_VPCOMLTD,
30310 IX86_BUILTIN_VPCOMLED,
30311 IX86_BUILTIN_VPCOMGTD,
30312 IX86_BUILTIN_VPCOMGED,
30313 IX86_BUILTIN_VPCOMFALSED,
30314 IX86_BUILTIN_VPCOMTRUED,
30316 IX86_BUILTIN_VPCOMEQQ,
30317 IX86_BUILTIN_VPCOMNEQ,
30318 IX86_BUILTIN_VPCOMLTQ,
30319 IX86_BUILTIN_VPCOMLEQ,
30320 IX86_BUILTIN_VPCOMGTQ,
30321 IX86_BUILTIN_VPCOMGEQ,
30322 IX86_BUILTIN_VPCOMFALSEQ,
30323 IX86_BUILTIN_VPCOMTRUEQ,
30325 /* LWP instructions. */
30326 IX86_BUILTIN_LLWPCB,
30327 IX86_BUILTIN_SLWPCB,
30328 IX86_BUILTIN_LWPVAL32,
30329 IX86_BUILTIN_LWPVAL64,
30330 IX86_BUILTIN_LWPINS32,
30331 IX86_BUILTIN_LWPINS64,
30333 IX86_BUILTIN_CLZS,
30335 /* RTM */
30336 IX86_BUILTIN_XBEGIN,
30337 IX86_BUILTIN_XEND,
30338 IX86_BUILTIN_XABORT,
30339 IX86_BUILTIN_XTEST,
30341 /* MPX */
30342 IX86_BUILTIN_BNDMK,
30343 IX86_BUILTIN_BNDSTX,
30344 IX86_BUILTIN_BNDLDX,
30345 IX86_BUILTIN_BNDCL,
30346 IX86_BUILTIN_BNDCU,
30347 IX86_BUILTIN_BNDRET,
30348 IX86_BUILTIN_BNDNARROW,
30349 IX86_BUILTIN_BNDINT,
30350 IX86_BUILTIN_SIZEOF,
30351 IX86_BUILTIN_BNDLOWER,
30352 IX86_BUILTIN_BNDUPPER,
30354 /* BMI instructions. */
30355 IX86_BUILTIN_BEXTR32,
30356 IX86_BUILTIN_BEXTR64,
30357 IX86_BUILTIN_CTZS,
30359 /* TBM instructions. */
30360 IX86_BUILTIN_BEXTRI32,
30361 IX86_BUILTIN_BEXTRI64,
30363 /* BMI2 instructions. */
30364 IX86_BUILTIN_BZHI32,
30365 IX86_BUILTIN_BZHI64,
30366 IX86_BUILTIN_PDEP32,
30367 IX86_BUILTIN_PDEP64,
30368 IX86_BUILTIN_PEXT32,
30369 IX86_BUILTIN_PEXT64,
30371 /* ADX instructions. */
30372 IX86_BUILTIN_ADDCARRYX32,
30373 IX86_BUILTIN_ADDCARRYX64,
30375 /* SBB instructions. */
30376 IX86_BUILTIN_SBB32,
30377 IX86_BUILTIN_SBB64,
30379 /* FSGSBASE instructions. */
30380 IX86_BUILTIN_RDFSBASE32,
30381 IX86_BUILTIN_RDFSBASE64,
30382 IX86_BUILTIN_RDGSBASE32,
30383 IX86_BUILTIN_RDGSBASE64,
30384 IX86_BUILTIN_WRFSBASE32,
30385 IX86_BUILTIN_WRFSBASE64,
30386 IX86_BUILTIN_WRGSBASE32,
30387 IX86_BUILTIN_WRGSBASE64,
30389 /* RDRND instructions. */
30390 IX86_BUILTIN_RDRAND16_STEP,
30391 IX86_BUILTIN_RDRAND32_STEP,
30392 IX86_BUILTIN_RDRAND64_STEP,
30394 /* RDSEED instructions. */
30395 IX86_BUILTIN_RDSEED16_STEP,
30396 IX86_BUILTIN_RDSEED32_STEP,
30397 IX86_BUILTIN_RDSEED64_STEP,
30399 /* F16C instructions. */
30400 IX86_BUILTIN_CVTPH2PS,
30401 IX86_BUILTIN_CVTPH2PS256,
30402 IX86_BUILTIN_CVTPS2PH,
30403 IX86_BUILTIN_CVTPS2PH256,
30405 /* CFString built-in for darwin */
30406 IX86_BUILTIN_CFSTRING,
30408 /* Builtins to get CPU type and supported features. */
30409 IX86_BUILTIN_CPU_INIT,
30410 IX86_BUILTIN_CPU_IS,
30411 IX86_BUILTIN_CPU_SUPPORTS,
30413 /* Read/write FLAGS register built-ins. */
30414 IX86_BUILTIN_READ_FLAGS,
30415 IX86_BUILTIN_WRITE_FLAGS,
30417 IX86_BUILTIN_MAX
30420 /* Table for the ix86 builtin decls. */
30421 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
30423 /* Table of all of the builtin functions that are possible with different ISA's
30424 but are waiting to be built until a function is declared to use that
30425 ISA. */
30426 struct builtin_isa {
30427 const char *name; /* function name */
30428 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
30429 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
30430 bool const_p; /* true if the declaration is constant */
30431 bool leaf_p; /* true if the declaration has leaf attribute */
30432 bool nothrow_p; /* true if the declaration has nothrow attribute */
30433 bool set_and_not_built_p;
30436 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
30439 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
30440 of which isa_flags to use in the ix86_builtins_isa array. Stores the
30441 function decl in the ix86_builtins array. Returns the function decl or
30442 NULL_TREE, if the builtin was not added.
30444 If the front end has a special hook for builtin functions, delay adding
30445 builtin functions that aren't in the current ISA until the ISA is changed
30446 with function specific optimization. Doing so, can save about 300K for the
30447 default compiler. When the builtin is expanded, check at that time whether
30448 it is valid.
30450 If the front end doesn't have a special hook, record all builtins, even if
30451 it isn't an instruction set in the current ISA in case the user uses
30452 function specific options for a different ISA, so that we don't get scope
30453 errors if a builtin is added in the middle of a function scope. */
30455 static inline tree
30456 def_builtin (HOST_WIDE_INT mask, const char *name,
30457 enum ix86_builtin_func_type tcode,
30458 enum ix86_builtins code)
30460 tree decl = NULL_TREE;
30462 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
30464 ix86_builtins_isa[(int) code].isa = mask;
30466 mask &= ~OPTION_MASK_ISA_64BIT;
30467 if (mask == 0
30468 || (mask & ix86_isa_flags) != 0
30469 || (lang_hooks.builtin_function
30470 == lang_hooks.builtin_function_ext_scope))
30473 tree type = ix86_get_builtin_func_type (tcode);
30474 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
30475 NULL, NULL_TREE);
30476 ix86_builtins[(int) code] = decl;
30477 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
30479 else
30481 ix86_builtins[(int) code] = NULL_TREE;
30482 ix86_builtins_isa[(int) code].tcode = tcode;
30483 ix86_builtins_isa[(int) code].name = name;
30484 ix86_builtins_isa[(int) code].leaf_p = false;
30485 ix86_builtins_isa[(int) code].nothrow_p = false;
30486 ix86_builtins_isa[(int) code].const_p = false;
30487 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
30491 return decl;
30494 /* Like def_builtin, but also marks the function decl "const". */
30496 static inline tree
30497 def_builtin_const (HOST_WIDE_INT mask, const char *name,
30498 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
30500 tree decl = def_builtin (mask, name, tcode, code);
30501 if (decl)
30502 TREE_READONLY (decl) = 1;
30503 else
30504 ix86_builtins_isa[(int) code].const_p = true;
30506 return decl;
30509 /* Add any new builtin functions for a given ISA that may not have been
30510 declared. This saves a bit of space compared to adding all of the
30511 declarations to the tree, even if we didn't use them. */
30513 static void
30514 ix86_add_new_builtins (HOST_WIDE_INT isa)
30516 int i;
30518 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
30520 if ((ix86_builtins_isa[i].isa & isa) != 0
30521 && ix86_builtins_isa[i].set_and_not_built_p)
30523 tree decl, type;
30525 /* Don't define the builtin again. */
30526 ix86_builtins_isa[i].set_and_not_built_p = false;
30528 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
30529 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
30530 type, i, BUILT_IN_MD, NULL,
30531 NULL_TREE);
30533 ix86_builtins[i] = decl;
30534 if (ix86_builtins_isa[i].const_p)
30535 TREE_READONLY (decl) = 1;
30536 if (ix86_builtins_isa[i].leaf_p)
30537 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
30538 NULL_TREE);
30539 if (ix86_builtins_isa[i].nothrow_p)
30540 TREE_NOTHROW (decl) = 1;
30545 /* Bits for builtin_description.flag. */
30547 /* Set when we don't support the comparison natively, and should
30548 swap_comparison in order to support it. */
30549 #define BUILTIN_DESC_SWAP_OPERANDS 1
30551 struct builtin_description
30553 const HOST_WIDE_INT mask;
30554 const enum insn_code icode;
30555 const char *const name;
30556 const enum ix86_builtins code;
30557 const enum rtx_code comparison;
30558 const int flag;
30561 static const struct builtin_description bdesc_comi[] =
30563 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
30564 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
30565 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
30566 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
30567 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
30568 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
30569 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
30570 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
30571 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
30572 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
30573 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
30574 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
30575 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
30576 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
30577 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
30578 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
30579 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
30580 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
30581 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
30582 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
30583 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
30584 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
30585 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
30586 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
30589 static const struct builtin_description bdesc_pcmpestr[] =
30591 /* SSE4.2 */
30592 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
30593 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
30594 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
30595 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
30596 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
30597 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
30598 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
30601 static const struct builtin_description bdesc_pcmpistr[] =
30603 /* SSE4.2 */
30604 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
30605 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
30606 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
30607 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
30608 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
30609 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
30610 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
30613 /* Special builtins with variable number of arguments. */
30614 static const struct builtin_description bdesc_special_args[] =
30616 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
30617 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
30618 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
30620 /* 80387 (for use internally for atomic compound assignment). */
30621 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
30622 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
30623 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) USHORT_FTYPE_VOID },
30624 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
30626 /* MMX */
30627 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
30629 /* 3DNow! */
30630 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
30632 /* FXSR, XSAVE, XSAVEOPT, XSAVEC and XSAVES. */
30633 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
30634 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
30635 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30636 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30637 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30638 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xsaves", IX86_BUILTIN_XSAVES, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30639 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xrstors", IX86_BUILTIN_XRSTORS, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30640 { OPTION_MASK_ISA_XSAVEC, CODE_FOR_nothing, "__builtin_ia32_xsavec", IX86_BUILTIN_XSAVEC, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30642 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
30643 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
30644 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30645 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30646 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30647 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaves64", IX86_BUILTIN_XSAVES64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30648 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30649 { OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30651 /* SSE */
30652 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30653 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30654 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
30656 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
30657 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
30658 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
30659 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
30661 /* SSE or 3DNow!A */
30662 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30663 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
30665 /* SSE2 */
30666 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30667 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30668 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30669 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
30670 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30671 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
30672 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
30673 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
30674 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
30675 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
30677 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
30678 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
30680 /* SSE3 */
30681 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
30683 /* SSE4.1 */
30684 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
30686 /* SSE4A */
30687 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30688 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30690 /* AVX */
30691 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
30692 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
30694 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
30695 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
30696 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
30697 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
30698 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
30700 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
30701 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
30702 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
30703 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
30704 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
30705 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
30706 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
30708 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
30709 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
30710 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
30712 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
30713 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
30714 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
30715 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
30716 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
30717 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
30718 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
30719 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
30721 /* AVX2 */
30722 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
30723 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
30724 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
30725 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
30726 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
30727 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
30728 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
30729 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
30730 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
30732 /* AVX512F */
30733 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30734 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30735 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30736 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30737 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30738 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30739 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30740 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30741 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30742 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30743 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30744 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30745 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30746 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30747 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30748 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30749 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30750 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30751 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30752 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30753 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
30754 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
30755 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
30756 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
30757 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30758 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30759 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30760 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30761 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30762 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30763 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30764 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30765 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30766 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30767 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30768 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30769 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30770 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30771 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30772 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30773 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30774 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30775 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30776 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30777 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30778 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30779 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30781 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
30782 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
30783 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
30784 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
30785 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
30786 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
30788 /* FSGSBASE */
30789 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30790 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
30791 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30792 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
30793 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
30794 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
30795 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
30796 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
30798 /* RTM */
30799 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30800 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
30801 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
30803 /* AVX512BW */
30804 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_loaddquhi512_mask", IX86_BUILTIN_LOADDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_SI },
30805 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_loaddquqi512_mask", IX86_BUILTIN_LOADDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_DI },
30806 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv32hi_mask, "__builtin_ia32_storedquhi512_mask", IX86_BUILTIN_STOREDQUHI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV32HI_V32HI_SI },
30807 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv64qi_mask, "__builtin_ia32_storedquqi512_mask", IX86_BUILTIN_STOREDQUQI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV64QI_V64QI_DI },
30809 /* AVX512VL */
30810 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_loaddquhi256_mask", IX86_BUILTIN_LOADDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_PCV16HI_V16HI_HI },
30811 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_loaddquhi128_mask", IX86_BUILTIN_LOADDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_PCV8HI_V8HI_QI },
30812 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_loaddquqi256_mask", IX86_BUILTIN_LOADDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_PCV32QI_V32QI_SI },
30813 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_loaddquqi128_mask", IX86_BUILTIN_LOADDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_PCV16QI_V16QI_HI },
30814 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64load256_mask", IX86_BUILTIN_MOVDQA64LOAD256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30815 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64load128_mask", IX86_BUILTIN_MOVDQA64LOAD128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30816 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32load256_mask", IX86_BUILTIN_MOVDQA32LOAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30817 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32load128_mask", IX86_BUILTIN_MOVDQA32LOAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30818 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4di_mask, "__builtin_ia32_movdqa64store256_mask", IX86_BUILTIN_MOVDQA64STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
30819 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2di_mask, "__builtin_ia32_movdqa64store128_mask", IX86_BUILTIN_MOVDQA64STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
30820 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8si_mask, "__builtin_ia32_movdqa32store256_mask", IX86_BUILTIN_MOVDQA32STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
30821 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4si_mask, "__builtin_ia32_movdqa32store128_mask", IX86_BUILTIN_MOVDQA32STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
30822 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_loadapd256_mask", IX86_BUILTIN_LOADAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30823 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_loadapd128_mask", IX86_BUILTIN_LOADAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30824 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_loadaps256_mask", IX86_BUILTIN_LOADAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30825 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_loadaps128_mask", IX86_BUILTIN_LOADAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30826 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4df_mask, "__builtin_ia32_storeapd256_mask", IX86_BUILTIN_STOREAPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
30827 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2df_mask, "__builtin_ia32_storeapd128_mask", IX86_BUILTIN_STOREAPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
30828 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8sf_mask, "__builtin_ia32_storeaps256_mask", IX86_BUILTIN_STOREAPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
30829 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4sf_mask, "__builtin_ia32_storeaps128_mask", IX86_BUILTIN_STOREAPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
30830 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadupd256_mask, "__builtin_ia32_loadupd256_mask", IX86_BUILTIN_LOADUPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30831 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loadupd_mask, "__builtin_ia32_loadupd128_mask", IX86_BUILTIN_LOADUPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30832 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadups256_mask, "__builtin_ia32_loadups256_mask", IX86_BUILTIN_LOADUPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30833 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_loadups_mask, "__builtin_ia32_loadups128_mask", IX86_BUILTIN_LOADUPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30834 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd256_mask, "__builtin_ia32_storeupd256_mask", IX86_BUILTIN_STOREUPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
30835 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd_mask, "__builtin_ia32_storeupd128_mask", IX86_BUILTIN_STOREUPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
30836 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups256_mask, "__builtin_ia32_storeups256_mask", IX86_BUILTIN_STOREUPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
30837 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups_mask, "__builtin_ia32_storeups128_mask", IX86_BUILTIN_STOREUPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
30838 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv4di_mask, "__builtin_ia32_loaddqudi256_mask", IX86_BUILTIN_LOADDQUDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30839 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv2di_mask, "__builtin_ia32_loaddqudi128_mask", IX86_BUILTIN_LOADDQUDI128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30840 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv8si_mask, "__builtin_ia32_loaddqusi256_mask", IX86_BUILTIN_LOADDQUSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30841 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv4si_mask, "__builtin_ia32_loaddqusi128_mask", IX86_BUILTIN_LOADDQUSI128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30842 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4di_mask, "__builtin_ia32_storedqudi256_mask", IX86_BUILTIN_STOREDQUDI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
30843 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv2di_mask, "__builtin_ia32_storedqudi128_mask", IX86_BUILTIN_STOREDQUDI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
30844 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8si_mask, "__builtin_ia32_storedqusi256_mask", IX86_BUILTIN_STOREDQUSI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
30845 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4si_mask, "__builtin_ia32_storedqusi128_mask", IX86_BUILTIN_STOREDQUSI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
30846 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16hi_mask, "__builtin_ia32_storedquhi256_mask", IX86_BUILTIN_STOREDQUHI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16HI_HI },
30847 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8hi_mask, "__builtin_ia32_storedquhi128_mask", IX86_BUILTIN_STOREDQUHI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8HI_QI },
30848 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv32qi_mask, "__builtin_ia32_storedquqi256_mask", IX86_BUILTIN_STOREDQUQI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32QI_SI },
30849 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16qi_mask, "__builtin_ia32_storedquqi128_mask", IX86_BUILTIN_STOREDQUQI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16QI_HI },
30850 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4df_mask, "__builtin_ia32_compressstoredf256_mask", IX86_BUILTIN_COMPRESSPDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
30851 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2df_mask, "__builtin_ia32_compressstoredf128_mask", IX86_BUILTIN_COMPRESSPDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
30852 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8sf_mask, "__builtin_ia32_compressstoresf256_mask", IX86_BUILTIN_COMPRESSPSSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
30853 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4sf_mask, "__builtin_ia32_compressstoresf128_mask", IX86_BUILTIN_COMPRESSPSSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
30854 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4di_mask, "__builtin_ia32_compressstoredi256_mask", IX86_BUILTIN_PCOMPRESSQSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
30855 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2di_mask, "__builtin_ia32_compressstoredi128_mask", IX86_BUILTIN_PCOMPRESSQSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
30856 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8si_mask, "__builtin_ia32_compressstoresi256_mask", IX86_BUILTIN_PCOMPRESSDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
30857 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4si_mask, "__builtin_ia32_compressstoresi128_mask", IX86_BUILTIN_PCOMPRESSDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
30858 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expandloaddf256_mask", IX86_BUILTIN_EXPANDPDLOAD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30859 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expandloaddf128_mask", IX86_BUILTIN_EXPANDPDLOAD128, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30860 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandloadsf256_mask", IX86_BUILTIN_EXPANDPSLOAD256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30861 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandloadsf128_mask", IX86_BUILTIN_EXPANDPSLOAD128, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30862 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expandloaddi256_mask", IX86_BUILTIN_PEXPANDQLOAD256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30863 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expandloaddi128_mask", IX86_BUILTIN_PEXPANDQLOAD128, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30864 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandloadsi256_mask", IX86_BUILTIN_PEXPANDDLOAD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30865 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandloadsi128_mask", IX86_BUILTIN_PEXPANDDLOAD128, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30866 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expandloaddf256_maskz", IX86_BUILTIN_EXPANDPDLOAD256Z, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30867 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expandloaddf128_maskz", IX86_BUILTIN_EXPANDPDLOAD128Z, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30868 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandloadsf256_maskz", IX86_BUILTIN_EXPANDPSLOAD256Z, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30869 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandloadsf128_maskz", IX86_BUILTIN_EXPANDPSLOAD128Z, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30870 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expandloaddi256_maskz", IX86_BUILTIN_PEXPANDQLOAD256Z, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30871 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expandloaddi128_maskz", IX86_BUILTIN_PEXPANDQLOAD128Z, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30872 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandloadsi256_maskz", IX86_BUILTIN_PEXPANDDLOAD256Z, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30873 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandloadsi128_maskz", IX86_BUILTIN_PEXPANDDLOAD128Z, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30874 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask_store, "__builtin_ia32_pmovqd256mem_mask", IX86_BUILTIN_PMOVQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
30875 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask_store, "__builtin_ia32_pmovqd128mem_mask", IX86_BUILTIN_PMOVQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
30876 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask_store, "__builtin_ia32_pmovsqd256mem_mask", IX86_BUILTIN_PMOVSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
30877 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask_store, "__builtin_ia32_pmovsqd128mem_mask", IX86_BUILTIN_PMOVSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
30878 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask_store, "__builtin_ia32_pmovusqd256mem_mask", IX86_BUILTIN_PMOVUSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
30879 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask_store, "__builtin_ia32_pmovusqd128mem_mask", IX86_BUILTIN_PMOVUSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
30880 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovqw256mem_mask", IX86_BUILTIN_PMOVQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
30881 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovqw128mem_mask", IX86_BUILTIN_PMOVQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
30882 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovsqw256mem_mask", IX86_BUILTIN_PMOVSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
30883 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovsqw128mem_mask", IX86_BUILTIN_PMOVSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
30884 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovusqw256mem_mask", IX86_BUILTIN_PMOVUSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
30885 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovusqw128mem_mask", IX86_BUILTIN_PMOVUSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
30886 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovqb256mem_mask", IX86_BUILTIN_PMOVQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
30887 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovqb128mem_mask", IX86_BUILTIN_PMOVQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
30888 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovsqb256mem_mask", IX86_BUILTIN_PMOVSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
30889 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovsqb128mem_mask", IX86_BUILTIN_PMOVSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
30890 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovusqb256mem_mask", IX86_BUILTIN_PMOVUSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
30891 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovusqb128mem_mask", IX86_BUILTIN_PMOVUSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
30892 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovdb256mem_mask", IX86_BUILTIN_PMOVDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
30893 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovdb128mem_mask", IX86_BUILTIN_PMOVDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
30894 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovsdb256mem_mask", IX86_BUILTIN_PMOVSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
30895 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovsdb128mem_mask", IX86_BUILTIN_PMOVSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
30896 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovusdb256mem_mask", IX86_BUILTIN_PMOVUSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
30897 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovusdb128mem_mask", IX86_BUILTIN_PMOVUSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
30898 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovdw256mem_mask", IX86_BUILTIN_PMOVDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
30899 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovdw128mem_mask", IX86_BUILTIN_PMOVDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
30900 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovsdw256mem_mask", IX86_BUILTIN_PMOVSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
30901 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovsdw128mem_mask", IX86_BUILTIN_PMOVSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
30902 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovusdw256mem_mask", IX86_BUILTIN_PMOVUSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
30903 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovusdw128mem_mask", IX86_BUILTIN_PMOVUSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
30905 /* PCOMMIT. */
30906 { OPTION_MASK_ISA_PCOMMIT, CODE_FOR_pcommit, "__builtin_ia32_pcommit", IX86_BUILTIN_PCOMMIT, UNKNOWN, (int) VOID_FTYPE_VOID },
30909 /* Builtins with variable number of arguments. */
30910 static const struct builtin_description bdesc_args[] =
30912 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
30913 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
30914 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
30915 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
30916 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
30917 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
30918 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
30920 /* MMX */
30921 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30922 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30923 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30924 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30925 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30926 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30928 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30929 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30930 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30931 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30932 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30933 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30934 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30935 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30937 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30938 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30940 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30941 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30942 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30943 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30945 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30946 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30947 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30948 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30949 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30950 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30952 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30953 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30954 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30955 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30956 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
30957 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
30959 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
30960 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
30961 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
30963 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
30965 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
30966 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
30967 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
30968 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
30969 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
30970 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
30972 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
30973 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
30974 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
30975 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
30976 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
30977 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
30979 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
30980 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
30981 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
30982 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
30984 /* 3DNow! */
30985 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
30986 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
30987 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
30988 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
30990 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30991 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
30992 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
30993 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
30994 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
30995 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
30996 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
30997 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
30998 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
30999 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31000 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31001 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31002 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31003 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31004 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31006 /* 3DNow!A */
31007 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
31008 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
31009 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
31010 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31011 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31012 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31014 /* SSE */
31015 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
31016 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31017 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31018 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31019 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31020 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31021 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
31022 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
31023 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
31024 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
31025 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
31026 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
31028 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31030 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31031 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31032 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31033 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31034 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31035 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31036 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31037 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31039 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
31040 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
31041 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
31042 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31043 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31044 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31045 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
31046 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
31047 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
31048 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31049 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
31050 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31051 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
31052 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
31053 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
31054 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31055 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
31056 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
31057 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
31058 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31060 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31061 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31062 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31063 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31065 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31066 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31067 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31068 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31070 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31072 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31073 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31074 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31075 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31076 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31078 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
31079 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
31080 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
31082 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
31084 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31085 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31086 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31088 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
31089 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
31091 /* SSE MMX or 3Dnow!A */
31092 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31093 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31094 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31096 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31097 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31098 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31099 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31101 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
31102 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
31104 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
31106 /* SSE2 */
31107 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31109 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
31110 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
31111 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
31112 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
31113 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
31115 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
31116 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
31117 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
31118 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
31119 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
31121 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
31123 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
31124 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
31125 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
31126 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
31128 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31129 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
31130 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31132 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31133 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31134 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31135 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31136 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31137 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31138 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31139 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31141 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
31142 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
31143 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
31144 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31145 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
31146 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31147 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
31148 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
31149 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
31150 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31151 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31152 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31153 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
31154 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
31155 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
31156 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31157 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
31158 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
31159 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
31160 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31162 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31163 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31164 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31165 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31167 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31168 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31169 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31170 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31172 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31174 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31175 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31176 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31178 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
31180 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31181 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31182 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31183 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31184 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31185 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31186 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31187 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31189 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31190 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31191 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31192 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31193 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31194 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31195 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31196 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31198 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31199 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
31201 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31202 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31203 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31204 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31206 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31207 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31209 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31210 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31211 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31212 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31213 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31214 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31216 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31217 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31218 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31219 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31221 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31222 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31223 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31224 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31225 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31226 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31227 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31228 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31230 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
31231 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
31232 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
31234 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31235 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
31237 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
31238 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
31240 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
31242 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
31243 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
31244 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
31245 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
31247 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
31248 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31249 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31250 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
31251 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31252 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31253 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
31255 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
31256 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31257 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31258 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
31259 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31260 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31261 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
31263 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31264 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31265 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31266 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31268 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
31269 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
31270 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
31272 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
31274 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31276 /* SSE2 MMX */
31277 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
31278 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
31280 /* SSE3 */
31281 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
31282 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31284 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31285 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31286 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31287 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31288 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31289 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31291 /* SSSE3 */
31292 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
31293 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
31294 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31295 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
31296 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
31297 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
31299 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31300 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31301 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31302 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31303 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31304 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31305 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31306 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31307 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31308 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31309 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31310 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31311 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
31312 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
31313 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31314 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31315 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31316 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31317 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31318 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31319 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31320 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31321 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31322 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31324 /* SSSE3. */
31325 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
31326 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
31328 /* SSE4.1 */
31329 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31330 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31331 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
31332 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
31333 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31334 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31335 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31336 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
31337 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
31338 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
31340 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
31341 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
31342 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
31343 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
31344 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
31345 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
31346 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
31347 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
31348 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
31349 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
31350 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
31351 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
31352 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31354 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
31355 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31356 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31357 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31358 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31359 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31360 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31361 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31362 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31363 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31364 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
31365 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31367 /* SSE4.1 */
31368 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
31369 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
31370 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31371 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31373 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
31374 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
31375 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
31376 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
31378 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
31379 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
31381 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
31382 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
31384 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
31385 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
31386 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
31387 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
31389 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
31390 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
31392 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31393 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31395 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31396 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31397 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31399 /* SSE4.2 */
31400 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31401 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
31402 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
31403 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31404 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31406 /* SSE4A */
31407 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
31408 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
31409 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
31410 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31412 /* AES */
31413 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
31414 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31416 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31417 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31418 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31419 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31421 /* PCLMUL */
31422 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
31424 /* AVX */
31425 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31426 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31427 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31428 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31429 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31430 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31431 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31432 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31433 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31434 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31435 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31436 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31437 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31438 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31439 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31440 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31441 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31442 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31443 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31444 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31445 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31446 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31447 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31448 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31449 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31450 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31452 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
31453 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
31454 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
31455 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
31457 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31458 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31459 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
31460 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
31461 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31462 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31463 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31464 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31465 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31466 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31467 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31468 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31469 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31470 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
31471 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
31472 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
31473 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
31474 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
31475 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
31476 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31477 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
31478 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
31479 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
31480 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31481 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31482 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31483 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
31484 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
31485 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
31486 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31487 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
31488 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
31489 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
31490 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
31492 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31493 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31494 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31496 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31497 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31498 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31499 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31500 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31502 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31504 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31505 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
31507 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
31508 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
31509 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
31510 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
31512 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31513 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
31515 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
31516 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
31518 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
31519 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
31520 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
31521 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
31523 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
31524 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
31526 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31527 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31529 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31530 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31531 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31532 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31534 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
31535 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
31536 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
31537 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
31538 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
31539 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
31541 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31542 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31543 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31544 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31545 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31546 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31547 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31548 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31549 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31550 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31551 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31552 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31553 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31554 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31555 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31557 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
31558 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
31560 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31561 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31563 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
31565 /* AVX2 */
31566 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
31567 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
31568 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
31569 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
31570 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
31571 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
31572 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
31573 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
31574 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31575 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31576 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31577 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31578 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31579 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31580 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31581 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31582 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
31583 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31584 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31585 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31586 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31587 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
31588 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
31589 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31590 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31591 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31592 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31593 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31594 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31595 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31596 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31597 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31598 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31599 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31600 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31601 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31602 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31603 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
31604 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
31605 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31606 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31607 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31608 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31609 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31610 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31611 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31612 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31613 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31614 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31615 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31616 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31617 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
31618 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
31619 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
31620 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
31621 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
31622 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
31623 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
31624 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
31625 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
31626 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
31627 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
31628 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
31629 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
31630 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
31631 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31632 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31633 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31634 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31635 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31636 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
31637 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31638 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
31639 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31640 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
31641 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
31642 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
31643 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31644 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31645 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31646 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
31647 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31648 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31649 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31650 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31651 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
31652 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
31653 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31654 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31655 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31656 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31657 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
31658 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31659 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31660 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31661 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31662 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
31663 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
31664 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31665 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31666 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31667 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31668 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31669 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31670 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31671 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31672 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31673 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31674 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31675 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31676 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31677 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31678 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31679 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31680 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31681 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31682 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
31683 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
31684 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
31685 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
31686 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
31687 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
31688 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
31689 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
31690 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
31691 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
31692 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31693 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
31694 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31695 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31696 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
31697 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31698 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
31699 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
31700 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vextractf128v4di, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
31701 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vinsertf128v4di, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
31702 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31703 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31704 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31705 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31706 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31707 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31708 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31709 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31710 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31711 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31713 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
31715 /* BMI */
31716 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31717 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31718 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
31720 /* TBM */
31721 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31722 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31724 /* F16C */
31725 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
31726 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
31727 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
31728 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
31730 /* BMI2 */
31731 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31732 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31733 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31734 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31735 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31736 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31738 /* AVX512F */
31739 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_256si, "__builtin_ia32_si512_256si", IX86_BUILTIN_SI512_SI256, UNKNOWN, (int) V16SI_FTYPE_V8SI },
31740 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_256ps, "__builtin_ia32_ps512_256ps", IX86_BUILTIN_PS512_PS256, UNKNOWN, (int) V16SF_FTYPE_V8SF },
31741 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_256pd, "__builtin_ia32_pd512_256pd", IX86_BUILTIN_PD512_PD256, UNKNOWN, (int) V8DF_FTYPE_V4DF },
31742 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_si, "__builtin_ia32_si512_si", IX86_BUILTIN_SI512_SI, UNKNOWN, (int) V16SI_FTYPE_V4SI },
31743 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_ps, "__builtin_ia32_ps512_ps", IX86_BUILTIN_PS512_PS, UNKNOWN, (int) V16SF_FTYPE_V4SF },
31744 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_pd, "__builtin_ia32_pd512_pd", IX86_BUILTIN_PD512_PD, UNKNOWN, (int) V8DF_FTYPE_V2DF },
31745 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
31746 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
31747 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31748 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31749 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31750 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31751 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
31752 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_QI },
31753 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
31754 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_QI },
31755 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
31756 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
31757 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
31758 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
31759 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31760 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31761 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
31762 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
31763 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
31764 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
31765 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31766 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31767 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31768 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31769 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_QI },
31770 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_QI },
31771 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_QI },
31772 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_QI },
31773 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI },
31774 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI },
31775 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI },
31776 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI },
31777 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31778 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31779 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31780 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31781 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31782 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31783 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31784 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31785 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31786 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31787 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31788 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31789 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31790 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31791 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31792 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
31793 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI },
31794 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
31795 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
31796 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
31797 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
31798 { OPTION_MASK_ISA_AVX512F & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_memv8di_mask, "__builtin_ia32_pbroadcastq512_mem_mask", IX86_BUILTIN_PBROADCASTQ512_MEM, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
31799 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31800 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31801 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31802 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31803 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31804 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31805 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31806 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31807 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31808 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31809 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31810 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31811 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31812 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31813 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31814 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31815 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31816 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31817 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
31818 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
31819 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
31820 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
31821 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
31822 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
31823 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
31824 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
31825 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
31826 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
31827 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
31828 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
31829 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
31830 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
31831 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
31832 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
31833 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
31834 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
31835 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
31836 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
31837 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
31838 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
31839 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
31840 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
31841 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
31842 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
31843 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31844 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
31845 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31846 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31847 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31848 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31849 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31850 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31851 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31852 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31853 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31854 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31855 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31856 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
31857 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31858 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
31859 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31860 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31861 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31862 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
31863 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31864 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
31865 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31866 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31867 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31868 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
31869 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31870 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
31871 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31872 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31873 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31874 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31875 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31876 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31877 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31878 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31879 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31880 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31881 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31882 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31883 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31884 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31885 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31886 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31887 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31888 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31889 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31890 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31891 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31892 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31893 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31894 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
31895 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
31896 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
31897 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
31898 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
31899 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
31900 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
31901 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
31902 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
31903 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
31904 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
31905 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
31906 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31907 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31908 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31909 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31910 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
31911 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31912 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31913 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
31914 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
31915 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31916 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
31917 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
31918 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
31919 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
31920 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31921 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31922 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
31923 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
31924 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
31925 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
31926 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31927 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31928 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
31929 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31930 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
31931 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31932 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
31933 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
31934 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
31935 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
31937 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
31938 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
31939 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
31940 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
31941 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
31942 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
31943 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
31944 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
31946 /* Mask arithmetic operations */
31947 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31948 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31949 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) HI_FTYPE_HI },
31950 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31951 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31952 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31953 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) HI_FTYPE_HI_HI },
31954 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31955 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31956 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) HI_FTYPE_HI },
31958 /* SHA */
31959 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31960 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31961 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31962 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
31963 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31964 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31965 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
31967 /* AVX512VL. */
31968 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_palignrv32qi_mask, "__builtin_ia32_palignr256_mask", IX86_BUILTIN_PALIGNR256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT },
31969 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_palignrv16qi_mask, "__builtin_ia32_palignr128_mask", IX86_BUILTIN_PALIGNR128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT },
31970 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64_256_mask", IX86_BUILTIN_MOVDQA64_256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
31971 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64_128_mask", IX86_BUILTIN_MOVDQA64_128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
31972 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32_256_mask", IX86_BUILTIN_MOVDQA32_256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
31973 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32_128_mask", IX86_BUILTIN_MOVDQA32_128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
31974 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_movapd256_mask", IX86_BUILTIN_MOVAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
31975 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_movapd128_mask", IX86_BUILTIN_MOVAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
31976 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_movaps256_mask", IX86_BUILTIN_MOVAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
31977 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_movaps128_mask", IX86_BUILTIN_MOVAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
31978 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_movdquhi256_mask", IX86_BUILTIN_MOVDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
31979 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_movdquhi128_mask", IX86_BUILTIN_MOVDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
31980 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_movdquqi256_mask", IX86_BUILTIN_MOVDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
31981 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_movdquqi128_mask", IX86_BUILTIN_MOVDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
31982 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4sf3_mask, "__builtin_ia32_minps_mask", IX86_BUILTIN_MINPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31983 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4sf3_mask, "__builtin_ia32_maxps_mask", IX86_BUILTIN_MAXPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31984 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2df3_mask, "__builtin_ia32_minpd_mask", IX86_BUILTIN_MINPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31985 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2df3_mask, "__builtin_ia32_maxpd_mask", IX86_BUILTIN_MAXPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31986 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4df3_mask, "__builtin_ia32_maxpd256_mask", IX86_BUILTIN_MAXPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31987 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8sf3_mask, "__builtin_ia32_maxps256_mask", IX86_BUILTIN_MAXPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31988 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4df3_mask, "__builtin_ia32_minpd256_mask", IX86_BUILTIN_MINPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31989 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8sf3_mask, "__builtin_ia32_minps256_mask", IX86_BUILTIN_MINPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31990 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4sf3_mask, "__builtin_ia32_mulps_mask", IX86_BUILTIN_MULPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31991 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_divv4sf3_mask, "__builtin_ia32_divps_mask", IX86_BUILTIN_DIVPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31992 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv2df3_mask, "__builtin_ia32_mulpd_mask", IX86_BUILTIN_MULPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31993 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_divv2df3_mask, "__builtin_ia32_divpd_mask", IX86_BUILTIN_DIVPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31994 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv4df3_mask, "__builtin_ia32_divpd256_mask", IX86_BUILTIN_DIVPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31995 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv8sf3_mask, "__builtin_ia32_divps256_mask", IX86_BUILTIN_DIVPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31996 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4df3_mask, "__builtin_ia32_mulpd256_mask", IX86_BUILTIN_MULPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31997 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8sf3_mask, "__builtin_ia32_mulps256_mask", IX86_BUILTIN_MULPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31998 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2df3_mask, "__builtin_ia32_addpd128_mask", IX86_BUILTIN_ADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31999 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4df3_mask, "__builtin_ia32_addpd256_mask", IX86_BUILTIN_ADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32000 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4sf3_mask, "__builtin_ia32_addps128_mask", IX86_BUILTIN_ADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32001 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8sf3_mask, "__builtin_ia32_addps256_mask", IX86_BUILTIN_ADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32002 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2df3_mask, "__builtin_ia32_subpd128_mask", IX86_BUILTIN_SUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32003 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4df3_mask, "__builtin_ia32_subpd256_mask", IX86_BUILTIN_SUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32004 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4sf3_mask, "__builtin_ia32_subps128_mask", IX86_BUILTIN_SUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32005 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8sf3_mask, "__builtin_ia32_subps256_mask", IX86_BUILTIN_SUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32006 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4df3_mask, "__builtin_ia32_xorpd256_mask", IX86_BUILTIN_XORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32007 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2df3_mask, "__builtin_ia32_xorpd128_mask", IX86_BUILTIN_XORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32008 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8sf3_mask, "__builtin_ia32_xorps256_mask", IX86_BUILTIN_XORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32009 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4sf3_mask, "__builtin_ia32_xorps128_mask", IX86_BUILTIN_XORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32010 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4df3_mask, "__builtin_ia32_orpd256_mask", IX86_BUILTIN_ORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32011 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2df3_mask, "__builtin_ia32_orpd128_mask", IX86_BUILTIN_ORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32012 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8sf3_mask, "__builtin_ia32_orps256_mask", IX86_BUILTIN_ORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32013 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4sf3_mask, "__builtin_ia32_orps128_mask", IX86_BUILTIN_ORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32014 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8sf_mask, "__builtin_ia32_broadcastf32x2_256_mask", IX86_BUILTIN_BROADCASTF32x2_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32015 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8si_mask, "__builtin_ia32_broadcasti32x2_256_mask", IX86_BUILTIN_BROADCASTI32x2_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32016 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4si_mask, "__builtin_ia32_broadcasti32x2_128_mask", IX86_BUILTIN_BROADCASTI32x2_128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32017 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4df_mask_1, "__builtin_ia32_broadcastf64x2_256_mask", IX86_BUILTIN_BROADCASTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
32018 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4di_mask_1, "__builtin_ia32_broadcasti64x2_256_mask", IX86_BUILTIN_BROADCASTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
32019 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8sf_mask_1, "__builtin_ia32_broadcastf32x4_256_mask", IX86_BUILTIN_BROADCASTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32020 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8si_mask_1, "__builtin_ia32_broadcasti32x4_256_mask", IX86_BUILTIN_BROADCASTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32021 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8sf, "__builtin_ia32_extractf32x4_256_mask", IX86_BUILTIN_EXTRACTF32X4_256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT_V4SF_QI },
32022 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8si, "__builtin_ia32_extracti32x4_256_mask", IX86_BUILTIN_EXTRACTI32X4_256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT_V4SI_QI },
32023 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv16hi_mask, "__builtin_ia32_dbpsadbw256_mask", IX86_BUILTIN_DBPSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI },
32024 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv8hi_mask, "__builtin_ia32_dbpsadbw128_mask", IX86_BUILTIN_DBPSADBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI },
32025 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2qq256_mask", IX86_BUILTIN_CVTTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32026 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2qq128_mask", IX86_BUILTIN_CVTTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32027 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2uqq256_mask", IX86_BUILTIN_CVTTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32028 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2uqq128_mask", IX86_BUILTIN_CVTTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32029 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2qq256_mask", IX86_BUILTIN_CVTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32030 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2qq128_mask", IX86_BUILTIN_CVTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32031 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2uqq256_mask", IX86_BUILTIN_CVTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32032 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2uqq128_mask", IX86_BUILTIN_CVTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32033 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4si2_mask, "__builtin_ia32_cvtpd2udq256_mask", IX86_BUILTIN_CVTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32034 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2si2_mask, "__builtin_ia32_cvtpd2udq128_mask", IX86_BUILTIN_CVTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32035 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2qq256_mask", IX86_BUILTIN_CVTTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32036 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2qq128_mask", IX86_BUILTIN_CVTTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32037 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2uqq256_mask", IX86_BUILTIN_CVTTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32038 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2uqq128_mask", IX86_BUILTIN_CVTTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32039 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2dq256_mask", IX86_BUILTIN_CVTTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32040 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2dq128_mask", IX86_BUILTIN_CVTTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32041 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2udq256_mask", IX86_BUILTIN_CVTTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32042 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2udq128_mask", IX86_BUILTIN_CVTTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32043 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2dq256_mask", IX86_BUILTIN_CVTTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32044 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvttpd2dq_mask, "__builtin_ia32_cvttpd2dq128_mask", IX86_BUILTIN_CVTTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32045 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2udq256_mask", IX86_BUILTIN_CVTTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32046 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2si2_mask, "__builtin_ia32_cvttpd2udq128_mask", IX86_BUILTIN_CVTTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32047 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2dq256_mask, "__builtin_ia32_cvtpd2dq256_mask", IX86_BUILTIN_CVTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32048 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2dq_mask, "__builtin_ia32_cvtpd2dq128_mask", IX86_BUILTIN_CVTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32049 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4df2_mask, "__builtin_ia32_cvtdq2pd256_mask", IX86_BUILTIN_CVTDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI },
32050 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtdq2pd_mask, "__builtin_ia32_cvtdq2pd128_mask", IX86_BUILTIN_CVTDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI },
32051 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4df2_mask, "__builtin_ia32_cvtudq2pd256_mask", IX86_BUILTIN_CVTUDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI },
32052 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2siv2df2_mask, "__builtin_ia32_cvtudq2pd128_mask", IX86_BUILTIN_CVTUDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI },
32053 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv8siv8sf2_mask, "__builtin_ia32_cvtdq2ps256_mask", IX86_BUILTIN_CVTDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI },
32054 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4sf2_mask, "__builtin_ia32_cvtdq2ps128_mask", IX86_BUILTIN_CVTDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI },
32055 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv8siv8sf2_mask, "__builtin_ia32_cvtudq2ps256_mask", IX86_BUILTIN_CVTUDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI },
32056 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4sf2_mask, "__builtin_ia32_cvtudq2ps128_mask", IX86_BUILTIN_CVTUDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI },
32057 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtps2pd256_mask, "__builtin_ia32_cvtps2pd256_mask", IX86_BUILTIN_CVTPS2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SF_V4DF_QI },
32058 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtps2pd_mask, "__builtin_ia32_cvtps2pd128_mask", IX86_BUILTIN_CVTPS2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SF_V2DF_QI },
32059 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv32qi_mask, "__builtin_ia32_pbroadcastb256_mask", IX86_BUILTIN_PBROADCASTB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16QI_V32QI_SI },
32060 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv32qi_mask, "__builtin_ia32_pbroadcastb256_gpr_mask", IX86_BUILTIN_PBROADCASTB256_GPR_MASK, UNKNOWN, (int) V32QI_FTYPE_QI_V32QI_SI },
32061 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16qi_mask, "__builtin_ia32_pbroadcastb128_mask", IX86_BUILTIN_PBROADCASTB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32062 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16qi_mask, "__builtin_ia32_pbroadcastb128_gpr_mask", IX86_BUILTIN_PBROADCASTB128_GPR_MASK, UNKNOWN, (int) V16QI_FTYPE_QI_V16QI_HI },
32063 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16hi_mask, "__builtin_ia32_pbroadcastw256_mask", IX86_BUILTIN_PBROADCASTW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8HI_V16HI_HI },
32064 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16hi_mask, "__builtin_ia32_pbroadcastw256_gpr_mask", IX86_BUILTIN_PBROADCASTW256_GPR_MASK, UNKNOWN, (int) V16HI_FTYPE_HI_V16HI_HI },
32065 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8hi_mask, "__builtin_ia32_pbroadcastw128_mask", IX86_BUILTIN_PBROADCASTW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32066 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8hi_mask, "__builtin_ia32_pbroadcastw128_gpr_mask", IX86_BUILTIN_PBROADCASTW128_GPR_MASK, UNKNOWN, (int) V8HI_FTYPE_HI_V8HI_QI },
32067 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8si_mask, "__builtin_ia32_pbroadcastd256_mask", IX86_BUILTIN_PBROADCASTD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32068 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8si_mask, "__builtin_ia32_pbroadcastd256_gpr_mask", IX86_BUILTIN_PBROADCASTD256_GPR_MASK, UNKNOWN, (int) V8SI_FTYPE_SI_V8SI_QI },
32069 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4si_mask, "__builtin_ia32_pbroadcastd128_mask", IX86_BUILTIN_PBROADCASTD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32070 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4si_mask, "__builtin_ia32_pbroadcastd128_gpr_mask", IX86_BUILTIN_PBROADCASTD128_GPR_MASK, UNKNOWN, (int) V4SI_FTYPE_SI_V4SI_QI },
32071 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4di_mask, "__builtin_ia32_pbroadcastq256_mask", IX86_BUILTIN_PBROADCASTQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
32072 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512vl_vec_dup_gprv4di_mask, "__builtin_ia32_pbroadcastq256_gpr_mask", IX86_BUILTIN_PBROADCASTQ256_GPR_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_QI },
32073 { OPTION_MASK_ISA_AVX512VL & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512vl_vec_dup_memv4di_mask, "__builtin_ia32_pbroadcastq256_mem_mask", IX86_BUILTIN_PBROADCASTQ256_MEM_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_QI },
32074 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv2di_mask, "__builtin_ia32_pbroadcastq128_mask", IX86_BUILTIN_PBROADCASTQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32075 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512vl_vec_dup_gprv2di_mask, "__builtin_ia32_pbroadcastq128_gpr_mask", IX86_BUILTIN_PBROADCASTQ128_GPR_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_QI },
32076 { OPTION_MASK_ISA_AVX512VL & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512vl_vec_dup_memv2di_mask, "__builtin_ia32_pbroadcastq128_mem_mask", IX86_BUILTIN_PBROADCASTQ128_MEM_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_QI },
32077 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8sf_mask, "__builtin_ia32_broadcastss256_mask", IX86_BUILTIN_BROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32078 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4sf_mask, "__builtin_ia32_broadcastss128_mask", IX86_BUILTIN_BROADCASTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32079 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4df_mask, "__builtin_ia32_broadcastsd256_mask", IX86_BUILTIN_BROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
32080 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4df, "__builtin_ia32_extractf64x2_256_mask", IX86_BUILTIN_EXTRACTF64X2_256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT_V2DF_QI },
32081 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4di, "__builtin_ia32_extracti64x2_256_mask", IX86_BUILTIN_EXTRACTI64X2_256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT_V2DI_QI },
32082 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8sf, "__builtin_ia32_insertf32x4_256_mask", IX86_BUILTIN_INSERTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI },
32083 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8si, "__builtin_ia32_inserti32x4_256_mask", IX86_BUILTIN_INSERTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI },
32084 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv16qiv16hi2_mask, "__builtin_ia32_pmovsxbw256_mask", IX86_BUILTIN_PMOVSXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI },
32085 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv8qiv8hi2_mask, "__builtin_ia32_pmovsxbw128_mask", IX86_BUILTIN_PMOVSXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI },
32086 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8qiv8si2_mask, "__builtin_ia32_pmovsxbd256_mask", IX86_BUILTIN_PMOVSXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI },
32087 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4qiv4si2_mask, "__builtin_ia32_pmovsxbd128_mask", IX86_BUILTIN_PMOVSXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI },
32088 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4qiv4di2_mask, "__builtin_ia32_pmovsxbq256_mask", IX86_BUILTIN_PMOVSXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI },
32089 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2qiv2di2_mask, "__builtin_ia32_pmovsxbq128_mask", IX86_BUILTIN_PMOVSXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI },
32090 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8hiv8si2_mask, "__builtin_ia32_pmovsxwd256_mask", IX86_BUILTIN_PMOVSXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI },
32091 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4hiv4si2_mask, "__builtin_ia32_pmovsxwd128_mask", IX86_BUILTIN_PMOVSXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI },
32092 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4hiv4di2_mask, "__builtin_ia32_pmovsxwq256_mask", IX86_BUILTIN_PMOVSXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI },
32093 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2hiv2di2_mask, "__builtin_ia32_pmovsxwq128_mask", IX86_BUILTIN_PMOVSXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI },
32094 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4siv4di2_mask, "__builtin_ia32_pmovsxdq256_mask", IX86_BUILTIN_PMOVSXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI },
32095 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2siv2di2_mask, "__builtin_ia32_pmovsxdq128_mask", IX86_BUILTIN_PMOVSXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI },
32096 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv16qiv16hi2_mask, "__builtin_ia32_pmovzxbw256_mask", IX86_BUILTIN_PMOVZXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI },
32097 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv8qiv8hi2_mask, "__builtin_ia32_pmovzxbw128_mask", IX86_BUILTIN_PMOVZXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI },
32098 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8qiv8si2_mask, "__builtin_ia32_pmovzxbd256_mask", IX86_BUILTIN_PMOVZXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI },
32099 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4qiv4si2_mask, "__builtin_ia32_pmovzxbd128_mask", IX86_BUILTIN_PMOVZXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI },
32100 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4qiv4di2_mask, "__builtin_ia32_pmovzxbq256_mask", IX86_BUILTIN_PMOVZXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI },
32101 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2qiv2di2_mask, "__builtin_ia32_pmovzxbq128_mask", IX86_BUILTIN_PMOVZXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI },
32102 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8hiv8si2_mask, "__builtin_ia32_pmovzxwd256_mask", IX86_BUILTIN_PMOVZXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI },
32103 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4hiv4si2_mask, "__builtin_ia32_pmovzxwd128_mask", IX86_BUILTIN_PMOVZXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI },
32104 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4hiv4di2_mask, "__builtin_ia32_pmovzxwq256_mask", IX86_BUILTIN_PMOVZXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI },
32105 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2hiv2di2_mask, "__builtin_ia32_pmovzxwq128_mask", IX86_BUILTIN_PMOVZXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI },
32106 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4siv4di2_mask, "__builtin_ia32_pmovzxdq256_mask", IX86_BUILTIN_PMOVZXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI },
32107 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2siv2di2_mask, "__builtin_ia32_pmovzxdq128_mask", IX86_BUILTIN_PMOVZXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI },
32108 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4df_mask, "__builtin_ia32_reducepd256_mask", IX86_BUILTIN_REDUCEPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32109 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv2df_mask, "__builtin_ia32_reducepd128_mask", IX86_BUILTIN_REDUCEPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32110 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv8sf_mask, "__builtin_ia32_reduceps256_mask", IX86_BUILTIN_REDUCEPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32111 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4sf_mask, "__builtin_ia32_reduceps128_mask", IX86_BUILTIN_REDUCEPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32112 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv2df, "__builtin_ia32_reducesd", IX86_BUILTIN_REDUCESD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32113 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv4sf, "__builtin_ia32_reducess", IX86_BUILTIN_REDUCESS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32114 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16hi_mask, "__builtin_ia32_permvarhi256_mask", IX86_BUILTIN_VPERMVARHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32115 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv8hi_mask, "__builtin_ia32_permvarhi128_mask", IX86_BUILTIN_VPERMVARHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32116 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_mask, "__builtin_ia32_vpermt2varhi256_mask", IX86_BUILTIN_VPERMT2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32117 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_maskz, "__builtin_ia32_vpermt2varhi256_maskz", IX86_BUILTIN_VPERMT2VARHI256_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32118 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_mask, "__builtin_ia32_vpermt2varhi128_mask", IX86_BUILTIN_VPERMT2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32119 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_maskz, "__builtin_ia32_vpermt2varhi128_maskz", IX86_BUILTIN_VPERMT2VARHI128_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32120 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16hi3_mask, "__builtin_ia32_vpermi2varhi256_mask", IX86_BUILTIN_VPERMI2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32121 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8hi3_mask, "__builtin_ia32_vpermi2varhi128_mask", IX86_BUILTIN_VPERMI2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32122 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4df_mask, "__builtin_ia32_rcp14pd256_mask", IX86_BUILTIN_RCP14PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32123 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v2df_mask, "__builtin_ia32_rcp14pd128_mask", IX86_BUILTIN_RCP14PD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32124 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v8sf_mask, "__builtin_ia32_rcp14ps256_mask", IX86_BUILTIN_RCP14PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32125 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4sf_mask, "__builtin_ia32_rcp14ps128_mask", IX86_BUILTIN_RCP14PS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32126 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4df_mask, "__builtin_ia32_rsqrt14pd256_mask", IX86_BUILTIN_RSQRT14PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32127 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v2df_mask, "__builtin_ia32_rsqrt14pd128_mask", IX86_BUILTIN_RSQRT14PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32128 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v8sf_mask, "__builtin_ia32_rsqrt14ps256_mask", IX86_BUILTIN_RSQRT14PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32129 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4sf_mask, "__builtin_ia32_rsqrt14ps128_mask", IX86_BUILTIN_RSQRT14PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32130 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv4df2_mask, "__builtin_ia32_sqrtpd256_mask", IX86_BUILTIN_SQRTPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32131 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sqrtv2df2_mask, "__builtin_ia32_sqrtpd128_mask", IX86_BUILTIN_SQRTPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32132 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv8sf2_mask, "__builtin_ia32_sqrtps256_mask", IX86_BUILTIN_SQRTPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32133 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_sqrtv4sf2_mask, "__builtin_ia32_sqrtps128_mask", IX86_BUILTIN_SQRTPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32134 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16qi3_mask, "__builtin_ia32_paddb128_mask", IX86_BUILTIN_PADDB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32135 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8hi3_mask, "__builtin_ia32_paddw128_mask", IX86_BUILTIN_PADDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32136 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4si3_mask, "__builtin_ia32_paddd128_mask", IX86_BUILTIN_PADDD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32137 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2di3_mask, "__builtin_ia32_paddq128_mask", IX86_BUILTIN_PADDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32138 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16qi3_mask, "__builtin_ia32_psubb128_mask", IX86_BUILTIN_PSUBB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32139 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8hi3_mask, "__builtin_ia32_psubw128_mask", IX86_BUILTIN_PSUBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32140 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4si3_mask, "__builtin_ia32_psubd128_mask", IX86_BUILTIN_PSUBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32141 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2di3_mask, "__builtin_ia32_psubq128_mask", IX86_BUILTIN_PSUBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32142 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv16qi3_mask, "__builtin_ia32_paddsb128_mask", IX86_BUILTIN_PADDSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32143 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv8hi3_mask, "__builtin_ia32_paddsw128_mask", IX86_BUILTIN_PADDSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32144 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv16qi3_mask, "__builtin_ia32_psubsb128_mask", IX86_BUILTIN_PSUBSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32145 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv8hi3_mask, "__builtin_ia32_psubsw128_mask", IX86_BUILTIN_PSUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32146 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv16qi3_mask, "__builtin_ia32_paddusb128_mask", IX86_BUILTIN_PADDUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32147 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv8hi3_mask, "__builtin_ia32_paddusw128_mask", IX86_BUILTIN_PADDUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32148 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv16qi3_mask, "__builtin_ia32_psubusb128_mask", IX86_BUILTIN_PSUBUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32149 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv8hi3_mask, "__builtin_ia32_psubusw128_mask", IX86_BUILTIN_PSUBUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32150 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv32qi3_mask, "__builtin_ia32_paddb256_mask", IX86_BUILTIN_PADDB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32151 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16hi3_mask, "__builtin_ia32_paddw256_mask", IX86_BUILTIN_PADDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32152 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8si3_mask, "__builtin_ia32_paddd256_mask", IX86_BUILTIN_PADDD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32153 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4di3_mask, "__builtin_ia32_paddq256_mask", IX86_BUILTIN_PADDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32154 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv32qi3_mask, "__builtin_ia32_paddsb256_mask", IX86_BUILTIN_PADDSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32155 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv16hi3_mask, "__builtin_ia32_paddsw256_mask", IX86_BUILTIN_PADDSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32156 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv32qi3_mask, "__builtin_ia32_paddusb256_mask", IX86_BUILTIN_PADDUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32157 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv16hi3_mask, "__builtin_ia32_paddusw256_mask", IX86_BUILTIN_PADDUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32158 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv32qi3_mask, "__builtin_ia32_psubb256_mask", IX86_BUILTIN_PSUBB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32159 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16hi3_mask, "__builtin_ia32_psubw256_mask", IX86_BUILTIN_PSUBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32160 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8si3_mask, "__builtin_ia32_psubd256_mask", IX86_BUILTIN_PSUBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32161 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4di3_mask, "__builtin_ia32_psubq256_mask", IX86_BUILTIN_PSUBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32162 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv32qi3_mask, "__builtin_ia32_psubsb256_mask", IX86_BUILTIN_PSUBSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32163 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv16hi3_mask, "__builtin_ia32_psubsw256_mask", IX86_BUILTIN_PSUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32164 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv32qi3_mask, "__builtin_ia32_psubusb256_mask", IX86_BUILTIN_PSUBUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32165 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv16hi3_mask, "__builtin_ia32_psubusw256_mask", IX86_BUILTIN_PSUBUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32166 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_256_mask", IX86_BUILTIN_SHUF_F64x2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32167 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_256_mask", IX86_BUILTIN_SHUF_I64x2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI },
32168 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_256_mask", IX86_BUILTIN_SHUF_I32x4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI },
32169 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_256_mask", IX86_BUILTIN_SHUF_F32x4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32170 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovwb128_mask", IX86_BUILTIN_PMOVWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32171 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovwb256_mask", IX86_BUILTIN_PMOVWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32172 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovswb128_mask", IX86_BUILTIN_PMOVSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32173 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovswb256_mask", IX86_BUILTIN_PMOVSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32174 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovuswb128_mask", IX86_BUILTIN_PMOVUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32175 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovuswb256_mask", IX86_BUILTIN_PMOVUSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32176 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask, "__builtin_ia32_pmovdb128_mask", IX86_BUILTIN_PMOVDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32177 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask, "__builtin_ia32_pmovdb256_mask", IX86_BUILTIN_PMOVDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32178 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask, "__builtin_ia32_pmovsdb128_mask", IX86_BUILTIN_PMOVSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32179 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask, "__builtin_ia32_pmovsdb256_mask", IX86_BUILTIN_PMOVSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32180 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask, "__builtin_ia32_pmovusdb128_mask", IX86_BUILTIN_PMOVUSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32181 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask, "__builtin_ia32_pmovusdb256_mask", IX86_BUILTIN_PMOVUSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32182 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask, "__builtin_ia32_pmovdw128_mask", IX86_BUILTIN_PMOVDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32183 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask, "__builtin_ia32_pmovdw256_mask", IX86_BUILTIN_PMOVDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32184 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask, "__builtin_ia32_pmovsdw128_mask", IX86_BUILTIN_PMOVSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32185 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask, "__builtin_ia32_pmovsdw256_mask", IX86_BUILTIN_PMOVSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32186 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask, "__builtin_ia32_pmovusdw128_mask", IX86_BUILTIN_PMOVUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32187 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask, "__builtin_ia32_pmovusdw256_mask", IX86_BUILTIN_PMOVUSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32188 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask, "__builtin_ia32_pmovqb128_mask", IX86_BUILTIN_PMOVQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32189 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask, "__builtin_ia32_pmovqb256_mask", IX86_BUILTIN_PMOVQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32190 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask, "__builtin_ia32_pmovsqb128_mask", IX86_BUILTIN_PMOVSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32191 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask, "__builtin_ia32_pmovsqb256_mask", IX86_BUILTIN_PMOVSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32192 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask, "__builtin_ia32_pmovusqb128_mask", IX86_BUILTIN_PMOVUSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32193 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask, "__builtin_ia32_pmovusqb256_mask", IX86_BUILTIN_PMOVUSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32194 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask, "__builtin_ia32_pmovqw128_mask", IX86_BUILTIN_PMOVQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32195 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask, "__builtin_ia32_pmovqw256_mask", IX86_BUILTIN_PMOVQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32196 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask, "__builtin_ia32_pmovsqw128_mask", IX86_BUILTIN_PMOVSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32197 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask, "__builtin_ia32_pmovsqw256_mask", IX86_BUILTIN_PMOVSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32198 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask, "__builtin_ia32_pmovusqw128_mask", IX86_BUILTIN_PMOVUSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32199 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask, "__builtin_ia32_pmovusqw256_mask", IX86_BUILTIN_PMOVUSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32200 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask, "__builtin_ia32_pmovqd128_mask", IX86_BUILTIN_PMOVQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32201 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask, "__builtin_ia32_pmovqd256_mask", IX86_BUILTIN_PMOVQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32202 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask, "__builtin_ia32_pmovsqd128_mask", IX86_BUILTIN_PMOVSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32203 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask, "__builtin_ia32_pmovsqd256_mask", IX86_BUILTIN_PMOVSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32204 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask, "__builtin_ia32_pmovusqd128_mask", IX86_BUILTIN_PMOVUSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32205 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask, "__builtin_ia32_pmovusqd256_mask", IX86_BUILTIN_PMOVUSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32206 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4df_mask, "__builtin_ia32_rangepd256_mask", IX86_BUILTIN_RANGEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32207 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv2df_mask, "__builtin_ia32_rangepd128_mask", IX86_BUILTIN_RANGEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI },
32208 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv8sf_mask, "__builtin_ia32_rangeps256_mask", IX86_BUILTIN_RANGEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32209 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4sf_mask, "__builtin_ia32_rangeps128_mask", IX86_BUILTIN_RANGEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI },
32210 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv8sf_mask, "__builtin_ia32_getexpps256_mask", IX86_BUILTIN_GETEXPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32211 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4df_mask, "__builtin_ia32_getexppd256_mask", IX86_BUILTIN_GETEXPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32212 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4sf_mask, "__builtin_ia32_getexpps128_mask", IX86_BUILTIN_GETEXPPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32213 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv2df_mask, "__builtin_ia32_getexppd128_mask", IX86_BUILTIN_GETEXPPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32214 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_mask, "__builtin_ia32_fixupimmpd256_mask", IX86_BUILTIN_FIXUPIMMPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI },
32215 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_maskz, "__builtin_ia32_fixupimmpd256_maskz", IX86_BUILTIN_FIXUPIMMPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI },
32216 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_mask, "__builtin_ia32_fixupimmps256_mask", IX86_BUILTIN_FIXUPIMMPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI },
32217 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_maskz, "__builtin_ia32_fixupimmps256_maskz", IX86_BUILTIN_FIXUPIMMPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI },
32218 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_mask, "__builtin_ia32_fixupimmpd128_mask", IX86_BUILTIN_FIXUPIMMPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI },
32219 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_maskz, "__builtin_ia32_fixupimmpd128_maskz", IX86_BUILTIN_FIXUPIMMPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI },
32220 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_mask, "__builtin_ia32_fixupimmps128_mask", IX86_BUILTIN_FIXUPIMMPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI },
32221 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_maskz, "__builtin_ia32_fixupimmps128_maskz", IX86_BUILTIN_FIXUPIMMPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI },
32222 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4di2_mask, "__builtin_ia32_pabsq256_mask", IX86_BUILTIN_PABSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32223 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv2di2_mask, "__builtin_ia32_pabsq128_mask", IX86_BUILTIN_PABSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32224 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8si2_mask, "__builtin_ia32_pabsd256_mask", IX86_BUILTIN_PABSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32225 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4si2_mask, "__builtin_ia32_pabsd128_mask", IX86_BUILTIN_PABSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32226 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pmulhrswv16hi3_mask , "__builtin_ia32_pmulhrsw256_mask", IX86_BUILTIN_PMULHRSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32227 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pmulhrswv8hi3_mask, "__builtin_ia32_pmulhrsw128_mask", IX86_BUILTIN_PMULHRSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32228 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv8hi3_highpart_mask, "__builtin_ia32_pmulhuw128_mask", IX86_BUILTIN_PMULHUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32229 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv16hi3_highpart_mask, "__builtin_ia32_pmulhuw256_mask" , IX86_BUILTIN_PMULHUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32230 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv16hi3_highpart_mask, "__builtin_ia32_pmulhw256_mask" , IX86_BUILTIN_PMULHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32231 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv8hi3_highpart_mask, "__builtin_ia32_pmulhw128_mask", IX86_BUILTIN_PMULHW128_MASK, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32232 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv16hi3_mask, "__builtin_ia32_pmullw256_mask" , IX86_BUILTIN_PMULLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32233 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8hi3_mask, "__builtin_ia32_pmullw128_mask", IX86_BUILTIN_PMULLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32234 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv4di3_mask, "__builtin_ia32_pmullq256_mask", IX86_BUILTIN_PMULLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32235 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv2di3_mask, "__builtin_ia32_pmullq128_mask", IX86_BUILTIN_PMULLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32236 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4df3_mask, "__builtin_ia32_andpd256_mask", IX86_BUILTIN_ANDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32237 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2df3_mask, "__builtin_ia32_andpd128_mask", IX86_BUILTIN_ANDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32238 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8sf3_mask, "__builtin_ia32_andps256_mask", IX86_BUILTIN_ANDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32239 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4sf3_mask, "__builtin_ia32_andps128_mask", IX86_BUILTIN_ANDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32240 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv4df3_mask, "__builtin_ia32_andnpd256_mask", IX86_BUILTIN_ANDNPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32241 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2df3_mask, "__builtin_ia32_andnpd128_mask", IX86_BUILTIN_ANDNPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32242 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv8sf3_mask, "__builtin_ia32_andnps256_mask", IX86_BUILTIN_ANDNPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32243 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_andnotv4sf3_mask, "__builtin_ia32_andnps128_mask", IX86_BUILTIN_ANDNPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32244 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllwi128_mask", IX86_BUILTIN_PSLLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32245 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslldi128_mask", IX86_BUILTIN_PSLLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32246 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllqi128_mask", IX86_BUILTIN_PSLLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32247 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllw128_mask", IX86_BUILTIN_PSLLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32248 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslld128_mask", IX86_BUILTIN_PSLLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32249 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllq128_mask", IX86_BUILTIN_PSLLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32250 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllwi256_mask", IX86_BUILTIN_PSLLWI256_MASK , UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32251 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllw256_mask", IX86_BUILTIN_PSLLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32252 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslldi256_mask", IX86_BUILTIN_PSLLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32253 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslld256_mask", IX86_BUILTIN_PSLLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32254 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllqi256_mask", IX86_BUILTIN_PSLLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32255 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllq256_mask", IX86_BUILTIN_PSLLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32256 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psradi128_mask", IX86_BUILTIN_PSRADI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32257 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psrad128_mask", IX86_BUILTIN_PSRAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32258 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psradi256_mask", IX86_BUILTIN_PSRADI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32259 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psrad256_mask", IX86_BUILTIN_PSRAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32260 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraqi128_mask", IX86_BUILTIN_PSRAQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32261 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraq128_mask", IX86_BUILTIN_PSRAQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32262 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraqi256_mask", IX86_BUILTIN_PSRAQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32263 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraq256_mask", IX86_BUILTIN_PSRAQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32264 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8si3_mask, "__builtin_ia32_pandd256_mask", IX86_BUILTIN_PANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32265 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4si3_mask, "__builtin_ia32_pandd128_mask", IX86_BUILTIN_PANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32266 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrldi128_mask", IX86_BUILTIN_PSRLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32267 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrld128_mask", IX86_BUILTIN_PSRLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32268 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrldi256_mask", IX86_BUILTIN_PSRLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32269 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrld256_mask", IX86_BUILTIN_PSRLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32270 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlqi128_mask", IX86_BUILTIN_PSRLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32271 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlq128_mask", IX86_BUILTIN_PSRLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32272 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlqi256_mask", IX86_BUILTIN_PSRLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32273 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlq256_mask", IX86_BUILTIN_PSRLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32274 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4di3_mask, "__builtin_ia32_pandq256_mask", IX86_BUILTIN_PANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32275 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2di3_mask, "__builtin_ia32_pandq128_mask", IX86_BUILTIN_PANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32276 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv8si3_mask, "__builtin_ia32_pandnd256_mask", IX86_BUILTIN_PANDND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32277 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv4si3_mask, "__builtin_ia32_pandnd128_mask", IX86_BUILTIN_PANDND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32278 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv4di3_mask, "__builtin_ia32_pandnq256_mask", IX86_BUILTIN_PANDNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32279 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2di3_mask, "__builtin_ia32_pandnq128_mask", IX86_BUILTIN_PANDNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32280 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8si3_mask, "__builtin_ia32_pord256_mask", IX86_BUILTIN_PORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32281 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4si3_mask, "__builtin_ia32_pord128_mask", IX86_BUILTIN_PORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32282 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4di3_mask, "__builtin_ia32_porq256_mask", IX86_BUILTIN_PORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32283 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2di3_mask, "__builtin_ia32_porq128_mask", IX86_BUILTIN_PORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32284 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8si3_mask, "__builtin_ia32_pxord256_mask", IX86_BUILTIN_PXORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32285 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4si3_mask, "__builtin_ia32_pxord128_mask", IX86_BUILTIN_PXORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32286 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4di3_mask, "__builtin_ia32_pxorq256_mask", IX86_BUILTIN_PXORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32287 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2di3_mask, "__builtin_ia32_pxorq128_mask", IX86_BUILTIN_PXORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32288 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packsswb_mask, "__builtin_ia32_packsswb256_mask", IX86_BUILTIN_PACKSSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI },
32289 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packsswb_mask, "__builtin_ia32_packsswb128_mask", IX86_BUILTIN_PACKSSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI },
32290 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packuswb_mask, "__builtin_ia32_packuswb256_mask", IX86_BUILTIN_PACKUSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI },
32291 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packuswb_mask, "__builtin_ia32_packuswb128_mask", IX86_BUILTIN_PACKUSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI },
32292 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev8sf_mask, "__builtin_ia32_rndscaleps_256_mask", IX86_BUILTIN_RNDSCALEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32293 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4df_mask, "__builtin_ia32_rndscalepd_256_mask", IX86_BUILTIN_RNDSCALEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32294 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4sf_mask, "__builtin_ia32_rndscaleps_128_mask", IX86_BUILTIN_RNDSCALEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32295 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev2df_mask, "__builtin_ia32_rndscalepd_128_mask", IX86_BUILTIN_RNDSCALEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32296 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_mask, "__builtin_ia32_pternlogq256_mask", IX86_BUILTIN_VTERNLOGQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI },
32297 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_maskz, "__builtin_ia32_pternlogq256_maskz", IX86_BUILTIN_VTERNLOGQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI },
32298 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_mask, "__builtin_ia32_pternlogd256_mask", IX86_BUILTIN_VTERNLOGD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI },
32299 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_maskz, "__builtin_ia32_pternlogd256_maskz", IX86_BUILTIN_VTERNLOGD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI },
32300 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_mask, "__builtin_ia32_pternlogq128_mask", IX86_BUILTIN_VTERNLOGQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI },
32301 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_maskz, "__builtin_ia32_pternlogq128_maskz", IX86_BUILTIN_VTERNLOGQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI },
32302 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_mask, "__builtin_ia32_pternlogd128_mask", IX86_BUILTIN_VTERNLOGD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI },
32303 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_maskz, "__builtin_ia32_pternlogd128_maskz", IX86_BUILTIN_VTERNLOGD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI },
32304 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4df_mask, "__builtin_ia32_scalefpd256_mask", IX86_BUILTIN_SCALEFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32305 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv8sf_mask, "__builtin_ia32_scalefps256_mask", IX86_BUILTIN_SCALEFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32306 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv2df_mask, "__builtin_ia32_scalefpd128_mask", IX86_BUILTIN_SCALEFPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32307 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4sf_mask, "__builtin_ia32_scalefps128_mask", IX86_BUILTIN_SCALEFPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32308 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask, "__builtin_ia32_vfmaddpd256_mask", IX86_BUILTIN_VFMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32309 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask3, "__builtin_ia32_vfmaddpd256_mask3", IX86_BUILTIN_VFMADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32310 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_maskz, "__builtin_ia32_vfmaddpd256_maskz", IX86_BUILTIN_VFMADDPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32311 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask, "__builtin_ia32_vfmaddpd128_mask", IX86_BUILTIN_VFMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32312 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask3, "__builtin_ia32_vfmaddpd128_mask3", IX86_BUILTIN_VFMADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32313 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_maskz, "__builtin_ia32_vfmaddpd128_maskz", IX86_BUILTIN_VFMADDPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32314 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask, "__builtin_ia32_vfmaddps256_mask", IX86_BUILTIN_VFMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32315 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask3, "__builtin_ia32_vfmaddps256_mask3", IX86_BUILTIN_VFMADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32316 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_maskz, "__builtin_ia32_vfmaddps256_maskz", IX86_BUILTIN_VFMADDPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32317 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask, "__builtin_ia32_vfmaddps128_mask", IX86_BUILTIN_VFMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32318 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask3, "__builtin_ia32_vfmaddps128_mask3", IX86_BUILTIN_VFMADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32319 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_maskz, "__builtin_ia32_vfmaddps128_maskz", IX86_BUILTIN_VFMADDPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32320 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4df_mask3, "__builtin_ia32_vfmsubpd256_mask3", IX86_BUILTIN_VFMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32321 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v2df_mask3, "__builtin_ia32_vfmsubpd128_mask3", IX86_BUILTIN_VFMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32322 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v8sf_mask3, "__builtin_ia32_vfmsubps256_mask3", IX86_BUILTIN_VFMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32323 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4sf_mask3, "__builtin_ia32_vfmsubps128_mask3", IX86_BUILTIN_VFMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32324 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4df_mask, "__builtin_ia32_vfnmaddpd256_mask", IX86_BUILTIN_VFNMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32325 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v2df_mask, "__builtin_ia32_vfnmaddpd128_mask", IX86_BUILTIN_VFNMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32326 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v8sf_mask, "__builtin_ia32_vfnmaddps256_mask", IX86_BUILTIN_VFNMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32327 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4sf_mask, "__builtin_ia32_vfnmaddps128_mask", IX86_BUILTIN_VFNMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32328 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask, "__builtin_ia32_vfnmsubpd256_mask", IX86_BUILTIN_VFNMSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32329 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask3, "__builtin_ia32_vfnmsubpd256_mask3", IX86_BUILTIN_VFNMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32330 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask, "__builtin_ia32_vfnmsubpd128_mask", IX86_BUILTIN_VFNMSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32331 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask3, "__builtin_ia32_vfnmsubpd128_mask3", IX86_BUILTIN_VFNMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32332 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask, "__builtin_ia32_vfnmsubps256_mask", IX86_BUILTIN_VFNMSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32333 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask3, "__builtin_ia32_vfnmsubps256_mask3", IX86_BUILTIN_VFNMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32334 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask, "__builtin_ia32_vfnmsubps128_mask", IX86_BUILTIN_VFNMSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32335 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask3, "__builtin_ia32_vfnmsubps128_mask3", IX86_BUILTIN_VFNMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32336 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask, "__builtin_ia32_vfmaddsubpd256_mask", IX86_BUILTIN_VFMADDSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32337 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask3, "__builtin_ia32_vfmaddsubpd256_mask3", IX86_BUILTIN_VFMADDSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32338 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_maskz, "__builtin_ia32_vfmaddsubpd256_maskz", IX86_BUILTIN_VFMADDSUBPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32339 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask, "__builtin_ia32_vfmaddsubpd128_mask", IX86_BUILTIN_VFMADDSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32340 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask3, "__builtin_ia32_vfmaddsubpd128_mask3", IX86_BUILTIN_VFMADDSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32341 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_maskz, "__builtin_ia32_vfmaddsubpd128_maskz", IX86_BUILTIN_VFMADDSUBPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32342 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask, "__builtin_ia32_vfmaddsubps256_mask", IX86_BUILTIN_VFMADDSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32343 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask3, "__builtin_ia32_vfmaddsubps256_mask3", IX86_BUILTIN_VFMADDSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32344 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_maskz, "__builtin_ia32_vfmaddsubps256_maskz", IX86_BUILTIN_VFMADDSUBPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32345 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask, "__builtin_ia32_vfmaddsubps128_mask", IX86_BUILTIN_VFMADDSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32346 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask3, "__builtin_ia32_vfmaddsubps128_mask3", IX86_BUILTIN_VFMADDSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32347 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_maskz, "__builtin_ia32_vfmaddsubps128_maskz", IX86_BUILTIN_VFMADDSUBPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32348 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4df_mask3, "__builtin_ia32_vfmsubaddpd256_mask3", IX86_BUILTIN_VFMSUBADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32349 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v2df_mask3, "__builtin_ia32_vfmsubaddpd128_mask3", IX86_BUILTIN_VFMSUBADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32350 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v8sf_mask3, "__builtin_ia32_vfmsubaddps256_mask3", IX86_BUILTIN_VFMSUBADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32351 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4sf_mask3, "__builtin_ia32_vfmsubaddps128_mask3", IX86_BUILTIN_VFMSUBADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32352 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4df, "__builtin_ia32_insertf64x2_256_mask", IX86_BUILTIN_INSERTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI },
32353 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4di, "__builtin_ia32_inserti64x2_256_mask", IX86_BUILTIN_INSERTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI },
32354 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv16hi_mask, "__builtin_ia32_psrav16hi_mask", IX86_BUILTIN_PSRAVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32355 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv8hi_mask, "__builtin_ia32_psrav8hi_mask", IX86_BUILTIN_PSRAVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32356 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v16hi_mask, "__builtin_ia32_pmaddubsw256_mask", IX86_BUILTIN_PMADDUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_V16HI_HI },
32357 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v8hi_mask, "__builtin_ia32_pmaddubsw128_mask", IX86_BUILTIN_PMADDUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_V8HI_QI },
32358 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v16hi_mask, "__builtin_ia32_pmaddwd256_mask", IX86_BUILTIN_PMADDWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI_V8SI_QI },
32359 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v8hi_mask, "__builtin_ia32_pmaddwd128_mask", IX86_BUILTIN_PMADDWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI_V4SI_QI },
32360 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv16hi_mask, "__builtin_ia32_psrlv16hi_mask", IX86_BUILTIN_PSRLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32361 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv8hi_mask, "__builtin_ia32_psrlv8hi_mask", IX86_BUILTIN_PSRLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32362 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_fix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2dq256_mask", IX86_BUILTIN_CVTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32363 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_fix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2dq128_mask", IX86_BUILTIN_CVTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32364 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2udq256_mask", IX86_BUILTIN_CVTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32365 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2udq128_mask", IX86_BUILTIN_CVTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32366 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv4di_mask, "__builtin_ia32_cvtps2qq256_mask", IX86_BUILTIN_CVTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32367 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv2di_mask, "__builtin_ia32_cvtps2qq128_mask", IX86_BUILTIN_CVTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32368 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv4di_mask, "__builtin_ia32_cvtps2uqq256_mask", IX86_BUILTIN_CVTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32369 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv2di_mask, "__builtin_ia32_cvtps2uqq128_mask", IX86_BUILTIN_CVTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32370 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv8sf_mask, "__builtin_ia32_getmantps256_mask", IX86_BUILTIN_GETMANTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32371 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4sf_mask, "__builtin_ia32_getmantps128_mask", IX86_BUILTIN_GETMANTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32372 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4df_mask, "__builtin_ia32_getmantpd256_mask", IX86_BUILTIN_GETMANTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32373 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv2df_mask, "__builtin_ia32_getmantpd128_mask", IX86_BUILTIN_GETMANTPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32374 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movddup256_mask, "__builtin_ia32_movddup256_mask", IX86_BUILTIN_MOVDDUP256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32375 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_dupv2df_mask, "__builtin_ia32_movddup128_mask", IX86_BUILTIN_MOVDDUP128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32376 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movshdup256_mask, "__builtin_ia32_movshdup256_mask", IX86_BUILTIN_MOVSHDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32377 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movshdup_mask, "__builtin_ia32_movshdup128_mask", IX86_BUILTIN_MOVSHDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32378 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movsldup256_mask, "__builtin_ia32_movsldup256_mask", IX86_BUILTIN_MOVSLDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32379 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movsldup_mask, "__builtin_ia32_movsldup128_mask", IX86_BUILTIN_MOVSLDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32380 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4sf2_mask, "__builtin_ia32_cvtqq2ps256_mask", IX86_BUILTIN_CVTQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI },
32381 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2sf2_mask, "__builtin_ia32_cvtqq2ps128_mask", IX86_BUILTIN_CVTQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI },
32382 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4sf2_mask, "__builtin_ia32_cvtuqq2ps256_mask", IX86_BUILTIN_CVTUQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI },
32383 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2sf2_mask, "__builtin_ia32_cvtuqq2ps128_mask", IX86_BUILTIN_CVTUQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI },
32384 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4df2_mask, "__builtin_ia32_cvtqq2pd256_mask", IX86_BUILTIN_CVTQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI },
32385 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2df2_mask, "__builtin_ia32_cvtqq2pd128_mask", IX86_BUILTIN_CVTQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI },
32386 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4df2_mask, "__builtin_ia32_cvtuqq2pd256_mask", IX86_BUILTIN_CVTUQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI },
32387 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2df2_mask, "__builtin_ia32_cvtuqq2pd128_mask", IX86_BUILTIN_CVTUQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI },
32388 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_mask, "__builtin_ia32_vpermt2varq256_mask", IX86_BUILTIN_VPERMT2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32389 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_maskz, "__builtin_ia32_vpermt2varq256_maskz", IX86_BUILTIN_VPERMT2VARQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32390 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_mask, "__builtin_ia32_vpermt2vard256_mask", IX86_BUILTIN_VPERMT2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32391 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_maskz, "__builtin_ia32_vpermt2vard256_maskz", IX86_BUILTIN_VPERMT2VARD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32392 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4di3_mask, "__builtin_ia32_vpermi2varq256_mask", IX86_BUILTIN_VPERMI2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32393 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8si3_mask, "__builtin_ia32_vpermi2vard256_mask", IX86_BUILTIN_VPERMI2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32394 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_mask, "__builtin_ia32_vpermt2varpd256_mask", IX86_BUILTIN_VPERMT2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI },
32395 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_maskz, "__builtin_ia32_vpermt2varpd256_maskz", IX86_BUILTIN_VPERMT2VARPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI },
32396 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_mask, "__builtin_ia32_vpermt2varps256_mask", IX86_BUILTIN_VPERMT2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI },
32397 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_maskz, "__builtin_ia32_vpermt2varps256_maskz", IX86_BUILTIN_VPERMT2VARPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI },
32398 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4df3_mask, "__builtin_ia32_vpermi2varpd256_mask", IX86_BUILTIN_VPERMI2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32399 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8sf3_mask, "__builtin_ia32_vpermi2varps256_mask", IX86_BUILTIN_VPERMI2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32400 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_mask, "__builtin_ia32_vpermt2varq128_mask", IX86_BUILTIN_VPERMT2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32401 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_maskz, "__builtin_ia32_vpermt2varq128_maskz", IX86_BUILTIN_VPERMT2VARQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32402 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_mask, "__builtin_ia32_vpermt2vard128_mask", IX86_BUILTIN_VPERMT2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32403 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_maskz, "__builtin_ia32_vpermt2vard128_maskz", IX86_BUILTIN_VPERMT2VARD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32404 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2di3_mask, "__builtin_ia32_vpermi2varq128_mask", IX86_BUILTIN_VPERMI2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32405 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4si3_mask, "__builtin_ia32_vpermi2vard128_mask", IX86_BUILTIN_VPERMI2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32406 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_mask, "__builtin_ia32_vpermt2varpd128_mask", IX86_BUILTIN_VPERMT2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI },
32407 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_maskz, "__builtin_ia32_vpermt2varpd128_maskz", IX86_BUILTIN_VPERMT2VARPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI },
32408 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_mask, "__builtin_ia32_vpermt2varps128_mask", IX86_BUILTIN_VPERMT2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI },
32409 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_maskz, "__builtin_ia32_vpermt2varps128_maskz", IX86_BUILTIN_VPERMT2VARPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI },
32410 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2df3_mask, "__builtin_ia32_vpermi2varpd128_mask", IX86_BUILTIN_VPERMI2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI },
32411 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4sf3_mask, "__builtin_ia32_vpermi2varps128_mask", IX86_BUILTIN_VPERMI2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI },
32412 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pshufbv32qi3_mask, "__builtin_ia32_pshufb256_mask", IX86_BUILTIN_PSHUFB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32413 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pshufbv16qi3_mask, "__builtin_ia32_pshufb128_mask", IX86_BUILTIN_PSHUFB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32414 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhwv3_mask, "__builtin_ia32_pshufhw256_mask", IX86_BUILTIN_PSHUFHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32415 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhw_mask, "__builtin_ia32_pshufhw128_mask", IX86_BUILTIN_PSHUFHW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32416 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflwv3_mask, "__builtin_ia32_pshuflw256_mask", IX86_BUILTIN_PSHUFLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32417 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflw_mask, "__builtin_ia32_pshuflw128_mask", IX86_BUILTIN_PSHUFLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32418 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufdv3_mask, "__builtin_ia32_pshufd256_mask", IX86_BUILTIN_PSHUFD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32419 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufd_mask, "__builtin_ia32_pshufd128_mask", IX86_BUILTIN_PSHUFD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32420 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufpd256_mask, "__builtin_ia32_shufpd256_mask", IX86_BUILTIN_SHUFPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32421 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_shufpd_mask, "__builtin_ia32_shufpd128_mask", IX86_BUILTIN_SHUFPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI },
32422 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufps256_mask, "__builtin_ia32_shufps256_mask", IX86_BUILTIN_SHUFPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32423 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_shufps_mask, "__builtin_ia32_shufps128_mask", IX86_BUILTIN_SHUFPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI },
32424 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4di_mask, "__builtin_ia32_prolvq256_mask", IX86_BUILTIN_PROLVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32425 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv2di_mask, "__builtin_ia32_prolvq128_mask", IX86_BUILTIN_PROLVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32426 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4di_mask, "__builtin_ia32_prolq256_mask", IX86_BUILTIN_PROLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32427 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv2di_mask, "__builtin_ia32_prolq128_mask", IX86_BUILTIN_PROLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32428 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4di_mask, "__builtin_ia32_prorvq256_mask", IX86_BUILTIN_PRORVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32429 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv2di_mask, "__builtin_ia32_prorvq128_mask", IX86_BUILTIN_PRORVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32430 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4di_mask, "__builtin_ia32_prorq256_mask", IX86_BUILTIN_PRORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32431 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv2di_mask, "__builtin_ia32_prorq128_mask", IX86_BUILTIN_PRORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32432 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv2di_mask, "__builtin_ia32_psravq128_mask", IX86_BUILTIN_PSRAVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32433 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4di_mask, "__builtin_ia32_psravq256_mask", IX86_BUILTIN_PSRAVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32434 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4di_mask, "__builtin_ia32_psllv4di_mask", IX86_BUILTIN_PSLLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32435 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv2di_mask, "__builtin_ia32_psllv2di_mask", IX86_BUILTIN_PSLLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32436 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv8si_mask, "__builtin_ia32_psllv8si_mask", IX86_BUILTIN_PSLLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32437 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4si_mask, "__builtin_ia32_psllv4si_mask", IX86_BUILTIN_PSLLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32438 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv8si_mask, "__builtin_ia32_psrav8si_mask", IX86_BUILTIN_PSRAVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32439 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4si_mask, "__builtin_ia32_psrav4si_mask", IX86_BUILTIN_PSRAVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32440 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4di_mask, "__builtin_ia32_psrlv4di_mask", IX86_BUILTIN_PSRLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32441 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv2di_mask, "__builtin_ia32_psrlv2di_mask", IX86_BUILTIN_PSRLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32442 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv8si_mask, "__builtin_ia32_psrlv8si_mask", IX86_BUILTIN_PSRLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32443 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4si_mask, "__builtin_ia32_psrlv4si_mask", IX86_BUILTIN_PSRLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32444 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psrawi256_mask", IX86_BUILTIN_PSRAWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32445 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psraw256_mask", IX86_BUILTIN_PSRAW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32446 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psrawi128_mask", IX86_BUILTIN_PSRAWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32447 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psraw128_mask", IX86_BUILTIN_PSRAW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32448 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlwi256_mask", IX86_BUILTIN_PSRLWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32449 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlw256_mask", IX86_BUILTIN_PSRLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32450 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlwi128_mask", IX86_BUILTIN_PSRLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32451 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlw128_mask", IX86_BUILTIN_PSRLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32452 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv8si_mask, "__builtin_ia32_prorvd256_mask", IX86_BUILTIN_PRORVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32453 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv8si_mask, "__builtin_ia32_prolvd256_mask", IX86_BUILTIN_PROLVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32454 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv8si_mask, "__builtin_ia32_prord256_mask", IX86_BUILTIN_PRORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32455 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv8si_mask, "__builtin_ia32_prold256_mask", IX86_BUILTIN_PROLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32456 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4si_mask, "__builtin_ia32_prorvd128_mask", IX86_BUILTIN_PRORVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32457 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4si_mask, "__builtin_ia32_prolvd128_mask", IX86_BUILTIN_PROLVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32458 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4si_mask, "__builtin_ia32_prord128_mask", IX86_BUILTIN_PRORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32459 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4si_mask, "__builtin_ia32_prold128_mask", IX86_BUILTIN_PROLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32460 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4df_mask, "__builtin_ia32_fpclasspd256_mask", IX86_BUILTIN_FPCLASSPD256, UNKNOWN, (int) QI_FTYPE_V4DF_INT_QI },
32461 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv2df_mask, "__builtin_ia32_fpclasspd128_mask", IX86_BUILTIN_FPCLASSPD128, UNKNOWN, (int) QI_FTYPE_V2DF_INT_QI },
32462 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv2df, "__builtin_ia32_fpclasssd", IX86_BUILTIN_FPCLASSSD, UNKNOWN, (int) QI_FTYPE_V2DF_INT },
32463 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv8sf_mask, "__builtin_ia32_fpclassps256_mask", IX86_BUILTIN_FPCLASSPS256, UNKNOWN, (int) QI_FTYPE_V8SF_INT_QI },
32464 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4sf_mask, "__builtin_ia32_fpclassps128_mask", IX86_BUILTIN_FPCLASSPS128, UNKNOWN, (int) QI_FTYPE_V4SF_INT_QI },
32465 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv4sf, "__builtin_ia32_fpclassss", IX86_BUILTIN_FPCLASSSS, UNKNOWN, (int) QI_FTYPE_V4SF_INT },
32466 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv16qi, "__builtin_ia32_cvtb2mask128", IX86_BUILTIN_CVTB2MASK128, UNKNOWN, (int) HI_FTYPE_V16QI },
32467 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv32qi, "__builtin_ia32_cvtb2mask256", IX86_BUILTIN_CVTB2MASK256, UNKNOWN, (int) SI_FTYPE_V32QI },
32468 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv8hi, "__builtin_ia32_cvtw2mask128", IX86_BUILTIN_CVTW2MASK128, UNKNOWN, (int) QI_FTYPE_V8HI },
32469 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv16hi, "__builtin_ia32_cvtw2mask256", IX86_BUILTIN_CVTW2MASK256, UNKNOWN, (int) HI_FTYPE_V16HI },
32470 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv4si, "__builtin_ia32_cvtd2mask128", IX86_BUILTIN_CVTD2MASK128, UNKNOWN, (int) QI_FTYPE_V4SI },
32471 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv8si, "__builtin_ia32_cvtd2mask256", IX86_BUILTIN_CVTD2MASK256, UNKNOWN, (int) QI_FTYPE_V8SI },
32472 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv2di, "__builtin_ia32_cvtq2mask128", IX86_BUILTIN_CVTQ2MASK128, UNKNOWN, (int) QI_FTYPE_V2DI },
32473 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv4di, "__builtin_ia32_cvtq2mask256", IX86_BUILTIN_CVTQ2MASK256, UNKNOWN, (int) QI_FTYPE_V4DI },
32474 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv16qi, "__builtin_ia32_cvtmask2b128", IX86_BUILTIN_CVTMASK2B128, UNKNOWN, (int) V16QI_FTYPE_HI },
32475 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv32qi, "__builtin_ia32_cvtmask2b256", IX86_BUILTIN_CVTMASK2B256, UNKNOWN, (int) V32QI_FTYPE_SI },
32476 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv8hi, "__builtin_ia32_cvtmask2w128", IX86_BUILTIN_CVTMASK2W128, UNKNOWN, (int) V8HI_FTYPE_QI },
32477 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv16hi, "__builtin_ia32_cvtmask2w256", IX86_BUILTIN_CVTMASK2W256, UNKNOWN, (int) V16HI_FTYPE_HI },
32478 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv4si, "__builtin_ia32_cvtmask2d128", IX86_BUILTIN_CVTMASK2D128, UNKNOWN, (int) V4SI_FTYPE_QI },
32479 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv8si, "__builtin_ia32_cvtmask2d256", IX86_BUILTIN_CVTMASK2D256, UNKNOWN, (int) V8SI_FTYPE_QI },
32480 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv2di, "__builtin_ia32_cvtmask2q128", IX86_BUILTIN_CVTMASK2Q128, UNKNOWN, (int) V2DI_FTYPE_QI },
32481 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv4di, "__builtin_ia32_cvtmask2q256", IX86_BUILTIN_CVTMASK2Q256, UNKNOWN, (int) V4DI_FTYPE_QI },
32482 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16qi3_mask, "__builtin_ia32_pcmpeqb128_mask", IX86_BUILTIN_PCMPEQB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32483 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv32qi3_mask, "__builtin_ia32_pcmpeqb256_mask", IX86_BUILTIN_PCMPEQB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32484 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8hi3_mask, "__builtin_ia32_pcmpeqw128_mask", IX86_BUILTIN_PCMPEQW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32485 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16hi3_mask, "__builtin_ia32_pcmpeqw256_mask", IX86_BUILTIN_PCMPEQW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32486 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4si3_mask, "__builtin_ia32_pcmpeqd128_mask", IX86_BUILTIN_PCMPEQD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32487 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8si3_mask, "__builtin_ia32_pcmpeqd256_mask", IX86_BUILTIN_PCMPEQD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32488 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv2di3_mask, "__builtin_ia32_pcmpeqq128_mask", IX86_BUILTIN_PCMPEQQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32489 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4di3_mask, "__builtin_ia32_pcmpeqq256_mask", IX86_BUILTIN_PCMPEQQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32490 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16qi3_mask, "__builtin_ia32_pcmpgtb128_mask", IX86_BUILTIN_PCMPGTB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32491 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv32qi3_mask, "__builtin_ia32_pcmpgtb256_mask", IX86_BUILTIN_PCMPGTB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32492 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8hi3_mask, "__builtin_ia32_pcmpgtw128_mask", IX86_BUILTIN_PCMPGTW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32493 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16hi3_mask, "__builtin_ia32_pcmpgtw256_mask", IX86_BUILTIN_PCMPGTW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32494 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4si3_mask, "__builtin_ia32_pcmpgtd128_mask", IX86_BUILTIN_PCMPGTD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32495 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8si3_mask, "__builtin_ia32_pcmpgtd256_mask", IX86_BUILTIN_PCMPGTD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32496 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv2di3_mask, "__builtin_ia32_pcmpgtq128_mask", IX86_BUILTIN_PCMPGTQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32497 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4di3_mask, "__builtin_ia32_pcmpgtq256_mask", IX86_BUILTIN_PCMPGTQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32498 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16qi3_mask, "__builtin_ia32_ptestmb128", IX86_BUILTIN_PTESTMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32499 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv32qi3_mask, "__builtin_ia32_ptestmb256", IX86_BUILTIN_PTESTMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32500 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8hi3_mask, "__builtin_ia32_ptestmw128", IX86_BUILTIN_PTESTMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32501 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16hi3_mask, "__builtin_ia32_ptestmw256", IX86_BUILTIN_PTESTMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32502 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4si3_mask, "__builtin_ia32_ptestmd128", IX86_BUILTIN_PTESTMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32503 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8si3_mask, "__builtin_ia32_ptestmd256", IX86_BUILTIN_PTESTMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32504 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv2di3_mask, "__builtin_ia32_ptestmq128", IX86_BUILTIN_PTESTMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32505 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4di3_mask, "__builtin_ia32_ptestmq256", IX86_BUILTIN_PTESTMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32506 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16qi3_mask, "__builtin_ia32_ptestnmb128", IX86_BUILTIN_PTESTNMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32507 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv32qi3_mask, "__builtin_ia32_ptestnmb256", IX86_BUILTIN_PTESTNMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32508 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8hi3_mask, "__builtin_ia32_ptestnmw128", IX86_BUILTIN_PTESTNMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32509 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16hi3_mask, "__builtin_ia32_ptestnmw256", IX86_BUILTIN_PTESTNMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32510 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4si3_mask, "__builtin_ia32_ptestnmd128", IX86_BUILTIN_PTESTNMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32511 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8si3_mask, "__builtin_ia32_ptestnmd256", IX86_BUILTIN_PTESTNMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32512 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv2di3_mask, "__builtin_ia32_ptestnmq128", IX86_BUILTIN_PTESTNMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32513 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4di3_mask, "__builtin_ia32_ptestnmq256", IX86_BUILTIN_PTESTNMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32514 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv2di, "__builtin_ia32_broadcastmb128", IX86_BUILTIN_PBROADCASTMB128, UNKNOWN, (int) V2DI_FTYPE_QI },
32515 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv4di, "__builtin_ia32_broadcastmb256", IX86_BUILTIN_PBROADCASTMB256, UNKNOWN, (int) V4DI_FTYPE_QI },
32516 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv4si, "__builtin_ia32_broadcastmw128", IX86_BUILTIN_PBROADCASTMW128, UNKNOWN, (int) V4SI_FTYPE_HI },
32517 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv8si, "__builtin_ia32_broadcastmw256", IX86_BUILTIN_PBROADCASTMW256, UNKNOWN, (int) V8SI_FTYPE_HI },
32518 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4df_mask, "__builtin_ia32_compressdf256_mask", IX86_BUILTIN_COMPRESSPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32519 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2df_mask, "__builtin_ia32_compressdf128_mask", IX86_BUILTIN_COMPRESSPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32520 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8sf_mask, "__builtin_ia32_compresssf256_mask", IX86_BUILTIN_COMPRESSPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32521 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4sf_mask, "__builtin_ia32_compresssf128_mask", IX86_BUILTIN_COMPRESSPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32522 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4di_mask, "__builtin_ia32_compressdi256_mask", IX86_BUILTIN_PCOMPRESSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32523 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2di_mask, "__builtin_ia32_compressdi128_mask", IX86_BUILTIN_PCOMPRESSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32524 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8si_mask, "__builtin_ia32_compresssi256_mask", IX86_BUILTIN_PCOMPRESSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32525 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4si_mask, "__builtin_ia32_compresssi128_mask", IX86_BUILTIN_PCOMPRESSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32526 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expanddf256_mask", IX86_BUILTIN_EXPANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32527 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expanddf128_mask", IX86_BUILTIN_EXPANDPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32528 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandsf256_mask", IX86_BUILTIN_EXPANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32529 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandsf128_mask", IX86_BUILTIN_EXPANDPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32530 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expanddi256_mask", IX86_BUILTIN_PEXPANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32531 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expanddi128_mask", IX86_BUILTIN_PEXPANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32532 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandsi256_mask", IX86_BUILTIN_PEXPANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32533 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandsi128_mask", IX86_BUILTIN_PEXPANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32534 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expanddf256_maskz", IX86_BUILTIN_EXPANDPD256Z, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32535 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expanddf128_maskz", IX86_BUILTIN_EXPANDPD128Z, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32536 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandsf256_maskz", IX86_BUILTIN_EXPANDPS256Z, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32537 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandsf128_maskz", IX86_BUILTIN_EXPANDPS128Z, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32538 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expanddi256_maskz", IX86_BUILTIN_PEXPANDQ256Z, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32539 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expanddi128_maskz", IX86_BUILTIN_PEXPANDQ128Z, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32540 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandsi256_maskz", IX86_BUILTIN_PEXPANDD256Z, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32541 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandsi128_maskz", IX86_BUILTIN_PEXPANDD128Z, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32542 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8si3_mask, "__builtin_ia32_pmaxsd256_mask", IX86_BUILTIN_PMAXSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32543 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8si3_mask, "__builtin_ia32_pminsd256_mask", IX86_BUILTIN_PMINSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32544 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8si3_mask, "__builtin_ia32_pmaxud256_mask", IX86_BUILTIN_PMAXUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32545 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8si3_mask, "__builtin_ia32_pminud256_mask", IX86_BUILTIN_PMINUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32546 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4si3_mask, "__builtin_ia32_pmaxsd128_mask", IX86_BUILTIN_PMAXSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32547 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4si3_mask, "__builtin_ia32_pminsd128_mask", IX86_BUILTIN_PMINSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32548 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4si3_mask, "__builtin_ia32_pmaxud128_mask", IX86_BUILTIN_PMAXUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32549 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4si3_mask, "__builtin_ia32_pminud128_mask", IX86_BUILTIN_PMINUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32550 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4di3_mask, "__builtin_ia32_pmaxsq256_mask", IX86_BUILTIN_PMAXSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32551 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4di3_mask, "__builtin_ia32_pminsq256_mask", IX86_BUILTIN_PMINSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32552 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4di3_mask, "__builtin_ia32_pmaxuq256_mask", IX86_BUILTIN_PMAXUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32553 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4di3_mask, "__builtin_ia32_pminuq256_mask", IX86_BUILTIN_PMINUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32554 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2di3_mask, "__builtin_ia32_pmaxsq128_mask", IX86_BUILTIN_PMAXSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32555 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2di3_mask, "__builtin_ia32_pminsq128_mask", IX86_BUILTIN_PMINSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32556 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv2di3_mask, "__builtin_ia32_pmaxuq128_mask", IX86_BUILTIN_PMAXUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32557 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv2di3_mask, "__builtin_ia32_pminuq128_mask", IX86_BUILTIN_PMINUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32558 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv32qi3_mask, "__builtin_ia32_pminsb256_mask", IX86_BUILTIN_PMINSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32559 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv32qi3_mask, "__builtin_ia32_pminub256_mask", IX86_BUILTIN_PMINUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32560 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv32qi3_mask, "__builtin_ia32_pmaxsb256_mask", IX86_BUILTIN_PMAXSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32561 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv32qi3_mask, "__builtin_ia32_pmaxub256_mask", IX86_BUILTIN_PMAXUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32562 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16qi3_mask, "__builtin_ia32_pminsb128_mask", IX86_BUILTIN_PMINSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32563 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16qi3_mask, "__builtin_ia32_pminub128_mask", IX86_BUILTIN_PMINUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32564 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16qi3_mask, "__builtin_ia32_pmaxsb128_mask", IX86_BUILTIN_PMAXSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32565 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16qi3_mask, "__builtin_ia32_pmaxub128_mask", IX86_BUILTIN_PMAXUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32566 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16hi3_mask, "__builtin_ia32_pminsw256_mask", IX86_BUILTIN_PMINSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32567 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16hi3_mask, "__builtin_ia32_pminuw256_mask", IX86_BUILTIN_PMINUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32568 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16hi3_mask, "__builtin_ia32_pmaxsw256_mask", IX86_BUILTIN_PMAXSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32569 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16hi3_mask, "__builtin_ia32_pmaxuw256_mask", IX86_BUILTIN_PMAXUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32570 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8hi3_mask, "__builtin_ia32_pminsw128_mask", IX86_BUILTIN_PMINSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32571 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8hi3_mask, "__builtin_ia32_pminuw128_mask", IX86_BUILTIN_PMINUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32572 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8hi3_mask, "__builtin_ia32_pmaxsw128_mask", IX86_BUILTIN_PMAXSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32573 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8hi3_mask, "__builtin_ia32_pmaxuw128_mask", IX86_BUILTIN_PMAXUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32574 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4di_mask, "__builtin_ia32_vpconflictdi_256_mask", IX86_BUILTIN_VPCONFLICTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32575 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv8si_mask, "__builtin_ia32_vpconflictsi_256_mask", IX86_BUILTIN_VPCONFLICTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32576 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4di2_mask, "__builtin_ia32_vplzcntq_256_mask", IX86_BUILTIN_VPCLZCNTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32577 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv8si2_mask, "__builtin_ia32_vplzcntd_256_mask", IX86_BUILTIN_VPCLZCNTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32578 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhpd256_mask, "__builtin_ia32_unpckhpd256_mask", IX86_BUILTIN_UNPCKHPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32579 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpckhpd128_mask, "__builtin_ia32_unpckhpd128_mask", IX86_BUILTIN_UNPCKHPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32580 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhps256_mask, "__builtin_ia32_unpckhps256_mask", IX86_BUILTIN_UNPCKHPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32581 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4sf_mask, "__builtin_ia32_unpckhps128_mask", IX86_BUILTIN_UNPCKHPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32582 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklpd256_mask, "__builtin_ia32_unpcklpd256_mask", IX86_BUILTIN_UNPCKLPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32583 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpcklpd128_mask, "__builtin_ia32_unpcklpd128_mask", IX86_BUILTIN_UNPCKLPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32584 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklps256_mask, "__builtin_ia32_unpcklps256_mask", IX86_BUILTIN_UNPCKLPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32585 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv2di_mask, "__builtin_ia32_vpconflictdi_128_mask", IX86_BUILTIN_VPCONFLICTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32586 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4si_mask, "__builtin_ia32_vpconflictsi_128_mask", IX86_BUILTIN_VPCONFLICTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32587 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv2di2_mask, "__builtin_ia32_vplzcntq_128_mask", IX86_BUILTIN_VPCLZCNTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32588 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4si2_mask, "__builtin_ia32_vplzcntd_128_mask", IX86_BUILTIN_VPCLZCNTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32589 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_unpcklps128_mask, "__builtin_ia32_unpcklps128_mask", IX86_BUILTIN_UNPCKLPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32590 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv8si_mask, "__builtin_ia32_alignd256_mask", IX86_BUILTIN_ALIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI },
32591 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4di_mask, "__builtin_ia32_alignq256_mask", IX86_BUILTIN_ALIGNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI },
32592 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4si_mask, "__builtin_ia32_alignd128_mask", IX86_BUILTIN_ALIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI },
32593 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv2di_mask, "__builtin_ia32_alignq128_mask", IX86_BUILTIN_ALIGNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI },
32594 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph256_mask, "__builtin_ia32_vcvtps2ph256_mask", IX86_BUILTIN_CVTPS2PH256_MASK, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT_V8HI_QI },
32595 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph_mask, "__builtin_ia32_vcvtps2ph_mask", IX86_BUILTIN_CVTPS2PH_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT_V8HI_QI },
32596 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps_mask, "__builtin_ia32_vcvtph2ps_mask", IX86_BUILTIN_CVTPH2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V8HI_V4SF_QI },
32597 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps256_mask, "__builtin_ia32_vcvtph2ps256_mask", IX86_BUILTIN_CVTPH2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8HI_V8SF_QI },
32598 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4si_mask, "__builtin_ia32_punpckhdq128_mask", IX86_BUILTIN_PUNPCKHDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32599 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv8si_mask, "__builtin_ia32_punpckhdq256_mask", IX86_BUILTIN_PUNPCKHDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32600 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv2di_mask, "__builtin_ia32_punpckhqdq128_mask", IX86_BUILTIN_PUNPCKHQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32601 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv4di_mask, "__builtin_ia32_punpckhqdq256_mask", IX86_BUILTIN_PUNPCKHQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32602 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv4si_mask, "__builtin_ia32_punpckldq128_mask", IX86_BUILTIN_PUNPCKLDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32603 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv8si_mask, "__builtin_ia32_punpckldq256_mask", IX86_BUILTIN_PUNPCKLDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32604 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv2di_mask, "__builtin_ia32_punpcklqdq128_mask", IX86_BUILTIN_PUNPCKLQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32605 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv4di_mask, "__builtin_ia32_punpcklqdq256_mask", IX86_BUILTIN_PUNPCKLQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32606 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv16qi_mask, "__builtin_ia32_punpckhbw128_mask", IX86_BUILTIN_PUNPCKHBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32607 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv32qi_mask, "__builtin_ia32_punpckhbw256_mask", IX86_BUILTIN_PUNPCKHBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32608 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv8hi_mask, "__builtin_ia32_punpckhwd128_mask", IX86_BUILTIN_PUNPCKHWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32609 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv16hi_mask, "__builtin_ia32_punpckhwd256_mask", IX86_BUILTIN_PUNPCKHWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32610 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv16qi_mask, "__builtin_ia32_punpcklbw128_mask", IX86_BUILTIN_PUNPCKLBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32611 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv32qi_mask, "__builtin_ia32_punpcklbw256_mask", IX86_BUILTIN_PUNPCKLBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32612 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv8hi_mask, "__builtin_ia32_punpcklwd128_mask", IX86_BUILTIN_PUNPCKLWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32613 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv16hi_mask, "__builtin_ia32_punpcklwd256_mask", IX86_BUILTIN_PUNPCKLWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32614 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv16hi_mask, "__builtin_ia32_psllv16hi_mask", IX86_BUILTIN_PSLLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32615 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv8hi_mask, "__builtin_ia32_psllv8hi_mask", IX86_BUILTIN_PSLLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32616 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packssdw_mask, "__builtin_ia32_packssdw256_mask", IX86_BUILTIN_PACKSSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI },
32617 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packssdw_mask, "__builtin_ia32_packssdw128_mask", IX86_BUILTIN_PACKSSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI },
32618 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packusdw_mask, "__builtin_ia32_packusdw256_mask", IX86_BUILTIN_PACKUSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI },
32619 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_packusdw_mask, "__builtin_ia32_packusdw128_mask", IX86_BUILTIN_PACKUSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI },
32620 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv32qi3_mask, "__builtin_ia32_pavgb256_mask", IX86_BUILTIN_PAVGB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32621 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv16hi3_mask, "__builtin_ia32_pavgw256_mask", IX86_BUILTIN_PAVGW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32622 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv16qi3_mask, "__builtin_ia32_pavgb128_mask", IX86_BUILTIN_PAVGB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32623 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv8hi3_mask, "__builtin_ia32_pavgw128_mask", IX86_BUILTIN_PAVGW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32624 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8sf_mask, "__builtin_ia32_permvarsf256_mask", IX86_BUILTIN_VPERMVARSF256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32625 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4df_mask, "__builtin_ia32_permvardf256_mask", IX86_BUILTIN_VPERMVARDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32626 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4df_mask, "__builtin_ia32_permdf256_mask", IX86_BUILTIN_VPERMDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32627 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv32qi2_mask, "__builtin_ia32_pabsb256_mask", IX86_BUILTIN_PABSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32628 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16qi2_mask, "__builtin_ia32_pabsb128_mask", IX86_BUILTIN_PABSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32629 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16hi2_mask, "__builtin_ia32_pabsw256_mask", IX86_BUILTIN_PABSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32630 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8hi2_mask, "__builtin_ia32_pabsw128_mask", IX86_BUILTIN_PABSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32631 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv2df3_mask, "__builtin_ia32_vpermilvarpd_mask", IX86_BUILTIN_VPERMILVARPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI },
32632 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4sf3_mask, "__builtin_ia32_vpermilvarps_mask", IX86_BUILTIN_VPERMILVARPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI },
32633 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4df3_mask, "__builtin_ia32_vpermilvarpd256_mask", IX86_BUILTIN_VPERMILVARPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32634 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv8sf3_mask, "__builtin_ia32_vpermilvarps256_mask", IX86_BUILTIN_VPERMILVARPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32635 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv2df_mask, "__builtin_ia32_vpermilpd_mask", IX86_BUILTIN_VPERMILPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32636 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4sf_mask, "__builtin_ia32_vpermilps_mask", IX86_BUILTIN_VPERMILPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32637 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4df_mask, "__builtin_ia32_vpermilpd256_mask", IX86_BUILTIN_VPERMILPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32638 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv8sf_mask, "__builtin_ia32_vpermilps256_mask", IX86_BUILTIN_VPERMILPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32639 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4di, "__builtin_ia32_blendmq_256_mask", IX86_BUILTIN_BLENDMQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32640 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8si, "__builtin_ia32_blendmd_256_mask", IX86_BUILTIN_BLENDMD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32641 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4df, "__builtin_ia32_blendmpd_256_mask", IX86_BUILTIN_BLENDMPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32642 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8sf, "__builtin_ia32_blendmps_256_mask", IX86_BUILTIN_BLENDMPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32643 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2di, "__builtin_ia32_blendmq_128_mask", IX86_BUILTIN_BLENDMQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32644 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4si, "__builtin_ia32_blendmd_128_mask", IX86_BUILTIN_BLENDMD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32645 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2df, "__builtin_ia32_blendmpd_128_mask", IX86_BUILTIN_BLENDMPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32646 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4sf, "__builtin_ia32_blendmps_128_mask", IX86_BUILTIN_BLENDMPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32647 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16hi, "__builtin_ia32_blendmw_256_mask", IX86_BUILTIN_BLENDMW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32648 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv32qi, "__builtin_ia32_blendmb_256_mask", IX86_BUILTIN_BLENDMB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32649 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8hi, "__builtin_ia32_blendmw_128_mask", IX86_BUILTIN_BLENDMW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32650 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16qi, "__builtin_ia32_blendmb_128_mask", IX86_BUILTIN_BLENDMB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32651 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8si3_mask, "__builtin_ia32_pmulld256_mask", IX86_BUILTIN_PMULLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32652 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4si3_mask, "__builtin_ia32_pmulld128_mask", IX86_BUILTIN_PMULLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32653 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v8si_mask, "__builtin_ia32_pmuludq256_mask", IX86_BUILTIN_PMULUDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI },
32654 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_smult_even_v8si_mask, "__builtin_ia32_pmuldq256_mask", IX86_BUILTIN_PMULDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI },
32655 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_mulv2siv2di3_mask, "__builtin_ia32_pmuldq128_mask", IX86_BUILTIN_PMULDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI },
32656 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v4si_mask, "__builtin_ia32_pmuludq128_mask", IX86_BUILTIN_PMULUDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI },
32657 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2ps256_mask, "__builtin_ia32_cvtpd2ps256_mask", IX86_BUILTIN_CVTPD2PS256_MASK, UNKNOWN, (int) V4SF_FTYPE_V4DF_V4SF_QI },
32658 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2ps_mask, "__builtin_ia32_cvtpd2ps_mask", IX86_BUILTIN_CVTPD2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V2DF_V4SF_QI },
32659 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8si_mask, "__builtin_ia32_permvarsi256_mask", IX86_BUILTIN_VPERMVARSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32660 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4di_mask, "__builtin_ia32_permvardi256_mask", IX86_BUILTIN_VPERMVARDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32661 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4di_mask, "__builtin_ia32_permdi256_mask", IX86_BUILTIN_VPERMDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32662 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4di3_mask, "__builtin_ia32_cmpq256_mask", IX86_BUILTIN_CMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI },
32663 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8si3_mask, "__builtin_ia32_cmpd256_mask", IX86_BUILTIN_CMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI },
32664 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4di3_mask, "__builtin_ia32_ucmpq256_mask", IX86_BUILTIN_UCMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI },
32665 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8si3_mask, "__builtin_ia32_ucmpd256_mask", IX86_BUILTIN_UCMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI },
32666 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv32qi3_mask, "__builtin_ia32_cmpb256_mask", IX86_BUILTIN_CMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI },
32667 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16hi3_mask, "__builtin_ia32_cmpw256_mask", IX86_BUILTIN_CMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI },
32668 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv32qi3_mask, "__builtin_ia32_ucmpb256_mask", IX86_BUILTIN_UCMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI },
32669 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16hi3_mask, "__builtin_ia32_ucmpw256_mask", IX86_BUILTIN_UCMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI },
32670 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4df3_mask, "__builtin_ia32_cmppd256_mask", IX86_BUILTIN_CMPPD256_MASK, UNKNOWN, (int) QI_FTYPE_V4DF_V4DF_INT_QI },
32671 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8sf3_mask, "__builtin_ia32_cmpps256_mask", IX86_BUILTIN_CMPPS256_MASK, UNKNOWN, (int) QI_FTYPE_V8SF_V8SF_INT_QI },
32672 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2di3_mask, "__builtin_ia32_cmpq128_mask", IX86_BUILTIN_CMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI },
32673 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4si3_mask, "__builtin_ia32_cmpd128_mask", IX86_BUILTIN_CMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI },
32674 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv2di3_mask, "__builtin_ia32_ucmpq128_mask", IX86_BUILTIN_UCMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI },
32675 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4si3_mask, "__builtin_ia32_ucmpd128_mask", IX86_BUILTIN_UCMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI },
32676 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16qi3_mask, "__builtin_ia32_cmpb128_mask", IX86_BUILTIN_CMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI },
32677 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8hi3_mask, "__builtin_ia32_cmpw128_mask", IX86_BUILTIN_CMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI },
32678 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16qi3_mask, "__builtin_ia32_ucmpb128_mask", IX86_BUILTIN_UCMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI },
32679 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8hi3_mask, "__builtin_ia32_ucmpw128_mask", IX86_BUILTIN_UCMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI },
32680 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2df3_mask, "__builtin_ia32_cmppd128_mask", IX86_BUILTIN_CMPPD128_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI },
32681 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4sf3_mask, "__builtin_ia32_cmpps128_mask", IX86_BUILTIN_CMPPS128_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI },
32683 /* AVX512DQ. */
32684 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x2_512_mask", IX86_BUILTIN_BROADCASTF32x2_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
32685 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask, "__builtin_ia32_broadcasti32x2_512_mask", IX86_BUILTIN_BROADCASTI32x2_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
32686 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8df_mask_1, "__builtin_ia32_broadcastf64x2_512_mask", IX86_BUILTIN_BROADCASTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
32687 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8di_mask_1, "__builtin_ia32_broadcasti64x2_512_mask", IX86_BUILTIN_BROADCASTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
32688 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask_1, "__builtin_ia32_broadcastf32x8_512_mask", IX86_BUILTIN_BROADCASTF32X8_512, UNKNOWN, (int) V16SF_FTYPE_V8SF_V16SF_HI },
32689 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask_1, "__builtin_ia32_broadcasti32x8_512_mask", IX86_BUILTIN_BROADCASTI32X8_512, UNKNOWN, (int) V16SI_FTYPE_V8SI_V16SI_HI },
32690 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf64x2_mask, "__builtin_ia32_extractf64x2_512_mask", IX86_BUILTIN_EXTRACTF64X2_512, UNKNOWN, (int) V2DF_FTYPE_V8DF_INT_V2DF_QI },
32691 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf32x8_mask, "__builtin_ia32_extractf32x8_mask", IX86_BUILTIN_EXTRACTF32X8, UNKNOWN, (int) V8SF_FTYPE_V16SF_INT_V8SF_QI },
32692 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti64x2_mask, "__builtin_ia32_extracti64x2_512_mask", IX86_BUILTIN_EXTRACTI64X2_512, UNKNOWN, (int) V2DI_FTYPE_V8DI_INT_V2DI_QI },
32693 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti32x8_mask, "__builtin_ia32_extracti32x8_mask", IX86_BUILTIN_EXTRACTI32X8, UNKNOWN, (int) V8SI_FTYPE_V16SI_INT_V8SI_QI },
32694 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv8df_mask, "__builtin_ia32_reducepd512_mask", IX86_BUILTIN_REDUCEPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
32695 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv16sf_mask, "__builtin_ia32_reduceps512_mask", IX86_BUILTIN_REDUCEPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
32696 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_mulv8di3_mask, "__builtin_ia32_pmullq512_mask", IX86_BUILTIN_PMULLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32697 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv8df3_mask, "__builtin_ia32_xorpd512_mask", IX86_BUILTIN_XORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32698 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv16sf3_mask, "__builtin_ia32_xorps512_mask", IX86_BUILTIN_XORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32699 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv8df3_mask, "__builtin_ia32_orpd512_mask", IX86_BUILTIN_ORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32700 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv16sf3_mask, "__builtin_ia32_orps512_mask", IX86_BUILTIN_ORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32701 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv8df3_mask, "__builtin_ia32_andpd512_mask", IX86_BUILTIN_ANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32702 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv16sf3_mask, "__builtin_ia32_andps512_mask", IX86_BUILTIN_ANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32703 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv8df3_mask, "__builtin_ia32_andnpd512_mask", IX86_BUILTIN_ANDNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI},
32704 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv16sf3_mask, "__builtin_ia32_andnps512_mask", IX86_BUILTIN_ANDNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32705 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf32x8_mask, "__builtin_ia32_insertf32x8_mask", IX86_BUILTIN_INSERTF32X8, UNKNOWN, (int) V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI },
32706 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti32x8_mask, "__builtin_ia32_inserti32x8_mask", IX86_BUILTIN_INSERTI32X8, UNKNOWN, (int) V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI },
32707 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf64x2_mask, "__builtin_ia32_insertf64x2_512_mask", IX86_BUILTIN_INSERTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI },
32708 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti64x2_mask, "__builtin_ia32_inserti64x2_512_mask", IX86_BUILTIN_INSERTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI },
32709 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv8df_mask, "__builtin_ia32_fpclasspd512_mask", IX86_BUILTIN_FPCLASSPD512, UNKNOWN, (int) QI_FTYPE_V8DF_INT_QI },
32710 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv16sf_mask, "__builtin_ia32_fpclassps512_mask", IX86_BUILTIN_FPCLASSPS512, UNKNOWN, (int) HI_FTYPE_V16SF_INT_HI },
32711 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtd2maskv16si, "__builtin_ia32_cvtd2mask512", IX86_BUILTIN_CVTD2MASK512, UNKNOWN, (int) HI_FTYPE_V16SI },
32712 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtq2maskv8di, "__builtin_ia32_cvtq2mask512", IX86_BUILTIN_CVTQ2MASK512, UNKNOWN, (int) QI_FTYPE_V8DI },
32713 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2dv16si, "__builtin_ia32_cvtmask2d512", IX86_BUILTIN_CVTMASK2D512, UNKNOWN, (int) V16SI_FTYPE_HI },
32714 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2qv8di, "__builtin_ia32_cvtmask2q512", IX86_BUILTIN_CVTMASK2Q512, UNKNOWN, (int) V8DI_FTYPE_QI },
32716 /* AVX512BW. */
32717 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpcksi, "__builtin_ia32_kunpcksi", IX86_BUILTIN_KUNPCKWD, UNKNOWN, (int) SI_FTYPE_SI_SI },
32718 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpckdi, "__builtin_ia32_kunpckdi", IX86_BUILTIN_KUNPCKDQ, UNKNOWN, (int) DI_FTYPE_DI_DI },
32719 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packusdw_mask, "__builtin_ia32_packusdw512_mask", IX86_BUILTIN_PACKUSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI },
32720 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlv4ti3, "__builtin_ia32_pslldq512", IX86_BUILTIN_PSLLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
32721 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrv4ti3, "__builtin_ia32_psrldq512", IX86_BUILTIN_PSRLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
32722 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packssdw_mask, "__builtin_ia32_packssdw512_mask", IX86_BUILTIN_PACKSSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI },
32723 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv4ti, "__builtin_ia32_palignr512", IX86_BUILTIN_PALIGNR512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_CONVERT },
32724 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv64qi_mask, "__builtin_ia32_palignr512_mask", IX86_BUILTIN_PALIGNR512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT },
32725 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_movdquhi512_mask", IX86_BUILTIN_MOVDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32726 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_movdquqi512_mask", IX86_BUILTIN_MOVDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32727 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_psadbw, "__builtin_ia32_psadbw512", IX86_BUILTIN_PSADBW512, UNKNOWN, (int) V8DI_FTYPE_V64QI_V64QI },
32728 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_dbpsadbwv32hi_mask, "__builtin_ia32_dbpsadbw512_mask", IX86_BUILTIN_DBPSADBW512, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI },
32729 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv64qi_mask, "__builtin_ia32_pbroadcastb512_mask", IX86_BUILTIN_PBROADCASTB512, UNKNOWN, (int) V64QI_FTYPE_V16QI_V64QI_DI },
32730 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv64qi_mask, "__builtin_ia32_pbroadcastb512_gpr_mask", IX86_BUILTIN_PBROADCASTB512_GPR, UNKNOWN, (int) V64QI_FTYPE_QI_V64QI_DI },
32731 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv32hi_mask, "__builtin_ia32_pbroadcastw512_mask", IX86_BUILTIN_PBROADCASTW512, UNKNOWN, (int) V32HI_FTYPE_V8HI_V32HI_SI },
32732 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv32hi_mask, "__builtin_ia32_pbroadcastw512_gpr_mask", IX86_BUILTIN_PBROADCASTW512_GPR, UNKNOWN, (int) V32HI_FTYPE_HI_V32HI_SI },
32733 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sign_extendv32qiv32hi2_mask, "__builtin_ia32_pmovsxbw512_mask", IX86_BUILTIN_PMOVSXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI },
32734 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_zero_extendv32qiv32hi2_mask, "__builtin_ia32_pmovzxbw512_mask", IX86_BUILTIN_PMOVZXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI },
32735 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_permvarv32hi_mask, "__builtin_ia32_permvarhi512_mask", IX86_BUILTIN_VPERMVARHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32736 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_mask, "__builtin_ia32_vpermt2varhi512_mask", IX86_BUILTIN_VPERMT2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32737 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_maskz, "__builtin_ia32_vpermt2varhi512_maskz", IX86_BUILTIN_VPERMT2VARHI512_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32738 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermi2varv32hi3_mask, "__builtin_ia32_vpermi2varhi512_mask", IX86_BUILTIN_VPERMI2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32739 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv64qi3_mask, "__builtin_ia32_pavgb512_mask", IX86_BUILTIN_PAVGB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32740 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv32hi3_mask, "__builtin_ia32_pavgw512_mask", IX86_BUILTIN_PAVGW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32741 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv64qi3_mask, "__builtin_ia32_paddb512_mask", IX86_BUILTIN_PADDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32742 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv64qi3_mask, "__builtin_ia32_psubb512_mask", IX86_BUILTIN_PSUBB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32743 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv64qi3_mask, "__builtin_ia32_psubsb512_mask", IX86_BUILTIN_PSUBSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32744 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv64qi3_mask, "__builtin_ia32_paddsb512_mask", IX86_BUILTIN_PADDSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32745 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv64qi3_mask, "__builtin_ia32_psubusb512_mask", IX86_BUILTIN_PSUBUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32746 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv64qi3_mask, "__builtin_ia32_paddusb512_mask", IX86_BUILTIN_PADDUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32747 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv32hi3_mask, "__builtin_ia32_psubw512_mask", IX86_BUILTIN_PSUBW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32748 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv32hi3_mask, "__builtin_ia32_paddw512_mask", IX86_BUILTIN_PADDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32749 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv32hi3_mask, "__builtin_ia32_psubsw512_mask", IX86_BUILTIN_PSUBSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32750 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv32hi3_mask, "__builtin_ia32_paddsw512_mask", IX86_BUILTIN_PADDSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32751 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv32hi3_mask, "__builtin_ia32_psubusw512_mask", IX86_BUILTIN_PSUBUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32752 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv32hi3_mask, "__builtin_ia32_paddusw512_mask", IX86_BUILTIN_PADDUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32753 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv32hi3_mask, "__builtin_ia32_pmaxuw512_mask", IX86_BUILTIN_PMAXUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32754 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv32hi3_mask, "__builtin_ia32_pmaxsw512_mask", IX86_BUILTIN_PMAXSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32755 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv32hi3_mask, "__builtin_ia32_pminuw512_mask", IX86_BUILTIN_PMINUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32756 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv32hi3_mask, "__builtin_ia32_pminsw512_mask", IX86_BUILTIN_PMINSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32757 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv64qi3_mask, "__builtin_ia32_pmaxub512_mask", IX86_BUILTIN_PMAXUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32758 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv64qi3_mask, "__builtin_ia32_pmaxsb512_mask", IX86_BUILTIN_PMAXSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32759 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv64qi3_mask, "__builtin_ia32_pminub512_mask", IX86_BUILTIN_PMINUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32760 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv64qi3_mask, "__builtin_ia32_pminsb512_mask", IX86_BUILTIN_PMINSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32761 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovwb512_mask", IX86_BUILTIN_PMOVWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32762 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovswb512_mask", IX86_BUILTIN_PMOVSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32763 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovuswb512_mask", IX86_BUILTIN_PMOVUSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32764 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_umulhrswv32hi3_mask, "__builtin_ia32_pmulhrsw512_mask", IX86_BUILTIN_PMULHRSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32765 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umulv32hi3_highpart_mask, "__builtin_ia32_pmulhuw512_mask" , IX86_BUILTIN_PMULHUW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32766 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smulv32hi3_highpart_mask, "__builtin_ia32_pmulhw512_mask" , IX86_BUILTIN_PMULHW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32767 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_mulv32hi3_mask, "__builtin_ia32_pmullw512_mask", IX86_BUILTIN_PMULLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32768 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllwi512_mask", IX86_BUILTIN_PSLLWI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32769 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllw512_mask", IX86_BUILTIN_PSLLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32770 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packsswb_mask, "__builtin_ia32_packsswb512_mask", IX86_BUILTIN_PACKSSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI },
32771 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packuswb_mask, "__builtin_ia32_packuswb512_mask", IX86_BUILTIN_PACKUSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI },
32772 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashrvv32hi_mask, "__builtin_ia32_psrav32hi_mask", IX86_BUILTIN_PSRAVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32773 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddubsw512v32hi_mask, "__builtin_ia32_pmaddubsw512_mask", IX86_BUILTIN_PMADDUBSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_V32HI_SI },
32774 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddwd512v32hi_mask, "__builtin_ia32_pmaddwd512_mask", IX86_BUILTIN_PMADDWD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V32HI_V32HI_V16SI_HI },
32775 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrvv32hi_mask, "__builtin_ia32_psrlv32hi_mask", IX86_BUILTIN_PSRLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32776 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv64qi_mask, "__builtin_ia32_punpckhbw512_mask", IX86_BUILTIN_PUNPCKHBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32777 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv32hi_mask, "__builtin_ia32_punpckhwd512_mask", IX86_BUILTIN_PUNPCKHWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32778 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv64qi_mask, "__builtin_ia32_punpcklbw512_mask", IX86_BUILTIN_PUNPCKLBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32779 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv32hi_mask, "__builtin_ia32_punpcklwd512_mask", IX86_BUILTIN_PUNPCKLWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32780 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufbv64qi3_mask, "__builtin_ia32_pshufb512_mask", IX86_BUILTIN_PSHUFB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32781 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufhwv32hi_mask, "__builtin_ia32_pshufhw512_mask", IX86_BUILTIN_PSHUFHW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32782 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshuflwv32hi_mask, "__builtin_ia32_pshuflw512_mask", IX86_BUILTIN_PSHUFLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32783 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psrawi512_mask", IX86_BUILTIN_PSRAWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32784 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psraw512_mask", IX86_BUILTIN_PSRAW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32785 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlwi512_mask", IX86_BUILTIN_PSRLWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32786 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlw512_mask", IX86_BUILTIN_PSRLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32787 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtb2maskv64qi, "__builtin_ia32_cvtb2mask512", IX86_BUILTIN_CVTB2MASK512, UNKNOWN, (int) DI_FTYPE_V64QI },
32788 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtw2maskv32hi, "__builtin_ia32_cvtw2mask512", IX86_BUILTIN_CVTW2MASK512, UNKNOWN, (int) SI_FTYPE_V32HI },
32789 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2bv64qi, "__builtin_ia32_cvtmask2b512", IX86_BUILTIN_CVTMASK2B512, UNKNOWN, (int) V64QI_FTYPE_DI },
32790 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2wv32hi, "__builtin_ia32_cvtmask2w512", IX86_BUILTIN_CVTMASK2W512, UNKNOWN, (int) V32HI_FTYPE_SI },
32791 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv64qi3_mask, "__builtin_ia32_pcmpeqb512_mask", IX86_BUILTIN_PCMPEQB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32792 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv32hi3_mask, "__builtin_ia32_pcmpeqw512_mask", IX86_BUILTIN_PCMPEQW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32793 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv64qi3_mask, "__builtin_ia32_pcmpgtb512_mask", IX86_BUILTIN_PCMPGTB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32794 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv32hi3_mask, "__builtin_ia32_pcmpgtw512_mask", IX86_BUILTIN_PCMPGTW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32795 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv64qi3_mask, "__builtin_ia32_ptestmb512", IX86_BUILTIN_PTESTMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32796 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv32hi3_mask, "__builtin_ia32_ptestmw512", IX86_BUILTIN_PTESTMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32797 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv64qi3_mask, "__builtin_ia32_ptestnmb512", IX86_BUILTIN_PTESTNMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32798 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv32hi3_mask, "__builtin_ia32_ptestnmw512", IX86_BUILTIN_PTESTNMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32799 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlvv32hi_mask, "__builtin_ia32_psllv32hi_mask", IX86_BUILTIN_PSLLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32800 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv64qi2_mask, "__builtin_ia32_pabsb512_mask", IX86_BUILTIN_PABSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32801 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv32hi2_mask, "__builtin_ia32_pabsw512_mask", IX86_BUILTIN_PABSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32802 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv32hi, "__builtin_ia32_blendmw_512_mask", IX86_BUILTIN_BLENDMW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32803 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv64qi, "__builtin_ia32_blendmb_512_mask", IX86_BUILTIN_BLENDMB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32804 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv64qi3_mask, "__builtin_ia32_cmpb512_mask", IX86_BUILTIN_CMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI },
32805 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv32hi3_mask, "__builtin_ia32_cmpw512_mask", IX86_BUILTIN_CMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI },
32806 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv64qi3_mask, "__builtin_ia32_ucmpb512_mask", IX86_BUILTIN_UCMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI },
32807 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv32hi3_mask, "__builtin_ia32_ucmpw512_mask", IX86_BUILTIN_UCMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI },
32809 /* AVX512IFMA */
32810 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_mask, "__builtin_ia32_vpmadd52luq512_mask", IX86_BUILTIN_VPMADD52LUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32811 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_maskz, "__builtin_ia32_vpmadd52luq512_maskz", IX86_BUILTIN_VPMADD52LUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32812 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_mask, "__builtin_ia32_vpmadd52huq512_mask", IX86_BUILTIN_VPMADD52HUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32813 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_maskz, "__builtin_ia32_vpmadd52huq512_maskz", IX86_BUILTIN_VPMADD52HUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32814 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_mask, "__builtin_ia32_vpmadd52luq256_mask", IX86_BUILTIN_VPMADD52LUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32815 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_maskz, "__builtin_ia32_vpmadd52luq256_maskz", IX86_BUILTIN_VPMADD52LUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32816 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_mask, "__builtin_ia32_vpmadd52huq256_mask", IX86_BUILTIN_VPMADD52HUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32817 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_maskz, "__builtin_ia32_vpmadd52huq256_maskz", IX86_BUILTIN_VPMADD52HUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32818 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_mask, "__builtin_ia32_vpmadd52luq128_mask", IX86_BUILTIN_VPMADD52LUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32819 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_maskz, "__builtin_ia32_vpmadd52luq128_maskz", IX86_BUILTIN_VPMADD52LUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32820 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_mask, "__builtin_ia32_vpmadd52huq128_mask", IX86_BUILTIN_VPMADD52HUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32821 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_maskz, "__builtin_ia32_vpmadd52huq128_maskz", IX86_BUILTIN_VPMADD52HUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32823 /* AVX512VBMI */
32824 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_vpmultishiftqbv64qi_mask, "__builtin_ia32_vpmultishiftqb512_mask", IX86_BUILTIN_VPMULTISHIFTQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32825 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv32qi_mask, "__builtin_ia32_vpmultishiftqb256_mask", IX86_BUILTIN_VPMULTISHIFTQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32826 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv16qi_mask, "__builtin_ia32_vpmultishiftqb128_mask", IX86_BUILTIN_VPMULTISHIFTQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32827 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_permvarv64qi_mask, "__builtin_ia32_permvarqi512_mask", IX86_BUILTIN_VPERMVARQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32828 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_mask, "__builtin_ia32_vpermt2varqi512_mask", IX86_BUILTIN_VPERMT2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32829 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_maskz, "__builtin_ia32_vpermt2varqi512_maskz", IX86_BUILTIN_VPERMT2VARQI512_MASKZ, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32830 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermi2varv64qi3_mask, "__builtin_ia32_vpermi2varqi512_mask", IX86_BUILTIN_VPERMI2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32831 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv32qi_mask, "__builtin_ia32_permvarqi256_mask", IX86_BUILTIN_VPERMVARQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32832 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16qi_mask, "__builtin_ia32_permvarqi128_mask", IX86_BUILTIN_VPERMVARQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32833 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_mask, "__builtin_ia32_vpermt2varqi256_mask", IX86_BUILTIN_VPERMT2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32834 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_maskz, "__builtin_ia32_vpermt2varqi256_maskz", IX86_BUILTIN_VPERMT2VARQI256_MASKZ, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32835 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_mask, "__builtin_ia32_vpermt2varqi128_mask", IX86_BUILTIN_VPERMT2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32836 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_maskz, "__builtin_ia32_vpermt2varqi128_maskz", IX86_BUILTIN_VPERMT2VARQI128_MASKZ, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32837 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv32qi3_mask, "__builtin_ia32_vpermi2varqi256_mask", IX86_BUILTIN_VPERMI2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32838 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16qi3_mask, "__builtin_ia32_vpermi2varqi128_mask", IX86_BUILTIN_VPERMI2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32841 /* Builtins with rounding support. */
32842 static const struct builtin_description bdesc_round_args[] =
32844 /* AVX512F */
32845 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32846 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32847 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32848 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32849 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT },
32850 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT },
32851 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT },
32852 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI_INT },
32853 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
32854 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
32855 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
32856 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32857 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
32858 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32859 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
32860 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32861 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
32862 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32863 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
32864 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
32865 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
32866 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
32867 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
32868 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32869 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32870 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32871 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32872 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
32873 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
32874 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
32875 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
32876 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32877 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32878 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32879 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32880 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
32881 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
32882 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
32883 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
32884 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
32885 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
32886 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
32887 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
32888 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
32889 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
32890 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32891 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32892 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
32893 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
32894 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
32895 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
32896 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32897 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32898 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32899 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32900 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32901 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32902 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32903 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32904 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32905 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32906 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32907 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32908 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
32909 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
32910 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
32911 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
32912 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32913 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32914 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32915 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32916 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
32917 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
32918 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32919 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32920 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32921 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32922 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32923 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32924 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
32925 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
32926 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
32927 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
32928 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
32929 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
32930 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
32931 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
32932 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
32933 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
32934 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
32935 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
32936 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
32937 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
32938 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
32939 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
32940 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32941 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32942 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32943 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32944 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32945 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32946 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
32947 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
32948 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32949 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32950 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32951 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32952 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32953 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32954 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32955 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32956 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32957 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32958 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32959 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32960 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32961 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32962 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32963 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32965 /* AVX512ER */
32966 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
32967 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
32968 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
32969 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
32970 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32971 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32972 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
32973 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
32974 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32975 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32977 /* AVX512DQ. */
32978 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv2df_round, "__builtin_ia32_rangesd128_round", IX86_BUILTIN_RANGESD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
32979 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv4sf_round, "__builtin_ia32_rangess128_round", IX86_BUILTIN_RANGESS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
32980 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2qq512_mask", IX86_BUILTIN_CVTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
32981 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2qqv8di_mask_round, "__builtin_ia32_cvtps2qq512_mask", IX86_BUILTIN_CVTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
32982 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2uqq512_mask", IX86_BUILTIN_CVTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
32983 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2uqqv8di_mask_round, "__builtin_ia32_cvtps2uqq512_mask", IX86_BUILTIN_CVTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
32984 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8sf2_mask_round, "__builtin_ia32_cvtqq2ps512_mask", IX86_BUILTIN_CVTQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
32985 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8sf2_mask_round, "__builtin_ia32_cvtuqq2ps512_mask", IX86_BUILTIN_CVTUQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
32986 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8df2_mask_round, "__builtin_ia32_cvtqq2pd512_mask", IX86_BUILTIN_CVTQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
32987 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8df2_mask_round, "__builtin_ia32_cvtuqq2pd512_mask", IX86_BUILTIN_CVTUQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
32988 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2qq512_mask", IX86_BUILTIN_CVTTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
32989 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2uqq512_mask", IX86_BUILTIN_CVTTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
32990 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2qq512_mask", IX86_BUILTIN_CVTTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
32991 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2uqq512_mask", IX86_BUILTIN_CVTTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
32992 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv16sf_mask_round, "__builtin_ia32_rangeps512_mask", IX86_BUILTIN_RANGEPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT },
32993 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv8df_mask_round, "__builtin_ia32_rangepd512_mask", IX86_BUILTIN_RANGEPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT },
32996 /* Bultins for MPX. */
32997 static const struct builtin_description bdesc_mpx[] =
32999 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndstx", IX86_BUILTIN_BNDSTX, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND_PCVOID },
33000 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcl", IX86_BUILTIN_BNDCL, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
33001 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcu", IX86_BUILTIN_BNDCU, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
33004 /* Const builtins for MPX. */
33005 static const struct builtin_description bdesc_mpx_const[] =
33007 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndmk", IX86_BUILTIN_BNDMK, UNKNOWN, (int) BND_FTYPE_PCVOID_ULONG },
33008 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndldx", IX86_BUILTIN_BNDLDX, UNKNOWN, (int) BND_FTYPE_PCVOID_PCVOID },
33009 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_narrow_bounds", IX86_BUILTIN_BNDNARROW, UNKNOWN, (int) PVOID_FTYPE_PCVOID_BND_ULONG },
33010 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndint", IX86_BUILTIN_BNDINT, UNKNOWN, (int) BND_FTYPE_BND_BND },
33011 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_sizeof", IX86_BUILTIN_SIZEOF, UNKNOWN, (int) ULONG_FTYPE_VOID },
33012 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndlower", IX86_BUILTIN_BNDLOWER, UNKNOWN, (int) PVOID_FTYPE_BND },
33013 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndupper", IX86_BUILTIN_BNDUPPER, UNKNOWN, (int) PVOID_FTYPE_BND },
33014 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndret", IX86_BUILTIN_BNDRET, UNKNOWN, (int) BND_FTYPE_PCVOID },
33017 /* FMA4 and XOP. */
33018 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
33019 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
33020 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
33021 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
33022 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
33023 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
33024 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
33025 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
33026 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
33027 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
33028 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
33029 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
33030 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
33031 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
33032 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
33033 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
33034 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
33035 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
33036 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
33037 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
33038 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
33039 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
33040 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
33041 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
33042 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
33043 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
33044 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
33045 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
33046 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
33047 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
33048 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
33049 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
33050 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
33051 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
33052 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
33053 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
33054 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
33055 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
33056 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
33057 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
33058 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
33059 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
33060 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
33061 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
33062 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
33063 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
33064 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
33065 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
33066 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
33067 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
33068 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
33069 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
33071 static const struct builtin_description bdesc_multi_arg[] =
33073 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
33074 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
33075 UNKNOWN, (int)MULTI_ARG_3_SF },
33076 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
33077 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
33078 UNKNOWN, (int)MULTI_ARG_3_DF },
33080 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
33081 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
33082 UNKNOWN, (int)MULTI_ARG_3_SF },
33083 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
33084 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
33085 UNKNOWN, (int)MULTI_ARG_3_DF },
33087 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
33088 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
33089 UNKNOWN, (int)MULTI_ARG_3_SF },
33090 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
33091 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
33092 UNKNOWN, (int)MULTI_ARG_3_DF },
33093 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
33094 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
33095 UNKNOWN, (int)MULTI_ARG_3_SF2 },
33096 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
33097 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
33098 UNKNOWN, (int)MULTI_ARG_3_DF2 },
33100 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
33101 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
33102 UNKNOWN, (int)MULTI_ARG_3_SF },
33103 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
33104 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
33105 UNKNOWN, (int)MULTI_ARG_3_DF },
33106 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
33107 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
33108 UNKNOWN, (int)MULTI_ARG_3_SF2 },
33109 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
33110 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
33111 UNKNOWN, (int)MULTI_ARG_3_DF2 },
33113 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
33114 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
33115 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
33116 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
33117 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
33118 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
33119 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
33121 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
33122 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
33123 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
33124 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
33125 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
33126 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
33127 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
33129 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
33131 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
33132 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
33133 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33134 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33135 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
33136 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
33137 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33138 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33139 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33140 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33141 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33142 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33144 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33145 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
33146 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
33147 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
33148 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
33149 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
33150 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
33151 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
33152 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33153 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
33154 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
33155 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
33156 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33157 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
33158 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
33159 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
33161 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
33162 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
33163 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
33164 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
33165 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
33166 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
33168 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33169 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
33170 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
33171 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33172 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
33173 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33174 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33175 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
33176 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
33177 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33178 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
33179 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33180 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33181 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33182 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33184 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
33185 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
33186 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
33187 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
33188 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
33189 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
33190 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
33192 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
33193 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
33194 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
33195 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
33196 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
33197 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
33198 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
33200 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
33201 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
33202 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
33203 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
33204 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
33205 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
33206 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
33208 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
33209 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
33210 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
33211 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
33212 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
33213 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
33214 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
33216 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
33217 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
33218 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
33219 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
33220 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
33221 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
33222 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
33224 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
33225 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
33226 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
33227 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
33228 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
33229 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
33230 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
33232 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
33233 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
33234 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
33235 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
33236 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
33237 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
33238 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
33240 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
33241 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
33242 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
33243 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
33244 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
33245 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
33246 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
33248 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
33249 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
33250 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
33251 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
33252 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
33253 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
33254 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
33255 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
33257 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
33258 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
33259 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
33260 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
33261 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
33262 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
33263 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
33264 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
33266 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
33267 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
33268 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
33269 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
33273 /* TM vector builtins. */
33275 /* Reuse the existing x86-specific `struct builtin_description' cause
33276 we're lazy. Add casts to make them fit. */
33277 static const struct builtin_description bdesc_tm[] =
33279 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33280 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33281 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33282 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33283 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33284 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33285 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33287 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33288 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33289 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33290 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33291 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33292 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33293 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33295 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33296 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33297 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33298 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33299 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33300 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33301 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33303 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
33304 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
33305 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
33308 /* TM callbacks. */
33310 /* Return the builtin decl needed to load a vector of TYPE. */
33312 static tree
33313 ix86_builtin_tm_load (tree type)
33315 if (TREE_CODE (type) == VECTOR_TYPE)
33317 switch (tree_to_uhwi (TYPE_SIZE (type)))
33319 case 64:
33320 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
33321 case 128:
33322 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
33323 case 256:
33324 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
33327 return NULL_TREE;
33330 /* Return the builtin decl needed to store a vector of TYPE. */
33332 static tree
33333 ix86_builtin_tm_store (tree type)
33335 if (TREE_CODE (type) == VECTOR_TYPE)
33337 switch (tree_to_uhwi (TYPE_SIZE (type)))
33339 case 64:
33340 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
33341 case 128:
33342 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
33343 case 256:
33344 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
33347 return NULL_TREE;
33350 /* Initialize the transactional memory vector load/store builtins. */
33352 static void
33353 ix86_init_tm_builtins (void)
33355 enum ix86_builtin_func_type ftype;
33356 const struct builtin_description *d;
33357 size_t i;
33358 tree decl;
33359 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
33360 tree attrs_log, attrs_type_log;
33362 if (!flag_tm)
33363 return;
33365 /* If there are no builtins defined, we must be compiling in a
33366 language without trans-mem support. */
33367 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
33368 return;
33370 /* Use whatever attributes a normal TM load has. */
33371 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
33372 attrs_load = DECL_ATTRIBUTES (decl);
33373 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33374 /* Use whatever attributes a normal TM store has. */
33375 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
33376 attrs_store = DECL_ATTRIBUTES (decl);
33377 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33378 /* Use whatever attributes a normal TM log has. */
33379 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
33380 attrs_log = DECL_ATTRIBUTES (decl);
33381 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33383 for (i = 0, d = bdesc_tm;
33384 i < ARRAY_SIZE (bdesc_tm);
33385 i++, d++)
33387 if ((d->mask & ix86_isa_flags) != 0
33388 || (lang_hooks.builtin_function
33389 == lang_hooks.builtin_function_ext_scope))
33391 tree type, attrs, attrs_type;
33392 enum built_in_function code = (enum built_in_function) d->code;
33394 ftype = (enum ix86_builtin_func_type) d->flag;
33395 type = ix86_get_builtin_func_type (ftype);
33397 if (BUILTIN_TM_LOAD_P (code))
33399 attrs = attrs_load;
33400 attrs_type = attrs_type_load;
33402 else if (BUILTIN_TM_STORE_P (code))
33404 attrs = attrs_store;
33405 attrs_type = attrs_type_store;
33407 else
33409 attrs = attrs_log;
33410 attrs_type = attrs_type_log;
33412 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
33413 /* The builtin without the prefix for
33414 calling it directly. */
33415 d->name + strlen ("__builtin_"),
33416 attrs);
33417 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
33418 set the TYPE_ATTRIBUTES. */
33419 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
33421 set_builtin_decl (code, decl, false);
33426 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
33427 in the current target ISA to allow the user to compile particular modules
33428 with different target specific options that differ from the command line
33429 options. */
33430 static void
33431 ix86_init_mmx_sse_builtins (void)
33433 const struct builtin_description * d;
33434 enum ix86_builtin_func_type ftype;
33435 size_t i;
33437 /* Add all special builtins with variable number of operands. */
33438 for (i = 0, d = bdesc_special_args;
33439 i < ARRAY_SIZE (bdesc_special_args);
33440 i++, d++)
33442 if (d->name == 0)
33443 continue;
33445 ftype = (enum ix86_builtin_func_type) d->flag;
33446 def_builtin (d->mask, d->name, ftype, d->code);
33449 /* Add all builtins with variable number of operands. */
33450 for (i = 0, d = bdesc_args;
33451 i < ARRAY_SIZE (bdesc_args);
33452 i++, d++)
33454 if (d->name == 0)
33455 continue;
33457 ftype = (enum ix86_builtin_func_type) d->flag;
33458 def_builtin_const (d->mask, d->name, ftype, d->code);
33461 /* Add all builtins with rounding. */
33462 for (i = 0, d = bdesc_round_args;
33463 i < ARRAY_SIZE (bdesc_round_args);
33464 i++, d++)
33466 if (d->name == 0)
33467 continue;
33469 ftype = (enum ix86_builtin_func_type) d->flag;
33470 def_builtin_const (d->mask, d->name, ftype, d->code);
33473 /* pcmpestr[im] insns. */
33474 for (i = 0, d = bdesc_pcmpestr;
33475 i < ARRAY_SIZE (bdesc_pcmpestr);
33476 i++, d++)
33478 if (d->code == IX86_BUILTIN_PCMPESTRM128)
33479 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
33480 else
33481 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
33482 def_builtin_const (d->mask, d->name, ftype, d->code);
33485 /* pcmpistr[im] insns. */
33486 for (i = 0, d = bdesc_pcmpistr;
33487 i < ARRAY_SIZE (bdesc_pcmpistr);
33488 i++, d++)
33490 if (d->code == IX86_BUILTIN_PCMPISTRM128)
33491 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
33492 else
33493 ftype = INT_FTYPE_V16QI_V16QI_INT;
33494 def_builtin_const (d->mask, d->name, ftype, d->code);
33497 /* comi/ucomi insns. */
33498 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
33500 if (d->mask == OPTION_MASK_ISA_SSE2)
33501 ftype = INT_FTYPE_V2DF_V2DF;
33502 else
33503 ftype = INT_FTYPE_V4SF_V4SF;
33504 def_builtin_const (d->mask, d->name, ftype, d->code);
33507 /* SSE */
33508 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
33509 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
33510 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
33511 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
33513 /* SSE or 3DNow!A */
33514 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
33515 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
33516 IX86_BUILTIN_MASKMOVQ);
33518 /* SSE2 */
33519 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
33520 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
33522 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
33523 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
33524 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
33525 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
33527 /* SSE3. */
33528 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
33529 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
33530 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
33531 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
33533 /* AES */
33534 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
33535 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
33536 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
33537 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
33538 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
33539 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
33540 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
33541 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
33542 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
33543 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
33544 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
33545 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
33547 /* PCLMUL */
33548 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
33549 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
33551 /* RDRND */
33552 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
33553 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
33554 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
33555 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
33556 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
33557 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
33558 IX86_BUILTIN_RDRAND64_STEP);
33560 /* AVX2 */
33561 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
33562 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
33563 IX86_BUILTIN_GATHERSIV2DF);
33565 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
33566 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
33567 IX86_BUILTIN_GATHERSIV4DF);
33569 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
33570 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
33571 IX86_BUILTIN_GATHERDIV2DF);
33573 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
33574 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
33575 IX86_BUILTIN_GATHERDIV4DF);
33577 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
33578 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
33579 IX86_BUILTIN_GATHERSIV4SF);
33581 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
33582 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
33583 IX86_BUILTIN_GATHERSIV8SF);
33585 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
33586 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
33587 IX86_BUILTIN_GATHERDIV4SF);
33589 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
33590 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
33591 IX86_BUILTIN_GATHERDIV8SF);
33593 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
33594 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
33595 IX86_BUILTIN_GATHERSIV2DI);
33597 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
33598 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
33599 IX86_BUILTIN_GATHERSIV4DI);
33601 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
33602 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
33603 IX86_BUILTIN_GATHERDIV2DI);
33605 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
33606 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
33607 IX86_BUILTIN_GATHERDIV4DI);
33609 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
33610 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
33611 IX86_BUILTIN_GATHERSIV4SI);
33613 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
33614 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
33615 IX86_BUILTIN_GATHERSIV8SI);
33617 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
33618 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
33619 IX86_BUILTIN_GATHERDIV4SI);
33621 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
33622 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
33623 IX86_BUILTIN_GATHERDIV8SI);
33625 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
33626 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
33627 IX86_BUILTIN_GATHERALTSIV4DF);
33629 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
33630 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
33631 IX86_BUILTIN_GATHERALTDIV8SF);
33633 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
33634 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
33635 IX86_BUILTIN_GATHERALTSIV4DI);
33637 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
33638 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
33639 IX86_BUILTIN_GATHERALTDIV8SI);
33641 /* AVX512F */
33642 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
33643 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
33644 IX86_BUILTIN_GATHER3SIV16SF);
33646 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
33647 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
33648 IX86_BUILTIN_GATHER3SIV8DF);
33650 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
33651 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
33652 IX86_BUILTIN_GATHER3DIV16SF);
33654 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
33655 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
33656 IX86_BUILTIN_GATHER3DIV8DF);
33658 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
33659 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
33660 IX86_BUILTIN_GATHER3SIV16SI);
33662 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
33663 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
33664 IX86_BUILTIN_GATHER3SIV8DI);
33666 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
33667 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
33668 IX86_BUILTIN_GATHER3DIV16SI);
33670 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
33671 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
33672 IX86_BUILTIN_GATHER3DIV8DI);
33674 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
33675 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
33676 IX86_BUILTIN_GATHER3ALTSIV8DF);
33678 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
33679 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
33680 IX86_BUILTIN_GATHER3ALTDIV16SF);
33682 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
33683 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
33684 IX86_BUILTIN_GATHER3ALTSIV8DI);
33686 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
33687 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
33688 IX86_BUILTIN_GATHER3ALTDIV16SI);
33690 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
33691 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
33692 IX86_BUILTIN_SCATTERSIV16SF);
33694 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
33695 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
33696 IX86_BUILTIN_SCATTERSIV8DF);
33698 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
33699 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
33700 IX86_BUILTIN_SCATTERDIV16SF);
33702 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
33703 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
33704 IX86_BUILTIN_SCATTERDIV8DF);
33706 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
33707 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
33708 IX86_BUILTIN_SCATTERSIV16SI);
33710 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
33711 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
33712 IX86_BUILTIN_SCATTERSIV8DI);
33714 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
33715 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
33716 IX86_BUILTIN_SCATTERDIV16SI);
33718 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
33719 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
33720 IX86_BUILTIN_SCATTERDIV8DI);
33722 /* AVX512VL */
33723 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2df",
33724 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_QI_INT,
33725 IX86_BUILTIN_GATHER3SIV2DF);
33727 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4df",
33728 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_QI_INT,
33729 IX86_BUILTIN_GATHER3SIV4DF);
33731 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2df",
33732 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_QI_INT,
33733 IX86_BUILTIN_GATHER3DIV2DF);
33735 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4df",
33736 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_QI_INT,
33737 IX86_BUILTIN_GATHER3DIV4DF);
33739 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4sf",
33740 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_QI_INT,
33741 IX86_BUILTIN_GATHER3SIV4SF);
33743 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8sf",
33744 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_QI_INT,
33745 IX86_BUILTIN_GATHER3SIV8SF);
33747 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4sf",
33748 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_QI_INT,
33749 IX86_BUILTIN_GATHER3DIV4SF);
33751 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8sf",
33752 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_QI_INT,
33753 IX86_BUILTIN_GATHER3DIV8SF);
33755 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2di",
33756 V2DI_FTYPE_V2DI_PCINT64_V4SI_QI_INT,
33757 IX86_BUILTIN_GATHER3SIV2DI);
33759 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4di",
33760 V4DI_FTYPE_V4DI_PCINT64_V4SI_QI_INT,
33761 IX86_BUILTIN_GATHER3SIV4DI);
33763 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2di",
33764 V2DI_FTYPE_V2DI_PCINT64_V2DI_QI_INT,
33765 IX86_BUILTIN_GATHER3DIV2DI);
33767 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4di",
33768 V4DI_FTYPE_V4DI_PCINT64_V4DI_QI_INT,
33769 IX86_BUILTIN_GATHER3DIV4DI);
33771 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4si",
33772 V4SI_FTYPE_V4SI_PCINT_V4SI_QI_INT,
33773 IX86_BUILTIN_GATHER3SIV4SI);
33775 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8si",
33776 V8SI_FTYPE_V8SI_PCINT_V8SI_QI_INT,
33777 IX86_BUILTIN_GATHER3SIV8SI);
33779 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4si",
33780 V4SI_FTYPE_V4SI_PCINT_V2DI_QI_INT,
33781 IX86_BUILTIN_GATHER3DIV4SI);
33783 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8si",
33784 V4SI_FTYPE_V4SI_PCINT_V4DI_QI_INT,
33785 IX86_BUILTIN_GATHER3DIV8SI);
33787 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4df ",
33788 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_QI_INT,
33789 IX86_BUILTIN_GATHER3ALTSIV4DF);
33791 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8sf ",
33792 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_QI_INT,
33793 IX86_BUILTIN_GATHER3ALTDIV8SF);
33795 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4di ",
33796 V4DI_FTYPE_V4DI_PCINT64_V8SI_QI_INT,
33797 IX86_BUILTIN_GATHER3ALTSIV4DI);
33799 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8si ",
33800 V8SI_FTYPE_V8SI_PCINT_V4DI_QI_INT,
33801 IX86_BUILTIN_GATHER3ALTDIV8SI);
33803 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8sf",
33804 VOID_FTYPE_PFLOAT_QI_V8SI_V8SF_INT,
33805 IX86_BUILTIN_SCATTERSIV8SF);
33807 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4sf",
33808 VOID_FTYPE_PFLOAT_QI_V4SI_V4SF_INT,
33809 IX86_BUILTIN_SCATTERSIV4SF);
33811 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4df",
33812 VOID_FTYPE_PDOUBLE_QI_V4SI_V4DF_INT,
33813 IX86_BUILTIN_SCATTERSIV4DF);
33815 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2df",
33816 VOID_FTYPE_PDOUBLE_QI_V4SI_V2DF_INT,
33817 IX86_BUILTIN_SCATTERSIV2DF);
33819 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8sf",
33820 VOID_FTYPE_PFLOAT_QI_V4DI_V4SF_INT,
33821 IX86_BUILTIN_SCATTERDIV8SF);
33823 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4sf",
33824 VOID_FTYPE_PFLOAT_QI_V2DI_V4SF_INT,
33825 IX86_BUILTIN_SCATTERDIV4SF);
33827 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4df",
33828 VOID_FTYPE_PDOUBLE_QI_V4DI_V4DF_INT,
33829 IX86_BUILTIN_SCATTERDIV4DF);
33831 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2df",
33832 VOID_FTYPE_PDOUBLE_QI_V2DI_V2DF_INT,
33833 IX86_BUILTIN_SCATTERDIV2DF);
33835 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8si",
33836 VOID_FTYPE_PINT_QI_V8SI_V8SI_INT,
33837 IX86_BUILTIN_SCATTERSIV8SI);
33839 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4si",
33840 VOID_FTYPE_PINT_QI_V4SI_V4SI_INT,
33841 IX86_BUILTIN_SCATTERSIV4SI);
33843 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4di",
33844 VOID_FTYPE_PLONGLONG_QI_V4SI_V4DI_INT,
33845 IX86_BUILTIN_SCATTERSIV4DI);
33847 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2di",
33848 VOID_FTYPE_PLONGLONG_QI_V4SI_V2DI_INT,
33849 IX86_BUILTIN_SCATTERSIV2DI);
33851 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8si",
33852 VOID_FTYPE_PINT_QI_V4DI_V4SI_INT,
33853 IX86_BUILTIN_SCATTERDIV8SI);
33855 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4si",
33856 VOID_FTYPE_PINT_QI_V2DI_V4SI_INT,
33857 IX86_BUILTIN_SCATTERDIV4SI);
33859 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4di",
33860 VOID_FTYPE_PLONGLONG_QI_V4DI_V4DI_INT,
33861 IX86_BUILTIN_SCATTERDIV4DI);
33863 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2di",
33864 VOID_FTYPE_PLONGLONG_QI_V2DI_V2DI_INT,
33865 IX86_BUILTIN_SCATTERDIV2DI);
33867 /* AVX512PF */
33868 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
33869 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
33870 IX86_BUILTIN_GATHERPFDPD);
33871 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
33872 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
33873 IX86_BUILTIN_GATHERPFDPS);
33874 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
33875 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
33876 IX86_BUILTIN_GATHERPFQPD);
33877 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
33878 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
33879 IX86_BUILTIN_GATHERPFQPS);
33880 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
33881 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
33882 IX86_BUILTIN_SCATTERPFDPD);
33883 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
33884 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
33885 IX86_BUILTIN_SCATTERPFDPS);
33886 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
33887 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
33888 IX86_BUILTIN_SCATTERPFQPD);
33889 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
33890 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
33891 IX86_BUILTIN_SCATTERPFQPS);
33893 /* SHA */
33894 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
33895 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
33896 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
33897 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
33898 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
33899 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
33900 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
33901 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
33902 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
33903 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
33904 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
33905 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
33906 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
33907 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
33909 /* RTM. */
33910 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
33911 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
33913 /* MMX access to the vec_init patterns. */
33914 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
33915 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
33917 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
33918 V4HI_FTYPE_HI_HI_HI_HI,
33919 IX86_BUILTIN_VEC_INIT_V4HI);
33921 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
33922 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
33923 IX86_BUILTIN_VEC_INIT_V8QI);
33925 /* Access to the vec_extract patterns. */
33926 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
33927 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
33928 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
33929 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
33930 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
33931 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
33932 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
33933 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
33934 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
33935 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
33937 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
33938 "__builtin_ia32_vec_ext_v4hi",
33939 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
33941 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
33942 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
33944 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
33945 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
33947 /* Access to the vec_set patterns. */
33948 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
33949 "__builtin_ia32_vec_set_v2di",
33950 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
33952 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
33953 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
33955 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
33956 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
33958 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
33959 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
33961 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
33962 "__builtin_ia32_vec_set_v4hi",
33963 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
33965 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
33966 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
33968 /* RDSEED */
33969 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
33970 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
33971 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
33972 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
33973 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
33974 "__builtin_ia32_rdseed_di_step",
33975 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
33977 /* ADCX */
33978 def_builtin (0, "__builtin_ia32_addcarryx_u32",
33979 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
33980 def_builtin (OPTION_MASK_ISA_64BIT,
33981 "__builtin_ia32_addcarryx_u64",
33982 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
33983 IX86_BUILTIN_ADDCARRYX64);
33985 /* SBB */
33986 def_builtin (0, "__builtin_ia32_sbb_u32",
33987 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32);
33988 def_builtin (OPTION_MASK_ISA_64BIT,
33989 "__builtin_ia32_sbb_u64",
33990 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
33991 IX86_BUILTIN_SBB64);
33993 /* Read/write FLAGS. */
33994 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
33995 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
33996 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
33997 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
33998 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
33999 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
34000 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
34001 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
34003 /* CLFLUSHOPT. */
34004 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
34005 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
34007 /* CLWB. */
34008 def_builtin (OPTION_MASK_ISA_CLWB, "__builtin_ia32_clwb",
34009 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLWB);
34011 /* Add FMA4 multi-arg argument instructions */
34012 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
34014 if (d->name == 0)
34015 continue;
34017 ftype = (enum ix86_builtin_func_type) d->flag;
34018 def_builtin_const (d->mask, d->name, ftype, d->code);
34022 static void
34023 ix86_init_mpx_builtins ()
34025 const struct builtin_description * d;
34026 enum ix86_builtin_func_type ftype;
34027 tree decl;
34028 size_t i;
34030 for (i = 0, d = bdesc_mpx;
34031 i < ARRAY_SIZE (bdesc_mpx);
34032 i++, d++)
34034 if (d->name == 0)
34035 continue;
34037 ftype = (enum ix86_builtin_func_type) d->flag;
34038 decl = def_builtin (d->mask, d->name, ftype, d->code);
34040 /* With no leaf and nothrow flags for MPX builtins
34041 abnormal edges may follow its call when setjmp
34042 presents in the function. Since we may have a lot
34043 of MPX builtins calls it causes lots of useless
34044 edges and enormous PHI nodes. To avoid this we mark
34045 MPX builtins as leaf and nothrow. */
34046 if (decl)
34048 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
34049 NULL_TREE);
34050 TREE_NOTHROW (decl) = 1;
34052 else
34054 ix86_builtins_isa[(int)d->code].leaf_p = true;
34055 ix86_builtins_isa[(int)d->code].nothrow_p = true;
34059 for (i = 0, d = bdesc_mpx_const;
34060 i < ARRAY_SIZE (bdesc_mpx_const);
34061 i++, d++)
34063 if (d->name == 0)
34064 continue;
34066 ftype = (enum ix86_builtin_func_type) d->flag;
34067 decl = def_builtin_const (d->mask, d->name, ftype, d->code);
34069 if (decl)
34071 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
34072 NULL_TREE);
34073 TREE_NOTHROW (decl) = 1;
34075 else
34077 ix86_builtins_isa[(int)d->code].leaf_p = true;
34078 ix86_builtins_isa[(int)d->code].nothrow_p = true;
34083 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
34084 to return a pointer to VERSION_DECL if the outcome of the expression
34085 formed by PREDICATE_CHAIN is true. This function will be called during
34086 version dispatch to decide which function version to execute. It returns
34087 the basic block at the end, to which more conditions can be added. */
34089 static basic_block
34090 add_condition_to_bb (tree function_decl, tree version_decl,
34091 tree predicate_chain, basic_block new_bb)
34093 gimple return_stmt;
34094 tree convert_expr, result_var;
34095 gimple convert_stmt;
34096 gimple call_cond_stmt;
34097 gimple if_else_stmt;
34099 basic_block bb1, bb2, bb3;
34100 edge e12, e23;
34102 tree cond_var, and_expr_var = NULL_TREE;
34103 gimple_seq gseq;
34105 tree predicate_decl, predicate_arg;
34107 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
34109 gcc_assert (new_bb != NULL);
34110 gseq = bb_seq (new_bb);
34113 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
34114 build_fold_addr_expr (version_decl));
34115 result_var = create_tmp_var (ptr_type_node);
34116 convert_stmt = gimple_build_assign (result_var, convert_expr);
34117 return_stmt = gimple_build_return (result_var);
34119 if (predicate_chain == NULL_TREE)
34121 gimple_seq_add_stmt (&gseq, convert_stmt);
34122 gimple_seq_add_stmt (&gseq, return_stmt);
34123 set_bb_seq (new_bb, gseq);
34124 gimple_set_bb (convert_stmt, new_bb);
34125 gimple_set_bb (return_stmt, new_bb);
34126 pop_cfun ();
34127 return new_bb;
34130 while (predicate_chain != NULL)
34132 cond_var = create_tmp_var (integer_type_node);
34133 predicate_decl = TREE_PURPOSE (predicate_chain);
34134 predicate_arg = TREE_VALUE (predicate_chain);
34135 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
34136 gimple_call_set_lhs (call_cond_stmt, cond_var);
34138 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
34139 gimple_set_bb (call_cond_stmt, new_bb);
34140 gimple_seq_add_stmt (&gseq, call_cond_stmt);
34142 predicate_chain = TREE_CHAIN (predicate_chain);
34144 if (and_expr_var == NULL)
34145 and_expr_var = cond_var;
34146 else
34148 gimple assign_stmt;
34149 /* Use MIN_EXPR to check if any integer is zero?.
34150 and_expr_var = min_expr <cond_var, and_expr_var> */
34151 assign_stmt = gimple_build_assign (and_expr_var,
34152 build2 (MIN_EXPR, integer_type_node,
34153 cond_var, and_expr_var));
34155 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
34156 gimple_set_bb (assign_stmt, new_bb);
34157 gimple_seq_add_stmt (&gseq, assign_stmt);
34161 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
34162 integer_zero_node,
34163 NULL_TREE, NULL_TREE);
34164 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
34165 gimple_set_bb (if_else_stmt, new_bb);
34166 gimple_seq_add_stmt (&gseq, if_else_stmt);
34168 gimple_seq_add_stmt (&gseq, convert_stmt);
34169 gimple_seq_add_stmt (&gseq, return_stmt);
34170 set_bb_seq (new_bb, gseq);
34172 bb1 = new_bb;
34173 e12 = split_block (bb1, if_else_stmt);
34174 bb2 = e12->dest;
34175 e12->flags &= ~EDGE_FALLTHRU;
34176 e12->flags |= EDGE_TRUE_VALUE;
34178 e23 = split_block (bb2, return_stmt);
34180 gimple_set_bb (convert_stmt, bb2);
34181 gimple_set_bb (return_stmt, bb2);
34183 bb3 = e23->dest;
34184 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
34186 remove_edge (e23);
34187 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
34189 pop_cfun ();
34191 return bb3;
34194 /* This parses the attribute arguments to target in DECL and determines
34195 the right builtin to use to match the platform specification.
34196 It returns the priority value for this version decl. If PREDICATE_LIST
34197 is not NULL, it stores the list of cpu features that need to be checked
34198 before dispatching this function. */
34200 static unsigned int
34201 get_builtin_code_for_version (tree decl, tree *predicate_list)
34203 tree attrs;
34204 struct cl_target_option cur_target;
34205 tree target_node;
34206 struct cl_target_option *new_target;
34207 const char *arg_str = NULL;
34208 const char *attrs_str = NULL;
34209 char *tok_str = NULL;
34210 char *token;
34212 /* Priority of i386 features, greater value is higher priority. This is
34213 used to decide the order in which function dispatch must happen. For
34214 instance, a version specialized for SSE4.2 should be checked for dispatch
34215 before a version for SSE3, as SSE4.2 implies SSE3. */
34216 enum feature_priority
34218 P_ZERO = 0,
34219 P_MMX,
34220 P_SSE,
34221 P_SSE2,
34222 P_SSE3,
34223 P_SSSE3,
34224 P_PROC_SSSE3,
34225 P_SSE4_A,
34226 P_PROC_SSE4_A,
34227 P_SSE4_1,
34228 P_SSE4_2,
34229 P_PROC_SSE4_2,
34230 P_POPCNT,
34231 P_AVX,
34232 P_PROC_AVX,
34233 P_FMA4,
34234 P_XOP,
34235 P_PROC_XOP,
34236 P_FMA,
34237 P_PROC_FMA,
34238 P_AVX2,
34239 P_PROC_AVX2,
34240 P_AVX512F
34243 enum feature_priority priority = P_ZERO;
34245 /* These are the target attribute strings for which a dispatcher is
34246 available, from fold_builtin_cpu. */
34248 static struct _feature_list
34250 const char *const name;
34251 const enum feature_priority priority;
34253 const feature_list[] =
34255 {"mmx", P_MMX},
34256 {"sse", P_SSE},
34257 {"sse2", P_SSE2},
34258 {"sse3", P_SSE3},
34259 {"sse4a", P_SSE4_A},
34260 {"ssse3", P_SSSE3},
34261 {"sse4.1", P_SSE4_1},
34262 {"sse4.2", P_SSE4_2},
34263 {"popcnt", P_POPCNT},
34264 {"avx", P_AVX},
34265 {"fma4", P_FMA4},
34266 {"xop", P_XOP},
34267 {"fma", P_FMA},
34268 {"avx2", P_AVX2},
34269 {"avx512f", P_AVX512F}
34273 static unsigned int NUM_FEATURES
34274 = sizeof (feature_list) / sizeof (struct _feature_list);
34276 unsigned int i;
34278 tree predicate_chain = NULL_TREE;
34279 tree predicate_decl, predicate_arg;
34281 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34282 gcc_assert (attrs != NULL);
34284 attrs = TREE_VALUE (TREE_VALUE (attrs));
34286 gcc_assert (TREE_CODE (attrs) == STRING_CST);
34287 attrs_str = TREE_STRING_POINTER (attrs);
34289 /* Return priority zero for default function. */
34290 if (strcmp (attrs_str, "default") == 0)
34291 return 0;
34293 /* Handle arch= if specified. For priority, set it to be 1 more than
34294 the best instruction set the processor can handle. For instance, if
34295 there is a version for atom and a version for ssse3 (the highest ISA
34296 priority for atom), the atom version must be checked for dispatch
34297 before the ssse3 version. */
34298 if (strstr (attrs_str, "arch=") != NULL)
34300 cl_target_option_save (&cur_target, &global_options);
34301 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
34302 &global_options_set);
34304 gcc_assert (target_node);
34305 new_target = TREE_TARGET_OPTION (target_node);
34306 gcc_assert (new_target);
34308 if (new_target->arch_specified && new_target->arch > 0)
34310 switch (new_target->arch)
34312 case PROCESSOR_CORE2:
34313 arg_str = "core2";
34314 priority = P_PROC_SSSE3;
34315 break;
34316 case PROCESSOR_NEHALEM:
34317 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
34318 arg_str = "westmere";
34319 else
34320 /* We translate "arch=corei7" and "arch=nehalem" to
34321 "corei7" so that it will be mapped to M_INTEL_COREI7
34322 as cpu type to cover all M_INTEL_COREI7_XXXs. */
34323 arg_str = "corei7";
34324 priority = P_PROC_SSE4_2;
34325 break;
34326 case PROCESSOR_SANDYBRIDGE:
34327 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
34328 arg_str = "ivybridge";
34329 else
34330 arg_str = "sandybridge";
34331 priority = P_PROC_AVX;
34332 break;
34333 case PROCESSOR_HASWELL:
34334 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
34335 arg_str = "broadwell";
34336 else
34337 arg_str = "haswell";
34338 priority = P_PROC_AVX2;
34339 break;
34340 case PROCESSOR_BONNELL:
34341 arg_str = "bonnell";
34342 priority = P_PROC_SSSE3;
34343 break;
34344 case PROCESSOR_SILVERMONT:
34345 arg_str = "silvermont";
34346 priority = P_PROC_SSE4_2;
34347 break;
34348 case PROCESSOR_AMDFAM10:
34349 arg_str = "amdfam10h";
34350 priority = P_PROC_SSE4_A;
34351 break;
34352 case PROCESSOR_BTVER1:
34353 arg_str = "btver1";
34354 priority = P_PROC_SSE4_A;
34355 break;
34356 case PROCESSOR_BTVER2:
34357 arg_str = "btver2";
34358 priority = P_PROC_AVX;
34359 break;
34360 case PROCESSOR_BDVER1:
34361 arg_str = "bdver1";
34362 priority = P_PROC_XOP;
34363 break;
34364 case PROCESSOR_BDVER2:
34365 arg_str = "bdver2";
34366 priority = P_PROC_FMA;
34367 break;
34368 case PROCESSOR_BDVER3:
34369 arg_str = "bdver3";
34370 priority = P_PROC_FMA;
34371 break;
34372 case PROCESSOR_BDVER4:
34373 arg_str = "bdver4";
34374 priority = P_PROC_AVX2;
34375 break;
34379 cl_target_option_restore (&global_options, &cur_target);
34381 if (predicate_list && arg_str == NULL)
34383 error_at (DECL_SOURCE_LOCATION (decl),
34384 "No dispatcher found for the versioning attributes");
34385 return 0;
34388 if (predicate_list)
34390 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
34391 /* For a C string literal the length includes the trailing NULL. */
34392 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
34393 predicate_chain = tree_cons (predicate_decl, predicate_arg,
34394 predicate_chain);
34398 /* Process feature name. */
34399 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
34400 strcpy (tok_str, attrs_str);
34401 token = strtok (tok_str, ",");
34402 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
34404 while (token != NULL)
34406 /* Do not process "arch=" */
34407 if (strncmp (token, "arch=", 5) == 0)
34409 token = strtok (NULL, ",");
34410 continue;
34412 for (i = 0; i < NUM_FEATURES; ++i)
34414 if (strcmp (token, feature_list[i].name) == 0)
34416 if (predicate_list)
34418 predicate_arg = build_string_literal (
34419 strlen (feature_list[i].name) + 1,
34420 feature_list[i].name);
34421 predicate_chain = tree_cons (predicate_decl, predicate_arg,
34422 predicate_chain);
34424 /* Find the maximum priority feature. */
34425 if (feature_list[i].priority > priority)
34426 priority = feature_list[i].priority;
34428 break;
34431 if (predicate_list && i == NUM_FEATURES)
34433 error_at (DECL_SOURCE_LOCATION (decl),
34434 "No dispatcher found for %s", token);
34435 return 0;
34437 token = strtok (NULL, ",");
34439 free (tok_str);
34441 if (predicate_list && predicate_chain == NULL_TREE)
34443 error_at (DECL_SOURCE_LOCATION (decl),
34444 "No dispatcher found for the versioning attributes : %s",
34445 attrs_str);
34446 return 0;
34448 else if (predicate_list)
34450 predicate_chain = nreverse (predicate_chain);
34451 *predicate_list = predicate_chain;
34454 return priority;
34457 /* This compares the priority of target features in function DECL1
34458 and DECL2. It returns positive value if DECL1 is higher priority,
34459 negative value if DECL2 is higher priority and 0 if they are the
34460 same. */
34462 static int
34463 ix86_compare_version_priority (tree decl1, tree decl2)
34465 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
34466 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
34468 return (int)priority1 - (int)priority2;
34471 /* V1 and V2 point to function versions with different priorities
34472 based on the target ISA. This function compares their priorities. */
34474 static int
34475 feature_compare (const void *v1, const void *v2)
34477 typedef struct _function_version_info
34479 tree version_decl;
34480 tree predicate_chain;
34481 unsigned int dispatch_priority;
34482 } function_version_info;
34484 const function_version_info c1 = *(const function_version_info *)v1;
34485 const function_version_info c2 = *(const function_version_info *)v2;
34486 return (c2.dispatch_priority - c1.dispatch_priority);
34489 /* This function generates the dispatch function for
34490 multi-versioned functions. DISPATCH_DECL is the function which will
34491 contain the dispatch logic. FNDECLS are the function choices for
34492 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
34493 in DISPATCH_DECL in which the dispatch code is generated. */
34495 static int
34496 dispatch_function_versions (tree dispatch_decl,
34497 void *fndecls_p,
34498 basic_block *empty_bb)
34500 tree default_decl;
34501 gimple ifunc_cpu_init_stmt;
34502 gimple_seq gseq;
34503 int ix;
34504 tree ele;
34505 vec<tree> *fndecls;
34506 unsigned int num_versions = 0;
34507 unsigned int actual_versions = 0;
34508 unsigned int i;
34510 struct _function_version_info
34512 tree version_decl;
34513 tree predicate_chain;
34514 unsigned int dispatch_priority;
34515 }*function_version_info;
34517 gcc_assert (dispatch_decl != NULL
34518 && fndecls_p != NULL
34519 && empty_bb != NULL);
34521 /*fndecls_p is actually a vector. */
34522 fndecls = static_cast<vec<tree> *> (fndecls_p);
34524 /* At least one more version other than the default. */
34525 num_versions = fndecls->length ();
34526 gcc_assert (num_versions >= 2);
34528 function_version_info = (struct _function_version_info *)
34529 XNEWVEC (struct _function_version_info, (num_versions - 1));
34531 /* The first version in the vector is the default decl. */
34532 default_decl = (*fndecls)[0];
34534 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
34536 gseq = bb_seq (*empty_bb);
34537 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
34538 constructors, so explicity call __builtin_cpu_init here. */
34539 ifunc_cpu_init_stmt = gimple_build_call_vec (
34540 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
34541 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
34542 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
34543 set_bb_seq (*empty_bb, gseq);
34545 pop_cfun ();
34548 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
34550 tree version_decl = ele;
34551 tree predicate_chain = NULL_TREE;
34552 unsigned int priority;
34553 /* Get attribute string, parse it and find the right predicate decl.
34554 The predicate function could be a lengthy combination of many
34555 features, like arch-type and various isa-variants. */
34556 priority = get_builtin_code_for_version (version_decl,
34557 &predicate_chain);
34559 if (predicate_chain == NULL_TREE)
34560 continue;
34562 function_version_info [actual_versions].version_decl = version_decl;
34563 function_version_info [actual_versions].predicate_chain
34564 = predicate_chain;
34565 function_version_info [actual_versions].dispatch_priority = priority;
34566 actual_versions++;
34569 /* Sort the versions according to descending order of dispatch priority. The
34570 priority is based on the ISA. This is not a perfect solution. There
34571 could still be ambiguity. If more than one function version is suitable
34572 to execute, which one should be dispatched? In future, allow the user
34573 to specify a dispatch priority next to the version. */
34574 qsort (function_version_info, actual_versions,
34575 sizeof (struct _function_version_info), feature_compare);
34577 for (i = 0; i < actual_versions; ++i)
34578 *empty_bb = add_condition_to_bb (dispatch_decl,
34579 function_version_info[i].version_decl,
34580 function_version_info[i].predicate_chain,
34581 *empty_bb);
34583 /* dispatch default version at the end. */
34584 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
34585 NULL, *empty_bb);
34587 free (function_version_info);
34588 return 0;
34591 /* Comparator function to be used in qsort routine to sort attribute
34592 specification strings to "target". */
34594 static int
34595 attr_strcmp (const void *v1, const void *v2)
34597 const char *c1 = *(char *const*)v1;
34598 const char *c2 = *(char *const*)v2;
34599 return strcmp (c1, c2);
34602 /* ARGLIST is the argument to target attribute. This function tokenizes
34603 the comma separated arguments, sorts them and returns a string which
34604 is a unique identifier for the comma separated arguments. It also
34605 replaces non-identifier characters "=,-" with "_". */
34607 static char *
34608 sorted_attr_string (tree arglist)
34610 tree arg;
34611 size_t str_len_sum = 0;
34612 char **args = NULL;
34613 char *attr_str, *ret_str;
34614 char *attr = NULL;
34615 unsigned int argnum = 1;
34616 unsigned int i;
34618 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
34620 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
34621 size_t len = strlen (str);
34622 str_len_sum += len + 1;
34623 if (arg != arglist)
34624 argnum++;
34625 for (i = 0; i < strlen (str); i++)
34626 if (str[i] == ',')
34627 argnum++;
34630 attr_str = XNEWVEC (char, str_len_sum);
34631 str_len_sum = 0;
34632 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
34634 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
34635 size_t len = strlen (str);
34636 memcpy (attr_str + str_len_sum, str, len);
34637 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
34638 str_len_sum += len + 1;
34641 /* Replace "=,-" with "_". */
34642 for (i = 0; i < strlen (attr_str); i++)
34643 if (attr_str[i] == '=' || attr_str[i]== '-')
34644 attr_str[i] = '_';
34646 if (argnum == 1)
34647 return attr_str;
34649 args = XNEWVEC (char *, argnum);
34651 i = 0;
34652 attr = strtok (attr_str, ",");
34653 while (attr != NULL)
34655 args[i] = attr;
34656 i++;
34657 attr = strtok (NULL, ",");
34660 qsort (args, argnum, sizeof (char *), attr_strcmp);
34662 ret_str = XNEWVEC (char, str_len_sum);
34663 str_len_sum = 0;
34664 for (i = 0; i < argnum; i++)
34666 size_t len = strlen (args[i]);
34667 memcpy (ret_str + str_len_sum, args[i], len);
34668 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
34669 str_len_sum += len + 1;
34672 XDELETEVEC (args);
34673 XDELETEVEC (attr_str);
34674 return ret_str;
34677 /* This function changes the assembler name for functions that are
34678 versions. If DECL is a function version and has a "target"
34679 attribute, it appends the attribute string to its assembler name. */
34681 static tree
34682 ix86_mangle_function_version_assembler_name (tree decl, tree id)
34684 tree version_attr;
34685 const char *orig_name, *version_string;
34686 char *attr_str, *assembler_name;
34688 if (DECL_DECLARED_INLINE_P (decl)
34689 && lookup_attribute ("gnu_inline",
34690 DECL_ATTRIBUTES (decl)))
34691 error_at (DECL_SOURCE_LOCATION (decl),
34692 "Function versions cannot be marked as gnu_inline,"
34693 " bodies have to be generated");
34695 if (DECL_VIRTUAL_P (decl)
34696 || DECL_VINDEX (decl))
34697 sorry ("Virtual function multiversioning not supported");
34699 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34701 /* target attribute string cannot be NULL. */
34702 gcc_assert (version_attr != NULL_TREE);
34704 orig_name = IDENTIFIER_POINTER (id);
34705 version_string
34706 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
34708 if (strcmp (version_string, "default") == 0)
34709 return id;
34711 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
34712 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
34714 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
34716 /* Allow assembler name to be modified if already set. */
34717 if (DECL_ASSEMBLER_NAME_SET_P (decl))
34718 SET_DECL_RTL (decl, NULL);
34720 tree ret = get_identifier (assembler_name);
34721 XDELETEVEC (attr_str);
34722 XDELETEVEC (assembler_name);
34723 return ret;
34726 /* This function returns true if FN1 and FN2 are versions of the same function,
34727 that is, the target strings of the function decls are different. This assumes
34728 that FN1 and FN2 have the same signature. */
34730 static bool
34731 ix86_function_versions (tree fn1, tree fn2)
34733 tree attr1, attr2;
34734 char *target1, *target2;
34735 bool result;
34737 if (TREE_CODE (fn1) != FUNCTION_DECL
34738 || TREE_CODE (fn2) != FUNCTION_DECL)
34739 return false;
34741 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
34742 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
34744 /* At least one function decl should have the target attribute specified. */
34745 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
34746 return false;
34748 /* Diagnose missing target attribute if one of the decls is already
34749 multi-versioned. */
34750 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
34752 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
34754 if (attr2 != NULL_TREE)
34756 tree tem = fn1;
34757 fn1 = fn2;
34758 fn2 = tem;
34759 attr1 = attr2;
34761 error_at (DECL_SOURCE_LOCATION (fn2),
34762 "missing %<target%> attribute for multi-versioned %D",
34763 fn2);
34764 inform (DECL_SOURCE_LOCATION (fn1),
34765 "previous declaration of %D", fn1);
34766 /* Prevent diagnosing of the same error multiple times. */
34767 DECL_ATTRIBUTES (fn2)
34768 = tree_cons (get_identifier ("target"),
34769 copy_node (TREE_VALUE (attr1)),
34770 DECL_ATTRIBUTES (fn2));
34772 return false;
34775 target1 = sorted_attr_string (TREE_VALUE (attr1));
34776 target2 = sorted_attr_string (TREE_VALUE (attr2));
34778 /* The sorted target strings must be different for fn1 and fn2
34779 to be versions. */
34780 if (strcmp (target1, target2) == 0)
34781 result = false;
34782 else
34783 result = true;
34785 XDELETEVEC (target1);
34786 XDELETEVEC (target2);
34788 return result;
34791 static tree
34792 ix86_mangle_decl_assembler_name (tree decl, tree id)
34794 /* For function version, add the target suffix to the assembler name. */
34795 if (TREE_CODE (decl) == FUNCTION_DECL
34796 && DECL_FUNCTION_VERSIONED (decl))
34797 id = ix86_mangle_function_version_assembler_name (decl, id);
34798 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
34799 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
34800 #endif
34802 return id;
34805 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
34806 is true, append the full path name of the source file. */
34808 static char *
34809 make_name (tree decl, const char *suffix, bool make_unique)
34811 char *global_var_name;
34812 int name_len;
34813 const char *name;
34814 const char *unique_name = NULL;
34816 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
34818 /* Get a unique name that can be used globally without any chances
34819 of collision at link time. */
34820 if (make_unique)
34821 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
34823 name_len = strlen (name) + strlen (suffix) + 2;
34825 if (make_unique)
34826 name_len += strlen (unique_name) + 1;
34827 global_var_name = XNEWVEC (char, name_len);
34829 /* Use '.' to concatenate names as it is demangler friendly. */
34830 if (make_unique)
34831 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
34832 suffix);
34833 else
34834 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
34836 return global_var_name;
34839 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
34841 /* Make a dispatcher declaration for the multi-versioned function DECL.
34842 Calls to DECL function will be replaced with calls to the dispatcher
34843 by the front-end. Return the decl created. */
34845 static tree
34846 make_dispatcher_decl (const tree decl)
34848 tree func_decl;
34849 char *func_name;
34850 tree fn_type, func_type;
34851 bool is_uniq = false;
34853 if (TREE_PUBLIC (decl) == 0)
34854 is_uniq = true;
34856 func_name = make_name (decl, "ifunc", is_uniq);
34858 fn_type = TREE_TYPE (decl);
34859 func_type = build_function_type (TREE_TYPE (fn_type),
34860 TYPE_ARG_TYPES (fn_type));
34862 func_decl = build_fn_decl (func_name, func_type);
34863 XDELETEVEC (func_name);
34864 TREE_USED (func_decl) = 1;
34865 DECL_CONTEXT (func_decl) = NULL_TREE;
34866 DECL_INITIAL (func_decl) = error_mark_node;
34867 DECL_ARTIFICIAL (func_decl) = 1;
34868 /* Mark this func as external, the resolver will flip it again if
34869 it gets generated. */
34870 DECL_EXTERNAL (func_decl) = 1;
34871 /* This will be of type IFUNCs have to be externally visible. */
34872 TREE_PUBLIC (func_decl) = 1;
34874 return func_decl;
34877 #endif
34879 /* Returns true if decl is multi-versioned and DECL is the default function,
34880 that is it is not tagged with target specific optimization. */
34882 static bool
34883 is_function_default_version (const tree decl)
34885 if (TREE_CODE (decl) != FUNCTION_DECL
34886 || !DECL_FUNCTION_VERSIONED (decl))
34887 return false;
34888 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34889 gcc_assert (attr);
34890 attr = TREE_VALUE (TREE_VALUE (attr));
34891 return (TREE_CODE (attr) == STRING_CST
34892 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
34895 /* Make a dispatcher declaration for the multi-versioned function DECL.
34896 Calls to DECL function will be replaced with calls to the dispatcher
34897 by the front-end. Returns the decl of the dispatcher function. */
34899 static tree
34900 ix86_get_function_versions_dispatcher (void *decl)
34902 tree fn = (tree) decl;
34903 struct cgraph_node *node = NULL;
34904 struct cgraph_node *default_node = NULL;
34905 struct cgraph_function_version_info *node_v = NULL;
34906 struct cgraph_function_version_info *first_v = NULL;
34908 tree dispatch_decl = NULL;
34910 struct cgraph_function_version_info *default_version_info = NULL;
34912 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
34914 node = cgraph_node::get (fn);
34915 gcc_assert (node != NULL);
34917 node_v = node->function_version ();
34918 gcc_assert (node_v != NULL);
34920 if (node_v->dispatcher_resolver != NULL)
34921 return node_v->dispatcher_resolver;
34923 /* Find the default version and make it the first node. */
34924 first_v = node_v;
34925 /* Go to the beginning of the chain. */
34926 while (first_v->prev != NULL)
34927 first_v = first_v->prev;
34928 default_version_info = first_v;
34929 while (default_version_info != NULL)
34931 if (is_function_default_version
34932 (default_version_info->this_node->decl))
34933 break;
34934 default_version_info = default_version_info->next;
34937 /* If there is no default node, just return NULL. */
34938 if (default_version_info == NULL)
34939 return NULL;
34941 /* Make default info the first node. */
34942 if (first_v != default_version_info)
34944 default_version_info->prev->next = default_version_info->next;
34945 if (default_version_info->next)
34946 default_version_info->next->prev = default_version_info->prev;
34947 first_v->prev = default_version_info;
34948 default_version_info->next = first_v;
34949 default_version_info->prev = NULL;
34952 default_node = default_version_info->this_node;
34954 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
34955 if (targetm.has_ifunc_p ())
34957 struct cgraph_function_version_info *it_v = NULL;
34958 struct cgraph_node *dispatcher_node = NULL;
34959 struct cgraph_function_version_info *dispatcher_version_info = NULL;
34961 /* Right now, the dispatching is done via ifunc. */
34962 dispatch_decl = make_dispatcher_decl (default_node->decl);
34964 dispatcher_node = cgraph_node::get_create (dispatch_decl);
34965 gcc_assert (dispatcher_node != NULL);
34966 dispatcher_node->dispatcher_function = 1;
34967 dispatcher_version_info
34968 = dispatcher_node->insert_new_function_version ();
34969 dispatcher_version_info->next = default_version_info;
34970 dispatcher_node->definition = 1;
34972 /* Set the dispatcher for all the versions. */
34973 it_v = default_version_info;
34974 while (it_v != NULL)
34976 it_v->dispatcher_resolver = dispatch_decl;
34977 it_v = it_v->next;
34980 else
34981 #endif
34983 error_at (DECL_SOURCE_LOCATION (default_node->decl),
34984 "multiversioning needs ifunc which is not supported "
34985 "on this target");
34988 return dispatch_decl;
34991 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
34992 it to CHAIN. */
34994 static tree
34995 make_attribute (const char *name, const char *arg_name, tree chain)
34997 tree attr_name;
34998 tree attr_arg_name;
34999 tree attr_args;
35000 tree attr;
35002 attr_name = get_identifier (name);
35003 attr_arg_name = build_string (strlen (arg_name), arg_name);
35004 attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
35005 attr = tree_cons (attr_name, attr_args, chain);
35006 return attr;
35009 /* Make the resolver function decl to dispatch the versions of
35010 a multi-versioned function, DEFAULT_DECL. Create an
35011 empty basic block in the resolver and store the pointer in
35012 EMPTY_BB. Return the decl of the resolver function. */
35014 static tree
35015 make_resolver_func (const tree default_decl,
35016 const tree dispatch_decl,
35017 basic_block *empty_bb)
35019 char *resolver_name;
35020 tree decl, type, decl_name, t;
35021 bool is_uniq = false;
35023 /* IFUNC's have to be globally visible. So, if the default_decl is
35024 not, then the name of the IFUNC should be made unique. */
35025 if (TREE_PUBLIC (default_decl) == 0)
35026 is_uniq = true;
35028 /* Append the filename to the resolver function if the versions are
35029 not externally visible. This is because the resolver function has
35030 to be externally visible for the loader to find it. So, appending
35031 the filename will prevent conflicts with a resolver function from
35032 another module which is based on the same version name. */
35033 resolver_name = make_name (default_decl, "resolver", is_uniq);
35035 /* The resolver function should return a (void *). */
35036 type = build_function_type_list (ptr_type_node, NULL_TREE);
35038 decl = build_fn_decl (resolver_name, type);
35039 decl_name = get_identifier (resolver_name);
35040 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
35042 DECL_NAME (decl) = decl_name;
35043 TREE_USED (decl) = 1;
35044 DECL_ARTIFICIAL (decl) = 1;
35045 DECL_IGNORED_P (decl) = 0;
35046 /* IFUNC resolvers have to be externally visible. */
35047 TREE_PUBLIC (decl) = 1;
35048 DECL_UNINLINABLE (decl) = 1;
35050 /* Resolver is not external, body is generated. */
35051 DECL_EXTERNAL (decl) = 0;
35052 DECL_EXTERNAL (dispatch_decl) = 0;
35054 DECL_CONTEXT (decl) = NULL_TREE;
35055 DECL_INITIAL (decl) = make_node (BLOCK);
35056 DECL_STATIC_CONSTRUCTOR (decl) = 0;
35058 if (DECL_COMDAT_GROUP (default_decl)
35059 || TREE_PUBLIC (default_decl))
35061 /* In this case, each translation unit with a call to this
35062 versioned function will put out a resolver. Ensure it
35063 is comdat to keep just one copy. */
35064 DECL_COMDAT (decl) = 1;
35065 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
35067 /* Build result decl and add to function_decl. */
35068 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
35069 DECL_ARTIFICIAL (t) = 1;
35070 DECL_IGNORED_P (t) = 1;
35071 DECL_RESULT (decl) = t;
35073 gimplify_function_tree (decl);
35074 push_cfun (DECL_STRUCT_FUNCTION (decl));
35075 *empty_bb = init_lowered_empty_function (decl, false);
35077 cgraph_node::add_new_function (decl, true);
35078 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
35080 pop_cfun ();
35082 gcc_assert (dispatch_decl != NULL);
35083 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
35084 DECL_ATTRIBUTES (dispatch_decl)
35085 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
35087 /* Create the alias for dispatch to resolver here. */
35088 /*cgraph_create_function_alias (dispatch_decl, decl);*/
35089 cgraph_node::create_same_body_alias (dispatch_decl, decl);
35090 XDELETEVEC (resolver_name);
35091 return decl;
35094 /* Generate the dispatching code body to dispatch multi-versioned function
35095 DECL. The target hook is called to process the "target" attributes and
35096 provide the code to dispatch the right function at run-time. NODE points
35097 to the dispatcher decl whose body will be created. */
35099 static tree
35100 ix86_generate_version_dispatcher_body (void *node_p)
35102 tree resolver_decl;
35103 basic_block empty_bb;
35104 tree default_ver_decl;
35105 struct cgraph_node *versn;
35106 struct cgraph_node *node;
35108 struct cgraph_function_version_info *node_version_info = NULL;
35109 struct cgraph_function_version_info *versn_info = NULL;
35111 node = (cgraph_node *)node_p;
35113 node_version_info = node->function_version ();
35114 gcc_assert (node->dispatcher_function
35115 && node_version_info != NULL);
35117 if (node_version_info->dispatcher_resolver)
35118 return node_version_info->dispatcher_resolver;
35120 /* The first version in the chain corresponds to the default version. */
35121 default_ver_decl = node_version_info->next->this_node->decl;
35123 /* node is going to be an alias, so remove the finalized bit. */
35124 node->definition = false;
35126 resolver_decl = make_resolver_func (default_ver_decl,
35127 node->decl, &empty_bb);
35129 node_version_info->dispatcher_resolver = resolver_decl;
35131 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
35133 auto_vec<tree, 2> fn_ver_vec;
35135 for (versn_info = node_version_info->next; versn_info;
35136 versn_info = versn_info->next)
35138 versn = versn_info->this_node;
35139 /* Check for virtual functions here again, as by this time it should
35140 have been determined if this function needs a vtable index or
35141 not. This happens for methods in derived classes that override
35142 virtual methods in base classes but are not explicitly marked as
35143 virtual. */
35144 if (DECL_VINDEX (versn->decl))
35145 sorry ("Virtual function multiversioning not supported");
35147 fn_ver_vec.safe_push (versn->decl);
35150 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
35151 cgraph_edge::rebuild_edges ();
35152 pop_cfun ();
35153 return resolver_decl;
35155 /* This builds the processor_model struct type defined in
35156 libgcc/config/i386/cpuinfo.c */
35158 static tree
35159 build_processor_model_struct (void)
35161 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
35162 "__cpu_features"};
35163 tree field = NULL_TREE, field_chain = NULL_TREE;
35164 int i;
35165 tree type = make_node (RECORD_TYPE);
35167 /* The first 3 fields are unsigned int. */
35168 for (i = 0; i < 3; ++i)
35170 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
35171 get_identifier (field_name[i]), unsigned_type_node);
35172 if (field_chain != NULL_TREE)
35173 DECL_CHAIN (field) = field_chain;
35174 field_chain = field;
35177 /* The last field is an array of unsigned integers of size one. */
35178 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
35179 get_identifier (field_name[3]),
35180 build_array_type (unsigned_type_node,
35181 build_index_type (size_one_node)));
35182 if (field_chain != NULL_TREE)
35183 DECL_CHAIN (field) = field_chain;
35184 field_chain = field;
35186 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
35187 return type;
35190 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
35192 static tree
35193 make_var_decl (tree type, const char *name)
35195 tree new_decl;
35197 new_decl = build_decl (UNKNOWN_LOCATION,
35198 VAR_DECL,
35199 get_identifier(name),
35200 type);
35202 DECL_EXTERNAL (new_decl) = 1;
35203 TREE_STATIC (new_decl) = 1;
35204 TREE_PUBLIC (new_decl) = 1;
35205 DECL_INITIAL (new_decl) = 0;
35206 DECL_ARTIFICIAL (new_decl) = 0;
35207 DECL_PRESERVE_P (new_decl) = 1;
35209 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
35210 assemble_variable (new_decl, 0, 0, 0);
35212 return new_decl;
35215 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
35216 into an integer defined in libgcc/config/i386/cpuinfo.c */
35218 static tree
35219 fold_builtin_cpu (tree fndecl, tree *args)
35221 unsigned int i;
35222 enum ix86_builtins fn_code = (enum ix86_builtins)
35223 DECL_FUNCTION_CODE (fndecl);
35224 tree param_string_cst = NULL;
35226 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
35227 enum processor_features
35229 F_CMOV = 0,
35230 F_MMX,
35231 F_POPCNT,
35232 F_SSE,
35233 F_SSE2,
35234 F_SSE3,
35235 F_SSSE3,
35236 F_SSE4_1,
35237 F_SSE4_2,
35238 F_AVX,
35239 F_AVX2,
35240 F_SSE4_A,
35241 F_FMA4,
35242 F_XOP,
35243 F_FMA,
35244 F_AVX512F,
35245 F_MAX
35248 /* These are the values for vendor types and cpu types and subtypes
35249 in cpuinfo.c. Cpu types and subtypes should be subtracted by
35250 the corresponding start value. */
35251 enum processor_model
35253 M_INTEL = 1,
35254 M_AMD,
35255 M_CPU_TYPE_START,
35256 M_INTEL_BONNELL,
35257 M_INTEL_CORE2,
35258 M_INTEL_COREI7,
35259 M_AMDFAM10H,
35260 M_AMDFAM15H,
35261 M_INTEL_SILVERMONT,
35262 M_AMD_BTVER1,
35263 M_AMD_BTVER2,
35264 M_CPU_SUBTYPE_START,
35265 M_INTEL_COREI7_NEHALEM,
35266 M_INTEL_COREI7_WESTMERE,
35267 M_INTEL_COREI7_SANDYBRIDGE,
35268 M_AMDFAM10H_BARCELONA,
35269 M_AMDFAM10H_SHANGHAI,
35270 M_AMDFAM10H_ISTANBUL,
35271 M_AMDFAM15H_BDVER1,
35272 M_AMDFAM15H_BDVER2,
35273 M_AMDFAM15H_BDVER3,
35274 M_AMDFAM15H_BDVER4,
35275 M_INTEL_COREI7_IVYBRIDGE,
35276 M_INTEL_COREI7_HASWELL
35279 static struct _arch_names_table
35281 const char *const name;
35282 const enum processor_model model;
35284 const arch_names_table[] =
35286 {"amd", M_AMD},
35287 {"intel", M_INTEL},
35288 {"atom", M_INTEL_BONNELL},
35289 {"slm", M_INTEL_SILVERMONT},
35290 {"core2", M_INTEL_CORE2},
35291 {"corei7", M_INTEL_COREI7},
35292 {"nehalem", M_INTEL_COREI7_NEHALEM},
35293 {"westmere", M_INTEL_COREI7_WESTMERE},
35294 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
35295 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
35296 {"haswell", M_INTEL_COREI7_HASWELL},
35297 {"bonnell", M_INTEL_BONNELL},
35298 {"silvermont", M_INTEL_SILVERMONT},
35299 {"amdfam10h", M_AMDFAM10H},
35300 {"barcelona", M_AMDFAM10H_BARCELONA},
35301 {"shanghai", M_AMDFAM10H_SHANGHAI},
35302 {"istanbul", M_AMDFAM10H_ISTANBUL},
35303 {"btver1", M_AMD_BTVER1},
35304 {"amdfam15h", M_AMDFAM15H},
35305 {"bdver1", M_AMDFAM15H_BDVER1},
35306 {"bdver2", M_AMDFAM15H_BDVER2},
35307 {"bdver3", M_AMDFAM15H_BDVER3},
35308 {"bdver4", M_AMDFAM15H_BDVER4},
35309 {"btver2", M_AMD_BTVER2},
35312 static struct _isa_names_table
35314 const char *const name;
35315 const enum processor_features feature;
35317 const isa_names_table[] =
35319 {"cmov", F_CMOV},
35320 {"mmx", F_MMX},
35321 {"popcnt", F_POPCNT},
35322 {"sse", F_SSE},
35323 {"sse2", F_SSE2},
35324 {"sse3", F_SSE3},
35325 {"ssse3", F_SSSE3},
35326 {"sse4a", F_SSE4_A},
35327 {"sse4.1", F_SSE4_1},
35328 {"sse4.2", F_SSE4_2},
35329 {"avx", F_AVX},
35330 {"fma4", F_FMA4},
35331 {"xop", F_XOP},
35332 {"fma", F_FMA},
35333 {"avx2", F_AVX2},
35334 {"avx512f",F_AVX512F}
35337 tree __processor_model_type = build_processor_model_struct ();
35338 tree __cpu_model_var = make_var_decl (__processor_model_type,
35339 "__cpu_model");
35342 varpool_node::add (__cpu_model_var);
35344 gcc_assert ((args != NULL) && (*args != NULL));
35346 param_string_cst = *args;
35347 while (param_string_cst
35348 && TREE_CODE (param_string_cst) != STRING_CST)
35350 /* *args must be a expr that can contain other EXPRS leading to a
35351 STRING_CST. */
35352 if (!EXPR_P (param_string_cst))
35354 error ("Parameter to builtin must be a string constant or literal");
35355 return integer_zero_node;
35357 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
35360 gcc_assert (param_string_cst);
35362 if (fn_code == IX86_BUILTIN_CPU_IS)
35364 tree ref;
35365 tree field;
35366 tree final;
35368 unsigned int field_val = 0;
35369 unsigned int NUM_ARCH_NAMES
35370 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
35372 for (i = 0; i < NUM_ARCH_NAMES; i++)
35373 if (strcmp (arch_names_table[i].name,
35374 TREE_STRING_POINTER (param_string_cst)) == 0)
35375 break;
35377 if (i == NUM_ARCH_NAMES)
35379 error ("Parameter to builtin not valid: %s",
35380 TREE_STRING_POINTER (param_string_cst));
35381 return integer_zero_node;
35384 field = TYPE_FIELDS (__processor_model_type);
35385 field_val = arch_names_table[i].model;
35387 /* CPU types are stored in the next field. */
35388 if (field_val > M_CPU_TYPE_START
35389 && field_val < M_CPU_SUBTYPE_START)
35391 field = DECL_CHAIN (field);
35392 field_val -= M_CPU_TYPE_START;
35395 /* CPU subtypes are stored in the next field. */
35396 if (field_val > M_CPU_SUBTYPE_START)
35398 field = DECL_CHAIN ( DECL_CHAIN (field));
35399 field_val -= M_CPU_SUBTYPE_START;
35402 /* Get the appropriate field in __cpu_model. */
35403 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
35404 field, NULL_TREE);
35406 /* Check the value. */
35407 final = build2 (EQ_EXPR, unsigned_type_node, ref,
35408 build_int_cstu (unsigned_type_node, field_val));
35409 return build1 (CONVERT_EXPR, integer_type_node, final);
35411 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
35413 tree ref;
35414 tree array_elt;
35415 tree field;
35416 tree final;
35418 unsigned int field_val = 0;
35419 unsigned int NUM_ISA_NAMES
35420 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
35422 for (i = 0; i < NUM_ISA_NAMES; i++)
35423 if (strcmp (isa_names_table[i].name,
35424 TREE_STRING_POINTER (param_string_cst)) == 0)
35425 break;
35427 if (i == NUM_ISA_NAMES)
35429 error ("Parameter to builtin not valid: %s",
35430 TREE_STRING_POINTER (param_string_cst));
35431 return integer_zero_node;
35434 field = TYPE_FIELDS (__processor_model_type);
35435 /* Get the last field, which is __cpu_features. */
35436 while (DECL_CHAIN (field))
35437 field = DECL_CHAIN (field);
35439 /* Get the appropriate field: __cpu_model.__cpu_features */
35440 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
35441 field, NULL_TREE);
35443 /* Access the 0th element of __cpu_features array. */
35444 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
35445 integer_zero_node, NULL_TREE, NULL_TREE);
35447 field_val = (1 << isa_names_table[i].feature);
35448 /* Return __cpu_model.__cpu_features[0] & field_val */
35449 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
35450 build_int_cstu (unsigned_type_node, field_val));
35451 return build1 (CONVERT_EXPR, integer_type_node, final);
35453 gcc_unreachable ();
35456 static tree
35457 ix86_fold_builtin (tree fndecl, int n_args,
35458 tree *args, bool ignore ATTRIBUTE_UNUSED)
35460 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
35462 enum ix86_builtins fn_code = (enum ix86_builtins)
35463 DECL_FUNCTION_CODE (fndecl);
35464 if (fn_code == IX86_BUILTIN_CPU_IS
35465 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
35467 gcc_assert (n_args == 1);
35468 return fold_builtin_cpu (fndecl, args);
35472 #ifdef SUBTARGET_FOLD_BUILTIN
35473 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
35474 #endif
35476 return NULL_TREE;
35479 /* Make builtins to detect cpu type and features supported. NAME is
35480 the builtin name, CODE is the builtin code, and FTYPE is the function
35481 type of the builtin. */
35483 static void
35484 make_cpu_type_builtin (const char* name, int code,
35485 enum ix86_builtin_func_type ftype, bool is_const)
35487 tree decl;
35488 tree type;
35490 type = ix86_get_builtin_func_type (ftype);
35491 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
35492 NULL, NULL_TREE);
35493 gcc_assert (decl != NULL_TREE);
35494 ix86_builtins[(int) code] = decl;
35495 TREE_READONLY (decl) = is_const;
35498 /* Make builtins to get CPU type and features supported. The created
35499 builtins are :
35501 __builtin_cpu_init (), to detect cpu type and features,
35502 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
35503 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
35506 static void
35507 ix86_init_platform_type_builtins (void)
35509 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
35510 INT_FTYPE_VOID, false);
35511 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
35512 INT_FTYPE_PCCHAR, true);
35513 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
35514 INT_FTYPE_PCCHAR, true);
35517 /* Internal method for ix86_init_builtins. */
35519 static void
35520 ix86_init_builtins_va_builtins_abi (void)
35522 tree ms_va_ref, sysv_va_ref;
35523 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
35524 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
35525 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
35526 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
35528 if (!TARGET_64BIT)
35529 return;
35530 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
35531 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
35532 ms_va_ref = build_reference_type (ms_va_list_type_node);
35533 sysv_va_ref =
35534 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
35536 fnvoid_va_end_ms =
35537 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
35538 fnvoid_va_start_ms =
35539 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
35540 fnvoid_va_end_sysv =
35541 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
35542 fnvoid_va_start_sysv =
35543 build_varargs_function_type_list (void_type_node, sysv_va_ref,
35544 NULL_TREE);
35545 fnvoid_va_copy_ms =
35546 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
35547 NULL_TREE);
35548 fnvoid_va_copy_sysv =
35549 build_function_type_list (void_type_node, sysv_va_ref,
35550 sysv_va_ref, NULL_TREE);
35552 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
35553 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
35554 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
35555 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
35556 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
35557 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
35558 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
35559 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35560 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
35561 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35562 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
35563 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35566 static void
35567 ix86_init_builtin_types (void)
35569 tree float128_type_node, float80_type_node;
35571 /* The __float80 type. */
35572 float80_type_node = long_double_type_node;
35573 if (TYPE_MODE (float80_type_node) != XFmode)
35575 /* The __float80 type. */
35576 float80_type_node = make_node (REAL_TYPE);
35578 TYPE_PRECISION (float80_type_node) = 80;
35579 layout_type (float80_type_node);
35581 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
35583 /* The __float128 type. */
35584 float128_type_node = make_node (REAL_TYPE);
35585 TYPE_PRECISION (float128_type_node) = 128;
35586 layout_type (float128_type_node);
35587 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
35589 /* This macro is built by i386-builtin-types.awk. */
35590 DEFINE_BUILTIN_PRIMITIVE_TYPES;
35593 static void
35594 ix86_init_builtins (void)
35596 tree t;
35598 ix86_init_builtin_types ();
35600 /* Builtins to get CPU type and features. */
35601 ix86_init_platform_type_builtins ();
35603 /* TFmode support builtins. */
35604 def_builtin_const (0, "__builtin_infq",
35605 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
35606 def_builtin_const (0, "__builtin_huge_valq",
35607 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
35609 /* We will expand them to normal call if SSE isn't available since
35610 they are used by libgcc. */
35611 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
35612 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
35613 BUILT_IN_MD, "__fabstf2", NULL_TREE);
35614 TREE_READONLY (t) = 1;
35615 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
35617 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
35618 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
35619 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
35620 TREE_READONLY (t) = 1;
35621 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
35623 ix86_init_tm_builtins ();
35624 ix86_init_mmx_sse_builtins ();
35625 ix86_init_mpx_builtins ();
35627 if (TARGET_LP64)
35628 ix86_init_builtins_va_builtins_abi ();
35630 #ifdef SUBTARGET_INIT_BUILTINS
35631 SUBTARGET_INIT_BUILTINS;
35632 #endif
35635 /* Return the ix86 builtin for CODE. */
35637 static tree
35638 ix86_builtin_decl (unsigned code, bool)
35640 if (code >= IX86_BUILTIN_MAX)
35641 return error_mark_node;
35643 return ix86_builtins[code];
35646 /* Errors in the source file can cause expand_expr to return const0_rtx
35647 where we expect a vector. To avoid crashing, use one of the vector
35648 clear instructions. */
35649 static rtx
35650 safe_vector_operand (rtx x, machine_mode mode)
35652 if (x == const0_rtx)
35653 x = CONST0_RTX (mode);
35654 return x;
35657 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
35659 static rtx
35660 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
35662 rtx pat;
35663 tree arg0 = CALL_EXPR_ARG (exp, 0);
35664 tree arg1 = CALL_EXPR_ARG (exp, 1);
35665 rtx op0 = expand_normal (arg0);
35666 rtx op1 = expand_normal (arg1);
35667 machine_mode tmode = insn_data[icode].operand[0].mode;
35668 machine_mode mode0 = insn_data[icode].operand[1].mode;
35669 machine_mode mode1 = insn_data[icode].operand[2].mode;
35671 if (VECTOR_MODE_P (mode0))
35672 op0 = safe_vector_operand (op0, mode0);
35673 if (VECTOR_MODE_P (mode1))
35674 op1 = safe_vector_operand (op1, mode1);
35676 if (optimize || !target
35677 || GET_MODE (target) != tmode
35678 || !insn_data[icode].operand[0].predicate (target, tmode))
35679 target = gen_reg_rtx (tmode);
35681 if (GET_MODE (op1) == SImode && mode1 == TImode)
35683 rtx x = gen_reg_rtx (V4SImode);
35684 emit_insn (gen_sse2_loadd (x, op1));
35685 op1 = gen_lowpart (TImode, x);
35688 if (!insn_data[icode].operand[1].predicate (op0, mode0))
35689 op0 = copy_to_mode_reg (mode0, op0);
35690 if (!insn_data[icode].operand[2].predicate (op1, mode1))
35691 op1 = copy_to_mode_reg (mode1, op1);
35693 pat = GEN_FCN (icode) (target, op0, op1);
35694 if (! pat)
35695 return 0;
35697 emit_insn (pat);
35699 return target;
35702 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
35704 static rtx
35705 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
35706 enum ix86_builtin_func_type m_type,
35707 enum rtx_code sub_code)
35709 rtx pat;
35710 int i;
35711 int nargs;
35712 bool comparison_p = false;
35713 bool tf_p = false;
35714 bool last_arg_constant = false;
35715 int num_memory = 0;
35716 struct {
35717 rtx op;
35718 machine_mode mode;
35719 } args[4];
35721 machine_mode tmode = insn_data[icode].operand[0].mode;
35723 switch (m_type)
35725 case MULTI_ARG_4_DF2_DI_I:
35726 case MULTI_ARG_4_DF2_DI_I1:
35727 case MULTI_ARG_4_SF2_SI_I:
35728 case MULTI_ARG_4_SF2_SI_I1:
35729 nargs = 4;
35730 last_arg_constant = true;
35731 break;
35733 case MULTI_ARG_3_SF:
35734 case MULTI_ARG_3_DF:
35735 case MULTI_ARG_3_SF2:
35736 case MULTI_ARG_3_DF2:
35737 case MULTI_ARG_3_DI:
35738 case MULTI_ARG_3_SI:
35739 case MULTI_ARG_3_SI_DI:
35740 case MULTI_ARG_3_HI:
35741 case MULTI_ARG_3_HI_SI:
35742 case MULTI_ARG_3_QI:
35743 case MULTI_ARG_3_DI2:
35744 case MULTI_ARG_3_SI2:
35745 case MULTI_ARG_3_HI2:
35746 case MULTI_ARG_3_QI2:
35747 nargs = 3;
35748 break;
35750 case MULTI_ARG_2_SF:
35751 case MULTI_ARG_2_DF:
35752 case MULTI_ARG_2_DI:
35753 case MULTI_ARG_2_SI:
35754 case MULTI_ARG_2_HI:
35755 case MULTI_ARG_2_QI:
35756 nargs = 2;
35757 break;
35759 case MULTI_ARG_2_DI_IMM:
35760 case MULTI_ARG_2_SI_IMM:
35761 case MULTI_ARG_2_HI_IMM:
35762 case MULTI_ARG_2_QI_IMM:
35763 nargs = 2;
35764 last_arg_constant = true;
35765 break;
35767 case MULTI_ARG_1_SF:
35768 case MULTI_ARG_1_DF:
35769 case MULTI_ARG_1_SF2:
35770 case MULTI_ARG_1_DF2:
35771 case MULTI_ARG_1_DI:
35772 case MULTI_ARG_1_SI:
35773 case MULTI_ARG_1_HI:
35774 case MULTI_ARG_1_QI:
35775 case MULTI_ARG_1_SI_DI:
35776 case MULTI_ARG_1_HI_DI:
35777 case MULTI_ARG_1_HI_SI:
35778 case MULTI_ARG_1_QI_DI:
35779 case MULTI_ARG_1_QI_SI:
35780 case MULTI_ARG_1_QI_HI:
35781 nargs = 1;
35782 break;
35784 case MULTI_ARG_2_DI_CMP:
35785 case MULTI_ARG_2_SI_CMP:
35786 case MULTI_ARG_2_HI_CMP:
35787 case MULTI_ARG_2_QI_CMP:
35788 nargs = 2;
35789 comparison_p = true;
35790 break;
35792 case MULTI_ARG_2_SF_TF:
35793 case MULTI_ARG_2_DF_TF:
35794 case MULTI_ARG_2_DI_TF:
35795 case MULTI_ARG_2_SI_TF:
35796 case MULTI_ARG_2_HI_TF:
35797 case MULTI_ARG_2_QI_TF:
35798 nargs = 2;
35799 tf_p = true;
35800 break;
35802 default:
35803 gcc_unreachable ();
35806 if (optimize || !target
35807 || GET_MODE (target) != tmode
35808 || !insn_data[icode].operand[0].predicate (target, tmode))
35809 target = gen_reg_rtx (tmode);
35811 gcc_assert (nargs <= 4);
35813 for (i = 0; i < nargs; i++)
35815 tree arg = CALL_EXPR_ARG (exp, i);
35816 rtx op = expand_normal (arg);
35817 int adjust = (comparison_p) ? 1 : 0;
35818 machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
35820 if (last_arg_constant && i == nargs - 1)
35822 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
35824 enum insn_code new_icode = icode;
35825 switch (icode)
35827 case CODE_FOR_xop_vpermil2v2df3:
35828 case CODE_FOR_xop_vpermil2v4sf3:
35829 case CODE_FOR_xop_vpermil2v4df3:
35830 case CODE_FOR_xop_vpermil2v8sf3:
35831 error ("the last argument must be a 2-bit immediate");
35832 return gen_reg_rtx (tmode);
35833 case CODE_FOR_xop_rotlv2di3:
35834 new_icode = CODE_FOR_rotlv2di3;
35835 goto xop_rotl;
35836 case CODE_FOR_xop_rotlv4si3:
35837 new_icode = CODE_FOR_rotlv4si3;
35838 goto xop_rotl;
35839 case CODE_FOR_xop_rotlv8hi3:
35840 new_icode = CODE_FOR_rotlv8hi3;
35841 goto xop_rotl;
35842 case CODE_FOR_xop_rotlv16qi3:
35843 new_icode = CODE_FOR_rotlv16qi3;
35844 xop_rotl:
35845 if (CONST_INT_P (op))
35847 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
35848 op = GEN_INT (INTVAL (op) & mask);
35849 gcc_checking_assert
35850 (insn_data[icode].operand[i + 1].predicate (op, mode));
35852 else
35854 gcc_checking_assert
35855 (nargs == 2
35856 && insn_data[new_icode].operand[0].mode == tmode
35857 && insn_data[new_icode].operand[1].mode == tmode
35858 && insn_data[new_icode].operand[2].mode == mode
35859 && insn_data[new_icode].operand[0].predicate
35860 == insn_data[icode].operand[0].predicate
35861 && insn_data[new_icode].operand[1].predicate
35862 == insn_data[icode].operand[1].predicate);
35863 icode = new_icode;
35864 goto non_constant;
35866 break;
35867 default:
35868 gcc_unreachable ();
35872 else
35874 non_constant:
35875 if (VECTOR_MODE_P (mode))
35876 op = safe_vector_operand (op, mode);
35878 /* If we aren't optimizing, only allow one memory operand to be
35879 generated. */
35880 if (memory_operand (op, mode))
35881 num_memory++;
35883 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
35885 if (optimize
35886 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
35887 || num_memory > 1)
35888 op = force_reg (mode, op);
35891 args[i].op = op;
35892 args[i].mode = mode;
35895 switch (nargs)
35897 case 1:
35898 pat = GEN_FCN (icode) (target, args[0].op);
35899 break;
35901 case 2:
35902 if (tf_p)
35903 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
35904 GEN_INT ((int)sub_code));
35905 else if (! comparison_p)
35906 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
35907 else
35909 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
35910 args[0].op,
35911 args[1].op);
35913 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
35915 break;
35917 case 3:
35918 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
35919 break;
35921 case 4:
35922 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
35923 break;
35925 default:
35926 gcc_unreachable ();
35929 if (! pat)
35930 return 0;
35932 emit_insn (pat);
35933 return target;
35936 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
35937 insns with vec_merge. */
35939 static rtx
35940 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
35941 rtx target)
35943 rtx pat;
35944 tree arg0 = CALL_EXPR_ARG (exp, 0);
35945 rtx op1, op0 = expand_normal (arg0);
35946 machine_mode tmode = insn_data[icode].operand[0].mode;
35947 machine_mode mode0 = insn_data[icode].operand[1].mode;
35949 if (optimize || !target
35950 || GET_MODE (target) != tmode
35951 || !insn_data[icode].operand[0].predicate (target, tmode))
35952 target = gen_reg_rtx (tmode);
35954 if (VECTOR_MODE_P (mode0))
35955 op0 = safe_vector_operand (op0, mode0);
35957 if ((optimize && !register_operand (op0, mode0))
35958 || !insn_data[icode].operand[1].predicate (op0, mode0))
35959 op0 = copy_to_mode_reg (mode0, op0);
35961 op1 = op0;
35962 if (!insn_data[icode].operand[2].predicate (op1, mode0))
35963 op1 = copy_to_mode_reg (mode0, op1);
35965 pat = GEN_FCN (icode) (target, op0, op1);
35966 if (! pat)
35967 return 0;
35968 emit_insn (pat);
35969 return target;
35972 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
35974 static rtx
35975 ix86_expand_sse_compare (const struct builtin_description *d,
35976 tree exp, rtx target, bool swap)
35978 rtx pat;
35979 tree arg0 = CALL_EXPR_ARG (exp, 0);
35980 tree arg1 = CALL_EXPR_ARG (exp, 1);
35981 rtx op0 = expand_normal (arg0);
35982 rtx op1 = expand_normal (arg1);
35983 rtx op2;
35984 machine_mode tmode = insn_data[d->icode].operand[0].mode;
35985 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
35986 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
35987 enum rtx_code comparison = d->comparison;
35989 if (VECTOR_MODE_P (mode0))
35990 op0 = safe_vector_operand (op0, mode0);
35991 if (VECTOR_MODE_P (mode1))
35992 op1 = safe_vector_operand (op1, mode1);
35994 /* Swap operands if we have a comparison that isn't available in
35995 hardware. */
35996 if (swap)
35998 rtx tmp = gen_reg_rtx (mode1);
35999 emit_move_insn (tmp, op1);
36000 op1 = op0;
36001 op0 = tmp;
36004 if (optimize || !target
36005 || GET_MODE (target) != tmode
36006 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36007 target = gen_reg_rtx (tmode);
36009 if ((optimize && !register_operand (op0, mode0))
36010 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
36011 op0 = copy_to_mode_reg (mode0, op0);
36012 if ((optimize && !register_operand (op1, mode1))
36013 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
36014 op1 = copy_to_mode_reg (mode1, op1);
36016 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
36017 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
36018 if (! pat)
36019 return 0;
36020 emit_insn (pat);
36021 return target;
36024 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
36026 static rtx
36027 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
36028 rtx target)
36030 rtx pat;
36031 tree arg0 = CALL_EXPR_ARG (exp, 0);
36032 tree arg1 = CALL_EXPR_ARG (exp, 1);
36033 rtx op0 = expand_normal (arg0);
36034 rtx op1 = expand_normal (arg1);
36035 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
36036 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
36037 enum rtx_code comparison = d->comparison;
36039 if (VECTOR_MODE_P (mode0))
36040 op0 = safe_vector_operand (op0, mode0);
36041 if (VECTOR_MODE_P (mode1))
36042 op1 = safe_vector_operand (op1, mode1);
36044 /* Swap operands if we have a comparison that isn't available in
36045 hardware. */
36046 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
36047 std::swap (op1, op0);
36049 target = gen_reg_rtx (SImode);
36050 emit_move_insn (target, const0_rtx);
36051 target = gen_rtx_SUBREG (QImode, target, 0);
36053 if ((optimize && !register_operand (op0, mode0))
36054 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36055 op0 = copy_to_mode_reg (mode0, op0);
36056 if ((optimize && !register_operand (op1, mode1))
36057 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36058 op1 = copy_to_mode_reg (mode1, op1);
36060 pat = GEN_FCN (d->icode) (op0, op1);
36061 if (! pat)
36062 return 0;
36063 emit_insn (pat);
36064 emit_insn (gen_rtx_SET (VOIDmode,
36065 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36066 gen_rtx_fmt_ee (comparison, QImode,
36067 SET_DEST (pat),
36068 const0_rtx)));
36070 return SUBREG_REG (target);
36073 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
36075 static rtx
36076 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
36077 rtx target)
36079 rtx pat;
36080 tree arg0 = CALL_EXPR_ARG (exp, 0);
36081 rtx op1, op0 = expand_normal (arg0);
36082 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36083 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36085 if (optimize || target == 0
36086 || GET_MODE (target) != tmode
36087 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36088 target = gen_reg_rtx (tmode);
36090 if (VECTOR_MODE_P (mode0))
36091 op0 = safe_vector_operand (op0, mode0);
36093 if ((optimize && !register_operand (op0, mode0))
36094 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36095 op0 = copy_to_mode_reg (mode0, op0);
36097 op1 = GEN_INT (d->comparison);
36099 pat = GEN_FCN (d->icode) (target, op0, op1);
36100 if (! pat)
36101 return 0;
36102 emit_insn (pat);
36103 return target;
36106 static rtx
36107 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
36108 tree exp, rtx target)
36110 rtx pat;
36111 tree arg0 = CALL_EXPR_ARG (exp, 0);
36112 tree arg1 = CALL_EXPR_ARG (exp, 1);
36113 rtx op0 = expand_normal (arg0);
36114 rtx op1 = expand_normal (arg1);
36115 rtx op2;
36116 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36117 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36118 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
36120 if (optimize || target == 0
36121 || GET_MODE (target) != tmode
36122 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36123 target = gen_reg_rtx (tmode);
36125 op0 = safe_vector_operand (op0, mode0);
36126 op1 = safe_vector_operand (op1, mode1);
36128 if ((optimize && !register_operand (op0, mode0))
36129 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36130 op0 = copy_to_mode_reg (mode0, op0);
36131 if ((optimize && !register_operand (op1, mode1))
36132 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36133 op1 = copy_to_mode_reg (mode1, op1);
36135 op2 = GEN_INT (d->comparison);
36137 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
36138 if (! pat)
36139 return 0;
36140 emit_insn (pat);
36141 return target;
36144 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
36146 static rtx
36147 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
36148 rtx target)
36150 rtx pat;
36151 tree arg0 = CALL_EXPR_ARG (exp, 0);
36152 tree arg1 = CALL_EXPR_ARG (exp, 1);
36153 rtx op0 = expand_normal (arg0);
36154 rtx op1 = expand_normal (arg1);
36155 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
36156 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
36157 enum rtx_code comparison = d->comparison;
36159 if (VECTOR_MODE_P (mode0))
36160 op0 = safe_vector_operand (op0, mode0);
36161 if (VECTOR_MODE_P (mode1))
36162 op1 = safe_vector_operand (op1, mode1);
36164 target = gen_reg_rtx (SImode);
36165 emit_move_insn (target, const0_rtx);
36166 target = gen_rtx_SUBREG (QImode, target, 0);
36168 if ((optimize && !register_operand (op0, mode0))
36169 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36170 op0 = copy_to_mode_reg (mode0, op0);
36171 if ((optimize && !register_operand (op1, mode1))
36172 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36173 op1 = copy_to_mode_reg (mode1, op1);
36175 pat = GEN_FCN (d->icode) (op0, op1);
36176 if (! pat)
36177 return 0;
36178 emit_insn (pat);
36179 emit_insn (gen_rtx_SET (VOIDmode,
36180 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36181 gen_rtx_fmt_ee (comparison, QImode,
36182 SET_DEST (pat),
36183 const0_rtx)));
36185 return SUBREG_REG (target);
36188 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
36190 static rtx
36191 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
36192 tree exp, rtx target)
36194 rtx pat;
36195 tree arg0 = CALL_EXPR_ARG (exp, 0);
36196 tree arg1 = CALL_EXPR_ARG (exp, 1);
36197 tree arg2 = CALL_EXPR_ARG (exp, 2);
36198 tree arg3 = CALL_EXPR_ARG (exp, 3);
36199 tree arg4 = CALL_EXPR_ARG (exp, 4);
36200 rtx scratch0, scratch1;
36201 rtx op0 = expand_normal (arg0);
36202 rtx op1 = expand_normal (arg1);
36203 rtx op2 = expand_normal (arg2);
36204 rtx op3 = expand_normal (arg3);
36205 rtx op4 = expand_normal (arg4);
36206 machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
36208 tmode0 = insn_data[d->icode].operand[0].mode;
36209 tmode1 = insn_data[d->icode].operand[1].mode;
36210 modev2 = insn_data[d->icode].operand[2].mode;
36211 modei3 = insn_data[d->icode].operand[3].mode;
36212 modev4 = insn_data[d->icode].operand[4].mode;
36213 modei5 = insn_data[d->icode].operand[5].mode;
36214 modeimm = insn_data[d->icode].operand[6].mode;
36216 if (VECTOR_MODE_P (modev2))
36217 op0 = safe_vector_operand (op0, modev2);
36218 if (VECTOR_MODE_P (modev4))
36219 op2 = safe_vector_operand (op2, modev4);
36221 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
36222 op0 = copy_to_mode_reg (modev2, op0);
36223 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
36224 op1 = copy_to_mode_reg (modei3, op1);
36225 if ((optimize && !register_operand (op2, modev4))
36226 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
36227 op2 = copy_to_mode_reg (modev4, op2);
36228 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
36229 op3 = copy_to_mode_reg (modei5, op3);
36231 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
36233 error ("the fifth argument must be an 8-bit immediate");
36234 return const0_rtx;
36237 if (d->code == IX86_BUILTIN_PCMPESTRI128)
36239 if (optimize || !target
36240 || GET_MODE (target) != tmode0
36241 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
36242 target = gen_reg_rtx (tmode0);
36244 scratch1 = gen_reg_rtx (tmode1);
36246 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
36248 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
36250 if (optimize || !target
36251 || GET_MODE (target) != tmode1
36252 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
36253 target = gen_reg_rtx (tmode1);
36255 scratch0 = gen_reg_rtx (tmode0);
36257 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
36259 else
36261 gcc_assert (d->flag);
36263 scratch0 = gen_reg_rtx (tmode0);
36264 scratch1 = gen_reg_rtx (tmode1);
36266 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
36269 if (! pat)
36270 return 0;
36272 emit_insn (pat);
36274 if (d->flag)
36276 target = gen_reg_rtx (SImode);
36277 emit_move_insn (target, const0_rtx);
36278 target = gen_rtx_SUBREG (QImode, target, 0);
36280 emit_insn
36281 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36282 gen_rtx_fmt_ee (EQ, QImode,
36283 gen_rtx_REG ((machine_mode) d->flag,
36284 FLAGS_REG),
36285 const0_rtx)));
36286 return SUBREG_REG (target);
36288 else
36289 return target;
36293 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
36295 static rtx
36296 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
36297 tree exp, rtx target)
36299 rtx pat;
36300 tree arg0 = CALL_EXPR_ARG (exp, 0);
36301 tree arg1 = CALL_EXPR_ARG (exp, 1);
36302 tree arg2 = CALL_EXPR_ARG (exp, 2);
36303 rtx scratch0, scratch1;
36304 rtx op0 = expand_normal (arg0);
36305 rtx op1 = expand_normal (arg1);
36306 rtx op2 = expand_normal (arg2);
36307 machine_mode tmode0, tmode1, modev2, modev3, modeimm;
36309 tmode0 = insn_data[d->icode].operand[0].mode;
36310 tmode1 = insn_data[d->icode].operand[1].mode;
36311 modev2 = insn_data[d->icode].operand[2].mode;
36312 modev3 = insn_data[d->icode].operand[3].mode;
36313 modeimm = insn_data[d->icode].operand[4].mode;
36315 if (VECTOR_MODE_P (modev2))
36316 op0 = safe_vector_operand (op0, modev2);
36317 if (VECTOR_MODE_P (modev3))
36318 op1 = safe_vector_operand (op1, modev3);
36320 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
36321 op0 = copy_to_mode_reg (modev2, op0);
36322 if ((optimize && !register_operand (op1, modev3))
36323 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
36324 op1 = copy_to_mode_reg (modev3, op1);
36326 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
36328 error ("the third argument must be an 8-bit immediate");
36329 return const0_rtx;
36332 if (d->code == IX86_BUILTIN_PCMPISTRI128)
36334 if (optimize || !target
36335 || GET_MODE (target) != tmode0
36336 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
36337 target = gen_reg_rtx (tmode0);
36339 scratch1 = gen_reg_rtx (tmode1);
36341 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
36343 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
36345 if (optimize || !target
36346 || GET_MODE (target) != tmode1
36347 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
36348 target = gen_reg_rtx (tmode1);
36350 scratch0 = gen_reg_rtx (tmode0);
36352 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
36354 else
36356 gcc_assert (d->flag);
36358 scratch0 = gen_reg_rtx (tmode0);
36359 scratch1 = gen_reg_rtx (tmode1);
36361 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
36364 if (! pat)
36365 return 0;
36367 emit_insn (pat);
36369 if (d->flag)
36371 target = gen_reg_rtx (SImode);
36372 emit_move_insn (target, const0_rtx);
36373 target = gen_rtx_SUBREG (QImode, target, 0);
36375 emit_insn
36376 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36377 gen_rtx_fmt_ee (EQ, QImode,
36378 gen_rtx_REG ((machine_mode) d->flag,
36379 FLAGS_REG),
36380 const0_rtx)));
36381 return SUBREG_REG (target);
36383 else
36384 return target;
36387 /* Subroutine of ix86_expand_builtin to take care of insns with
36388 variable number of operands. */
36390 static rtx
36391 ix86_expand_args_builtin (const struct builtin_description *d,
36392 tree exp, rtx target)
36394 rtx pat, real_target;
36395 unsigned int i, nargs;
36396 unsigned int nargs_constant = 0;
36397 unsigned int mask_pos = 0;
36398 int num_memory = 0;
36399 struct
36401 rtx op;
36402 machine_mode mode;
36403 } args[6];
36404 bool last_arg_count = false;
36405 enum insn_code icode = d->icode;
36406 const struct insn_data_d *insn_p = &insn_data[icode];
36407 machine_mode tmode = insn_p->operand[0].mode;
36408 machine_mode rmode = VOIDmode;
36409 bool swap = false;
36410 enum rtx_code comparison = d->comparison;
36412 switch ((enum ix86_builtin_func_type) d->flag)
36414 case V2DF_FTYPE_V2DF_ROUND:
36415 case V4DF_FTYPE_V4DF_ROUND:
36416 case V4SF_FTYPE_V4SF_ROUND:
36417 case V8SF_FTYPE_V8SF_ROUND:
36418 case V4SI_FTYPE_V4SF_ROUND:
36419 case V8SI_FTYPE_V8SF_ROUND:
36420 return ix86_expand_sse_round (d, exp, target);
36421 case V4SI_FTYPE_V2DF_V2DF_ROUND:
36422 case V8SI_FTYPE_V4DF_V4DF_ROUND:
36423 case V16SI_FTYPE_V8DF_V8DF_ROUND:
36424 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
36425 case INT_FTYPE_V8SF_V8SF_PTEST:
36426 case INT_FTYPE_V4DI_V4DI_PTEST:
36427 case INT_FTYPE_V4DF_V4DF_PTEST:
36428 case INT_FTYPE_V4SF_V4SF_PTEST:
36429 case INT_FTYPE_V2DI_V2DI_PTEST:
36430 case INT_FTYPE_V2DF_V2DF_PTEST:
36431 return ix86_expand_sse_ptest (d, exp, target);
36432 case FLOAT128_FTYPE_FLOAT128:
36433 case FLOAT_FTYPE_FLOAT:
36434 case INT_FTYPE_INT:
36435 case UINT64_FTYPE_INT:
36436 case UINT16_FTYPE_UINT16:
36437 case INT64_FTYPE_INT64:
36438 case INT64_FTYPE_V4SF:
36439 case INT64_FTYPE_V2DF:
36440 case INT_FTYPE_V16QI:
36441 case INT_FTYPE_V8QI:
36442 case INT_FTYPE_V8SF:
36443 case INT_FTYPE_V4DF:
36444 case INT_FTYPE_V4SF:
36445 case INT_FTYPE_V2DF:
36446 case INT_FTYPE_V32QI:
36447 case V16QI_FTYPE_V16QI:
36448 case V8SI_FTYPE_V8SF:
36449 case V8SI_FTYPE_V4SI:
36450 case V8HI_FTYPE_V8HI:
36451 case V8HI_FTYPE_V16QI:
36452 case V8QI_FTYPE_V8QI:
36453 case V8SF_FTYPE_V8SF:
36454 case V8SF_FTYPE_V8SI:
36455 case V8SF_FTYPE_V4SF:
36456 case V8SF_FTYPE_V8HI:
36457 case V4SI_FTYPE_V4SI:
36458 case V4SI_FTYPE_V16QI:
36459 case V4SI_FTYPE_V4SF:
36460 case V4SI_FTYPE_V8SI:
36461 case V4SI_FTYPE_V8HI:
36462 case V4SI_FTYPE_V4DF:
36463 case V4SI_FTYPE_V2DF:
36464 case V4HI_FTYPE_V4HI:
36465 case V4DF_FTYPE_V4DF:
36466 case V4DF_FTYPE_V4SI:
36467 case V4DF_FTYPE_V4SF:
36468 case V4DF_FTYPE_V2DF:
36469 case V4SF_FTYPE_V4SF:
36470 case V4SF_FTYPE_V4SI:
36471 case V4SF_FTYPE_V8SF:
36472 case V4SF_FTYPE_V4DF:
36473 case V4SF_FTYPE_V8HI:
36474 case V4SF_FTYPE_V2DF:
36475 case V2DI_FTYPE_V2DI:
36476 case V2DI_FTYPE_V16QI:
36477 case V2DI_FTYPE_V8HI:
36478 case V2DI_FTYPE_V4SI:
36479 case V2DF_FTYPE_V2DF:
36480 case V2DF_FTYPE_V4SI:
36481 case V2DF_FTYPE_V4DF:
36482 case V2DF_FTYPE_V4SF:
36483 case V2DF_FTYPE_V2SI:
36484 case V2SI_FTYPE_V2SI:
36485 case V2SI_FTYPE_V4SF:
36486 case V2SI_FTYPE_V2SF:
36487 case V2SI_FTYPE_V2DF:
36488 case V2SF_FTYPE_V2SF:
36489 case V2SF_FTYPE_V2SI:
36490 case V32QI_FTYPE_V32QI:
36491 case V32QI_FTYPE_V16QI:
36492 case V16HI_FTYPE_V16HI:
36493 case V16HI_FTYPE_V8HI:
36494 case V8SI_FTYPE_V8SI:
36495 case V16HI_FTYPE_V16QI:
36496 case V8SI_FTYPE_V16QI:
36497 case V4DI_FTYPE_V16QI:
36498 case V8SI_FTYPE_V8HI:
36499 case V4DI_FTYPE_V8HI:
36500 case V4DI_FTYPE_V4SI:
36501 case V4DI_FTYPE_V2DI:
36502 case HI_FTYPE_HI:
36503 case HI_FTYPE_V16QI:
36504 case SI_FTYPE_V32QI:
36505 case DI_FTYPE_V64QI:
36506 case V16QI_FTYPE_HI:
36507 case V32QI_FTYPE_SI:
36508 case V64QI_FTYPE_DI:
36509 case V8HI_FTYPE_QI:
36510 case V16HI_FTYPE_HI:
36511 case V32HI_FTYPE_SI:
36512 case V4SI_FTYPE_QI:
36513 case V8SI_FTYPE_QI:
36514 case V4SI_FTYPE_HI:
36515 case V8SI_FTYPE_HI:
36516 case QI_FTYPE_V8HI:
36517 case HI_FTYPE_V16HI:
36518 case SI_FTYPE_V32HI:
36519 case QI_FTYPE_V4SI:
36520 case QI_FTYPE_V8SI:
36521 case HI_FTYPE_V16SI:
36522 case QI_FTYPE_V2DI:
36523 case QI_FTYPE_V4DI:
36524 case QI_FTYPE_V8DI:
36525 case UINT_FTYPE_V2DF:
36526 case UINT_FTYPE_V4SF:
36527 case UINT64_FTYPE_V2DF:
36528 case UINT64_FTYPE_V4SF:
36529 case V16QI_FTYPE_V8DI:
36530 case V16HI_FTYPE_V16SI:
36531 case V16SI_FTYPE_HI:
36532 case V2DI_FTYPE_QI:
36533 case V4DI_FTYPE_QI:
36534 case V16SI_FTYPE_V16SI:
36535 case V16SI_FTYPE_INT:
36536 case V16SF_FTYPE_FLOAT:
36537 case V16SF_FTYPE_V8SF:
36538 case V16SI_FTYPE_V8SI:
36539 case V16SF_FTYPE_V4SF:
36540 case V16SI_FTYPE_V4SI:
36541 case V16SF_FTYPE_V16SF:
36542 case V8HI_FTYPE_V8DI:
36543 case V8UHI_FTYPE_V8UHI:
36544 case V8SI_FTYPE_V8DI:
36545 case V8SF_FTYPE_V8DF:
36546 case V8DI_FTYPE_QI:
36547 case V8DI_FTYPE_INT64:
36548 case V8DI_FTYPE_V4DI:
36549 case V8DI_FTYPE_V8DI:
36550 case V8DF_FTYPE_DOUBLE:
36551 case V8DF_FTYPE_V4DF:
36552 case V8DF_FTYPE_V2DF:
36553 case V8DF_FTYPE_V8DF:
36554 case V8DF_FTYPE_V8SI:
36555 nargs = 1;
36556 break;
36557 case V4SF_FTYPE_V4SF_VEC_MERGE:
36558 case V2DF_FTYPE_V2DF_VEC_MERGE:
36559 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
36560 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
36561 case V16QI_FTYPE_V16QI_V16QI:
36562 case V16QI_FTYPE_V8HI_V8HI:
36563 case V16SI_FTYPE_V16SI_V16SI:
36564 case V16SF_FTYPE_V16SF_V16SF:
36565 case V16SF_FTYPE_V16SF_V16SI:
36566 case V8QI_FTYPE_V8QI_V8QI:
36567 case V8QI_FTYPE_V4HI_V4HI:
36568 case V8HI_FTYPE_V8HI_V8HI:
36569 case V8HI_FTYPE_V16QI_V16QI:
36570 case V8HI_FTYPE_V4SI_V4SI:
36571 case V8SF_FTYPE_V8SF_V8SF:
36572 case V8SF_FTYPE_V8SF_V8SI:
36573 case V8DI_FTYPE_V8DI_V8DI:
36574 case V8DF_FTYPE_V8DF_V8DF:
36575 case V8DF_FTYPE_V8DF_V8DI:
36576 case V4SI_FTYPE_V4SI_V4SI:
36577 case V4SI_FTYPE_V8HI_V8HI:
36578 case V4SI_FTYPE_V4SF_V4SF:
36579 case V4SI_FTYPE_V2DF_V2DF:
36580 case V4HI_FTYPE_V4HI_V4HI:
36581 case V4HI_FTYPE_V8QI_V8QI:
36582 case V4HI_FTYPE_V2SI_V2SI:
36583 case V4DF_FTYPE_V4DF_V4DF:
36584 case V4DF_FTYPE_V4DF_V4DI:
36585 case V4SF_FTYPE_V4SF_V4SF:
36586 case V4SF_FTYPE_V4SF_V4SI:
36587 case V4SF_FTYPE_V4SF_V2SI:
36588 case V4SF_FTYPE_V4SF_V2DF:
36589 case V4SF_FTYPE_V4SF_UINT:
36590 case V4SF_FTYPE_V4SF_UINT64:
36591 case V4SF_FTYPE_V4SF_DI:
36592 case V4SF_FTYPE_V4SF_SI:
36593 case V2DI_FTYPE_V2DI_V2DI:
36594 case V2DI_FTYPE_V16QI_V16QI:
36595 case V2DI_FTYPE_V4SI_V4SI:
36596 case V2UDI_FTYPE_V4USI_V4USI:
36597 case V2DI_FTYPE_V2DI_V16QI:
36598 case V2DI_FTYPE_V2DF_V2DF:
36599 case V2SI_FTYPE_V2SI_V2SI:
36600 case V2SI_FTYPE_V4HI_V4HI:
36601 case V2SI_FTYPE_V2SF_V2SF:
36602 case V2DF_FTYPE_V2DF_V2DF:
36603 case V2DF_FTYPE_V2DF_V4SF:
36604 case V2DF_FTYPE_V2DF_V2DI:
36605 case V2DF_FTYPE_V2DF_DI:
36606 case V2DF_FTYPE_V2DF_SI:
36607 case V2DF_FTYPE_V2DF_UINT:
36608 case V2DF_FTYPE_V2DF_UINT64:
36609 case V2SF_FTYPE_V2SF_V2SF:
36610 case V1DI_FTYPE_V1DI_V1DI:
36611 case V1DI_FTYPE_V8QI_V8QI:
36612 case V1DI_FTYPE_V2SI_V2SI:
36613 case V32QI_FTYPE_V16HI_V16HI:
36614 case V16HI_FTYPE_V8SI_V8SI:
36615 case V32QI_FTYPE_V32QI_V32QI:
36616 case V16HI_FTYPE_V32QI_V32QI:
36617 case V16HI_FTYPE_V16HI_V16HI:
36618 case V8SI_FTYPE_V4DF_V4DF:
36619 case V8SI_FTYPE_V8SI_V8SI:
36620 case V8SI_FTYPE_V16HI_V16HI:
36621 case V4DI_FTYPE_V4DI_V4DI:
36622 case V4DI_FTYPE_V8SI_V8SI:
36623 case V4UDI_FTYPE_V8USI_V8USI:
36624 case QI_FTYPE_V8DI_V8DI:
36625 case V8DI_FTYPE_V64QI_V64QI:
36626 case HI_FTYPE_V16SI_V16SI:
36627 if (comparison == UNKNOWN)
36628 return ix86_expand_binop_builtin (icode, exp, target);
36629 nargs = 2;
36630 break;
36631 case V4SF_FTYPE_V4SF_V4SF_SWAP:
36632 case V2DF_FTYPE_V2DF_V2DF_SWAP:
36633 gcc_assert (comparison != UNKNOWN);
36634 nargs = 2;
36635 swap = true;
36636 break;
36637 case V16HI_FTYPE_V16HI_V8HI_COUNT:
36638 case V16HI_FTYPE_V16HI_SI_COUNT:
36639 case V8SI_FTYPE_V8SI_V4SI_COUNT:
36640 case V8SI_FTYPE_V8SI_SI_COUNT:
36641 case V4DI_FTYPE_V4DI_V2DI_COUNT:
36642 case V4DI_FTYPE_V4DI_INT_COUNT:
36643 case V8HI_FTYPE_V8HI_V8HI_COUNT:
36644 case V8HI_FTYPE_V8HI_SI_COUNT:
36645 case V4SI_FTYPE_V4SI_V4SI_COUNT:
36646 case V4SI_FTYPE_V4SI_SI_COUNT:
36647 case V4HI_FTYPE_V4HI_V4HI_COUNT:
36648 case V4HI_FTYPE_V4HI_SI_COUNT:
36649 case V2DI_FTYPE_V2DI_V2DI_COUNT:
36650 case V2DI_FTYPE_V2DI_SI_COUNT:
36651 case V2SI_FTYPE_V2SI_V2SI_COUNT:
36652 case V2SI_FTYPE_V2SI_SI_COUNT:
36653 case V1DI_FTYPE_V1DI_V1DI_COUNT:
36654 case V1DI_FTYPE_V1DI_SI_COUNT:
36655 nargs = 2;
36656 last_arg_count = true;
36657 break;
36658 case UINT64_FTYPE_UINT64_UINT64:
36659 case UINT_FTYPE_UINT_UINT:
36660 case UINT_FTYPE_UINT_USHORT:
36661 case UINT_FTYPE_UINT_UCHAR:
36662 case UINT16_FTYPE_UINT16_INT:
36663 case UINT8_FTYPE_UINT8_INT:
36664 case HI_FTYPE_HI_HI:
36665 case SI_FTYPE_SI_SI:
36666 case DI_FTYPE_DI_DI:
36667 case V16SI_FTYPE_V8DF_V8DF:
36668 nargs = 2;
36669 break;
36670 case V2DI_FTYPE_V2DI_INT_CONVERT:
36671 nargs = 2;
36672 rmode = V1TImode;
36673 nargs_constant = 1;
36674 break;
36675 case V4DI_FTYPE_V4DI_INT_CONVERT:
36676 nargs = 2;
36677 rmode = V2TImode;
36678 nargs_constant = 1;
36679 break;
36680 case V8DI_FTYPE_V8DI_INT_CONVERT:
36681 nargs = 2;
36682 rmode = V4TImode;
36683 nargs_constant = 1;
36684 break;
36685 case V8HI_FTYPE_V8HI_INT:
36686 case V8HI_FTYPE_V8SF_INT:
36687 case V16HI_FTYPE_V16SF_INT:
36688 case V8HI_FTYPE_V4SF_INT:
36689 case V8SF_FTYPE_V8SF_INT:
36690 case V4SF_FTYPE_V16SF_INT:
36691 case V16SF_FTYPE_V16SF_INT:
36692 case V4SI_FTYPE_V4SI_INT:
36693 case V4SI_FTYPE_V8SI_INT:
36694 case V4HI_FTYPE_V4HI_INT:
36695 case V4DF_FTYPE_V4DF_INT:
36696 case V4DF_FTYPE_V8DF_INT:
36697 case V4SF_FTYPE_V4SF_INT:
36698 case V4SF_FTYPE_V8SF_INT:
36699 case V2DI_FTYPE_V2DI_INT:
36700 case V2DF_FTYPE_V2DF_INT:
36701 case V2DF_FTYPE_V4DF_INT:
36702 case V16HI_FTYPE_V16HI_INT:
36703 case V8SI_FTYPE_V8SI_INT:
36704 case V16SI_FTYPE_V16SI_INT:
36705 case V4SI_FTYPE_V16SI_INT:
36706 case V4DI_FTYPE_V4DI_INT:
36707 case V2DI_FTYPE_V4DI_INT:
36708 case V4DI_FTYPE_V8DI_INT:
36709 case HI_FTYPE_HI_INT:
36710 case QI_FTYPE_V4SF_INT:
36711 case QI_FTYPE_V2DF_INT:
36712 nargs = 2;
36713 nargs_constant = 1;
36714 break;
36715 case V16QI_FTYPE_V16QI_V16QI_V16QI:
36716 case V8SF_FTYPE_V8SF_V8SF_V8SF:
36717 case V4DF_FTYPE_V4DF_V4DF_V4DF:
36718 case V4SF_FTYPE_V4SF_V4SF_V4SF:
36719 case V2DF_FTYPE_V2DF_V2DF_V2DF:
36720 case V32QI_FTYPE_V32QI_V32QI_V32QI:
36721 case HI_FTYPE_V16SI_V16SI_HI:
36722 case QI_FTYPE_V8DI_V8DI_QI:
36723 case V16HI_FTYPE_V16SI_V16HI_HI:
36724 case V16QI_FTYPE_V16SI_V16QI_HI:
36725 case V16QI_FTYPE_V8DI_V16QI_QI:
36726 case V16SF_FTYPE_V16SF_V16SF_HI:
36727 case V16SF_FTYPE_V16SF_V16SF_V16SF:
36728 case V16SF_FTYPE_V16SF_V16SI_V16SF:
36729 case V16SF_FTYPE_V16SI_V16SF_HI:
36730 case V16SF_FTYPE_V16SI_V16SF_V16SF:
36731 case V16SF_FTYPE_V4SF_V16SF_HI:
36732 case V16SI_FTYPE_SI_V16SI_HI:
36733 case V16SI_FTYPE_V16HI_V16SI_HI:
36734 case V16SI_FTYPE_V16QI_V16SI_HI:
36735 case V16SI_FTYPE_V16SF_V16SI_HI:
36736 case V8SF_FTYPE_V4SF_V8SF_QI:
36737 case V4DF_FTYPE_V2DF_V4DF_QI:
36738 case V8SI_FTYPE_V4SI_V8SI_QI:
36739 case V8SI_FTYPE_SI_V8SI_QI:
36740 case V4SI_FTYPE_V4SI_V4SI_QI:
36741 case V4SI_FTYPE_SI_V4SI_QI:
36742 case V4DI_FTYPE_V2DI_V4DI_QI:
36743 case V4DI_FTYPE_DI_V4DI_QI:
36744 case V2DI_FTYPE_V2DI_V2DI_QI:
36745 case V2DI_FTYPE_DI_V2DI_QI:
36746 case V64QI_FTYPE_V64QI_V64QI_DI:
36747 case V64QI_FTYPE_V16QI_V64QI_DI:
36748 case V64QI_FTYPE_QI_V64QI_DI:
36749 case V32QI_FTYPE_V32QI_V32QI_SI:
36750 case V32QI_FTYPE_V16QI_V32QI_SI:
36751 case V32QI_FTYPE_QI_V32QI_SI:
36752 case V16QI_FTYPE_V16QI_V16QI_HI:
36753 case V16QI_FTYPE_QI_V16QI_HI:
36754 case V32HI_FTYPE_V8HI_V32HI_SI:
36755 case V32HI_FTYPE_HI_V32HI_SI:
36756 case V16HI_FTYPE_V8HI_V16HI_HI:
36757 case V16HI_FTYPE_HI_V16HI_HI:
36758 case V8HI_FTYPE_V8HI_V8HI_QI:
36759 case V8HI_FTYPE_HI_V8HI_QI:
36760 case V8SF_FTYPE_V8HI_V8SF_QI:
36761 case V4SF_FTYPE_V8HI_V4SF_QI:
36762 case V8SI_FTYPE_V8SF_V8SI_QI:
36763 case V4SI_FTYPE_V4SF_V4SI_QI:
36764 case V8DI_FTYPE_V8SF_V8DI_QI:
36765 case V4DI_FTYPE_V4SF_V4DI_QI:
36766 case V2DI_FTYPE_V4SF_V2DI_QI:
36767 case V8SF_FTYPE_V8DI_V8SF_QI:
36768 case V4SF_FTYPE_V4DI_V4SF_QI:
36769 case V4SF_FTYPE_V2DI_V4SF_QI:
36770 case V8DF_FTYPE_V8DI_V8DF_QI:
36771 case V4DF_FTYPE_V4DI_V4DF_QI:
36772 case V2DF_FTYPE_V2DI_V2DF_QI:
36773 case V16QI_FTYPE_V8HI_V16QI_QI:
36774 case V16QI_FTYPE_V16HI_V16QI_HI:
36775 case V16QI_FTYPE_V4SI_V16QI_QI:
36776 case V16QI_FTYPE_V8SI_V16QI_QI:
36777 case V8HI_FTYPE_V4SI_V8HI_QI:
36778 case V8HI_FTYPE_V8SI_V8HI_QI:
36779 case V16QI_FTYPE_V2DI_V16QI_QI:
36780 case V16QI_FTYPE_V4DI_V16QI_QI:
36781 case V8HI_FTYPE_V2DI_V8HI_QI:
36782 case V8HI_FTYPE_V4DI_V8HI_QI:
36783 case V4SI_FTYPE_V2DI_V4SI_QI:
36784 case V4SI_FTYPE_V4DI_V4SI_QI:
36785 case V32QI_FTYPE_V32HI_V32QI_SI:
36786 case HI_FTYPE_V16QI_V16QI_HI:
36787 case SI_FTYPE_V32QI_V32QI_SI:
36788 case DI_FTYPE_V64QI_V64QI_DI:
36789 case QI_FTYPE_V8HI_V8HI_QI:
36790 case HI_FTYPE_V16HI_V16HI_HI:
36791 case SI_FTYPE_V32HI_V32HI_SI:
36792 case QI_FTYPE_V4SI_V4SI_QI:
36793 case QI_FTYPE_V8SI_V8SI_QI:
36794 case QI_FTYPE_V2DI_V2DI_QI:
36795 case QI_FTYPE_V4DI_V4DI_QI:
36796 case V4SF_FTYPE_V2DF_V4SF_QI:
36797 case V4SF_FTYPE_V4DF_V4SF_QI:
36798 case V16SI_FTYPE_V16SI_V16SI_HI:
36799 case V16SI_FTYPE_V16SI_V16SI_V16SI:
36800 case V16SI_FTYPE_V4SI_V16SI_HI:
36801 case V2DI_FTYPE_V2DI_V2DI_V2DI:
36802 case V2DI_FTYPE_V4SI_V2DI_QI:
36803 case V2DI_FTYPE_V8HI_V2DI_QI:
36804 case V2DI_FTYPE_V16QI_V2DI_QI:
36805 case V4DI_FTYPE_V4DI_V4DI_QI:
36806 case V4DI_FTYPE_V4SI_V4DI_QI:
36807 case V4DI_FTYPE_V8HI_V4DI_QI:
36808 case V4DI_FTYPE_V16QI_V4DI_QI:
36809 case V8DI_FTYPE_V8DF_V8DI_QI:
36810 case V4DI_FTYPE_V4DF_V4DI_QI:
36811 case V2DI_FTYPE_V2DF_V2DI_QI:
36812 case V4SI_FTYPE_V4DF_V4SI_QI:
36813 case V4SI_FTYPE_V2DF_V4SI_QI:
36814 case V4SI_FTYPE_V8HI_V4SI_QI:
36815 case V4SI_FTYPE_V16QI_V4SI_QI:
36816 case V8SI_FTYPE_V8SI_V8SI_V8SI:
36817 case V4DI_FTYPE_V4DI_V4DI_V4DI:
36818 case V8DF_FTYPE_V2DF_V8DF_QI:
36819 case V8DF_FTYPE_V4DF_V8DF_QI:
36820 case V8DF_FTYPE_V8DF_V8DF_QI:
36821 case V8DF_FTYPE_V8DF_V8DF_V8DF:
36822 case V8SF_FTYPE_V8SF_V8SF_QI:
36823 case V8SF_FTYPE_V8SI_V8SF_QI:
36824 case V4DF_FTYPE_V4DF_V4DF_QI:
36825 case V4SF_FTYPE_V4SF_V4SF_QI:
36826 case V2DF_FTYPE_V2DF_V2DF_QI:
36827 case V2DF_FTYPE_V4SF_V2DF_QI:
36828 case V2DF_FTYPE_V4SI_V2DF_QI:
36829 case V4SF_FTYPE_V4SI_V4SF_QI:
36830 case V4DF_FTYPE_V4SF_V4DF_QI:
36831 case V4DF_FTYPE_V4SI_V4DF_QI:
36832 case V8SI_FTYPE_V8SI_V8SI_QI:
36833 case V8SI_FTYPE_V8HI_V8SI_QI:
36834 case V8SI_FTYPE_V16QI_V8SI_QI:
36835 case V8DF_FTYPE_V8DF_V8DI_V8DF:
36836 case V8DF_FTYPE_V8DI_V8DF_V8DF:
36837 case V8DF_FTYPE_V8SF_V8DF_QI:
36838 case V8DF_FTYPE_V8SI_V8DF_QI:
36839 case V8DI_FTYPE_DI_V8DI_QI:
36840 case V16SF_FTYPE_V8SF_V16SF_HI:
36841 case V16SI_FTYPE_V8SI_V16SI_HI:
36842 case V16HI_FTYPE_V16HI_V16HI_HI:
36843 case V8HI_FTYPE_V16QI_V8HI_QI:
36844 case V16HI_FTYPE_V16QI_V16HI_HI:
36845 case V32HI_FTYPE_V32HI_V32HI_SI:
36846 case V32HI_FTYPE_V32QI_V32HI_SI:
36847 case V8DI_FTYPE_V16QI_V8DI_QI:
36848 case V8DI_FTYPE_V2DI_V8DI_QI:
36849 case V8DI_FTYPE_V4DI_V8DI_QI:
36850 case V8DI_FTYPE_V8DI_V8DI_QI:
36851 case V8DI_FTYPE_V8DI_V8DI_V8DI:
36852 case V8DI_FTYPE_V8HI_V8DI_QI:
36853 case V8DI_FTYPE_V8SI_V8DI_QI:
36854 case V8HI_FTYPE_V8DI_V8HI_QI:
36855 case V8SF_FTYPE_V8DF_V8SF_QI:
36856 case V8SI_FTYPE_V8DF_V8SI_QI:
36857 case V8SI_FTYPE_V8DI_V8SI_QI:
36858 case V4SI_FTYPE_V4SI_V4SI_V4SI:
36859 nargs = 3;
36860 break;
36861 case V32QI_FTYPE_V32QI_V32QI_INT:
36862 case V16HI_FTYPE_V16HI_V16HI_INT:
36863 case V16QI_FTYPE_V16QI_V16QI_INT:
36864 case V4DI_FTYPE_V4DI_V4DI_INT:
36865 case V8HI_FTYPE_V8HI_V8HI_INT:
36866 case V8SI_FTYPE_V8SI_V8SI_INT:
36867 case V8SI_FTYPE_V8SI_V4SI_INT:
36868 case V8SF_FTYPE_V8SF_V8SF_INT:
36869 case V8SF_FTYPE_V8SF_V4SF_INT:
36870 case V4SI_FTYPE_V4SI_V4SI_INT:
36871 case V4DF_FTYPE_V4DF_V4DF_INT:
36872 case V16SF_FTYPE_V16SF_V16SF_INT:
36873 case V16SF_FTYPE_V16SF_V4SF_INT:
36874 case V16SI_FTYPE_V16SI_V4SI_INT:
36875 case V4DF_FTYPE_V4DF_V2DF_INT:
36876 case V4SF_FTYPE_V4SF_V4SF_INT:
36877 case V2DI_FTYPE_V2DI_V2DI_INT:
36878 case V4DI_FTYPE_V4DI_V2DI_INT:
36879 case V2DF_FTYPE_V2DF_V2DF_INT:
36880 case QI_FTYPE_V8DI_V8DI_INT:
36881 case QI_FTYPE_V8DF_V8DF_INT:
36882 case QI_FTYPE_V2DF_V2DF_INT:
36883 case QI_FTYPE_V4SF_V4SF_INT:
36884 case HI_FTYPE_V16SI_V16SI_INT:
36885 case HI_FTYPE_V16SF_V16SF_INT:
36886 nargs = 3;
36887 nargs_constant = 1;
36888 break;
36889 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
36890 nargs = 3;
36891 rmode = V4DImode;
36892 nargs_constant = 1;
36893 break;
36894 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
36895 nargs = 3;
36896 rmode = V2DImode;
36897 nargs_constant = 1;
36898 break;
36899 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
36900 nargs = 3;
36901 rmode = DImode;
36902 nargs_constant = 1;
36903 break;
36904 case V2DI_FTYPE_V2DI_UINT_UINT:
36905 nargs = 3;
36906 nargs_constant = 2;
36907 break;
36908 case V8DI_FTYPE_V8DI_V8DI_INT_CONVERT:
36909 nargs = 3;
36910 rmode = V8DImode;
36911 nargs_constant = 1;
36912 break;
36913 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT:
36914 nargs = 5;
36915 rmode = V8DImode;
36916 mask_pos = 2;
36917 nargs_constant = 1;
36918 break;
36919 case QI_FTYPE_V8DF_INT_QI:
36920 case QI_FTYPE_V4DF_INT_QI:
36921 case QI_FTYPE_V2DF_INT_QI:
36922 case HI_FTYPE_V16SF_INT_HI:
36923 case QI_FTYPE_V8SF_INT_QI:
36924 case QI_FTYPE_V4SF_INT_QI:
36925 nargs = 3;
36926 mask_pos = 1;
36927 nargs_constant = 1;
36928 break;
36929 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT:
36930 nargs = 5;
36931 rmode = V4DImode;
36932 mask_pos = 2;
36933 nargs_constant = 1;
36934 break;
36935 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT:
36936 nargs = 5;
36937 rmode = V2DImode;
36938 mask_pos = 2;
36939 nargs_constant = 1;
36940 break;
36941 case V32QI_FTYPE_V32QI_V32QI_V32QI_SI:
36942 case V32HI_FTYPE_V32HI_V32HI_V32HI_SI:
36943 case V32HI_FTYPE_V64QI_V64QI_V32HI_SI:
36944 case V16SI_FTYPE_V32HI_V32HI_V16SI_HI:
36945 case V64QI_FTYPE_V64QI_V64QI_V64QI_DI:
36946 case V32HI_FTYPE_V32HI_V8HI_V32HI_SI:
36947 case V16HI_FTYPE_V16HI_V8HI_V16HI_HI:
36948 case V8SI_FTYPE_V8SI_V4SI_V8SI_QI:
36949 case V4DI_FTYPE_V4DI_V2DI_V4DI_QI:
36950 case V64QI_FTYPE_V32HI_V32HI_V64QI_DI:
36951 case V32QI_FTYPE_V16HI_V16HI_V32QI_SI:
36952 case V16QI_FTYPE_V8HI_V8HI_V16QI_HI:
36953 case V32HI_FTYPE_V16SI_V16SI_V32HI_SI:
36954 case V16HI_FTYPE_V8SI_V8SI_V16HI_HI:
36955 case V8HI_FTYPE_V4SI_V4SI_V8HI_QI:
36956 case V4DF_FTYPE_V4DF_V4DI_V4DF_QI:
36957 case V8SF_FTYPE_V8SF_V8SI_V8SF_QI:
36958 case V4SF_FTYPE_V4SF_V4SI_V4SF_QI:
36959 case V2DF_FTYPE_V2DF_V2DI_V2DF_QI:
36960 case V2DI_FTYPE_V4SI_V4SI_V2DI_QI:
36961 case V4DI_FTYPE_V8SI_V8SI_V4DI_QI:
36962 case V4DF_FTYPE_V4DI_V4DF_V4DF_QI:
36963 case V8SF_FTYPE_V8SI_V8SF_V8SF_QI:
36964 case V2DF_FTYPE_V2DI_V2DF_V2DF_QI:
36965 case V4SF_FTYPE_V4SI_V4SF_V4SF_QI:
36966 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI:
36967 case V16SF_FTYPE_V16SF_V16SI_V16SF_HI:
36968 case V16SF_FTYPE_V16SI_V16SF_V16SF_HI:
36969 case V16SI_FTYPE_V16SI_V16SI_V16SI_HI:
36970 case V16SI_FTYPE_V16SI_V4SI_V16SI_HI:
36971 case V8HI_FTYPE_V8HI_V8HI_V8HI_QI:
36972 case V8SI_FTYPE_V8SI_V8SI_V8SI_QI:
36973 case V4SI_FTYPE_V4SI_V4SI_V4SI_QI:
36974 case V8SF_FTYPE_V8SF_V8SF_V8SF_QI:
36975 case V16QI_FTYPE_V16QI_V16QI_V16QI_HI:
36976 case V16HI_FTYPE_V16HI_V16HI_V16HI_HI:
36977 case V2DI_FTYPE_V2DI_V2DI_V2DI_QI:
36978 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI:
36979 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI:
36980 case V4DI_FTYPE_V4DI_V4DI_V4DI_QI:
36981 case V4DF_FTYPE_V4DF_V4DF_V4DF_QI:
36982 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI:
36983 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI:
36984 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI:
36985 case V8DF_FTYPE_V8DF_V8DI_V8DF_QI:
36986 case V8DF_FTYPE_V8DI_V8DF_V8DF_QI:
36987 case V8DI_FTYPE_V16SI_V16SI_V8DI_QI:
36988 case V8DI_FTYPE_V8DI_SI_V8DI_V8DI:
36989 case V8DI_FTYPE_V8DI_V2DI_V8DI_QI:
36990 case V8DI_FTYPE_V8DI_V8DI_V8DI_QI:
36991 case V8HI_FTYPE_V16QI_V16QI_V8HI_QI:
36992 case V16HI_FTYPE_V32QI_V32QI_V16HI_HI:
36993 case V8SI_FTYPE_V16HI_V16HI_V8SI_QI:
36994 case V4SI_FTYPE_V8HI_V8HI_V4SI_QI:
36995 nargs = 4;
36996 break;
36997 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
36998 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
36999 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
37000 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
37001 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
37002 nargs = 4;
37003 nargs_constant = 1;
37004 break;
37005 case QI_FTYPE_V4DI_V4DI_INT_QI:
37006 case QI_FTYPE_V8SI_V8SI_INT_QI:
37007 case QI_FTYPE_V4DF_V4DF_INT_QI:
37008 case QI_FTYPE_V8SF_V8SF_INT_QI:
37009 case QI_FTYPE_V2DI_V2DI_INT_QI:
37010 case QI_FTYPE_V4SI_V4SI_INT_QI:
37011 case QI_FTYPE_V2DF_V2DF_INT_QI:
37012 case QI_FTYPE_V4SF_V4SF_INT_QI:
37013 case DI_FTYPE_V64QI_V64QI_INT_DI:
37014 case SI_FTYPE_V32QI_V32QI_INT_SI:
37015 case HI_FTYPE_V16QI_V16QI_INT_HI:
37016 case SI_FTYPE_V32HI_V32HI_INT_SI:
37017 case HI_FTYPE_V16HI_V16HI_INT_HI:
37018 case QI_FTYPE_V8HI_V8HI_INT_QI:
37019 nargs = 4;
37020 mask_pos = 1;
37021 nargs_constant = 1;
37022 break;
37023 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
37024 nargs = 4;
37025 nargs_constant = 2;
37026 break;
37027 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
37028 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
37029 nargs = 4;
37030 break;
37031 case QI_FTYPE_V8DI_V8DI_INT_QI:
37032 case HI_FTYPE_V16SI_V16SI_INT_HI:
37033 case QI_FTYPE_V8DF_V8DF_INT_QI:
37034 case HI_FTYPE_V16SF_V16SF_INT_HI:
37035 mask_pos = 1;
37036 nargs = 4;
37037 nargs_constant = 1;
37038 break;
37039 case V8SF_FTYPE_V8SF_INT_V8SF_QI:
37040 case V4SF_FTYPE_V4SF_INT_V4SF_QI:
37041 case V2DF_FTYPE_V4DF_INT_V2DF_QI:
37042 case V2DI_FTYPE_V4DI_INT_V2DI_QI:
37043 case V8SF_FTYPE_V16SF_INT_V8SF_QI:
37044 case V8SI_FTYPE_V16SI_INT_V8SI_QI:
37045 case V2DF_FTYPE_V8DF_INT_V2DF_QI:
37046 case V2DI_FTYPE_V8DI_INT_V2DI_QI:
37047 case V4SF_FTYPE_V8SF_INT_V4SF_QI:
37048 case V4SI_FTYPE_V8SI_INT_V4SI_QI:
37049 case V8HI_FTYPE_V8SF_INT_V8HI_QI:
37050 case V8HI_FTYPE_V4SF_INT_V8HI_QI:
37051 case V32HI_FTYPE_V32HI_INT_V32HI_SI:
37052 case V16HI_FTYPE_V16HI_INT_V16HI_HI:
37053 case V8HI_FTYPE_V8HI_INT_V8HI_QI:
37054 case V4DI_FTYPE_V4DI_INT_V4DI_QI:
37055 case V2DI_FTYPE_V2DI_INT_V2DI_QI:
37056 case V8SI_FTYPE_V8SI_INT_V8SI_QI:
37057 case V4SI_FTYPE_V4SI_INT_V4SI_QI:
37058 case V4DF_FTYPE_V4DF_INT_V4DF_QI:
37059 case V2DF_FTYPE_V2DF_INT_V2DF_QI:
37060 case V8DF_FTYPE_V8DF_INT_V8DF_QI:
37061 case V16SF_FTYPE_V16SF_INT_V16SF_HI:
37062 case V16HI_FTYPE_V16SF_INT_V16HI_HI:
37063 case V16SI_FTYPE_V16SI_INT_V16SI_HI:
37064 case V4SI_FTYPE_V16SI_INT_V4SI_QI:
37065 case V4DI_FTYPE_V8DI_INT_V4DI_QI:
37066 case V4DF_FTYPE_V8DF_INT_V4DF_QI:
37067 case V4SF_FTYPE_V16SF_INT_V4SF_QI:
37068 case V8DI_FTYPE_V8DI_INT_V8DI_QI:
37069 nargs = 4;
37070 mask_pos = 2;
37071 nargs_constant = 1;
37072 break;
37073 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI:
37074 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI:
37075 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI:
37076 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI:
37077 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI:
37078 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI:
37079 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI:
37080 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI:
37081 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI:
37082 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI:
37083 case V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI:
37084 case V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI:
37085 case V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI:
37086 case V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI:
37087 case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI:
37088 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI:
37089 case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI:
37090 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI:
37091 case V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI:
37092 case V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI:
37093 case V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI:
37094 case V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI:
37095 case V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI:
37096 case V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI:
37097 case V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI:
37098 case V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI:
37099 case V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI:
37100 nargs = 5;
37101 mask_pos = 2;
37102 nargs_constant = 1;
37103 break;
37104 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI:
37105 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI:
37106 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI:
37107 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI:
37108 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI:
37109 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI:
37110 case V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI:
37111 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI:
37112 case V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI:
37113 case V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI:
37114 case V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI:
37115 nargs = 5;
37116 nargs = 5;
37117 mask_pos = 1;
37118 nargs_constant = 1;
37119 break;
37121 default:
37122 gcc_unreachable ();
37125 gcc_assert (nargs <= ARRAY_SIZE (args));
37127 if (comparison != UNKNOWN)
37129 gcc_assert (nargs == 2);
37130 return ix86_expand_sse_compare (d, exp, target, swap);
37133 if (rmode == VOIDmode || rmode == tmode)
37135 if (optimize
37136 || target == 0
37137 || GET_MODE (target) != tmode
37138 || !insn_p->operand[0].predicate (target, tmode))
37139 target = gen_reg_rtx (tmode);
37140 real_target = target;
37142 else
37144 real_target = gen_reg_rtx (tmode);
37145 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
37148 for (i = 0; i < nargs; i++)
37150 tree arg = CALL_EXPR_ARG (exp, i);
37151 rtx op = expand_normal (arg);
37152 machine_mode mode = insn_p->operand[i + 1].mode;
37153 bool match = insn_p->operand[i + 1].predicate (op, mode);
37155 if (last_arg_count && (i + 1) == nargs)
37157 /* SIMD shift insns take either an 8-bit immediate or
37158 register as count. But builtin functions take int as
37159 count. If count doesn't match, we put it in register. */
37160 if (!match)
37162 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
37163 if (!insn_p->operand[i + 1].predicate (op, mode))
37164 op = copy_to_reg (op);
37167 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
37168 (!mask_pos && (nargs - i) <= nargs_constant))
37170 if (!match)
37171 switch (icode)
37173 case CODE_FOR_avx_vinsertf128v4di:
37174 case CODE_FOR_avx_vextractf128v4di:
37175 error ("the last argument must be an 1-bit immediate");
37176 return const0_rtx;
37178 case CODE_FOR_avx512f_cmpv8di3_mask:
37179 case CODE_FOR_avx512f_cmpv16si3_mask:
37180 case CODE_FOR_avx512f_ucmpv8di3_mask:
37181 case CODE_FOR_avx512f_ucmpv16si3_mask:
37182 case CODE_FOR_avx512vl_cmpv4di3_mask:
37183 case CODE_FOR_avx512vl_cmpv8si3_mask:
37184 case CODE_FOR_avx512vl_ucmpv4di3_mask:
37185 case CODE_FOR_avx512vl_ucmpv8si3_mask:
37186 case CODE_FOR_avx512vl_cmpv2di3_mask:
37187 case CODE_FOR_avx512vl_cmpv4si3_mask:
37188 case CODE_FOR_avx512vl_ucmpv2di3_mask:
37189 case CODE_FOR_avx512vl_ucmpv4si3_mask:
37190 error ("the last argument must be a 3-bit immediate");
37191 return const0_rtx;
37193 case CODE_FOR_sse4_1_roundsd:
37194 case CODE_FOR_sse4_1_roundss:
37196 case CODE_FOR_sse4_1_roundpd:
37197 case CODE_FOR_sse4_1_roundps:
37198 case CODE_FOR_avx_roundpd256:
37199 case CODE_FOR_avx_roundps256:
37201 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
37202 case CODE_FOR_sse4_1_roundps_sfix:
37203 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
37204 case CODE_FOR_avx_roundps_sfix256:
37206 case CODE_FOR_sse4_1_blendps:
37207 case CODE_FOR_avx_blendpd256:
37208 case CODE_FOR_avx_vpermilv4df:
37209 case CODE_FOR_avx_vpermilv4df_mask:
37210 case CODE_FOR_avx512f_getmantv8df_mask:
37211 case CODE_FOR_avx512f_getmantv16sf_mask:
37212 case CODE_FOR_avx512vl_getmantv8sf_mask:
37213 case CODE_FOR_avx512vl_getmantv4df_mask:
37214 case CODE_FOR_avx512vl_getmantv4sf_mask:
37215 case CODE_FOR_avx512vl_getmantv2df_mask:
37216 case CODE_FOR_avx512dq_rangepv8df_mask_round:
37217 case CODE_FOR_avx512dq_rangepv16sf_mask_round:
37218 case CODE_FOR_avx512dq_rangepv4df_mask:
37219 case CODE_FOR_avx512dq_rangepv8sf_mask:
37220 case CODE_FOR_avx512dq_rangepv2df_mask:
37221 case CODE_FOR_avx512dq_rangepv4sf_mask:
37222 case CODE_FOR_avx_shufpd256_mask:
37223 error ("the last argument must be a 4-bit immediate");
37224 return const0_rtx;
37226 case CODE_FOR_sha1rnds4:
37227 case CODE_FOR_sse4_1_blendpd:
37228 case CODE_FOR_avx_vpermilv2df:
37229 case CODE_FOR_avx_vpermilv2df_mask:
37230 case CODE_FOR_xop_vpermil2v2df3:
37231 case CODE_FOR_xop_vpermil2v4sf3:
37232 case CODE_FOR_xop_vpermil2v4df3:
37233 case CODE_FOR_xop_vpermil2v8sf3:
37234 case CODE_FOR_avx512f_vinsertf32x4_mask:
37235 case CODE_FOR_avx512f_vinserti32x4_mask:
37236 case CODE_FOR_avx512f_vextractf32x4_mask:
37237 case CODE_FOR_avx512f_vextracti32x4_mask:
37238 case CODE_FOR_sse2_shufpd:
37239 case CODE_FOR_sse2_shufpd_mask:
37240 case CODE_FOR_avx512dq_shuf_f64x2_mask:
37241 case CODE_FOR_avx512dq_shuf_i64x2_mask:
37242 case CODE_FOR_avx512vl_shuf_i32x4_mask:
37243 case CODE_FOR_avx512vl_shuf_f32x4_mask:
37244 error ("the last argument must be a 2-bit immediate");
37245 return const0_rtx;
37247 case CODE_FOR_avx_vextractf128v4df:
37248 case CODE_FOR_avx_vextractf128v8sf:
37249 case CODE_FOR_avx_vextractf128v8si:
37250 case CODE_FOR_avx_vinsertf128v4df:
37251 case CODE_FOR_avx_vinsertf128v8sf:
37252 case CODE_FOR_avx_vinsertf128v8si:
37253 case CODE_FOR_avx512f_vinsertf64x4_mask:
37254 case CODE_FOR_avx512f_vinserti64x4_mask:
37255 case CODE_FOR_avx512f_vextractf64x4_mask:
37256 case CODE_FOR_avx512f_vextracti64x4_mask:
37257 case CODE_FOR_avx512dq_vinsertf32x8_mask:
37258 case CODE_FOR_avx512dq_vinserti32x8_mask:
37259 case CODE_FOR_avx512vl_vinsertv4df:
37260 case CODE_FOR_avx512vl_vinsertv4di:
37261 case CODE_FOR_avx512vl_vinsertv8sf:
37262 case CODE_FOR_avx512vl_vinsertv8si:
37263 error ("the last argument must be a 1-bit immediate");
37264 return const0_rtx;
37266 case CODE_FOR_avx_vmcmpv2df3:
37267 case CODE_FOR_avx_vmcmpv4sf3:
37268 case CODE_FOR_avx_cmpv2df3:
37269 case CODE_FOR_avx_cmpv4sf3:
37270 case CODE_FOR_avx_cmpv4df3:
37271 case CODE_FOR_avx_cmpv8sf3:
37272 case CODE_FOR_avx512f_cmpv8df3_mask:
37273 case CODE_FOR_avx512f_cmpv16sf3_mask:
37274 case CODE_FOR_avx512f_vmcmpv2df3_mask:
37275 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
37276 error ("the last argument must be a 5-bit immediate");
37277 return const0_rtx;
37279 default:
37280 switch (nargs_constant)
37282 case 2:
37283 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
37284 (!mask_pos && (nargs - i) == nargs_constant))
37286 error ("the next to last argument must be an 8-bit immediate");
37287 break;
37289 case 1:
37290 error ("the last argument must be an 8-bit immediate");
37291 break;
37292 default:
37293 gcc_unreachable ();
37295 return const0_rtx;
37298 else
37300 if (VECTOR_MODE_P (mode))
37301 op = safe_vector_operand (op, mode);
37303 /* If we aren't optimizing, only allow one memory operand to
37304 be generated. */
37305 if (memory_operand (op, mode))
37306 num_memory++;
37308 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
37310 if (optimize || !match || num_memory > 1)
37311 op = copy_to_mode_reg (mode, op);
37313 else
37315 op = copy_to_reg (op);
37316 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
37320 args[i].op = op;
37321 args[i].mode = mode;
37324 switch (nargs)
37326 case 1:
37327 pat = GEN_FCN (icode) (real_target, args[0].op);
37328 break;
37329 case 2:
37330 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
37331 break;
37332 case 3:
37333 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37334 args[2].op);
37335 break;
37336 case 4:
37337 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37338 args[2].op, args[3].op);
37339 break;
37340 case 5:
37341 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37342 args[2].op, args[3].op, args[4].op);
37343 case 6:
37344 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37345 args[2].op, args[3].op, args[4].op,
37346 args[5].op);
37347 break;
37348 default:
37349 gcc_unreachable ();
37352 if (! pat)
37353 return 0;
37355 emit_insn (pat);
37356 return target;
37359 /* Transform pattern of following layout:
37360 (parallel [
37361 set (A B)
37362 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
37364 into:
37365 (set (A B))
37368 (parallel [ A B
37370 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
37373 into:
37374 (parallel [ A B ... ]) */
37376 static rtx
37377 ix86_erase_embedded_rounding (rtx pat)
37379 if (GET_CODE (pat) == INSN)
37380 pat = PATTERN (pat);
37382 gcc_assert (GET_CODE (pat) == PARALLEL);
37384 if (XVECLEN (pat, 0) == 2)
37386 rtx p0 = XVECEXP (pat, 0, 0);
37387 rtx p1 = XVECEXP (pat, 0, 1);
37389 gcc_assert (GET_CODE (p0) == SET
37390 && GET_CODE (p1) == UNSPEC
37391 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
37393 return p0;
37395 else
37397 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
37398 int i = 0;
37399 int j = 0;
37401 for (; i < XVECLEN (pat, 0); ++i)
37403 rtx elem = XVECEXP (pat, 0, i);
37404 if (GET_CODE (elem) != UNSPEC
37405 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
37406 res [j++] = elem;
37409 /* No more than 1 occurence was removed. */
37410 gcc_assert (j >= XVECLEN (pat, 0) - 1);
37412 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
37416 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
37417 with rounding. */
37418 static rtx
37419 ix86_expand_sse_comi_round (const struct builtin_description *d,
37420 tree exp, rtx target)
37422 rtx pat, set_dst;
37423 tree arg0 = CALL_EXPR_ARG (exp, 0);
37424 tree arg1 = CALL_EXPR_ARG (exp, 1);
37425 tree arg2 = CALL_EXPR_ARG (exp, 2);
37426 tree arg3 = CALL_EXPR_ARG (exp, 3);
37427 rtx op0 = expand_normal (arg0);
37428 rtx op1 = expand_normal (arg1);
37429 rtx op2 = expand_normal (arg2);
37430 rtx op3 = expand_normal (arg3);
37431 enum insn_code icode = d->icode;
37432 const struct insn_data_d *insn_p = &insn_data[icode];
37433 machine_mode mode0 = insn_p->operand[0].mode;
37434 machine_mode mode1 = insn_p->operand[1].mode;
37435 enum rtx_code comparison = UNEQ;
37436 bool need_ucomi = false;
37438 /* See avxintrin.h for values. */
37439 enum rtx_code comi_comparisons[32] =
37441 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
37442 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
37443 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
37445 bool need_ucomi_values[32] =
37447 true, false, false, true, true, false, false, true,
37448 true, false, false, true, true, false, false, true,
37449 false, true, true, false, false, true, true, false,
37450 false, true, true, false, false, true, true, false
37453 if (!CONST_INT_P (op2))
37455 error ("the third argument must be comparison constant");
37456 return const0_rtx;
37458 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
37460 error ("incorect comparison mode");
37461 return const0_rtx;
37464 if (!insn_p->operand[2].predicate (op3, SImode))
37466 error ("incorrect rounding operand");
37467 return const0_rtx;
37470 comparison = comi_comparisons[INTVAL (op2)];
37471 need_ucomi = need_ucomi_values[INTVAL (op2)];
37473 if (VECTOR_MODE_P (mode0))
37474 op0 = safe_vector_operand (op0, mode0);
37475 if (VECTOR_MODE_P (mode1))
37476 op1 = safe_vector_operand (op1, mode1);
37478 target = gen_reg_rtx (SImode);
37479 emit_move_insn (target, const0_rtx);
37480 target = gen_rtx_SUBREG (QImode, target, 0);
37482 if ((optimize && !register_operand (op0, mode0))
37483 || !insn_p->operand[0].predicate (op0, mode0))
37484 op0 = copy_to_mode_reg (mode0, op0);
37485 if ((optimize && !register_operand (op1, mode1))
37486 || !insn_p->operand[1].predicate (op1, mode1))
37487 op1 = copy_to_mode_reg (mode1, op1);
37489 if (need_ucomi)
37490 icode = icode == CODE_FOR_sse_comi_round
37491 ? CODE_FOR_sse_ucomi_round
37492 : CODE_FOR_sse2_ucomi_round;
37494 pat = GEN_FCN (icode) (op0, op1, op3);
37495 if (! pat)
37496 return 0;
37498 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
37499 if (INTVAL (op3) == NO_ROUND)
37501 pat = ix86_erase_embedded_rounding (pat);
37502 if (! pat)
37503 return 0;
37505 set_dst = SET_DEST (pat);
37507 else
37509 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
37510 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
37513 emit_insn (pat);
37514 emit_insn (gen_rtx_SET (VOIDmode,
37515 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
37516 gen_rtx_fmt_ee (comparison, QImode,
37517 set_dst,
37518 const0_rtx)));
37520 return SUBREG_REG (target);
37523 static rtx
37524 ix86_expand_round_builtin (const struct builtin_description *d,
37525 tree exp, rtx target)
37527 rtx pat;
37528 unsigned int i, nargs;
37529 struct
37531 rtx op;
37532 machine_mode mode;
37533 } args[6];
37534 enum insn_code icode = d->icode;
37535 const struct insn_data_d *insn_p = &insn_data[icode];
37536 machine_mode tmode = insn_p->operand[0].mode;
37537 unsigned int nargs_constant = 0;
37538 unsigned int redundant_embed_rnd = 0;
37540 switch ((enum ix86_builtin_func_type) d->flag)
37542 case UINT64_FTYPE_V2DF_INT:
37543 case UINT64_FTYPE_V4SF_INT:
37544 case UINT_FTYPE_V2DF_INT:
37545 case UINT_FTYPE_V4SF_INT:
37546 case INT64_FTYPE_V2DF_INT:
37547 case INT64_FTYPE_V4SF_INT:
37548 case INT_FTYPE_V2DF_INT:
37549 case INT_FTYPE_V4SF_INT:
37550 nargs = 2;
37551 break;
37552 case V4SF_FTYPE_V4SF_UINT_INT:
37553 case V4SF_FTYPE_V4SF_UINT64_INT:
37554 case V2DF_FTYPE_V2DF_UINT64_INT:
37555 case V4SF_FTYPE_V4SF_INT_INT:
37556 case V4SF_FTYPE_V4SF_INT64_INT:
37557 case V2DF_FTYPE_V2DF_INT64_INT:
37558 case V4SF_FTYPE_V4SF_V4SF_INT:
37559 case V2DF_FTYPE_V2DF_V2DF_INT:
37560 case V4SF_FTYPE_V4SF_V2DF_INT:
37561 case V2DF_FTYPE_V2DF_V4SF_INT:
37562 nargs = 3;
37563 break;
37564 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
37565 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
37566 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
37567 case V8DI_FTYPE_V8DF_V8DI_QI_INT:
37568 case V8SF_FTYPE_V8DI_V8SF_QI_INT:
37569 case V8DF_FTYPE_V8DI_V8DF_QI_INT:
37570 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
37571 case V8DI_FTYPE_V8SF_V8DI_QI_INT:
37572 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
37573 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
37574 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
37575 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
37576 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
37577 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
37578 nargs = 4;
37579 break;
37580 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
37581 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
37582 nargs_constant = 2;
37583 nargs = 4;
37584 break;
37585 case INT_FTYPE_V4SF_V4SF_INT_INT:
37586 case INT_FTYPE_V2DF_V2DF_INT_INT:
37587 return ix86_expand_sse_comi_round (d, exp, target);
37588 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT:
37589 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
37590 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
37591 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
37592 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
37593 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
37594 nargs = 5;
37595 break;
37596 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
37597 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
37598 nargs_constant = 4;
37599 nargs = 5;
37600 break;
37601 case QI_FTYPE_V8DF_V8DF_INT_QI_INT:
37602 case QI_FTYPE_V2DF_V2DF_INT_QI_INT:
37603 case HI_FTYPE_V16SF_V16SF_INT_HI_INT:
37604 case QI_FTYPE_V4SF_V4SF_INT_QI_INT:
37605 nargs_constant = 3;
37606 nargs = 5;
37607 break;
37608 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT:
37609 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT:
37610 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
37611 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
37612 nargs = 6;
37613 nargs_constant = 4;
37614 break;
37615 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
37616 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
37617 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
37618 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
37619 nargs = 6;
37620 nargs_constant = 3;
37621 break;
37622 default:
37623 gcc_unreachable ();
37625 gcc_assert (nargs <= ARRAY_SIZE (args));
37627 if (optimize
37628 || target == 0
37629 || GET_MODE (target) != tmode
37630 || !insn_p->operand[0].predicate (target, tmode))
37631 target = gen_reg_rtx (tmode);
37633 for (i = 0; i < nargs; i++)
37635 tree arg = CALL_EXPR_ARG (exp, i);
37636 rtx op = expand_normal (arg);
37637 machine_mode mode = insn_p->operand[i + 1].mode;
37638 bool match = insn_p->operand[i + 1].predicate (op, mode);
37640 if (i == nargs - nargs_constant)
37642 if (!match)
37644 switch (icode)
37646 case CODE_FOR_avx512f_getmantv8df_mask_round:
37647 case CODE_FOR_avx512f_getmantv16sf_mask_round:
37648 case CODE_FOR_avx512f_vgetmantv2df_round:
37649 case CODE_FOR_avx512f_vgetmantv4sf_round:
37650 error ("the immediate argument must be a 4-bit immediate");
37651 return const0_rtx;
37652 case CODE_FOR_avx512f_cmpv8df3_mask_round:
37653 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
37654 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
37655 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
37656 error ("the immediate argument must be a 5-bit immediate");
37657 return const0_rtx;
37658 default:
37659 error ("the immediate argument must be an 8-bit immediate");
37660 return const0_rtx;
37664 else if (i == nargs-1)
37666 if (!insn_p->operand[nargs].predicate (op, SImode))
37668 error ("incorrect rounding operand");
37669 return const0_rtx;
37672 /* If there is no rounding use normal version of the pattern. */
37673 if (INTVAL (op) == NO_ROUND)
37674 redundant_embed_rnd = 1;
37676 else
37678 if (VECTOR_MODE_P (mode))
37679 op = safe_vector_operand (op, mode);
37681 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
37683 if (optimize || !match)
37684 op = copy_to_mode_reg (mode, op);
37686 else
37688 op = copy_to_reg (op);
37689 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
37693 args[i].op = op;
37694 args[i].mode = mode;
37697 switch (nargs)
37699 case 1:
37700 pat = GEN_FCN (icode) (target, args[0].op);
37701 break;
37702 case 2:
37703 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
37704 break;
37705 case 3:
37706 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37707 args[2].op);
37708 break;
37709 case 4:
37710 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37711 args[2].op, args[3].op);
37712 break;
37713 case 5:
37714 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37715 args[2].op, args[3].op, args[4].op);
37716 case 6:
37717 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37718 args[2].op, args[3].op, args[4].op,
37719 args[5].op);
37720 break;
37721 default:
37722 gcc_unreachable ();
37725 if (!pat)
37726 return 0;
37728 if (redundant_embed_rnd)
37729 pat = ix86_erase_embedded_rounding (pat);
37731 emit_insn (pat);
37732 return target;
37735 /* Subroutine of ix86_expand_builtin to take care of special insns
37736 with variable number of operands. */
37738 static rtx
37739 ix86_expand_special_args_builtin (const struct builtin_description *d,
37740 tree exp, rtx target)
37742 tree arg;
37743 rtx pat, op;
37744 unsigned int i, nargs, arg_adjust, memory;
37745 bool aligned_mem = false;
37746 struct
37748 rtx op;
37749 machine_mode mode;
37750 } args[3];
37751 enum insn_code icode = d->icode;
37752 bool last_arg_constant = false;
37753 const struct insn_data_d *insn_p = &insn_data[icode];
37754 machine_mode tmode = insn_p->operand[0].mode;
37755 enum { load, store } klass;
37757 switch ((enum ix86_builtin_func_type) d->flag)
37759 case VOID_FTYPE_VOID:
37760 emit_insn (GEN_FCN (icode) (target));
37761 return 0;
37762 case VOID_FTYPE_UINT64:
37763 case VOID_FTYPE_UNSIGNED:
37764 nargs = 0;
37765 klass = store;
37766 memory = 0;
37767 break;
37769 case INT_FTYPE_VOID:
37770 case USHORT_FTYPE_VOID:
37771 case UINT64_FTYPE_VOID:
37772 case UNSIGNED_FTYPE_VOID:
37773 nargs = 0;
37774 klass = load;
37775 memory = 0;
37776 break;
37777 case UINT64_FTYPE_PUNSIGNED:
37778 case V2DI_FTYPE_PV2DI:
37779 case V4DI_FTYPE_PV4DI:
37780 case V32QI_FTYPE_PCCHAR:
37781 case V16QI_FTYPE_PCCHAR:
37782 case V8SF_FTYPE_PCV4SF:
37783 case V8SF_FTYPE_PCFLOAT:
37784 case V4SF_FTYPE_PCFLOAT:
37785 case V4DF_FTYPE_PCV2DF:
37786 case V4DF_FTYPE_PCDOUBLE:
37787 case V2DF_FTYPE_PCDOUBLE:
37788 case VOID_FTYPE_PVOID:
37789 case V16SI_FTYPE_PV4SI:
37790 case V16SF_FTYPE_PV4SF:
37791 case V8DI_FTYPE_PV4DI:
37792 case V8DI_FTYPE_PV8DI:
37793 case V8DF_FTYPE_PV4DF:
37794 nargs = 1;
37795 klass = load;
37796 memory = 0;
37797 switch (icode)
37799 case CODE_FOR_sse4_1_movntdqa:
37800 case CODE_FOR_avx2_movntdqa:
37801 case CODE_FOR_avx512f_movntdqa:
37802 aligned_mem = true;
37803 break;
37804 default:
37805 break;
37807 break;
37808 case VOID_FTYPE_PV2SF_V4SF:
37809 case VOID_FTYPE_PV8DI_V8DI:
37810 case VOID_FTYPE_PV4DI_V4DI:
37811 case VOID_FTYPE_PV2DI_V2DI:
37812 case VOID_FTYPE_PCHAR_V32QI:
37813 case VOID_FTYPE_PCHAR_V16QI:
37814 case VOID_FTYPE_PFLOAT_V16SF:
37815 case VOID_FTYPE_PFLOAT_V8SF:
37816 case VOID_FTYPE_PFLOAT_V4SF:
37817 case VOID_FTYPE_PDOUBLE_V8DF:
37818 case VOID_FTYPE_PDOUBLE_V4DF:
37819 case VOID_FTYPE_PDOUBLE_V2DF:
37820 case VOID_FTYPE_PLONGLONG_LONGLONG:
37821 case VOID_FTYPE_PULONGLONG_ULONGLONG:
37822 case VOID_FTYPE_PINT_INT:
37823 nargs = 1;
37824 klass = store;
37825 /* Reserve memory operand for target. */
37826 memory = ARRAY_SIZE (args);
37827 switch (icode)
37829 /* These builtins and instructions require the memory
37830 to be properly aligned. */
37831 case CODE_FOR_avx_movntv4di:
37832 case CODE_FOR_sse2_movntv2di:
37833 case CODE_FOR_avx_movntv8sf:
37834 case CODE_FOR_sse_movntv4sf:
37835 case CODE_FOR_sse4a_vmmovntv4sf:
37836 case CODE_FOR_avx_movntv4df:
37837 case CODE_FOR_sse2_movntv2df:
37838 case CODE_FOR_sse4a_vmmovntv2df:
37839 case CODE_FOR_sse2_movntidi:
37840 case CODE_FOR_sse_movntq:
37841 case CODE_FOR_sse2_movntisi:
37842 case CODE_FOR_avx512f_movntv16sf:
37843 case CODE_FOR_avx512f_movntv8df:
37844 case CODE_FOR_avx512f_movntv8di:
37845 aligned_mem = true;
37846 break;
37847 default:
37848 break;
37850 break;
37851 case V4SF_FTYPE_V4SF_PCV2SF:
37852 case V2DF_FTYPE_V2DF_PCDOUBLE:
37853 nargs = 2;
37854 klass = load;
37855 memory = 1;
37856 break;
37857 case V8SF_FTYPE_PCV8SF_V8SI:
37858 case V4DF_FTYPE_PCV4DF_V4DI:
37859 case V4SF_FTYPE_PCV4SF_V4SI:
37860 case V2DF_FTYPE_PCV2DF_V2DI:
37861 case V8SI_FTYPE_PCV8SI_V8SI:
37862 case V4DI_FTYPE_PCV4DI_V4DI:
37863 case V4SI_FTYPE_PCV4SI_V4SI:
37864 case V2DI_FTYPE_PCV2DI_V2DI:
37865 nargs = 2;
37866 klass = load;
37867 memory = 0;
37868 break;
37869 case VOID_FTYPE_PV8DF_V8DF_QI:
37870 case VOID_FTYPE_PV16SF_V16SF_HI:
37871 case VOID_FTYPE_PV8DI_V8DI_QI:
37872 case VOID_FTYPE_PV4DI_V4DI_QI:
37873 case VOID_FTYPE_PV2DI_V2DI_QI:
37874 case VOID_FTYPE_PV16SI_V16SI_HI:
37875 case VOID_FTYPE_PV8SI_V8SI_QI:
37876 case VOID_FTYPE_PV4SI_V4SI_QI:
37877 switch (icode)
37879 /* These builtins and instructions require the memory
37880 to be properly aligned. */
37881 case CODE_FOR_avx512f_storev16sf_mask:
37882 case CODE_FOR_avx512f_storev16si_mask:
37883 case CODE_FOR_avx512f_storev8df_mask:
37884 case CODE_FOR_avx512f_storev8di_mask:
37885 case CODE_FOR_avx512vl_storev8sf_mask:
37886 case CODE_FOR_avx512vl_storev8si_mask:
37887 case CODE_FOR_avx512vl_storev4df_mask:
37888 case CODE_FOR_avx512vl_storev4di_mask:
37889 case CODE_FOR_avx512vl_storev4sf_mask:
37890 case CODE_FOR_avx512vl_storev4si_mask:
37891 case CODE_FOR_avx512vl_storev2df_mask:
37892 case CODE_FOR_avx512vl_storev2di_mask:
37893 aligned_mem = true;
37894 break;
37895 default:
37896 break;
37898 /* FALLTHRU */
37899 case VOID_FTYPE_PV8SF_V8SI_V8SF:
37900 case VOID_FTYPE_PV4DF_V4DI_V4DF:
37901 case VOID_FTYPE_PV4SF_V4SI_V4SF:
37902 case VOID_FTYPE_PV2DF_V2DI_V2DF:
37903 case VOID_FTYPE_PV8SI_V8SI_V8SI:
37904 case VOID_FTYPE_PV4DI_V4DI_V4DI:
37905 case VOID_FTYPE_PV4SI_V4SI_V4SI:
37906 case VOID_FTYPE_PV2DI_V2DI_V2DI:
37907 case VOID_FTYPE_PDOUBLE_V2DF_QI:
37908 case VOID_FTYPE_PFLOAT_V4SF_QI:
37909 case VOID_FTYPE_PV8SI_V8DI_QI:
37910 case VOID_FTYPE_PV8HI_V8DI_QI:
37911 case VOID_FTYPE_PV16HI_V16SI_HI:
37912 case VOID_FTYPE_PV16QI_V8DI_QI:
37913 case VOID_FTYPE_PV16QI_V16SI_HI:
37914 case VOID_FTYPE_PV4SI_V4DI_QI:
37915 case VOID_FTYPE_PV4SI_V2DI_QI:
37916 case VOID_FTYPE_PV8HI_V4DI_QI:
37917 case VOID_FTYPE_PV8HI_V2DI_QI:
37918 case VOID_FTYPE_PV8HI_V8SI_QI:
37919 case VOID_FTYPE_PV8HI_V4SI_QI:
37920 case VOID_FTYPE_PV16QI_V4DI_QI:
37921 case VOID_FTYPE_PV16QI_V2DI_QI:
37922 case VOID_FTYPE_PV16QI_V8SI_QI:
37923 case VOID_FTYPE_PV16QI_V4SI_QI:
37924 case VOID_FTYPE_PV8HI_V8HI_QI:
37925 case VOID_FTYPE_PV16HI_V16HI_HI:
37926 case VOID_FTYPE_PV32HI_V32HI_SI:
37927 case VOID_FTYPE_PV16QI_V16QI_HI:
37928 case VOID_FTYPE_PV32QI_V32QI_SI:
37929 case VOID_FTYPE_PV64QI_V64QI_DI:
37930 case VOID_FTYPE_PV4DF_V4DF_QI:
37931 case VOID_FTYPE_PV2DF_V2DF_QI:
37932 case VOID_FTYPE_PV8SF_V8SF_QI:
37933 case VOID_FTYPE_PV4SF_V4SF_QI:
37934 nargs = 2;
37935 klass = store;
37936 /* Reserve memory operand for target. */
37937 memory = ARRAY_SIZE (args);
37938 break;
37939 case V4SF_FTYPE_PCV4SF_V4SF_QI:
37940 case V8SF_FTYPE_PCV8SF_V8SF_QI:
37941 case V16SF_FTYPE_PCV16SF_V16SF_HI:
37942 case V4SI_FTYPE_PCV4SI_V4SI_QI:
37943 case V8SI_FTYPE_PCV8SI_V8SI_QI:
37944 case V16SI_FTYPE_PCV16SI_V16SI_HI:
37945 case V2DF_FTYPE_PCV2DF_V2DF_QI:
37946 case V4DF_FTYPE_PCV4DF_V4DF_QI:
37947 case V8DF_FTYPE_PCV8DF_V8DF_QI:
37948 case V2DI_FTYPE_PCV2DI_V2DI_QI:
37949 case V4DI_FTYPE_PCV4DI_V4DI_QI:
37950 case V8DI_FTYPE_PCV8DI_V8DI_QI:
37951 case V2DF_FTYPE_PCDOUBLE_V2DF_QI:
37952 case V4SF_FTYPE_PCFLOAT_V4SF_QI:
37953 case V8HI_FTYPE_PCV8HI_V8HI_QI:
37954 case V16HI_FTYPE_PCV16HI_V16HI_HI:
37955 case V32HI_FTYPE_PCV32HI_V32HI_SI:
37956 case V16QI_FTYPE_PCV16QI_V16QI_HI:
37957 case V32QI_FTYPE_PCV32QI_V32QI_SI:
37958 case V64QI_FTYPE_PCV64QI_V64QI_DI:
37959 nargs = 3;
37960 klass = load;
37961 memory = 0;
37962 switch (icode)
37964 /* These builtins and instructions require the memory
37965 to be properly aligned. */
37966 case CODE_FOR_avx512f_loadv16sf_mask:
37967 case CODE_FOR_avx512f_loadv16si_mask:
37968 case CODE_FOR_avx512f_loadv8df_mask:
37969 case CODE_FOR_avx512f_loadv8di_mask:
37970 case CODE_FOR_avx512vl_loadv8sf_mask:
37971 case CODE_FOR_avx512vl_loadv8si_mask:
37972 case CODE_FOR_avx512vl_loadv4df_mask:
37973 case CODE_FOR_avx512vl_loadv4di_mask:
37974 case CODE_FOR_avx512vl_loadv4sf_mask:
37975 case CODE_FOR_avx512vl_loadv4si_mask:
37976 case CODE_FOR_avx512vl_loadv2df_mask:
37977 case CODE_FOR_avx512vl_loadv2di_mask:
37978 case CODE_FOR_avx512bw_loadv64qi_mask:
37979 case CODE_FOR_avx512vl_loadv32qi_mask:
37980 case CODE_FOR_avx512vl_loadv16qi_mask:
37981 case CODE_FOR_avx512bw_loadv32hi_mask:
37982 case CODE_FOR_avx512vl_loadv16hi_mask:
37983 case CODE_FOR_avx512vl_loadv8hi_mask:
37984 aligned_mem = true;
37985 break;
37986 default:
37987 break;
37989 break;
37990 case VOID_FTYPE_UINT_UINT_UINT:
37991 case VOID_FTYPE_UINT64_UINT_UINT:
37992 case UCHAR_FTYPE_UINT_UINT_UINT:
37993 case UCHAR_FTYPE_UINT64_UINT_UINT:
37994 nargs = 3;
37995 klass = load;
37996 memory = ARRAY_SIZE (args);
37997 last_arg_constant = true;
37998 break;
37999 default:
38000 gcc_unreachable ();
38003 gcc_assert (nargs <= ARRAY_SIZE (args));
38005 if (klass == store)
38007 arg = CALL_EXPR_ARG (exp, 0);
38008 op = expand_normal (arg);
38009 gcc_assert (target == 0);
38010 if (memory)
38012 op = ix86_zero_extend_to_Pmode (op);
38013 target = gen_rtx_MEM (tmode, op);
38014 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
38015 on it. Try to improve it using get_pointer_alignment,
38016 and if the special builtin is one that requires strict
38017 mode alignment, also from it's GET_MODE_ALIGNMENT.
38018 Failure to do so could lead to ix86_legitimate_combined_insn
38019 rejecting all changes to such insns. */
38020 unsigned int align = get_pointer_alignment (arg);
38021 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
38022 align = GET_MODE_ALIGNMENT (tmode);
38023 if (MEM_ALIGN (target) < align)
38024 set_mem_align (target, align);
38026 else
38027 target = force_reg (tmode, op);
38028 arg_adjust = 1;
38030 else
38032 arg_adjust = 0;
38033 if (optimize
38034 || target == 0
38035 || !register_operand (target, tmode)
38036 || GET_MODE (target) != tmode)
38037 target = gen_reg_rtx (tmode);
38040 for (i = 0; i < nargs; i++)
38042 machine_mode mode = insn_p->operand[i + 1].mode;
38043 bool match;
38045 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
38046 op = expand_normal (arg);
38047 match = insn_p->operand[i + 1].predicate (op, mode);
38049 if (last_arg_constant && (i + 1) == nargs)
38051 if (!match)
38053 if (icode == CODE_FOR_lwp_lwpvalsi3
38054 || icode == CODE_FOR_lwp_lwpinssi3
38055 || icode == CODE_FOR_lwp_lwpvaldi3
38056 || icode == CODE_FOR_lwp_lwpinsdi3)
38057 error ("the last argument must be a 32-bit immediate");
38058 else
38059 error ("the last argument must be an 8-bit immediate");
38060 return const0_rtx;
38063 else
38065 if (i == memory)
38067 /* This must be the memory operand. */
38068 op = ix86_zero_extend_to_Pmode (op);
38069 op = gen_rtx_MEM (mode, op);
38070 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
38071 on it. Try to improve it using get_pointer_alignment,
38072 and if the special builtin is one that requires strict
38073 mode alignment, also from it's GET_MODE_ALIGNMENT.
38074 Failure to do so could lead to ix86_legitimate_combined_insn
38075 rejecting all changes to such insns. */
38076 unsigned int align = get_pointer_alignment (arg);
38077 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
38078 align = GET_MODE_ALIGNMENT (mode);
38079 if (MEM_ALIGN (op) < align)
38080 set_mem_align (op, align);
38082 else
38084 /* This must be register. */
38085 if (VECTOR_MODE_P (mode))
38086 op = safe_vector_operand (op, mode);
38088 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
38089 op = copy_to_mode_reg (mode, op);
38090 else
38092 op = copy_to_reg (op);
38093 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
38098 args[i].op = op;
38099 args[i].mode = mode;
38102 switch (nargs)
38104 case 0:
38105 pat = GEN_FCN (icode) (target);
38106 break;
38107 case 1:
38108 pat = GEN_FCN (icode) (target, args[0].op);
38109 break;
38110 case 2:
38111 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
38112 break;
38113 case 3:
38114 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
38115 break;
38116 default:
38117 gcc_unreachable ();
38120 if (! pat)
38121 return 0;
38122 emit_insn (pat);
38123 return klass == store ? 0 : target;
38126 /* Return the integer constant in ARG. Constrain it to be in the range
38127 of the subparts of VEC_TYPE; issue an error if not. */
38129 static int
38130 get_element_number (tree vec_type, tree arg)
38132 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
38134 if (!tree_fits_uhwi_p (arg)
38135 || (elt = tree_to_uhwi (arg), elt > max))
38137 error ("selector must be an integer constant in the range 0..%wi", max);
38138 return 0;
38141 return elt;
38144 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38145 ix86_expand_vector_init. We DO have language-level syntax for this, in
38146 the form of (type){ init-list }. Except that since we can't place emms
38147 instructions from inside the compiler, we can't allow the use of MMX
38148 registers unless the user explicitly asks for it. So we do *not* define
38149 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
38150 we have builtins invoked by mmintrin.h that gives us license to emit
38151 these sorts of instructions. */
38153 static rtx
38154 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
38156 machine_mode tmode = TYPE_MODE (type);
38157 machine_mode inner_mode = GET_MODE_INNER (tmode);
38158 int i, n_elt = GET_MODE_NUNITS (tmode);
38159 rtvec v = rtvec_alloc (n_elt);
38161 gcc_assert (VECTOR_MODE_P (tmode));
38162 gcc_assert (call_expr_nargs (exp) == n_elt);
38164 for (i = 0; i < n_elt; ++i)
38166 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
38167 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
38170 if (!target || !register_operand (target, tmode))
38171 target = gen_reg_rtx (tmode);
38173 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
38174 return target;
38177 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38178 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
38179 had a language-level syntax for referencing vector elements. */
38181 static rtx
38182 ix86_expand_vec_ext_builtin (tree exp, rtx target)
38184 machine_mode tmode, mode0;
38185 tree arg0, arg1;
38186 int elt;
38187 rtx op0;
38189 arg0 = CALL_EXPR_ARG (exp, 0);
38190 arg1 = CALL_EXPR_ARG (exp, 1);
38192 op0 = expand_normal (arg0);
38193 elt = get_element_number (TREE_TYPE (arg0), arg1);
38195 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
38196 mode0 = TYPE_MODE (TREE_TYPE (arg0));
38197 gcc_assert (VECTOR_MODE_P (mode0));
38199 op0 = force_reg (mode0, op0);
38201 if (optimize || !target || !register_operand (target, tmode))
38202 target = gen_reg_rtx (tmode);
38204 ix86_expand_vector_extract (true, target, op0, elt);
38206 return target;
38209 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38210 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
38211 a language-level syntax for referencing vector elements. */
38213 static rtx
38214 ix86_expand_vec_set_builtin (tree exp)
38216 machine_mode tmode, mode1;
38217 tree arg0, arg1, arg2;
38218 int elt;
38219 rtx op0, op1, target;
38221 arg0 = CALL_EXPR_ARG (exp, 0);
38222 arg1 = CALL_EXPR_ARG (exp, 1);
38223 arg2 = CALL_EXPR_ARG (exp, 2);
38225 tmode = TYPE_MODE (TREE_TYPE (arg0));
38226 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
38227 gcc_assert (VECTOR_MODE_P (tmode));
38229 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
38230 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
38231 elt = get_element_number (TREE_TYPE (arg0), arg2);
38233 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
38234 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
38236 op0 = force_reg (tmode, op0);
38237 op1 = force_reg (mode1, op1);
38239 /* OP0 is the source of these builtin functions and shouldn't be
38240 modified. Create a copy, use it and return it as target. */
38241 target = gen_reg_rtx (tmode);
38242 emit_move_insn (target, op0);
38243 ix86_expand_vector_set (true, target, op1, elt);
38245 return target;
38248 /* Emit conditional move of SRC to DST with condition
38249 OP1 CODE OP2. */
38250 static void
38251 ix86_emit_cmove (rtx dst, rtx src, enum rtx_code code, rtx op1, rtx op2)
38253 rtx t;
38255 if (TARGET_CMOVE)
38257 t = ix86_expand_compare (code, op1, op2);
38258 emit_insn (gen_rtx_SET (VOIDmode, dst,
38259 gen_rtx_IF_THEN_ELSE (GET_MODE (dst), t,
38260 src, dst)));
38262 else
38264 rtx nomove = gen_label_rtx ();
38265 emit_cmp_and_jump_insns (op1, op2, reverse_condition (code),
38266 const0_rtx, GET_MODE (op1), 1, nomove);
38267 emit_move_insn (dst, src);
38268 emit_label (nomove);
38272 /* Choose max of DST and SRC and put it to DST. */
38273 static void
38274 ix86_emit_move_max (rtx dst, rtx src)
38276 ix86_emit_cmove (dst, src, LTU, dst, src);
38279 /* Expand an expression EXP that calls a built-in function,
38280 with result going to TARGET if that's convenient
38281 (and in mode MODE if that's convenient).
38282 SUBTARGET may be used as the target for computing one of EXP's operands.
38283 IGNORE is nonzero if the value is to be ignored. */
38285 static rtx
38286 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
38287 machine_mode mode, int ignore)
38289 const struct builtin_description *d;
38290 size_t i;
38291 enum insn_code icode;
38292 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
38293 tree arg0, arg1, arg2, arg3, arg4;
38294 rtx op0, op1, op2, op3, op4, pat, insn;
38295 machine_mode mode0, mode1, mode2, mode3, mode4;
38296 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
38298 /* For CPU builtins that can be folded, fold first and expand the fold. */
38299 switch (fcode)
38301 case IX86_BUILTIN_CPU_INIT:
38303 /* Make it call __cpu_indicator_init in libgcc. */
38304 tree call_expr, fndecl, type;
38305 type = build_function_type_list (integer_type_node, NULL_TREE);
38306 fndecl = build_fn_decl ("__cpu_indicator_init", type);
38307 call_expr = build_call_expr (fndecl, 0);
38308 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
38310 case IX86_BUILTIN_CPU_IS:
38311 case IX86_BUILTIN_CPU_SUPPORTS:
38313 tree arg0 = CALL_EXPR_ARG (exp, 0);
38314 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
38315 gcc_assert (fold_expr != NULL_TREE);
38316 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
38320 /* Determine whether the builtin function is available under the current ISA.
38321 Originally the builtin was not created if it wasn't applicable to the
38322 current ISA based on the command line switches. With function specific
38323 options, we need to check in the context of the function making the call
38324 whether it is supported. */
38325 if (ix86_builtins_isa[fcode].isa
38326 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
38328 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
38329 NULL, (enum fpmath_unit) 0, false);
38331 if (!opts)
38332 error ("%qE needs unknown isa option", fndecl);
38333 else
38335 gcc_assert (opts != NULL);
38336 error ("%qE needs isa option %s", fndecl, opts);
38337 free (opts);
38339 return const0_rtx;
38342 switch (fcode)
38344 case IX86_BUILTIN_BNDMK:
38345 if (!target
38346 || GET_MODE (target) != BNDmode
38347 || !register_operand (target, BNDmode))
38348 target = gen_reg_rtx (BNDmode);
38350 arg0 = CALL_EXPR_ARG (exp, 0);
38351 arg1 = CALL_EXPR_ARG (exp, 1);
38353 op0 = expand_normal (arg0);
38354 op1 = expand_normal (arg1);
38356 if (!register_operand (op0, Pmode))
38357 op0 = ix86_zero_extend_to_Pmode (op0);
38358 if (!register_operand (op1, Pmode))
38359 op1 = ix86_zero_extend_to_Pmode (op1);
38361 /* Builtin arg1 is size of block but instruction op1 should
38362 be (size - 1). */
38363 op1 = expand_simple_binop (Pmode, PLUS, op1, constm1_rtx,
38364 NULL_RTX, 1, OPTAB_DIRECT);
38366 emit_insn (BNDmode == BND64mode
38367 ? gen_bnd64_mk (target, op0, op1)
38368 : gen_bnd32_mk (target, op0, op1));
38369 return target;
38371 case IX86_BUILTIN_BNDSTX:
38372 arg0 = CALL_EXPR_ARG (exp, 0);
38373 arg1 = CALL_EXPR_ARG (exp, 1);
38374 arg2 = CALL_EXPR_ARG (exp, 2);
38376 op0 = expand_normal (arg0);
38377 op1 = expand_normal (arg1);
38378 op2 = expand_normal (arg2);
38380 if (!register_operand (op0, Pmode))
38381 op0 = ix86_zero_extend_to_Pmode (op0);
38382 if (!register_operand (op1, BNDmode))
38383 op1 = copy_to_mode_reg (BNDmode, op1);
38384 if (!register_operand (op2, Pmode))
38385 op2 = ix86_zero_extend_to_Pmode (op2);
38387 emit_insn (BNDmode == BND64mode
38388 ? gen_bnd64_stx (op2, op0, op1)
38389 : gen_bnd32_stx (op2, op0, op1));
38390 return 0;
38392 case IX86_BUILTIN_BNDLDX:
38393 if (!target
38394 || GET_MODE (target) != BNDmode
38395 || !register_operand (target, BNDmode))
38396 target = gen_reg_rtx (BNDmode);
38398 arg0 = CALL_EXPR_ARG (exp, 0);
38399 arg1 = CALL_EXPR_ARG (exp, 1);
38401 op0 = expand_normal (arg0);
38402 op1 = expand_normal (arg1);
38404 if (!register_operand (op0, Pmode))
38405 op0 = ix86_zero_extend_to_Pmode (op0);
38406 if (!register_operand (op1, Pmode))
38407 op1 = ix86_zero_extend_to_Pmode (op1);
38409 emit_insn (BNDmode == BND64mode
38410 ? gen_bnd64_ldx (target, op0, op1)
38411 : gen_bnd32_ldx (target, op0, op1));
38412 return target;
38414 case IX86_BUILTIN_BNDCL:
38415 arg0 = CALL_EXPR_ARG (exp, 0);
38416 arg1 = CALL_EXPR_ARG (exp, 1);
38418 op0 = expand_normal (arg0);
38419 op1 = expand_normal (arg1);
38421 if (!register_operand (op0, Pmode))
38422 op0 = ix86_zero_extend_to_Pmode (op0);
38423 if (!register_operand (op1, BNDmode))
38424 op1 = copy_to_mode_reg (BNDmode, op1);
38426 emit_insn (BNDmode == BND64mode
38427 ? gen_bnd64_cl (op1, op0)
38428 : gen_bnd32_cl (op1, op0));
38429 return 0;
38431 case IX86_BUILTIN_BNDCU:
38432 arg0 = CALL_EXPR_ARG (exp, 0);
38433 arg1 = CALL_EXPR_ARG (exp, 1);
38435 op0 = expand_normal (arg0);
38436 op1 = expand_normal (arg1);
38438 if (!register_operand (op0, Pmode))
38439 op0 = ix86_zero_extend_to_Pmode (op0);
38440 if (!register_operand (op1, BNDmode))
38441 op1 = copy_to_mode_reg (BNDmode, op1);
38443 emit_insn (BNDmode == BND64mode
38444 ? gen_bnd64_cu (op1, op0)
38445 : gen_bnd32_cu (op1, op0));
38446 return 0;
38448 case IX86_BUILTIN_BNDRET:
38449 arg0 = CALL_EXPR_ARG (exp, 0);
38450 gcc_assert (TREE_CODE (arg0) == SSA_NAME);
38451 target = chkp_get_rtl_bounds (arg0);
38453 /* If no bounds were specified for returned value,
38454 then use INIT bounds. It usually happens when
38455 some built-in function is expanded. */
38456 if (!target)
38458 rtx t1 = gen_reg_rtx (Pmode);
38459 rtx t2 = gen_reg_rtx (Pmode);
38460 target = gen_reg_rtx (BNDmode);
38461 emit_move_insn (t1, const0_rtx);
38462 emit_move_insn (t2, constm1_rtx);
38463 emit_insn (BNDmode == BND64mode
38464 ? gen_bnd64_mk (target, t1, t2)
38465 : gen_bnd32_mk (target, t1, t2));
38468 gcc_assert (target && REG_P (target));
38469 return target;
38471 case IX86_BUILTIN_BNDNARROW:
38473 rtx m1, m1h1, m1h2, lb, ub, t1;
38475 /* Return value and lb. */
38476 arg0 = CALL_EXPR_ARG (exp, 0);
38477 /* Bounds. */
38478 arg1 = CALL_EXPR_ARG (exp, 1);
38479 /* Size. */
38480 arg2 = CALL_EXPR_ARG (exp, 2);
38482 lb = expand_normal (arg0);
38483 op1 = expand_normal (arg1);
38484 op2 = expand_normal (arg2);
38486 /* Size was passed but we need to use (size - 1) as for bndmk. */
38487 op2 = expand_simple_binop (Pmode, PLUS, op2, constm1_rtx,
38488 NULL_RTX, 1, OPTAB_DIRECT);
38490 /* Add LB to size and inverse to get UB. */
38491 op2 = expand_simple_binop (Pmode, PLUS, op2, lb,
38492 op2, 1, OPTAB_DIRECT);
38493 ub = expand_simple_unop (Pmode, NOT, op2, op2, 1);
38495 if (!register_operand (lb, Pmode))
38496 lb = ix86_zero_extend_to_Pmode (lb);
38497 if (!register_operand (ub, Pmode))
38498 ub = ix86_zero_extend_to_Pmode (ub);
38500 /* We need to move bounds to memory before any computations. */
38501 if (MEM_P (op1))
38502 m1 = op1;
38503 else
38505 m1 = assign_386_stack_local (BNDmode, SLOT_TEMP);
38506 emit_move_insn (m1, op1);
38509 /* Generate mem expression to be used for access to LB and UB. */
38510 m1h1 = adjust_address (m1, Pmode, 0);
38511 m1h2 = adjust_address (m1, Pmode, GET_MODE_SIZE (Pmode));
38513 t1 = gen_reg_rtx (Pmode);
38515 /* Compute LB. */
38516 emit_move_insn (t1, m1h1);
38517 ix86_emit_move_max (t1, lb);
38518 emit_move_insn (m1h1, t1);
38520 /* Compute UB. UB is stored in 1's complement form. Therefore
38521 we also use max here. */
38522 emit_move_insn (t1, m1h2);
38523 ix86_emit_move_max (t1, ub);
38524 emit_move_insn (m1h2, t1);
38526 op2 = gen_reg_rtx (BNDmode);
38527 emit_move_insn (op2, m1);
38529 return chkp_join_splitted_slot (lb, op2);
38532 case IX86_BUILTIN_BNDINT:
38534 rtx res, rh1, rh2, lb1, lb2, ub1, ub2;
38536 if (!target
38537 || GET_MODE (target) != BNDmode
38538 || !register_operand (target, BNDmode))
38539 target = gen_reg_rtx (BNDmode);
38541 arg0 = CALL_EXPR_ARG (exp, 0);
38542 arg1 = CALL_EXPR_ARG (exp, 1);
38544 op0 = expand_normal (arg0);
38545 op1 = expand_normal (arg1);
38547 res = assign_386_stack_local (BNDmode, SLOT_TEMP);
38548 rh1 = adjust_address (res, Pmode, 0);
38549 rh2 = adjust_address (res, Pmode, GET_MODE_SIZE (Pmode));
38551 /* Put first bounds to temporaries. */
38552 lb1 = gen_reg_rtx (Pmode);
38553 ub1 = gen_reg_rtx (Pmode);
38554 if (MEM_P (op0))
38556 emit_move_insn (lb1, adjust_address (op0, Pmode, 0));
38557 emit_move_insn (ub1, adjust_address (op0, Pmode,
38558 GET_MODE_SIZE (Pmode)));
38560 else
38562 emit_move_insn (res, op0);
38563 emit_move_insn (lb1, rh1);
38564 emit_move_insn (ub1, rh2);
38567 /* Put second bounds to temporaries. */
38568 lb2 = gen_reg_rtx (Pmode);
38569 ub2 = gen_reg_rtx (Pmode);
38570 if (MEM_P (op1))
38572 emit_move_insn (lb2, adjust_address (op1, Pmode, 0));
38573 emit_move_insn (ub2, adjust_address (op1, Pmode,
38574 GET_MODE_SIZE (Pmode)));
38576 else
38578 emit_move_insn (res, op1);
38579 emit_move_insn (lb2, rh1);
38580 emit_move_insn (ub2, rh2);
38583 /* Compute LB. */
38584 ix86_emit_move_max (lb1, lb2);
38585 emit_move_insn (rh1, lb1);
38587 /* Compute UB. UB is stored in 1's complement form. Therefore
38588 we also use max here. */
38589 ix86_emit_move_max (ub1, ub2);
38590 emit_move_insn (rh2, ub1);
38592 emit_move_insn (target, res);
38594 return target;
38597 case IX86_BUILTIN_SIZEOF:
38599 tree name;
38600 rtx symbol;
38602 if (!target
38603 || GET_MODE (target) != Pmode
38604 || !register_operand (target, Pmode))
38605 target = gen_reg_rtx (Pmode);
38607 arg0 = CALL_EXPR_ARG (exp, 0);
38608 gcc_assert (TREE_CODE (arg0) == VAR_DECL);
38610 name = DECL_ASSEMBLER_NAME (arg0);
38611 symbol = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (name));
38613 emit_insn (Pmode == SImode
38614 ? gen_move_size_reloc_si (target, symbol)
38615 : gen_move_size_reloc_di (target, symbol));
38617 return target;
38620 case IX86_BUILTIN_BNDLOWER:
38622 rtx mem, hmem;
38624 if (!target
38625 || GET_MODE (target) != Pmode
38626 || !register_operand (target, Pmode))
38627 target = gen_reg_rtx (Pmode);
38629 arg0 = CALL_EXPR_ARG (exp, 0);
38630 op0 = expand_normal (arg0);
38632 /* We need to move bounds to memory first. */
38633 if (MEM_P (op0))
38634 mem = op0;
38635 else
38637 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
38638 emit_move_insn (mem, op0);
38641 /* Generate mem expression to access LB and load it. */
38642 hmem = adjust_address (mem, Pmode, 0);
38643 emit_move_insn (target, hmem);
38645 return target;
38648 case IX86_BUILTIN_BNDUPPER:
38650 rtx mem, hmem, res;
38652 if (!target
38653 || GET_MODE (target) != Pmode
38654 || !register_operand (target, Pmode))
38655 target = gen_reg_rtx (Pmode);
38657 arg0 = CALL_EXPR_ARG (exp, 0);
38658 op0 = expand_normal (arg0);
38660 /* We need to move bounds to memory first. */
38661 if (MEM_P (op0))
38662 mem = op0;
38663 else
38665 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
38666 emit_move_insn (mem, op0);
38669 /* Generate mem expression to access UB. */
38670 hmem = adjust_address (mem, Pmode, GET_MODE_SIZE (Pmode));
38672 /* We need to inverse all bits of UB. */
38673 res = expand_simple_unop (Pmode, NOT, hmem, target, 1);
38675 if (res != target)
38676 emit_move_insn (target, res);
38678 return target;
38681 case IX86_BUILTIN_MASKMOVQ:
38682 case IX86_BUILTIN_MASKMOVDQU:
38683 icode = (fcode == IX86_BUILTIN_MASKMOVQ
38684 ? CODE_FOR_mmx_maskmovq
38685 : CODE_FOR_sse2_maskmovdqu);
38686 /* Note the arg order is different from the operand order. */
38687 arg1 = CALL_EXPR_ARG (exp, 0);
38688 arg2 = CALL_EXPR_ARG (exp, 1);
38689 arg0 = CALL_EXPR_ARG (exp, 2);
38690 op0 = expand_normal (arg0);
38691 op1 = expand_normal (arg1);
38692 op2 = expand_normal (arg2);
38693 mode0 = insn_data[icode].operand[0].mode;
38694 mode1 = insn_data[icode].operand[1].mode;
38695 mode2 = insn_data[icode].operand[2].mode;
38697 op0 = ix86_zero_extend_to_Pmode (op0);
38698 op0 = gen_rtx_MEM (mode1, op0);
38700 if (!insn_data[icode].operand[0].predicate (op0, mode0))
38701 op0 = copy_to_mode_reg (mode0, op0);
38702 if (!insn_data[icode].operand[1].predicate (op1, mode1))
38703 op1 = copy_to_mode_reg (mode1, op1);
38704 if (!insn_data[icode].operand[2].predicate (op2, mode2))
38705 op2 = copy_to_mode_reg (mode2, op2);
38706 pat = GEN_FCN (icode) (op0, op1, op2);
38707 if (! pat)
38708 return 0;
38709 emit_insn (pat);
38710 return 0;
38712 case IX86_BUILTIN_LDMXCSR:
38713 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
38714 target = assign_386_stack_local (SImode, SLOT_TEMP);
38715 emit_move_insn (target, op0);
38716 emit_insn (gen_sse_ldmxcsr (target));
38717 return 0;
38719 case IX86_BUILTIN_STMXCSR:
38720 target = assign_386_stack_local (SImode, SLOT_TEMP);
38721 emit_insn (gen_sse_stmxcsr (target));
38722 return copy_to_mode_reg (SImode, target);
38724 case IX86_BUILTIN_CLFLUSH:
38725 arg0 = CALL_EXPR_ARG (exp, 0);
38726 op0 = expand_normal (arg0);
38727 icode = CODE_FOR_sse2_clflush;
38728 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38729 op0 = ix86_zero_extend_to_Pmode (op0);
38731 emit_insn (gen_sse2_clflush (op0));
38732 return 0;
38734 case IX86_BUILTIN_CLWB:
38735 arg0 = CALL_EXPR_ARG (exp, 0);
38736 op0 = expand_normal (arg0);
38737 icode = CODE_FOR_clwb;
38738 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38739 op0 = ix86_zero_extend_to_Pmode (op0);
38741 emit_insn (gen_clwb (op0));
38742 return 0;
38744 case IX86_BUILTIN_CLFLUSHOPT:
38745 arg0 = CALL_EXPR_ARG (exp, 0);
38746 op0 = expand_normal (arg0);
38747 icode = CODE_FOR_clflushopt;
38748 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38749 op0 = ix86_zero_extend_to_Pmode (op0);
38751 emit_insn (gen_clflushopt (op0));
38752 return 0;
38754 case IX86_BUILTIN_MONITOR:
38755 arg0 = CALL_EXPR_ARG (exp, 0);
38756 arg1 = CALL_EXPR_ARG (exp, 1);
38757 arg2 = CALL_EXPR_ARG (exp, 2);
38758 op0 = expand_normal (arg0);
38759 op1 = expand_normal (arg1);
38760 op2 = expand_normal (arg2);
38761 if (!REG_P (op0))
38762 op0 = ix86_zero_extend_to_Pmode (op0);
38763 if (!REG_P (op1))
38764 op1 = copy_to_mode_reg (SImode, op1);
38765 if (!REG_P (op2))
38766 op2 = copy_to_mode_reg (SImode, op2);
38767 emit_insn (ix86_gen_monitor (op0, op1, op2));
38768 return 0;
38770 case IX86_BUILTIN_MWAIT:
38771 arg0 = CALL_EXPR_ARG (exp, 0);
38772 arg1 = CALL_EXPR_ARG (exp, 1);
38773 op0 = expand_normal (arg0);
38774 op1 = expand_normal (arg1);
38775 if (!REG_P (op0))
38776 op0 = copy_to_mode_reg (SImode, op0);
38777 if (!REG_P (op1))
38778 op1 = copy_to_mode_reg (SImode, op1);
38779 emit_insn (gen_sse3_mwait (op0, op1));
38780 return 0;
38782 case IX86_BUILTIN_VEC_INIT_V2SI:
38783 case IX86_BUILTIN_VEC_INIT_V4HI:
38784 case IX86_BUILTIN_VEC_INIT_V8QI:
38785 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
38787 case IX86_BUILTIN_VEC_EXT_V2DF:
38788 case IX86_BUILTIN_VEC_EXT_V2DI:
38789 case IX86_BUILTIN_VEC_EXT_V4SF:
38790 case IX86_BUILTIN_VEC_EXT_V4SI:
38791 case IX86_BUILTIN_VEC_EXT_V8HI:
38792 case IX86_BUILTIN_VEC_EXT_V2SI:
38793 case IX86_BUILTIN_VEC_EXT_V4HI:
38794 case IX86_BUILTIN_VEC_EXT_V16QI:
38795 return ix86_expand_vec_ext_builtin (exp, target);
38797 case IX86_BUILTIN_VEC_SET_V2DI:
38798 case IX86_BUILTIN_VEC_SET_V4SF:
38799 case IX86_BUILTIN_VEC_SET_V4SI:
38800 case IX86_BUILTIN_VEC_SET_V8HI:
38801 case IX86_BUILTIN_VEC_SET_V4HI:
38802 case IX86_BUILTIN_VEC_SET_V16QI:
38803 return ix86_expand_vec_set_builtin (exp);
38805 case IX86_BUILTIN_INFQ:
38806 case IX86_BUILTIN_HUGE_VALQ:
38808 REAL_VALUE_TYPE inf;
38809 rtx tmp;
38811 real_inf (&inf);
38812 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
38814 tmp = validize_mem (force_const_mem (mode, tmp));
38816 if (target == 0)
38817 target = gen_reg_rtx (mode);
38819 emit_move_insn (target, tmp);
38820 return target;
38823 case IX86_BUILTIN_RDPMC:
38824 case IX86_BUILTIN_RDTSC:
38825 case IX86_BUILTIN_RDTSCP:
38827 op0 = gen_reg_rtx (DImode);
38828 op1 = gen_reg_rtx (DImode);
38830 if (fcode == IX86_BUILTIN_RDPMC)
38832 arg0 = CALL_EXPR_ARG (exp, 0);
38833 op2 = expand_normal (arg0);
38834 if (!register_operand (op2, SImode))
38835 op2 = copy_to_mode_reg (SImode, op2);
38837 insn = (TARGET_64BIT
38838 ? gen_rdpmc_rex64 (op0, op1, op2)
38839 : gen_rdpmc (op0, op2));
38840 emit_insn (insn);
38842 else if (fcode == IX86_BUILTIN_RDTSC)
38844 insn = (TARGET_64BIT
38845 ? gen_rdtsc_rex64 (op0, op1)
38846 : gen_rdtsc (op0));
38847 emit_insn (insn);
38849 else
38851 op2 = gen_reg_rtx (SImode);
38853 insn = (TARGET_64BIT
38854 ? gen_rdtscp_rex64 (op0, op1, op2)
38855 : gen_rdtscp (op0, op2));
38856 emit_insn (insn);
38858 arg0 = CALL_EXPR_ARG (exp, 0);
38859 op4 = expand_normal (arg0);
38860 if (!address_operand (op4, VOIDmode))
38862 op4 = convert_memory_address (Pmode, op4);
38863 op4 = copy_addr_to_reg (op4);
38865 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
38868 if (target == 0)
38870 /* mode is VOIDmode if __builtin_rd* has been called
38871 without lhs. */
38872 if (mode == VOIDmode)
38873 return target;
38874 target = gen_reg_rtx (mode);
38877 if (TARGET_64BIT)
38879 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
38880 op1, 1, OPTAB_DIRECT);
38881 op0 = expand_simple_binop (DImode, IOR, op0, op1,
38882 op0, 1, OPTAB_DIRECT);
38885 emit_move_insn (target, op0);
38886 return target;
38888 case IX86_BUILTIN_FXSAVE:
38889 case IX86_BUILTIN_FXRSTOR:
38890 case IX86_BUILTIN_FXSAVE64:
38891 case IX86_BUILTIN_FXRSTOR64:
38892 case IX86_BUILTIN_FNSTENV:
38893 case IX86_BUILTIN_FLDENV:
38894 mode0 = BLKmode;
38895 switch (fcode)
38897 case IX86_BUILTIN_FXSAVE:
38898 icode = CODE_FOR_fxsave;
38899 break;
38900 case IX86_BUILTIN_FXRSTOR:
38901 icode = CODE_FOR_fxrstor;
38902 break;
38903 case IX86_BUILTIN_FXSAVE64:
38904 icode = CODE_FOR_fxsave64;
38905 break;
38906 case IX86_BUILTIN_FXRSTOR64:
38907 icode = CODE_FOR_fxrstor64;
38908 break;
38909 case IX86_BUILTIN_FNSTENV:
38910 icode = CODE_FOR_fnstenv;
38911 break;
38912 case IX86_BUILTIN_FLDENV:
38913 icode = CODE_FOR_fldenv;
38914 break;
38915 default:
38916 gcc_unreachable ();
38919 arg0 = CALL_EXPR_ARG (exp, 0);
38920 op0 = expand_normal (arg0);
38922 if (!address_operand (op0, VOIDmode))
38924 op0 = convert_memory_address (Pmode, op0);
38925 op0 = copy_addr_to_reg (op0);
38927 op0 = gen_rtx_MEM (mode0, op0);
38929 pat = GEN_FCN (icode) (op0);
38930 if (pat)
38931 emit_insn (pat);
38932 return 0;
38934 case IX86_BUILTIN_XSAVE:
38935 case IX86_BUILTIN_XRSTOR:
38936 case IX86_BUILTIN_XSAVE64:
38937 case IX86_BUILTIN_XRSTOR64:
38938 case IX86_BUILTIN_XSAVEOPT:
38939 case IX86_BUILTIN_XSAVEOPT64:
38940 case IX86_BUILTIN_XSAVES:
38941 case IX86_BUILTIN_XRSTORS:
38942 case IX86_BUILTIN_XSAVES64:
38943 case IX86_BUILTIN_XRSTORS64:
38944 case IX86_BUILTIN_XSAVEC:
38945 case IX86_BUILTIN_XSAVEC64:
38946 arg0 = CALL_EXPR_ARG (exp, 0);
38947 arg1 = CALL_EXPR_ARG (exp, 1);
38948 op0 = expand_normal (arg0);
38949 op1 = expand_normal (arg1);
38951 if (!address_operand (op0, VOIDmode))
38953 op0 = convert_memory_address (Pmode, op0);
38954 op0 = copy_addr_to_reg (op0);
38956 op0 = gen_rtx_MEM (BLKmode, op0);
38958 op1 = force_reg (DImode, op1);
38960 if (TARGET_64BIT)
38962 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
38963 NULL, 1, OPTAB_DIRECT);
38964 switch (fcode)
38966 case IX86_BUILTIN_XSAVE:
38967 icode = CODE_FOR_xsave_rex64;
38968 break;
38969 case IX86_BUILTIN_XRSTOR:
38970 icode = CODE_FOR_xrstor_rex64;
38971 break;
38972 case IX86_BUILTIN_XSAVE64:
38973 icode = CODE_FOR_xsave64;
38974 break;
38975 case IX86_BUILTIN_XRSTOR64:
38976 icode = CODE_FOR_xrstor64;
38977 break;
38978 case IX86_BUILTIN_XSAVEOPT:
38979 icode = CODE_FOR_xsaveopt_rex64;
38980 break;
38981 case IX86_BUILTIN_XSAVEOPT64:
38982 icode = CODE_FOR_xsaveopt64;
38983 break;
38984 case IX86_BUILTIN_XSAVES:
38985 icode = CODE_FOR_xsaves_rex64;
38986 break;
38987 case IX86_BUILTIN_XRSTORS:
38988 icode = CODE_FOR_xrstors_rex64;
38989 break;
38990 case IX86_BUILTIN_XSAVES64:
38991 icode = CODE_FOR_xsaves64;
38992 break;
38993 case IX86_BUILTIN_XRSTORS64:
38994 icode = CODE_FOR_xrstors64;
38995 break;
38996 case IX86_BUILTIN_XSAVEC:
38997 icode = CODE_FOR_xsavec_rex64;
38998 break;
38999 case IX86_BUILTIN_XSAVEC64:
39000 icode = CODE_FOR_xsavec64;
39001 break;
39002 default:
39003 gcc_unreachable ();
39006 op2 = gen_lowpart (SImode, op2);
39007 op1 = gen_lowpart (SImode, op1);
39008 pat = GEN_FCN (icode) (op0, op1, op2);
39010 else
39012 switch (fcode)
39014 case IX86_BUILTIN_XSAVE:
39015 icode = CODE_FOR_xsave;
39016 break;
39017 case IX86_BUILTIN_XRSTOR:
39018 icode = CODE_FOR_xrstor;
39019 break;
39020 case IX86_BUILTIN_XSAVEOPT:
39021 icode = CODE_FOR_xsaveopt;
39022 break;
39023 case IX86_BUILTIN_XSAVES:
39024 icode = CODE_FOR_xsaves;
39025 break;
39026 case IX86_BUILTIN_XRSTORS:
39027 icode = CODE_FOR_xrstors;
39028 break;
39029 case IX86_BUILTIN_XSAVEC:
39030 icode = CODE_FOR_xsavec;
39031 break;
39032 default:
39033 gcc_unreachable ();
39035 pat = GEN_FCN (icode) (op0, op1);
39038 if (pat)
39039 emit_insn (pat);
39040 return 0;
39042 case IX86_BUILTIN_LLWPCB:
39043 arg0 = CALL_EXPR_ARG (exp, 0);
39044 op0 = expand_normal (arg0);
39045 icode = CODE_FOR_lwp_llwpcb;
39046 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39047 op0 = ix86_zero_extend_to_Pmode (op0);
39048 emit_insn (gen_lwp_llwpcb (op0));
39049 return 0;
39051 case IX86_BUILTIN_SLWPCB:
39052 icode = CODE_FOR_lwp_slwpcb;
39053 if (!target
39054 || !insn_data[icode].operand[0].predicate (target, Pmode))
39055 target = gen_reg_rtx (Pmode);
39056 emit_insn (gen_lwp_slwpcb (target));
39057 return target;
39059 case IX86_BUILTIN_BEXTRI32:
39060 case IX86_BUILTIN_BEXTRI64:
39061 arg0 = CALL_EXPR_ARG (exp, 0);
39062 arg1 = CALL_EXPR_ARG (exp, 1);
39063 op0 = expand_normal (arg0);
39064 op1 = expand_normal (arg1);
39065 icode = (fcode == IX86_BUILTIN_BEXTRI32
39066 ? CODE_FOR_tbm_bextri_si
39067 : CODE_FOR_tbm_bextri_di);
39068 if (!CONST_INT_P (op1))
39070 error ("last argument must be an immediate");
39071 return const0_rtx;
39073 else
39075 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
39076 unsigned char lsb_index = INTVAL (op1) & 0xFF;
39077 op1 = GEN_INT (length);
39078 op2 = GEN_INT (lsb_index);
39079 pat = GEN_FCN (icode) (target, op0, op1, op2);
39080 if (pat)
39081 emit_insn (pat);
39082 return target;
39085 case IX86_BUILTIN_RDRAND16_STEP:
39086 icode = CODE_FOR_rdrandhi_1;
39087 mode0 = HImode;
39088 goto rdrand_step;
39090 case IX86_BUILTIN_RDRAND32_STEP:
39091 icode = CODE_FOR_rdrandsi_1;
39092 mode0 = SImode;
39093 goto rdrand_step;
39095 case IX86_BUILTIN_RDRAND64_STEP:
39096 icode = CODE_FOR_rdranddi_1;
39097 mode0 = DImode;
39099 rdrand_step:
39100 op0 = gen_reg_rtx (mode0);
39101 emit_insn (GEN_FCN (icode) (op0));
39103 arg0 = CALL_EXPR_ARG (exp, 0);
39104 op1 = expand_normal (arg0);
39105 if (!address_operand (op1, VOIDmode))
39107 op1 = convert_memory_address (Pmode, op1);
39108 op1 = copy_addr_to_reg (op1);
39110 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
39112 op1 = gen_reg_rtx (SImode);
39113 emit_move_insn (op1, CONST1_RTX (SImode));
39115 /* Emit SImode conditional move. */
39116 if (mode0 == HImode)
39118 op2 = gen_reg_rtx (SImode);
39119 emit_insn (gen_zero_extendhisi2 (op2, op0));
39121 else if (mode0 == SImode)
39122 op2 = op0;
39123 else
39124 op2 = gen_rtx_SUBREG (SImode, op0, 0);
39126 if (target == 0
39127 || !register_operand (target, SImode))
39128 target = gen_reg_rtx (SImode);
39130 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
39131 const0_rtx);
39132 emit_insn (gen_rtx_SET (VOIDmode, target,
39133 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
39134 return target;
39136 case IX86_BUILTIN_RDSEED16_STEP:
39137 icode = CODE_FOR_rdseedhi_1;
39138 mode0 = HImode;
39139 goto rdseed_step;
39141 case IX86_BUILTIN_RDSEED32_STEP:
39142 icode = CODE_FOR_rdseedsi_1;
39143 mode0 = SImode;
39144 goto rdseed_step;
39146 case IX86_BUILTIN_RDSEED64_STEP:
39147 icode = CODE_FOR_rdseeddi_1;
39148 mode0 = DImode;
39150 rdseed_step:
39151 op0 = gen_reg_rtx (mode0);
39152 emit_insn (GEN_FCN (icode) (op0));
39154 arg0 = CALL_EXPR_ARG (exp, 0);
39155 op1 = expand_normal (arg0);
39156 if (!address_operand (op1, VOIDmode))
39158 op1 = convert_memory_address (Pmode, op1);
39159 op1 = copy_addr_to_reg (op1);
39161 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
39163 op2 = gen_reg_rtx (QImode);
39165 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
39166 const0_rtx);
39167 emit_insn (gen_rtx_SET (VOIDmode, op2, pat));
39169 if (target == 0
39170 || !register_operand (target, SImode))
39171 target = gen_reg_rtx (SImode);
39173 emit_insn (gen_zero_extendqisi2 (target, op2));
39174 return target;
39176 case IX86_BUILTIN_SBB32:
39177 icode = CODE_FOR_subsi3_carry;
39178 mode0 = SImode;
39179 goto addcarryx;
39181 case IX86_BUILTIN_SBB64:
39182 icode = CODE_FOR_subdi3_carry;
39183 mode0 = DImode;
39184 goto addcarryx;
39186 case IX86_BUILTIN_ADDCARRYX32:
39187 icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry;
39188 mode0 = SImode;
39189 goto addcarryx;
39191 case IX86_BUILTIN_ADDCARRYX64:
39192 icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry;
39193 mode0 = DImode;
39195 addcarryx:
39196 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
39197 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
39198 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
39199 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
39201 op0 = gen_reg_rtx (QImode);
39203 /* Generate CF from input operand. */
39204 op1 = expand_normal (arg0);
39205 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
39206 emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx));
39208 /* Gen ADCX instruction to compute X+Y+CF. */
39209 op2 = expand_normal (arg1);
39210 op3 = expand_normal (arg2);
39212 if (!REG_P (op2))
39213 op2 = copy_to_mode_reg (mode0, op2);
39214 if (!REG_P (op3))
39215 op3 = copy_to_mode_reg (mode0, op3);
39217 op0 = gen_reg_rtx (mode0);
39219 op4 = gen_rtx_REG (CCCmode, FLAGS_REG);
39220 pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx);
39221 emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat));
39223 /* Store the result. */
39224 op4 = expand_normal (arg3);
39225 if (!address_operand (op4, VOIDmode))
39227 op4 = convert_memory_address (Pmode, op4);
39228 op4 = copy_addr_to_reg (op4);
39230 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
39232 /* Return current CF value. */
39233 if (target == 0)
39234 target = gen_reg_rtx (QImode);
39236 PUT_MODE (pat, QImode);
39237 emit_insn (gen_rtx_SET (VOIDmode, target, pat));
39238 return target;
39240 case IX86_BUILTIN_READ_FLAGS:
39241 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
39243 if (optimize
39244 || target == NULL_RTX
39245 || !nonimmediate_operand (target, word_mode)
39246 || GET_MODE (target) != word_mode)
39247 target = gen_reg_rtx (word_mode);
39249 emit_insn (gen_pop (target));
39250 return target;
39252 case IX86_BUILTIN_WRITE_FLAGS:
39254 arg0 = CALL_EXPR_ARG (exp, 0);
39255 op0 = expand_normal (arg0);
39256 if (!general_no_elim_operand (op0, word_mode))
39257 op0 = copy_to_mode_reg (word_mode, op0);
39259 emit_insn (gen_push (op0));
39260 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
39261 return 0;
39263 case IX86_BUILTIN_KORTESTC16:
39264 icode = CODE_FOR_kortestchi;
39265 mode0 = HImode;
39266 mode1 = CCCmode;
39267 goto kortest;
39269 case IX86_BUILTIN_KORTESTZ16:
39270 icode = CODE_FOR_kortestzhi;
39271 mode0 = HImode;
39272 mode1 = CCZmode;
39274 kortest:
39275 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
39276 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
39277 op0 = expand_normal (arg0);
39278 op1 = expand_normal (arg1);
39280 op0 = copy_to_reg (op0);
39281 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
39282 op1 = copy_to_reg (op1);
39283 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
39285 target = gen_reg_rtx (QImode);
39286 emit_insn (gen_rtx_SET (mode0, target, const0_rtx));
39288 /* Emit kortest. */
39289 emit_insn (GEN_FCN (icode) (op0, op1));
39290 /* And use setcc to return result from flags. */
39291 ix86_expand_setcc (target, EQ,
39292 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
39293 return target;
39295 case IX86_BUILTIN_GATHERSIV2DF:
39296 icode = CODE_FOR_avx2_gathersiv2df;
39297 goto gather_gen;
39298 case IX86_BUILTIN_GATHERSIV4DF:
39299 icode = CODE_FOR_avx2_gathersiv4df;
39300 goto gather_gen;
39301 case IX86_BUILTIN_GATHERDIV2DF:
39302 icode = CODE_FOR_avx2_gatherdiv2df;
39303 goto gather_gen;
39304 case IX86_BUILTIN_GATHERDIV4DF:
39305 icode = CODE_FOR_avx2_gatherdiv4df;
39306 goto gather_gen;
39307 case IX86_BUILTIN_GATHERSIV4SF:
39308 icode = CODE_FOR_avx2_gathersiv4sf;
39309 goto gather_gen;
39310 case IX86_BUILTIN_GATHERSIV8SF:
39311 icode = CODE_FOR_avx2_gathersiv8sf;
39312 goto gather_gen;
39313 case IX86_BUILTIN_GATHERDIV4SF:
39314 icode = CODE_FOR_avx2_gatherdiv4sf;
39315 goto gather_gen;
39316 case IX86_BUILTIN_GATHERDIV8SF:
39317 icode = CODE_FOR_avx2_gatherdiv8sf;
39318 goto gather_gen;
39319 case IX86_BUILTIN_GATHERSIV2DI:
39320 icode = CODE_FOR_avx2_gathersiv2di;
39321 goto gather_gen;
39322 case IX86_BUILTIN_GATHERSIV4DI:
39323 icode = CODE_FOR_avx2_gathersiv4di;
39324 goto gather_gen;
39325 case IX86_BUILTIN_GATHERDIV2DI:
39326 icode = CODE_FOR_avx2_gatherdiv2di;
39327 goto gather_gen;
39328 case IX86_BUILTIN_GATHERDIV4DI:
39329 icode = CODE_FOR_avx2_gatherdiv4di;
39330 goto gather_gen;
39331 case IX86_BUILTIN_GATHERSIV4SI:
39332 icode = CODE_FOR_avx2_gathersiv4si;
39333 goto gather_gen;
39334 case IX86_BUILTIN_GATHERSIV8SI:
39335 icode = CODE_FOR_avx2_gathersiv8si;
39336 goto gather_gen;
39337 case IX86_BUILTIN_GATHERDIV4SI:
39338 icode = CODE_FOR_avx2_gatherdiv4si;
39339 goto gather_gen;
39340 case IX86_BUILTIN_GATHERDIV8SI:
39341 icode = CODE_FOR_avx2_gatherdiv8si;
39342 goto gather_gen;
39343 case IX86_BUILTIN_GATHERALTSIV4DF:
39344 icode = CODE_FOR_avx2_gathersiv4df;
39345 goto gather_gen;
39346 case IX86_BUILTIN_GATHERALTDIV8SF:
39347 icode = CODE_FOR_avx2_gatherdiv8sf;
39348 goto gather_gen;
39349 case IX86_BUILTIN_GATHERALTSIV4DI:
39350 icode = CODE_FOR_avx2_gathersiv4di;
39351 goto gather_gen;
39352 case IX86_BUILTIN_GATHERALTDIV8SI:
39353 icode = CODE_FOR_avx2_gatherdiv8si;
39354 goto gather_gen;
39355 case IX86_BUILTIN_GATHER3SIV16SF:
39356 icode = CODE_FOR_avx512f_gathersiv16sf;
39357 goto gather_gen;
39358 case IX86_BUILTIN_GATHER3SIV8DF:
39359 icode = CODE_FOR_avx512f_gathersiv8df;
39360 goto gather_gen;
39361 case IX86_BUILTIN_GATHER3DIV16SF:
39362 icode = CODE_FOR_avx512f_gatherdiv16sf;
39363 goto gather_gen;
39364 case IX86_BUILTIN_GATHER3DIV8DF:
39365 icode = CODE_FOR_avx512f_gatherdiv8df;
39366 goto gather_gen;
39367 case IX86_BUILTIN_GATHER3SIV16SI:
39368 icode = CODE_FOR_avx512f_gathersiv16si;
39369 goto gather_gen;
39370 case IX86_BUILTIN_GATHER3SIV8DI:
39371 icode = CODE_FOR_avx512f_gathersiv8di;
39372 goto gather_gen;
39373 case IX86_BUILTIN_GATHER3DIV16SI:
39374 icode = CODE_FOR_avx512f_gatherdiv16si;
39375 goto gather_gen;
39376 case IX86_BUILTIN_GATHER3DIV8DI:
39377 icode = CODE_FOR_avx512f_gatherdiv8di;
39378 goto gather_gen;
39379 case IX86_BUILTIN_GATHER3ALTSIV8DF:
39380 icode = CODE_FOR_avx512f_gathersiv8df;
39381 goto gather_gen;
39382 case IX86_BUILTIN_GATHER3ALTDIV16SF:
39383 icode = CODE_FOR_avx512f_gatherdiv16sf;
39384 goto gather_gen;
39385 case IX86_BUILTIN_GATHER3ALTSIV8DI:
39386 icode = CODE_FOR_avx512f_gathersiv8di;
39387 goto gather_gen;
39388 case IX86_BUILTIN_GATHER3ALTDIV16SI:
39389 icode = CODE_FOR_avx512f_gatherdiv16si;
39390 goto gather_gen;
39391 case IX86_BUILTIN_GATHER3SIV2DF:
39392 icode = CODE_FOR_avx512vl_gathersiv2df;
39393 goto gather_gen;
39394 case IX86_BUILTIN_GATHER3SIV4DF:
39395 icode = CODE_FOR_avx512vl_gathersiv4df;
39396 goto gather_gen;
39397 case IX86_BUILTIN_GATHER3DIV2DF:
39398 icode = CODE_FOR_avx512vl_gatherdiv2df;
39399 goto gather_gen;
39400 case IX86_BUILTIN_GATHER3DIV4DF:
39401 icode = CODE_FOR_avx512vl_gatherdiv4df;
39402 goto gather_gen;
39403 case IX86_BUILTIN_GATHER3SIV4SF:
39404 icode = CODE_FOR_avx512vl_gathersiv4sf;
39405 goto gather_gen;
39406 case IX86_BUILTIN_GATHER3SIV8SF:
39407 icode = CODE_FOR_avx512vl_gathersiv8sf;
39408 goto gather_gen;
39409 case IX86_BUILTIN_GATHER3DIV4SF:
39410 icode = CODE_FOR_avx512vl_gatherdiv4sf;
39411 goto gather_gen;
39412 case IX86_BUILTIN_GATHER3DIV8SF:
39413 icode = CODE_FOR_avx512vl_gatherdiv8sf;
39414 goto gather_gen;
39415 case IX86_BUILTIN_GATHER3SIV2DI:
39416 icode = CODE_FOR_avx512vl_gathersiv2di;
39417 goto gather_gen;
39418 case IX86_BUILTIN_GATHER3SIV4DI:
39419 icode = CODE_FOR_avx512vl_gathersiv4di;
39420 goto gather_gen;
39421 case IX86_BUILTIN_GATHER3DIV2DI:
39422 icode = CODE_FOR_avx512vl_gatherdiv2di;
39423 goto gather_gen;
39424 case IX86_BUILTIN_GATHER3DIV4DI:
39425 icode = CODE_FOR_avx512vl_gatherdiv4di;
39426 goto gather_gen;
39427 case IX86_BUILTIN_GATHER3SIV4SI:
39428 icode = CODE_FOR_avx512vl_gathersiv4si;
39429 goto gather_gen;
39430 case IX86_BUILTIN_GATHER3SIV8SI:
39431 icode = CODE_FOR_avx512vl_gathersiv8si;
39432 goto gather_gen;
39433 case IX86_BUILTIN_GATHER3DIV4SI:
39434 icode = CODE_FOR_avx512vl_gatherdiv4si;
39435 goto gather_gen;
39436 case IX86_BUILTIN_GATHER3DIV8SI:
39437 icode = CODE_FOR_avx512vl_gatherdiv8si;
39438 goto gather_gen;
39439 case IX86_BUILTIN_GATHER3ALTSIV4DF:
39440 icode = CODE_FOR_avx512vl_gathersiv4df;
39441 goto gather_gen;
39442 case IX86_BUILTIN_GATHER3ALTDIV8SF:
39443 icode = CODE_FOR_avx512vl_gatherdiv8sf;
39444 goto gather_gen;
39445 case IX86_BUILTIN_GATHER3ALTSIV4DI:
39446 icode = CODE_FOR_avx512vl_gathersiv4di;
39447 goto gather_gen;
39448 case IX86_BUILTIN_GATHER3ALTDIV8SI:
39449 icode = CODE_FOR_avx512vl_gatherdiv8si;
39450 goto gather_gen;
39451 case IX86_BUILTIN_SCATTERSIV16SF:
39452 icode = CODE_FOR_avx512f_scattersiv16sf;
39453 goto scatter_gen;
39454 case IX86_BUILTIN_SCATTERSIV8DF:
39455 icode = CODE_FOR_avx512f_scattersiv8df;
39456 goto scatter_gen;
39457 case IX86_BUILTIN_SCATTERDIV16SF:
39458 icode = CODE_FOR_avx512f_scatterdiv16sf;
39459 goto scatter_gen;
39460 case IX86_BUILTIN_SCATTERDIV8DF:
39461 icode = CODE_FOR_avx512f_scatterdiv8df;
39462 goto scatter_gen;
39463 case IX86_BUILTIN_SCATTERSIV16SI:
39464 icode = CODE_FOR_avx512f_scattersiv16si;
39465 goto scatter_gen;
39466 case IX86_BUILTIN_SCATTERSIV8DI:
39467 icode = CODE_FOR_avx512f_scattersiv8di;
39468 goto scatter_gen;
39469 case IX86_BUILTIN_SCATTERDIV16SI:
39470 icode = CODE_FOR_avx512f_scatterdiv16si;
39471 goto scatter_gen;
39472 case IX86_BUILTIN_SCATTERDIV8DI:
39473 icode = CODE_FOR_avx512f_scatterdiv8di;
39474 goto scatter_gen;
39475 case IX86_BUILTIN_SCATTERSIV8SF:
39476 icode = CODE_FOR_avx512vl_scattersiv8sf;
39477 goto scatter_gen;
39478 case IX86_BUILTIN_SCATTERSIV4SF:
39479 icode = CODE_FOR_avx512vl_scattersiv4sf;
39480 goto scatter_gen;
39481 case IX86_BUILTIN_SCATTERSIV4DF:
39482 icode = CODE_FOR_avx512vl_scattersiv4df;
39483 goto scatter_gen;
39484 case IX86_BUILTIN_SCATTERSIV2DF:
39485 icode = CODE_FOR_avx512vl_scattersiv2df;
39486 goto scatter_gen;
39487 case IX86_BUILTIN_SCATTERDIV8SF:
39488 icode = CODE_FOR_avx512vl_scatterdiv8sf;
39489 goto scatter_gen;
39490 case IX86_BUILTIN_SCATTERDIV4SF:
39491 icode = CODE_FOR_avx512vl_scatterdiv4sf;
39492 goto scatter_gen;
39493 case IX86_BUILTIN_SCATTERDIV4DF:
39494 icode = CODE_FOR_avx512vl_scatterdiv4df;
39495 goto scatter_gen;
39496 case IX86_BUILTIN_SCATTERDIV2DF:
39497 icode = CODE_FOR_avx512vl_scatterdiv2df;
39498 goto scatter_gen;
39499 case IX86_BUILTIN_SCATTERSIV8SI:
39500 icode = CODE_FOR_avx512vl_scattersiv8si;
39501 goto scatter_gen;
39502 case IX86_BUILTIN_SCATTERSIV4SI:
39503 icode = CODE_FOR_avx512vl_scattersiv4si;
39504 goto scatter_gen;
39505 case IX86_BUILTIN_SCATTERSIV4DI:
39506 icode = CODE_FOR_avx512vl_scattersiv4di;
39507 goto scatter_gen;
39508 case IX86_BUILTIN_SCATTERSIV2DI:
39509 icode = CODE_FOR_avx512vl_scattersiv2di;
39510 goto scatter_gen;
39511 case IX86_BUILTIN_SCATTERDIV8SI:
39512 icode = CODE_FOR_avx512vl_scatterdiv8si;
39513 goto scatter_gen;
39514 case IX86_BUILTIN_SCATTERDIV4SI:
39515 icode = CODE_FOR_avx512vl_scatterdiv4si;
39516 goto scatter_gen;
39517 case IX86_BUILTIN_SCATTERDIV4DI:
39518 icode = CODE_FOR_avx512vl_scatterdiv4di;
39519 goto scatter_gen;
39520 case IX86_BUILTIN_SCATTERDIV2DI:
39521 icode = CODE_FOR_avx512vl_scatterdiv2di;
39522 goto scatter_gen;
39523 case IX86_BUILTIN_GATHERPFDPD:
39524 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
39525 goto vec_prefetch_gen;
39526 case IX86_BUILTIN_GATHERPFDPS:
39527 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
39528 goto vec_prefetch_gen;
39529 case IX86_BUILTIN_GATHERPFQPD:
39530 icode = CODE_FOR_avx512pf_gatherpfv8didf;
39531 goto vec_prefetch_gen;
39532 case IX86_BUILTIN_GATHERPFQPS:
39533 icode = CODE_FOR_avx512pf_gatherpfv8disf;
39534 goto vec_prefetch_gen;
39535 case IX86_BUILTIN_SCATTERPFDPD:
39536 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
39537 goto vec_prefetch_gen;
39538 case IX86_BUILTIN_SCATTERPFDPS:
39539 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
39540 goto vec_prefetch_gen;
39541 case IX86_BUILTIN_SCATTERPFQPD:
39542 icode = CODE_FOR_avx512pf_scatterpfv8didf;
39543 goto vec_prefetch_gen;
39544 case IX86_BUILTIN_SCATTERPFQPS:
39545 icode = CODE_FOR_avx512pf_scatterpfv8disf;
39546 goto vec_prefetch_gen;
39548 gather_gen:
39549 rtx half;
39550 rtx (*gen) (rtx, rtx);
39552 arg0 = CALL_EXPR_ARG (exp, 0);
39553 arg1 = CALL_EXPR_ARG (exp, 1);
39554 arg2 = CALL_EXPR_ARG (exp, 2);
39555 arg3 = CALL_EXPR_ARG (exp, 3);
39556 arg4 = CALL_EXPR_ARG (exp, 4);
39557 op0 = expand_normal (arg0);
39558 op1 = expand_normal (arg1);
39559 op2 = expand_normal (arg2);
39560 op3 = expand_normal (arg3);
39561 op4 = expand_normal (arg4);
39562 /* Note the arg order is different from the operand order. */
39563 mode0 = insn_data[icode].operand[1].mode;
39564 mode2 = insn_data[icode].operand[3].mode;
39565 mode3 = insn_data[icode].operand[4].mode;
39566 mode4 = insn_data[icode].operand[5].mode;
39568 if (target == NULL_RTX
39569 || GET_MODE (target) != insn_data[icode].operand[0].mode
39570 || !insn_data[icode].operand[0].predicate (target,
39571 GET_MODE (target)))
39572 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
39573 else
39574 subtarget = target;
39576 switch (fcode)
39578 case IX86_BUILTIN_GATHER3ALTSIV8DF:
39579 case IX86_BUILTIN_GATHER3ALTSIV8DI:
39580 half = gen_reg_rtx (V8SImode);
39581 if (!nonimmediate_operand (op2, V16SImode))
39582 op2 = copy_to_mode_reg (V16SImode, op2);
39583 emit_insn (gen_vec_extract_lo_v16si (half, op2));
39584 op2 = half;
39585 break;
39586 case IX86_BUILTIN_GATHER3ALTSIV4DF:
39587 case IX86_BUILTIN_GATHER3ALTSIV4DI:
39588 case IX86_BUILTIN_GATHERALTSIV4DF:
39589 case IX86_BUILTIN_GATHERALTSIV4DI:
39590 half = gen_reg_rtx (V4SImode);
39591 if (!nonimmediate_operand (op2, V8SImode))
39592 op2 = copy_to_mode_reg (V8SImode, op2);
39593 emit_insn (gen_vec_extract_lo_v8si (half, op2));
39594 op2 = half;
39595 break;
39596 case IX86_BUILTIN_GATHER3ALTDIV16SF:
39597 case IX86_BUILTIN_GATHER3ALTDIV16SI:
39598 half = gen_reg_rtx (mode0);
39599 if (mode0 == V8SFmode)
39600 gen = gen_vec_extract_lo_v16sf;
39601 else
39602 gen = gen_vec_extract_lo_v16si;
39603 if (!nonimmediate_operand (op0, GET_MODE (op0)))
39604 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
39605 emit_insn (gen (half, op0));
39606 op0 = half;
39607 if (GET_MODE (op3) != VOIDmode)
39609 if (!nonimmediate_operand (op3, GET_MODE (op3)))
39610 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
39611 emit_insn (gen (half, op3));
39612 op3 = half;
39614 break;
39615 case IX86_BUILTIN_GATHER3ALTDIV8SF:
39616 case IX86_BUILTIN_GATHER3ALTDIV8SI:
39617 case IX86_BUILTIN_GATHERALTDIV8SF:
39618 case IX86_BUILTIN_GATHERALTDIV8SI:
39619 half = gen_reg_rtx (mode0);
39620 if (mode0 == V4SFmode)
39621 gen = gen_vec_extract_lo_v8sf;
39622 else
39623 gen = gen_vec_extract_lo_v8si;
39624 if (!nonimmediate_operand (op0, GET_MODE (op0)))
39625 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
39626 emit_insn (gen (half, op0));
39627 op0 = half;
39628 if (GET_MODE (op3) != VOIDmode)
39630 if (!nonimmediate_operand (op3, GET_MODE (op3)))
39631 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
39632 emit_insn (gen (half, op3));
39633 op3 = half;
39635 break;
39636 default:
39637 break;
39640 /* Force memory operand only with base register here. But we
39641 don't want to do it on memory operand for other builtin
39642 functions. */
39643 op1 = ix86_zero_extend_to_Pmode (op1);
39645 if (!insn_data[icode].operand[1].predicate (op0, mode0))
39646 op0 = copy_to_mode_reg (mode0, op0);
39647 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
39648 op1 = copy_to_mode_reg (Pmode, op1);
39649 if (!insn_data[icode].operand[3].predicate (op2, mode2))
39650 op2 = copy_to_mode_reg (mode2, op2);
39651 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
39653 if (!insn_data[icode].operand[4].predicate (op3, mode3))
39654 op3 = copy_to_mode_reg (mode3, op3);
39656 else
39658 op3 = copy_to_reg (op3);
39659 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
39661 if (!insn_data[icode].operand[5].predicate (op4, mode4))
39663 error ("the last argument must be scale 1, 2, 4, 8");
39664 return const0_rtx;
39667 /* Optimize. If mask is known to have all high bits set,
39668 replace op0 with pc_rtx to signal that the instruction
39669 overwrites the whole destination and doesn't use its
39670 previous contents. */
39671 if (optimize)
39673 if (TREE_CODE (arg3) == INTEGER_CST)
39675 if (integer_all_onesp (arg3))
39676 op0 = pc_rtx;
39678 else if (TREE_CODE (arg3) == VECTOR_CST)
39680 unsigned int negative = 0;
39681 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
39683 tree cst = VECTOR_CST_ELT (arg3, i);
39684 if (TREE_CODE (cst) == INTEGER_CST
39685 && tree_int_cst_sign_bit (cst))
39686 negative++;
39687 else if (TREE_CODE (cst) == REAL_CST
39688 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
39689 negative++;
39691 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
39692 op0 = pc_rtx;
39694 else if (TREE_CODE (arg3) == SSA_NAME
39695 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
39697 /* Recognize also when mask is like:
39698 __v2df src = _mm_setzero_pd ();
39699 __v2df mask = _mm_cmpeq_pd (src, src);
39701 __v8sf src = _mm256_setzero_ps ();
39702 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
39703 as that is a cheaper way to load all ones into
39704 a register than having to load a constant from
39705 memory. */
39706 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
39707 if (is_gimple_call (def_stmt))
39709 tree fndecl = gimple_call_fndecl (def_stmt);
39710 if (fndecl
39711 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
39712 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
39714 case IX86_BUILTIN_CMPPD:
39715 case IX86_BUILTIN_CMPPS:
39716 case IX86_BUILTIN_CMPPD256:
39717 case IX86_BUILTIN_CMPPS256:
39718 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
39719 break;
39720 /* FALLTHRU */
39721 case IX86_BUILTIN_CMPEQPD:
39722 case IX86_BUILTIN_CMPEQPS:
39723 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
39724 && initializer_zerop (gimple_call_arg (def_stmt,
39725 1)))
39726 op0 = pc_rtx;
39727 break;
39728 default:
39729 break;
39735 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
39736 if (! pat)
39737 return const0_rtx;
39738 emit_insn (pat);
39740 switch (fcode)
39742 case IX86_BUILTIN_GATHER3DIV16SF:
39743 if (target == NULL_RTX)
39744 target = gen_reg_rtx (V8SFmode);
39745 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
39746 break;
39747 case IX86_BUILTIN_GATHER3DIV16SI:
39748 if (target == NULL_RTX)
39749 target = gen_reg_rtx (V8SImode);
39750 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
39751 break;
39752 case IX86_BUILTIN_GATHER3DIV8SF:
39753 case IX86_BUILTIN_GATHERDIV8SF:
39754 if (target == NULL_RTX)
39755 target = gen_reg_rtx (V4SFmode);
39756 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
39757 break;
39758 case IX86_BUILTIN_GATHER3DIV8SI:
39759 case IX86_BUILTIN_GATHERDIV8SI:
39760 if (target == NULL_RTX)
39761 target = gen_reg_rtx (V4SImode);
39762 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
39763 break;
39764 default:
39765 target = subtarget;
39766 break;
39768 return target;
39770 scatter_gen:
39771 arg0 = CALL_EXPR_ARG (exp, 0);
39772 arg1 = CALL_EXPR_ARG (exp, 1);
39773 arg2 = CALL_EXPR_ARG (exp, 2);
39774 arg3 = CALL_EXPR_ARG (exp, 3);
39775 arg4 = CALL_EXPR_ARG (exp, 4);
39776 op0 = expand_normal (arg0);
39777 op1 = expand_normal (arg1);
39778 op2 = expand_normal (arg2);
39779 op3 = expand_normal (arg3);
39780 op4 = expand_normal (arg4);
39781 mode1 = insn_data[icode].operand[1].mode;
39782 mode2 = insn_data[icode].operand[2].mode;
39783 mode3 = insn_data[icode].operand[3].mode;
39784 mode4 = insn_data[icode].operand[4].mode;
39786 /* Force memory operand only with base register here. But we
39787 don't want to do it on memory operand for other builtin
39788 functions. */
39789 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
39791 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39792 op0 = copy_to_mode_reg (Pmode, op0);
39794 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
39796 if (!insn_data[icode].operand[1].predicate (op1, mode1))
39797 op1 = copy_to_mode_reg (mode1, op1);
39799 else
39801 op1 = copy_to_reg (op1);
39802 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
39805 if (!insn_data[icode].operand[2].predicate (op2, mode2))
39806 op2 = copy_to_mode_reg (mode2, op2);
39808 if (!insn_data[icode].operand[3].predicate (op3, mode3))
39809 op3 = copy_to_mode_reg (mode3, op3);
39811 if (!insn_data[icode].operand[4].predicate (op4, mode4))
39813 error ("the last argument must be scale 1, 2, 4, 8");
39814 return const0_rtx;
39817 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
39818 if (! pat)
39819 return const0_rtx;
39821 emit_insn (pat);
39822 return 0;
39824 vec_prefetch_gen:
39825 arg0 = CALL_EXPR_ARG (exp, 0);
39826 arg1 = CALL_EXPR_ARG (exp, 1);
39827 arg2 = CALL_EXPR_ARG (exp, 2);
39828 arg3 = CALL_EXPR_ARG (exp, 3);
39829 arg4 = CALL_EXPR_ARG (exp, 4);
39830 op0 = expand_normal (arg0);
39831 op1 = expand_normal (arg1);
39832 op2 = expand_normal (arg2);
39833 op3 = expand_normal (arg3);
39834 op4 = expand_normal (arg4);
39835 mode0 = insn_data[icode].operand[0].mode;
39836 mode1 = insn_data[icode].operand[1].mode;
39837 mode3 = insn_data[icode].operand[3].mode;
39838 mode4 = insn_data[icode].operand[4].mode;
39840 if (GET_MODE (op0) == mode0
39841 || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
39843 if (!insn_data[icode].operand[0].predicate (op0, mode0))
39844 op0 = copy_to_mode_reg (mode0, op0);
39846 else if (op0 != constm1_rtx)
39848 op0 = copy_to_reg (op0);
39849 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
39852 if (!insn_data[icode].operand[1].predicate (op1, mode1))
39853 op1 = copy_to_mode_reg (mode1, op1);
39855 /* Force memory operand only with base register here. But we
39856 don't want to do it on memory operand for other builtin
39857 functions. */
39858 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
39860 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
39861 op2 = copy_to_mode_reg (Pmode, op2);
39863 if (!insn_data[icode].operand[3].predicate (op3, mode3))
39865 error ("the forth argument must be scale 1, 2, 4, 8");
39866 return const0_rtx;
39869 if (!insn_data[icode].operand[4].predicate (op4, mode4))
39871 error ("incorrect hint operand");
39872 return const0_rtx;
39875 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
39876 if (! pat)
39877 return const0_rtx;
39879 emit_insn (pat);
39881 return 0;
39883 case IX86_BUILTIN_XABORT:
39884 icode = CODE_FOR_xabort;
39885 arg0 = CALL_EXPR_ARG (exp, 0);
39886 op0 = expand_normal (arg0);
39887 mode0 = insn_data[icode].operand[0].mode;
39888 if (!insn_data[icode].operand[0].predicate (op0, mode0))
39890 error ("the xabort's argument must be an 8-bit immediate");
39891 return const0_rtx;
39893 emit_insn (gen_xabort (op0));
39894 return 0;
39896 default:
39897 break;
39900 for (i = 0, d = bdesc_special_args;
39901 i < ARRAY_SIZE (bdesc_special_args);
39902 i++, d++)
39903 if (d->code == fcode)
39904 return ix86_expand_special_args_builtin (d, exp, target);
39906 for (i = 0, d = bdesc_args;
39907 i < ARRAY_SIZE (bdesc_args);
39908 i++, d++)
39909 if (d->code == fcode)
39910 switch (fcode)
39912 case IX86_BUILTIN_FABSQ:
39913 case IX86_BUILTIN_COPYSIGNQ:
39914 if (!TARGET_SSE)
39915 /* Emit a normal call if SSE isn't available. */
39916 return expand_call (exp, target, ignore);
39917 default:
39918 return ix86_expand_args_builtin (d, exp, target);
39921 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
39922 if (d->code == fcode)
39923 return ix86_expand_sse_comi (d, exp, target);
39925 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
39926 if (d->code == fcode)
39927 return ix86_expand_round_builtin (d, exp, target);
39929 for (i = 0, d = bdesc_pcmpestr;
39930 i < ARRAY_SIZE (bdesc_pcmpestr);
39931 i++, d++)
39932 if (d->code == fcode)
39933 return ix86_expand_sse_pcmpestr (d, exp, target);
39935 for (i = 0, d = bdesc_pcmpistr;
39936 i < ARRAY_SIZE (bdesc_pcmpistr);
39937 i++, d++)
39938 if (d->code == fcode)
39939 return ix86_expand_sse_pcmpistr (d, exp, target);
39941 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
39942 if (d->code == fcode)
39943 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
39944 (enum ix86_builtin_func_type)
39945 d->flag, d->comparison);
39947 gcc_unreachable ();
39950 /* This returns the target-specific builtin with code CODE if
39951 current_function_decl has visibility on this builtin, which is checked
39952 using isa flags. Returns NULL_TREE otherwise. */
39954 static tree ix86_get_builtin (enum ix86_builtins code)
39956 struct cl_target_option *opts;
39957 tree target_tree = NULL_TREE;
39959 /* Determine the isa flags of current_function_decl. */
39961 if (current_function_decl)
39962 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
39964 if (target_tree == NULL)
39965 target_tree = target_option_default_node;
39967 opts = TREE_TARGET_OPTION (target_tree);
39969 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
39970 return ix86_builtin_decl (code, true);
39971 else
39972 return NULL_TREE;
39975 /* Return function decl for target specific builtin
39976 for given MPX builtin passed i FCODE. */
39977 static tree
39978 ix86_builtin_mpx_function (unsigned fcode)
39980 switch (fcode)
39982 case BUILT_IN_CHKP_BNDMK:
39983 return ix86_builtins[IX86_BUILTIN_BNDMK];
39985 case BUILT_IN_CHKP_BNDSTX:
39986 return ix86_builtins[IX86_BUILTIN_BNDSTX];
39988 case BUILT_IN_CHKP_BNDLDX:
39989 return ix86_builtins[IX86_BUILTIN_BNDLDX];
39991 case BUILT_IN_CHKP_BNDCL:
39992 return ix86_builtins[IX86_BUILTIN_BNDCL];
39994 case BUILT_IN_CHKP_BNDCU:
39995 return ix86_builtins[IX86_BUILTIN_BNDCU];
39997 case BUILT_IN_CHKP_BNDRET:
39998 return ix86_builtins[IX86_BUILTIN_BNDRET];
40000 case BUILT_IN_CHKP_INTERSECT:
40001 return ix86_builtins[IX86_BUILTIN_BNDINT];
40003 case BUILT_IN_CHKP_NARROW:
40004 return ix86_builtins[IX86_BUILTIN_BNDNARROW];
40006 case BUILT_IN_CHKP_SIZEOF:
40007 return ix86_builtins[IX86_BUILTIN_SIZEOF];
40009 case BUILT_IN_CHKP_EXTRACT_LOWER:
40010 return ix86_builtins[IX86_BUILTIN_BNDLOWER];
40012 case BUILT_IN_CHKP_EXTRACT_UPPER:
40013 return ix86_builtins[IX86_BUILTIN_BNDUPPER];
40015 default:
40016 return NULL_TREE;
40019 gcc_unreachable ();
40022 /* Helper function for ix86_load_bounds and ix86_store_bounds.
40024 Return an address to be used to load/store bounds for pointer
40025 passed in SLOT.
40027 SLOT_NO is an integer constant holding number of a target
40028 dependent special slot to be used in case SLOT is not a memory.
40030 SPECIAL_BASE is a pointer to be used as a base of fake address
40031 to access special slots in Bounds Table. SPECIAL_BASE[-1],
40032 SPECIAL_BASE[-2] etc. will be used as fake pointer locations. */
40034 static rtx
40035 ix86_get_arg_address_for_bt (rtx slot, rtx slot_no, rtx special_base)
40037 rtx addr = NULL;
40039 /* NULL slot means we pass bounds for pointer not passed to the
40040 function at all. Register slot means we pass pointer in a
40041 register. In both these cases bounds are passed via Bounds
40042 Table. Since we do not have actual pointer stored in memory,
40043 we have to use fake addresses to access Bounds Table. We
40044 start with (special_base - sizeof (void*)) and decrease this
40045 address by pointer size to get addresses for other slots. */
40046 if (!slot || REG_P (slot))
40048 gcc_assert (CONST_INT_P (slot_no));
40049 addr = plus_constant (Pmode, special_base,
40050 -(INTVAL (slot_no) + 1) * GET_MODE_SIZE (Pmode));
40052 /* If pointer is passed in a memory then its address is used to
40053 access Bounds Table. */
40054 else if (MEM_P (slot))
40056 addr = XEXP (slot, 0);
40057 if (!register_operand (addr, Pmode))
40058 addr = copy_addr_to_reg (addr);
40060 else
40061 gcc_unreachable ();
40063 return addr;
40066 /* Expand pass uses this hook to load bounds for function parameter
40067 PTR passed in SLOT in case its bounds are not passed in a register.
40069 If SLOT is a memory, then bounds are loaded as for regular pointer
40070 loaded from memory. PTR may be NULL in case SLOT is a memory.
40071 In such case value of PTR (if required) may be loaded from SLOT.
40073 If SLOT is NULL or a register then SLOT_NO is an integer constant
40074 holding number of the target dependent special slot which should be
40075 used to obtain bounds.
40077 Return loaded bounds. */
40079 static rtx
40080 ix86_load_bounds (rtx slot, rtx ptr, rtx slot_no)
40082 rtx reg = gen_reg_rtx (BNDmode);
40083 rtx addr;
40085 /* Get address to be used to access Bounds Table. Special slots start
40086 at the location of return address of the current function. */
40087 addr = ix86_get_arg_address_for_bt (slot, slot_no, arg_pointer_rtx);
40089 /* Load pointer value from a memory if we don't have it. */
40090 if (!ptr)
40092 gcc_assert (MEM_P (slot));
40093 ptr = copy_addr_to_reg (slot);
40096 emit_insn (BNDmode == BND64mode
40097 ? gen_bnd64_ldx (reg, addr, ptr)
40098 : gen_bnd32_ldx (reg, addr, ptr));
40100 return reg;
40103 /* Expand pass uses this hook to store BOUNDS for call argument PTR
40104 passed in SLOT in case BOUNDS are not passed in a register.
40106 If SLOT is a memory, then BOUNDS are stored as for regular pointer
40107 stored in memory. PTR may be NULL in case SLOT is a memory.
40108 In such case value of PTR (if required) may be loaded from SLOT.
40110 If SLOT is NULL or a register then SLOT_NO is an integer constant
40111 holding number of the target dependent special slot which should be
40112 used to store BOUNDS. */
40114 static void
40115 ix86_store_bounds (rtx ptr, rtx slot, rtx bounds, rtx slot_no)
40117 rtx addr;
40119 /* Get address to be used to access Bounds Table. Special slots start
40120 at the location of return address of a called function. */
40121 addr = ix86_get_arg_address_for_bt (slot, slot_no, stack_pointer_rtx);
40123 /* Load pointer value from a memory if we don't have it. */
40124 if (!ptr)
40126 gcc_assert (MEM_P (slot));
40127 ptr = copy_addr_to_reg (slot);
40130 gcc_assert (POINTER_BOUNDS_MODE_P (GET_MODE (bounds)));
40131 if (!register_operand (bounds, BNDmode))
40132 bounds = copy_to_mode_reg (BNDmode, bounds);
40134 emit_insn (BNDmode == BND64mode
40135 ? gen_bnd64_stx (addr, ptr, bounds)
40136 : gen_bnd32_stx (addr, ptr, bounds));
40139 /* Load and return bounds returned by function in SLOT. */
40141 static rtx
40142 ix86_load_returned_bounds (rtx slot)
40144 rtx res;
40146 gcc_assert (REG_P (slot));
40147 res = gen_reg_rtx (BNDmode);
40148 emit_move_insn (res, slot);
40150 return res;
40153 /* Store BOUNDS returned by function into SLOT. */
40155 static void
40156 ix86_store_returned_bounds (rtx slot, rtx bounds)
40158 gcc_assert (REG_P (slot));
40159 emit_move_insn (slot, bounds);
40162 /* Returns a function decl for a vectorized version of the builtin function
40163 with builtin function code FN and the result vector type TYPE, or NULL_TREE
40164 if it is not available. */
40166 static tree
40167 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
40168 tree type_in)
40170 machine_mode in_mode, out_mode;
40171 int in_n, out_n;
40172 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
40174 if (TREE_CODE (type_out) != VECTOR_TYPE
40175 || TREE_CODE (type_in) != VECTOR_TYPE
40176 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
40177 return NULL_TREE;
40179 out_mode = TYPE_MODE (TREE_TYPE (type_out));
40180 out_n = TYPE_VECTOR_SUBPARTS (type_out);
40181 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40182 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40184 switch (fn)
40186 case BUILT_IN_SQRT:
40187 if (out_mode == DFmode && in_mode == DFmode)
40189 if (out_n == 2 && in_n == 2)
40190 return ix86_get_builtin (IX86_BUILTIN_SQRTPD);
40191 else if (out_n == 4 && in_n == 4)
40192 return ix86_get_builtin (IX86_BUILTIN_SQRTPD256);
40193 else if (out_n == 8 && in_n == 8)
40194 return ix86_get_builtin (IX86_BUILTIN_SQRTPD512);
40196 break;
40198 case BUILT_IN_EXP2F:
40199 if (out_mode == SFmode && in_mode == SFmode)
40201 if (out_n == 16 && in_n == 16)
40202 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
40204 break;
40206 case BUILT_IN_SQRTF:
40207 if (out_mode == SFmode && in_mode == SFmode)
40209 if (out_n == 4 && in_n == 4)
40210 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR);
40211 else if (out_n == 8 && in_n == 8)
40212 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256);
40213 else if (out_n == 16 && in_n == 16)
40214 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR512);
40216 break;
40218 case BUILT_IN_IFLOOR:
40219 case BUILT_IN_LFLOOR:
40220 case BUILT_IN_LLFLOOR:
40221 /* The round insn does not trap on denormals. */
40222 if (flag_trapping_math || !TARGET_ROUND)
40223 break;
40225 if (out_mode == SImode && in_mode == DFmode)
40227 if (out_n == 4 && in_n == 2)
40228 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
40229 else if (out_n == 8 && in_n == 4)
40230 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
40231 else if (out_n == 16 && in_n == 8)
40232 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
40234 break;
40236 case BUILT_IN_IFLOORF:
40237 case BUILT_IN_LFLOORF:
40238 case BUILT_IN_LLFLOORF:
40239 /* The round insn does not trap on denormals. */
40240 if (flag_trapping_math || !TARGET_ROUND)
40241 break;
40243 if (out_mode == SImode && in_mode == SFmode)
40245 if (out_n == 4 && in_n == 4)
40246 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
40247 else if (out_n == 8 && in_n == 8)
40248 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
40250 break;
40252 case BUILT_IN_ICEIL:
40253 case BUILT_IN_LCEIL:
40254 case BUILT_IN_LLCEIL:
40255 /* The round insn does not trap on denormals. */
40256 if (flag_trapping_math || !TARGET_ROUND)
40257 break;
40259 if (out_mode == SImode && in_mode == DFmode)
40261 if (out_n == 4 && in_n == 2)
40262 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
40263 else if (out_n == 8 && in_n == 4)
40264 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
40265 else if (out_n == 16 && in_n == 8)
40266 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
40268 break;
40270 case BUILT_IN_ICEILF:
40271 case BUILT_IN_LCEILF:
40272 case BUILT_IN_LLCEILF:
40273 /* The round insn does not trap on denormals. */
40274 if (flag_trapping_math || !TARGET_ROUND)
40275 break;
40277 if (out_mode == SImode && in_mode == SFmode)
40279 if (out_n == 4 && in_n == 4)
40280 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
40281 else if (out_n == 8 && in_n == 8)
40282 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
40284 break;
40286 case BUILT_IN_IRINT:
40287 case BUILT_IN_LRINT:
40288 case BUILT_IN_LLRINT:
40289 if (out_mode == SImode && in_mode == DFmode)
40291 if (out_n == 4 && in_n == 2)
40292 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
40293 else if (out_n == 8 && in_n == 4)
40294 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
40296 break;
40298 case BUILT_IN_IRINTF:
40299 case BUILT_IN_LRINTF:
40300 case BUILT_IN_LLRINTF:
40301 if (out_mode == SImode && in_mode == SFmode)
40303 if (out_n == 4 && in_n == 4)
40304 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
40305 else if (out_n == 8 && in_n == 8)
40306 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
40308 break;
40310 case BUILT_IN_IROUND:
40311 case BUILT_IN_LROUND:
40312 case BUILT_IN_LLROUND:
40313 /* The round insn does not trap on denormals. */
40314 if (flag_trapping_math || !TARGET_ROUND)
40315 break;
40317 if (out_mode == SImode && in_mode == DFmode)
40319 if (out_n == 4 && in_n == 2)
40320 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
40321 else if (out_n == 8 && in_n == 4)
40322 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
40323 else if (out_n == 16 && in_n == 8)
40324 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
40326 break;
40328 case BUILT_IN_IROUNDF:
40329 case BUILT_IN_LROUNDF:
40330 case BUILT_IN_LLROUNDF:
40331 /* The round insn does not trap on denormals. */
40332 if (flag_trapping_math || !TARGET_ROUND)
40333 break;
40335 if (out_mode == SImode && in_mode == SFmode)
40337 if (out_n == 4 && in_n == 4)
40338 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
40339 else if (out_n == 8 && in_n == 8)
40340 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
40342 break;
40344 case BUILT_IN_COPYSIGN:
40345 if (out_mode == DFmode && in_mode == DFmode)
40347 if (out_n == 2 && in_n == 2)
40348 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD);
40349 else if (out_n == 4 && in_n == 4)
40350 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256);
40351 else if (out_n == 8 && in_n == 8)
40352 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD512);
40354 break;
40356 case BUILT_IN_COPYSIGNF:
40357 if (out_mode == SFmode && in_mode == SFmode)
40359 if (out_n == 4 && in_n == 4)
40360 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS);
40361 else if (out_n == 8 && in_n == 8)
40362 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256);
40363 else if (out_n == 16 && in_n == 16)
40364 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS512);
40366 break;
40368 case BUILT_IN_FLOOR:
40369 /* The round insn does not trap on denormals. */
40370 if (flag_trapping_math || !TARGET_ROUND)
40371 break;
40373 if (out_mode == DFmode && in_mode == DFmode)
40375 if (out_n == 2 && in_n == 2)
40376 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
40377 else if (out_n == 4 && in_n == 4)
40378 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
40380 break;
40382 case BUILT_IN_FLOORF:
40383 /* The round insn does not trap on denormals. */
40384 if (flag_trapping_math || !TARGET_ROUND)
40385 break;
40387 if (out_mode == SFmode && in_mode == SFmode)
40389 if (out_n == 4 && in_n == 4)
40390 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
40391 else if (out_n == 8 && in_n == 8)
40392 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
40394 break;
40396 case BUILT_IN_CEIL:
40397 /* The round insn does not trap on denormals. */
40398 if (flag_trapping_math || !TARGET_ROUND)
40399 break;
40401 if (out_mode == DFmode && in_mode == DFmode)
40403 if (out_n == 2 && in_n == 2)
40404 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
40405 else if (out_n == 4 && in_n == 4)
40406 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
40408 break;
40410 case BUILT_IN_CEILF:
40411 /* The round insn does not trap on denormals. */
40412 if (flag_trapping_math || !TARGET_ROUND)
40413 break;
40415 if (out_mode == SFmode && in_mode == SFmode)
40417 if (out_n == 4 && in_n == 4)
40418 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
40419 else if (out_n == 8 && in_n == 8)
40420 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
40422 break;
40424 case BUILT_IN_TRUNC:
40425 /* The round insn does not trap on denormals. */
40426 if (flag_trapping_math || !TARGET_ROUND)
40427 break;
40429 if (out_mode == DFmode && in_mode == DFmode)
40431 if (out_n == 2 && in_n == 2)
40432 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
40433 else if (out_n == 4 && in_n == 4)
40434 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
40436 break;
40438 case BUILT_IN_TRUNCF:
40439 /* The round insn does not trap on denormals. */
40440 if (flag_trapping_math || !TARGET_ROUND)
40441 break;
40443 if (out_mode == SFmode && in_mode == SFmode)
40445 if (out_n == 4 && in_n == 4)
40446 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
40447 else if (out_n == 8 && in_n == 8)
40448 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
40450 break;
40452 case BUILT_IN_RINT:
40453 /* The round insn does not trap on denormals. */
40454 if (flag_trapping_math || !TARGET_ROUND)
40455 break;
40457 if (out_mode == DFmode && in_mode == DFmode)
40459 if (out_n == 2 && in_n == 2)
40460 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
40461 else if (out_n == 4 && in_n == 4)
40462 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
40464 break;
40466 case BUILT_IN_RINTF:
40467 /* The round insn does not trap on denormals. */
40468 if (flag_trapping_math || !TARGET_ROUND)
40469 break;
40471 if (out_mode == SFmode && in_mode == SFmode)
40473 if (out_n == 4 && in_n == 4)
40474 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
40475 else if (out_n == 8 && in_n == 8)
40476 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
40478 break;
40480 case BUILT_IN_ROUND:
40481 /* The round insn does not trap on denormals. */
40482 if (flag_trapping_math || !TARGET_ROUND)
40483 break;
40485 if (out_mode == DFmode && in_mode == DFmode)
40487 if (out_n == 2 && in_n == 2)
40488 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
40489 else if (out_n == 4 && in_n == 4)
40490 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
40492 break;
40494 case BUILT_IN_ROUNDF:
40495 /* The round insn does not trap on denormals. */
40496 if (flag_trapping_math || !TARGET_ROUND)
40497 break;
40499 if (out_mode == SFmode && in_mode == SFmode)
40501 if (out_n == 4 && in_n == 4)
40502 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
40503 else if (out_n == 8 && in_n == 8)
40504 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
40506 break;
40508 case BUILT_IN_FMA:
40509 if (out_mode == DFmode && in_mode == DFmode)
40511 if (out_n == 2 && in_n == 2)
40512 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
40513 if (out_n == 4 && in_n == 4)
40514 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
40516 break;
40518 case BUILT_IN_FMAF:
40519 if (out_mode == SFmode && in_mode == SFmode)
40521 if (out_n == 4 && in_n == 4)
40522 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
40523 if (out_n == 8 && in_n == 8)
40524 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
40526 break;
40528 default:
40529 break;
40532 /* Dispatch to a handler for a vectorization library. */
40533 if (ix86_veclib_handler)
40534 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
40535 type_in);
40537 return NULL_TREE;
40540 /* Handler for an SVML-style interface to
40541 a library with vectorized intrinsics. */
40543 static tree
40544 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
40546 char name[20];
40547 tree fntype, new_fndecl, args;
40548 unsigned arity;
40549 const char *bname;
40550 machine_mode el_mode, in_mode;
40551 int n, in_n;
40553 /* The SVML is suitable for unsafe math only. */
40554 if (!flag_unsafe_math_optimizations)
40555 return NULL_TREE;
40557 el_mode = TYPE_MODE (TREE_TYPE (type_out));
40558 n = TYPE_VECTOR_SUBPARTS (type_out);
40559 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40560 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40561 if (el_mode != in_mode
40562 || n != in_n)
40563 return NULL_TREE;
40565 switch (fn)
40567 case BUILT_IN_EXP:
40568 case BUILT_IN_LOG:
40569 case BUILT_IN_LOG10:
40570 case BUILT_IN_POW:
40571 case BUILT_IN_TANH:
40572 case BUILT_IN_TAN:
40573 case BUILT_IN_ATAN:
40574 case BUILT_IN_ATAN2:
40575 case BUILT_IN_ATANH:
40576 case BUILT_IN_CBRT:
40577 case BUILT_IN_SINH:
40578 case BUILT_IN_SIN:
40579 case BUILT_IN_ASINH:
40580 case BUILT_IN_ASIN:
40581 case BUILT_IN_COSH:
40582 case BUILT_IN_COS:
40583 case BUILT_IN_ACOSH:
40584 case BUILT_IN_ACOS:
40585 if (el_mode != DFmode || n != 2)
40586 return NULL_TREE;
40587 break;
40589 case BUILT_IN_EXPF:
40590 case BUILT_IN_LOGF:
40591 case BUILT_IN_LOG10F:
40592 case BUILT_IN_POWF:
40593 case BUILT_IN_TANHF:
40594 case BUILT_IN_TANF:
40595 case BUILT_IN_ATANF:
40596 case BUILT_IN_ATAN2F:
40597 case BUILT_IN_ATANHF:
40598 case BUILT_IN_CBRTF:
40599 case BUILT_IN_SINHF:
40600 case BUILT_IN_SINF:
40601 case BUILT_IN_ASINHF:
40602 case BUILT_IN_ASINF:
40603 case BUILT_IN_COSHF:
40604 case BUILT_IN_COSF:
40605 case BUILT_IN_ACOSHF:
40606 case BUILT_IN_ACOSF:
40607 if (el_mode != SFmode || n != 4)
40608 return NULL_TREE;
40609 break;
40611 default:
40612 return NULL_TREE;
40615 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
40617 if (fn == BUILT_IN_LOGF)
40618 strcpy (name, "vmlsLn4");
40619 else if (fn == BUILT_IN_LOG)
40620 strcpy (name, "vmldLn2");
40621 else if (n == 4)
40623 sprintf (name, "vmls%s", bname+10);
40624 name[strlen (name)-1] = '4';
40626 else
40627 sprintf (name, "vmld%s2", bname+10);
40629 /* Convert to uppercase. */
40630 name[4] &= ~0x20;
40632 arity = 0;
40633 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
40634 args;
40635 args = TREE_CHAIN (args))
40636 arity++;
40638 if (arity == 1)
40639 fntype = build_function_type_list (type_out, type_in, NULL);
40640 else
40641 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
40643 /* Build a function declaration for the vectorized function. */
40644 new_fndecl = build_decl (BUILTINS_LOCATION,
40645 FUNCTION_DECL, get_identifier (name), fntype);
40646 TREE_PUBLIC (new_fndecl) = 1;
40647 DECL_EXTERNAL (new_fndecl) = 1;
40648 DECL_IS_NOVOPS (new_fndecl) = 1;
40649 TREE_READONLY (new_fndecl) = 1;
40651 return new_fndecl;
40654 /* Handler for an ACML-style interface to
40655 a library with vectorized intrinsics. */
40657 static tree
40658 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
40660 char name[20] = "__vr.._";
40661 tree fntype, new_fndecl, args;
40662 unsigned arity;
40663 const char *bname;
40664 machine_mode el_mode, in_mode;
40665 int n, in_n;
40667 /* The ACML is 64bits only and suitable for unsafe math only as
40668 it does not correctly support parts of IEEE with the required
40669 precision such as denormals. */
40670 if (!TARGET_64BIT
40671 || !flag_unsafe_math_optimizations)
40672 return NULL_TREE;
40674 el_mode = TYPE_MODE (TREE_TYPE (type_out));
40675 n = TYPE_VECTOR_SUBPARTS (type_out);
40676 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40677 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40678 if (el_mode != in_mode
40679 || n != in_n)
40680 return NULL_TREE;
40682 switch (fn)
40684 case BUILT_IN_SIN:
40685 case BUILT_IN_COS:
40686 case BUILT_IN_EXP:
40687 case BUILT_IN_LOG:
40688 case BUILT_IN_LOG2:
40689 case BUILT_IN_LOG10:
40690 name[4] = 'd';
40691 name[5] = '2';
40692 if (el_mode != DFmode
40693 || n != 2)
40694 return NULL_TREE;
40695 break;
40697 case BUILT_IN_SINF:
40698 case BUILT_IN_COSF:
40699 case BUILT_IN_EXPF:
40700 case BUILT_IN_POWF:
40701 case BUILT_IN_LOGF:
40702 case BUILT_IN_LOG2F:
40703 case BUILT_IN_LOG10F:
40704 name[4] = 's';
40705 name[5] = '4';
40706 if (el_mode != SFmode
40707 || n != 4)
40708 return NULL_TREE;
40709 break;
40711 default:
40712 return NULL_TREE;
40715 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
40716 sprintf (name + 7, "%s", bname+10);
40718 arity = 0;
40719 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
40720 args;
40721 args = TREE_CHAIN (args))
40722 arity++;
40724 if (arity == 1)
40725 fntype = build_function_type_list (type_out, type_in, NULL);
40726 else
40727 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
40729 /* Build a function declaration for the vectorized function. */
40730 new_fndecl = build_decl (BUILTINS_LOCATION,
40731 FUNCTION_DECL, get_identifier (name), fntype);
40732 TREE_PUBLIC (new_fndecl) = 1;
40733 DECL_EXTERNAL (new_fndecl) = 1;
40734 DECL_IS_NOVOPS (new_fndecl) = 1;
40735 TREE_READONLY (new_fndecl) = 1;
40737 return new_fndecl;
40740 /* Returns a decl of a function that implements gather load with
40741 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
40742 Return NULL_TREE if it is not available. */
40744 static tree
40745 ix86_vectorize_builtin_gather (const_tree mem_vectype,
40746 const_tree index_type, int scale)
40748 bool si;
40749 enum ix86_builtins code;
40751 if (! TARGET_AVX2)
40752 return NULL_TREE;
40754 if ((TREE_CODE (index_type) != INTEGER_TYPE
40755 && !POINTER_TYPE_P (index_type))
40756 || (TYPE_MODE (index_type) != SImode
40757 && TYPE_MODE (index_type) != DImode))
40758 return NULL_TREE;
40760 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
40761 return NULL_TREE;
40763 /* v*gather* insn sign extends index to pointer mode. */
40764 if (TYPE_PRECISION (index_type) < POINTER_SIZE
40765 && TYPE_UNSIGNED (index_type))
40766 return NULL_TREE;
40768 if (scale <= 0
40769 || scale > 8
40770 || (scale & (scale - 1)) != 0)
40771 return NULL_TREE;
40773 si = TYPE_MODE (index_type) == SImode;
40774 switch (TYPE_MODE (mem_vectype))
40776 case V2DFmode:
40777 if (TARGET_AVX512VL)
40778 code = si ? IX86_BUILTIN_GATHER3SIV2DF : IX86_BUILTIN_GATHER3DIV2DF;
40779 else
40780 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
40781 break;
40782 case V4DFmode:
40783 if (TARGET_AVX512VL)
40784 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DF : IX86_BUILTIN_GATHER3DIV4DF;
40785 else
40786 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
40787 break;
40788 case V2DImode:
40789 if (TARGET_AVX512VL)
40790 code = si ? IX86_BUILTIN_GATHER3SIV2DI : IX86_BUILTIN_GATHER3DIV2DI;
40791 else
40792 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
40793 break;
40794 case V4DImode:
40795 if (TARGET_AVX512VL)
40796 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DI : IX86_BUILTIN_GATHER3DIV4DI;
40797 else
40798 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
40799 break;
40800 case V4SFmode:
40801 if (TARGET_AVX512VL)
40802 code = si ? IX86_BUILTIN_GATHER3SIV4SF : IX86_BUILTIN_GATHER3DIV4SF;
40803 else
40804 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
40805 break;
40806 case V8SFmode:
40807 if (TARGET_AVX512VL)
40808 code = si ? IX86_BUILTIN_GATHER3SIV8SF : IX86_BUILTIN_GATHER3ALTDIV8SF;
40809 else
40810 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
40811 break;
40812 case V4SImode:
40813 if (TARGET_AVX512VL)
40814 code = si ? IX86_BUILTIN_GATHER3SIV4SI : IX86_BUILTIN_GATHER3DIV4SI;
40815 else
40816 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
40817 break;
40818 case V8SImode:
40819 if (TARGET_AVX512VL)
40820 code = si ? IX86_BUILTIN_GATHER3SIV8SI : IX86_BUILTIN_GATHER3ALTDIV8SI;
40821 else
40822 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
40823 break;
40824 case V8DFmode:
40825 if (TARGET_AVX512F)
40826 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
40827 else
40828 return NULL_TREE;
40829 break;
40830 case V8DImode:
40831 if (TARGET_AVX512F)
40832 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
40833 else
40834 return NULL_TREE;
40835 break;
40836 case V16SFmode:
40837 if (TARGET_AVX512F)
40838 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
40839 else
40840 return NULL_TREE;
40841 break;
40842 case V16SImode:
40843 if (TARGET_AVX512F)
40844 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
40845 else
40846 return NULL_TREE;
40847 break;
40848 default:
40849 return NULL_TREE;
40852 return ix86_get_builtin (code);
40855 /* Returns a code for a target-specific builtin that implements
40856 reciprocal of the function, or NULL_TREE if not available. */
40858 static tree
40859 ix86_builtin_reciprocal (unsigned int fn, bool md_fn, bool)
40861 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
40862 && flag_finite_math_only && !flag_trapping_math
40863 && flag_unsafe_math_optimizations))
40864 return NULL_TREE;
40866 if (md_fn)
40867 /* Machine dependent builtins. */
40868 switch (fn)
40870 /* Vectorized version of sqrt to rsqrt conversion. */
40871 case IX86_BUILTIN_SQRTPS_NR:
40872 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
40874 case IX86_BUILTIN_SQRTPS_NR256:
40875 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
40877 default:
40878 return NULL_TREE;
40880 else
40881 /* Normal builtins. */
40882 switch (fn)
40884 /* Sqrt to rsqrt conversion. */
40885 case BUILT_IN_SQRTF:
40886 return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
40888 default:
40889 return NULL_TREE;
40893 /* Helper for avx_vpermilps256_operand et al. This is also used by
40894 the expansion functions to turn the parallel back into a mask.
40895 The return value is 0 for no match and the imm8+1 for a match. */
40898 avx_vpermilp_parallel (rtx par, machine_mode mode)
40900 unsigned i, nelt = GET_MODE_NUNITS (mode);
40901 unsigned mask = 0;
40902 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
40904 if (XVECLEN (par, 0) != (int) nelt)
40905 return 0;
40907 /* Validate that all of the elements are constants, and not totally
40908 out of range. Copy the data into an integral array to make the
40909 subsequent checks easier. */
40910 for (i = 0; i < nelt; ++i)
40912 rtx er = XVECEXP (par, 0, i);
40913 unsigned HOST_WIDE_INT ei;
40915 if (!CONST_INT_P (er))
40916 return 0;
40917 ei = INTVAL (er);
40918 if (ei >= nelt)
40919 return 0;
40920 ipar[i] = ei;
40923 switch (mode)
40925 case V8DFmode:
40926 /* In the 512-bit DFmode case, we can only move elements within
40927 a 128-bit lane. First fill the second part of the mask,
40928 then fallthru. */
40929 for (i = 4; i < 6; ++i)
40931 if (ipar[i] < 4 || ipar[i] >= 6)
40932 return 0;
40933 mask |= (ipar[i] - 4) << i;
40935 for (i = 6; i < 8; ++i)
40937 if (ipar[i] < 6)
40938 return 0;
40939 mask |= (ipar[i] - 6) << i;
40941 /* FALLTHRU */
40943 case V4DFmode:
40944 /* In the 256-bit DFmode case, we can only move elements within
40945 a 128-bit lane. */
40946 for (i = 0; i < 2; ++i)
40948 if (ipar[i] >= 2)
40949 return 0;
40950 mask |= ipar[i] << i;
40952 for (i = 2; i < 4; ++i)
40954 if (ipar[i] < 2)
40955 return 0;
40956 mask |= (ipar[i] - 2) << i;
40958 break;
40960 case V16SFmode:
40961 /* In 512 bit SFmode case, permutation in the upper 256 bits
40962 must mirror the permutation in the lower 256-bits. */
40963 for (i = 0; i < 8; ++i)
40964 if (ipar[i] + 8 != ipar[i + 8])
40965 return 0;
40966 /* FALLTHRU */
40968 case V8SFmode:
40969 /* In 256 bit SFmode case, we have full freedom of
40970 movement within the low 128-bit lane, but the high 128-bit
40971 lane must mirror the exact same pattern. */
40972 for (i = 0; i < 4; ++i)
40973 if (ipar[i] + 4 != ipar[i + 4])
40974 return 0;
40975 nelt = 4;
40976 /* FALLTHRU */
40978 case V2DFmode:
40979 case V4SFmode:
40980 /* In the 128-bit case, we've full freedom in the placement of
40981 the elements from the source operand. */
40982 for (i = 0; i < nelt; ++i)
40983 mask |= ipar[i] << (i * (nelt / 2));
40984 break;
40986 default:
40987 gcc_unreachable ();
40990 /* Make sure success has a non-zero value by adding one. */
40991 return mask + 1;
40994 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
40995 the expansion functions to turn the parallel back into a mask.
40996 The return value is 0 for no match and the imm8+1 for a match. */
40999 avx_vperm2f128_parallel (rtx par, machine_mode mode)
41001 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
41002 unsigned mask = 0;
41003 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
41005 if (XVECLEN (par, 0) != (int) nelt)
41006 return 0;
41008 /* Validate that all of the elements are constants, and not totally
41009 out of range. Copy the data into an integral array to make the
41010 subsequent checks easier. */
41011 for (i = 0; i < nelt; ++i)
41013 rtx er = XVECEXP (par, 0, i);
41014 unsigned HOST_WIDE_INT ei;
41016 if (!CONST_INT_P (er))
41017 return 0;
41018 ei = INTVAL (er);
41019 if (ei >= 2 * nelt)
41020 return 0;
41021 ipar[i] = ei;
41024 /* Validate that the halves of the permute are halves. */
41025 for (i = 0; i < nelt2 - 1; ++i)
41026 if (ipar[i] + 1 != ipar[i + 1])
41027 return 0;
41028 for (i = nelt2; i < nelt - 1; ++i)
41029 if (ipar[i] + 1 != ipar[i + 1])
41030 return 0;
41032 /* Reconstruct the mask. */
41033 for (i = 0; i < 2; ++i)
41035 unsigned e = ipar[i * nelt2];
41036 if (e % nelt2)
41037 return 0;
41038 e /= nelt2;
41039 mask |= e << (i * 4);
41042 /* Make sure success has a non-zero value by adding one. */
41043 return mask + 1;
41046 /* Return a register priority for hard reg REGNO. */
41047 static int
41048 ix86_register_priority (int hard_regno)
41050 /* ebp and r13 as the base always wants a displacement, r12 as the
41051 base always wants an index. So discourage their usage in an
41052 address. */
41053 if (hard_regno == R12_REG || hard_regno == R13_REG)
41054 return 0;
41055 if (hard_regno == BP_REG)
41056 return 1;
41057 /* New x86-64 int registers result in bigger code size. Discourage
41058 them. */
41059 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
41060 return 2;
41061 /* New x86-64 SSE registers result in bigger code size. Discourage
41062 them. */
41063 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
41064 return 2;
41065 /* Usage of AX register results in smaller code. Prefer it. */
41066 if (hard_regno == 0)
41067 return 4;
41068 return 3;
41071 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
41073 Put float CONST_DOUBLE in the constant pool instead of fp regs.
41074 QImode must go into class Q_REGS.
41075 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
41076 movdf to do mem-to-mem moves through integer regs. */
41078 static reg_class_t
41079 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
41081 machine_mode mode = GET_MODE (x);
41083 /* We're only allowed to return a subclass of CLASS. Many of the
41084 following checks fail for NO_REGS, so eliminate that early. */
41085 if (regclass == NO_REGS)
41086 return NO_REGS;
41088 /* All classes can load zeros. */
41089 if (x == CONST0_RTX (mode))
41090 return regclass;
41092 /* Force constants into memory if we are loading a (nonzero) constant into
41093 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
41094 instructions to load from a constant. */
41095 if (CONSTANT_P (x)
41096 && (MAYBE_MMX_CLASS_P (regclass)
41097 || MAYBE_SSE_CLASS_P (regclass)
41098 || MAYBE_MASK_CLASS_P (regclass)))
41099 return NO_REGS;
41101 /* Prefer SSE regs only, if we can use them for math. */
41102 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
41103 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
41105 /* Floating-point constants need more complex checks. */
41106 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
41108 /* General regs can load everything. */
41109 if (reg_class_subset_p (regclass, GENERAL_REGS))
41110 return regclass;
41112 /* Floats can load 0 and 1 plus some others. Note that we eliminated
41113 zero above. We only want to wind up preferring 80387 registers if
41114 we plan on doing computation with them. */
41115 if (TARGET_80387
41116 && standard_80387_constant_p (x) > 0)
41118 /* Limit class to non-sse. */
41119 if (regclass == FLOAT_SSE_REGS)
41120 return FLOAT_REGS;
41121 if (regclass == FP_TOP_SSE_REGS)
41122 return FP_TOP_REG;
41123 if (regclass == FP_SECOND_SSE_REGS)
41124 return FP_SECOND_REG;
41125 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
41126 return regclass;
41129 return NO_REGS;
41132 /* Generally when we see PLUS here, it's the function invariant
41133 (plus soft-fp const_int). Which can only be computed into general
41134 regs. */
41135 if (GET_CODE (x) == PLUS)
41136 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
41138 /* QImode constants are easy to load, but non-constant QImode data
41139 must go into Q_REGS. */
41140 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
41142 if (reg_class_subset_p (regclass, Q_REGS))
41143 return regclass;
41144 if (reg_class_subset_p (Q_REGS, regclass))
41145 return Q_REGS;
41146 return NO_REGS;
41149 return regclass;
41152 /* Discourage putting floating-point values in SSE registers unless
41153 SSE math is being used, and likewise for the 387 registers. */
41154 static reg_class_t
41155 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
41157 machine_mode mode = GET_MODE (x);
41159 /* Restrict the output reload class to the register bank that we are doing
41160 math on. If we would like not to return a subset of CLASS, reject this
41161 alternative: if reload cannot do this, it will still use its choice. */
41162 mode = GET_MODE (x);
41163 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
41164 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
41166 if (X87_FLOAT_MODE_P (mode))
41168 if (regclass == FP_TOP_SSE_REGS)
41169 return FP_TOP_REG;
41170 else if (regclass == FP_SECOND_SSE_REGS)
41171 return FP_SECOND_REG;
41172 else
41173 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
41176 return regclass;
41179 static reg_class_t
41180 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
41181 machine_mode mode, secondary_reload_info *sri)
41183 /* Double-word spills from general registers to non-offsettable memory
41184 references (zero-extended addresses) require special handling. */
41185 if (TARGET_64BIT
41186 && MEM_P (x)
41187 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
41188 && INTEGER_CLASS_P (rclass)
41189 && !offsettable_memref_p (x))
41191 sri->icode = (in_p
41192 ? CODE_FOR_reload_noff_load
41193 : CODE_FOR_reload_noff_store);
41194 /* Add the cost of moving address to a temporary. */
41195 sri->extra_cost = 1;
41197 return NO_REGS;
41200 /* QImode spills from non-QI registers require
41201 intermediate register on 32bit targets. */
41202 if (mode == QImode
41203 && (MAYBE_MASK_CLASS_P (rclass)
41204 || (!TARGET_64BIT && !in_p
41205 && INTEGER_CLASS_P (rclass)
41206 && MAYBE_NON_Q_CLASS_P (rclass))))
41208 int regno;
41210 if (REG_P (x))
41211 regno = REGNO (x);
41212 else
41213 regno = -1;
41215 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
41216 regno = true_regnum (x);
41218 /* Return Q_REGS if the operand is in memory. */
41219 if (regno == -1)
41220 return Q_REGS;
41223 /* This condition handles corner case where an expression involving
41224 pointers gets vectorized. We're trying to use the address of a
41225 stack slot as a vector initializer.
41227 (set (reg:V2DI 74 [ vect_cst_.2 ])
41228 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
41230 Eventually frame gets turned into sp+offset like this:
41232 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41233 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
41234 (const_int 392 [0x188]))))
41236 That later gets turned into:
41238 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41239 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
41240 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
41242 We'll have the following reload recorded:
41244 Reload 0: reload_in (DI) =
41245 (plus:DI (reg/f:DI 7 sp)
41246 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
41247 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41248 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
41249 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
41250 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41251 reload_reg_rtx: (reg:V2DI 22 xmm1)
41253 Which isn't going to work since SSE instructions can't handle scalar
41254 additions. Returning GENERAL_REGS forces the addition into integer
41255 register and reload can handle subsequent reloads without problems. */
41257 if (in_p && GET_CODE (x) == PLUS
41258 && SSE_CLASS_P (rclass)
41259 && SCALAR_INT_MODE_P (mode))
41260 return GENERAL_REGS;
41262 return NO_REGS;
41265 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
41267 static bool
41268 ix86_class_likely_spilled_p (reg_class_t rclass)
41270 switch (rclass)
41272 case AREG:
41273 case DREG:
41274 case CREG:
41275 case BREG:
41276 case AD_REGS:
41277 case SIREG:
41278 case DIREG:
41279 case SSE_FIRST_REG:
41280 case FP_TOP_REG:
41281 case FP_SECOND_REG:
41282 case BND_REGS:
41283 return true;
41285 default:
41286 break;
41289 return false;
41292 /* If we are copying between general and FP registers, we need a memory
41293 location. The same is true for SSE and MMX registers.
41295 To optimize register_move_cost performance, allow inline variant.
41297 The macro can't work reliably when one of the CLASSES is class containing
41298 registers from multiple units (SSE, MMX, integer). We avoid this by never
41299 combining those units in single alternative in the machine description.
41300 Ensure that this constraint holds to avoid unexpected surprises.
41302 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
41303 enforce these sanity checks. */
41305 static inline bool
41306 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
41307 machine_mode mode, int strict)
41309 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
41310 return false;
41311 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
41312 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
41313 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
41314 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
41315 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
41316 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
41318 gcc_assert (!strict || lra_in_progress);
41319 return true;
41322 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
41323 return true;
41325 /* Between mask and general, we have moves no larger than word size. */
41326 if ((MAYBE_MASK_CLASS_P (class1) != MAYBE_MASK_CLASS_P (class2))
41327 && (GET_MODE_SIZE (mode) > UNITS_PER_WORD))
41328 return true;
41330 /* ??? This is a lie. We do have moves between mmx/general, and for
41331 mmx/sse2. But by saying we need secondary memory we discourage the
41332 register allocator from using the mmx registers unless needed. */
41333 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
41334 return true;
41336 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
41338 /* SSE1 doesn't have any direct moves from other classes. */
41339 if (!TARGET_SSE2)
41340 return true;
41342 /* If the target says that inter-unit moves are more expensive
41343 than moving through memory, then don't generate them. */
41344 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
41345 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
41346 return true;
41348 /* Between SSE and general, we have moves no larger than word size. */
41349 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
41350 return true;
41353 return false;
41356 bool
41357 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
41358 machine_mode mode, int strict)
41360 return inline_secondary_memory_needed (class1, class2, mode, strict);
41363 /* Implement the TARGET_CLASS_MAX_NREGS hook.
41365 On the 80386, this is the size of MODE in words,
41366 except in the FP regs, where a single reg is always enough. */
41368 static unsigned char
41369 ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
41371 if (MAYBE_INTEGER_CLASS_P (rclass))
41373 if (mode == XFmode)
41374 return (TARGET_64BIT ? 2 : 3);
41375 else if (mode == XCmode)
41376 return (TARGET_64BIT ? 4 : 6);
41377 else
41378 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
41380 else
41382 if (COMPLEX_MODE_P (mode))
41383 return 2;
41384 else
41385 return 1;
41389 /* Return true if the registers in CLASS cannot represent the change from
41390 modes FROM to TO. */
41392 bool
41393 ix86_cannot_change_mode_class (machine_mode from, machine_mode to,
41394 enum reg_class regclass)
41396 if (from == to)
41397 return false;
41399 /* x87 registers can't do subreg at all, as all values are reformatted
41400 to extended precision. */
41401 if (MAYBE_FLOAT_CLASS_P (regclass))
41402 return true;
41404 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
41406 /* Vector registers do not support QI or HImode loads. If we don't
41407 disallow a change to these modes, reload will assume it's ok to
41408 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
41409 the vec_dupv4hi pattern. */
41410 if (GET_MODE_SIZE (from) < 4)
41411 return true;
41414 return false;
41417 /* Return the cost of moving data of mode M between a
41418 register and memory. A value of 2 is the default; this cost is
41419 relative to those in `REGISTER_MOVE_COST'.
41421 This function is used extensively by register_move_cost that is used to
41422 build tables at startup. Make it inline in this case.
41423 When IN is 2, return maximum of in and out move cost.
41425 If moving between registers and memory is more expensive than
41426 between two registers, you should define this macro to express the
41427 relative cost.
41429 Model also increased moving costs of QImode registers in non
41430 Q_REGS classes.
41432 static inline int
41433 inline_memory_move_cost (machine_mode mode, enum reg_class regclass,
41434 int in)
41436 int cost;
41437 if (FLOAT_CLASS_P (regclass))
41439 int index;
41440 switch (mode)
41442 case SFmode:
41443 index = 0;
41444 break;
41445 case DFmode:
41446 index = 1;
41447 break;
41448 case XFmode:
41449 index = 2;
41450 break;
41451 default:
41452 return 100;
41454 if (in == 2)
41455 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
41456 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
41458 if (SSE_CLASS_P (regclass))
41460 int index;
41461 switch (GET_MODE_SIZE (mode))
41463 case 4:
41464 index = 0;
41465 break;
41466 case 8:
41467 index = 1;
41468 break;
41469 case 16:
41470 index = 2;
41471 break;
41472 default:
41473 return 100;
41475 if (in == 2)
41476 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
41477 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
41479 if (MMX_CLASS_P (regclass))
41481 int index;
41482 switch (GET_MODE_SIZE (mode))
41484 case 4:
41485 index = 0;
41486 break;
41487 case 8:
41488 index = 1;
41489 break;
41490 default:
41491 return 100;
41493 if (in)
41494 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
41495 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
41497 switch (GET_MODE_SIZE (mode))
41499 case 1:
41500 if (Q_CLASS_P (regclass) || TARGET_64BIT)
41502 if (!in)
41503 return ix86_cost->int_store[0];
41504 if (TARGET_PARTIAL_REG_DEPENDENCY
41505 && optimize_function_for_speed_p (cfun))
41506 cost = ix86_cost->movzbl_load;
41507 else
41508 cost = ix86_cost->int_load[0];
41509 if (in == 2)
41510 return MAX (cost, ix86_cost->int_store[0]);
41511 return cost;
41513 else
41515 if (in == 2)
41516 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
41517 if (in)
41518 return ix86_cost->movzbl_load;
41519 else
41520 return ix86_cost->int_store[0] + 4;
41522 break;
41523 case 2:
41524 if (in == 2)
41525 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
41526 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
41527 default:
41528 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
41529 if (mode == TFmode)
41530 mode = XFmode;
41531 if (in == 2)
41532 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
41533 else if (in)
41534 cost = ix86_cost->int_load[2];
41535 else
41536 cost = ix86_cost->int_store[2];
41537 return (cost * (((int) GET_MODE_SIZE (mode)
41538 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
41542 static int
41543 ix86_memory_move_cost (machine_mode mode, reg_class_t regclass,
41544 bool in)
41546 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
41550 /* Return the cost of moving data from a register in class CLASS1 to
41551 one in class CLASS2.
41553 It is not required that the cost always equal 2 when FROM is the same as TO;
41554 on some machines it is expensive to move between registers if they are not
41555 general registers. */
41557 static int
41558 ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
41559 reg_class_t class2_i)
41561 enum reg_class class1 = (enum reg_class) class1_i;
41562 enum reg_class class2 = (enum reg_class) class2_i;
41564 /* In case we require secondary memory, compute cost of the store followed
41565 by load. In order to avoid bad register allocation choices, we need
41566 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
41568 if (inline_secondary_memory_needed (class1, class2, mode, 0))
41570 int cost = 1;
41572 cost += inline_memory_move_cost (mode, class1, 2);
41573 cost += inline_memory_move_cost (mode, class2, 2);
41575 /* In case of copying from general_purpose_register we may emit multiple
41576 stores followed by single load causing memory size mismatch stall.
41577 Count this as arbitrarily high cost of 20. */
41578 if (targetm.class_max_nregs (class1, mode)
41579 > targetm.class_max_nregs (class2, mode))
41580 cost += 20;
41582 /* In the case of FP/MMX moves, the registers actually overlap, and we
41583 have to switch modes in order to treat them differently. */
41584 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
41585 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
41586 cost += 20;
41588 return cost;
41591 /* Moves between SSE/MMX and integer unit are expensive. */
41592 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
41593 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
41595 /* ??? By keeping returned value relatively high, we limit the number
41596 of moves between integer and MMX/SSE registers for all targets.
41597 Additionally, high value prevents problem with x86_modes_tieable_p(),
41598 where integer modes in MMX/SSE registers are not tieable
41599 because of missing QImode and HImode moves to, from or between
41600 MMX/SSE registers. */
41601 return MAX (8, ix86_cost->mmxsse_to_integer);
41603 if (MAYBE_FLOAT_CLASS_P (class1))
41604 return ix86_cost->fp_move;
41605 if (MAYBE_SSE_CLASS_P (class1))
41606 return ix86_cost->sse_move;
41607 if (MAYBE_MMX_CLASS_P (class1))
41608 return ix86_cost->mmx_move;
41609 return 2;
41612 /* Return TRUE if hard register REGNO can hold a value of machine-mode
41613 MODE. */
41615 bool
41616 ix86_hard_regno_mode_ok (int regno, machine_mode mode)
41618 /* Flags and only flags can only hold CCmode values. */
41619 if (CC_REGNO_P (regno))
41620 return GET_MODE_CLASS (mode) == MODE_CC;
41621 if (GET_MODE_CLASS (mode) == MODE_CC
41622 || GET_MODE_CLASS (mode) == MODE_RANDOM
41623 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
41624 return false;
41625 if (STACK_REGNO_P (regno))
41626 return VALID_FP_MODE_P (mode);
41627 if (MASK_REGNO_P (regno))
41628 return (VALID_MASK_REG_MODE (mode)
41629 || ((TARGET_AVX512BW || TARGET_AVX512VBMI)
41630 && VALID_MASK_AVX512BW_MODE (mode)));
41631 if (BND_REGNO_P (regno))
41632 return VALID_BND_REG_MODE (mode);
41633 if (SSE_REGNO_P (regno))
41635 /* We implement the move patterns for all vector modes into and
41636 out of SSE registers, even when no operation instructions
41637 are available. */
41639 /* For AVX-512 we allow, regardless of regno:
41640 - XI mode
41641 - any of 512-bit wide vector mode
41642 - any scalar mode. */
41643 if (TARGET_AVX512F
41644 && (mode == XImode
41645 || VALID_AVX512F_REG_MODE (mode)
41646 || VALID_AVX512F_SCALAR_MODE (mode)))
41647 return true;
41649 /* TODO check for QI/HI scalars. */
41650 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
41651 if (TARGET_AVX512VL
41652 && (mode == OImode
41653 || mode == TImode
41654 || VALID_AVX256_REG_MODE (mode)
41655 || VALID_AVX512VL_128_REG_MODE (mode)))
41656 return true;
41658 /* xmm16-xmm31 are only available for AVX-512. */
41659 if (EXT_REX_SSE_REGNO_P (regno))
41660 return false;
41662 /* OImode and AVX modes are available only when AVX is enabled. */
41663 return ((TARGET_AVX
41664 && VALID_AVX256_REG_OR_OI_MODE (mode))
41665 || VALID_SSE_REG_MODE (mode)
41666 || VALID_SSE2_REG_MODE (mode)
41667 || VALID_MMX_REG_MODE (mode)
41668 || VALID_MMX_REG_MODE_3DNOW (mode));
41670 if (MMX_REGNO_P (regno))
41672 /* We implement the move patterns for 3DNOW modes even in MMX mode,
41673 so if the register is available at all, then we can move data of
41674 the given mode into or out of it. */
41675 return (VALID_MMX_REG_MODE (mode)
41676 || VALID_MMX_REG_MODE_3DNOW (mode));
41679 if (mode == QImode)
41681 /* Take care for QImode values - they can be in non-QI regs,
41682 but then they do cause partial register stalls. */
41683 if (ANY_QI_REGNO_P (regno))
41684 return true;
41685 if (!TARGET_PARTIAL_REG_STALL)
41686 return true;
41687 /* LRA checks if the hard register is OK for the given mode.
41688 QImode values can live in non-QI regs, so we allow all
41689 registers here. */
41690 if (lra_in_progress)
41691 return true;
41692 return !can_create_pseudo_p ();
41694 /* We handle both integer and floats in the general purpose registers. */
41695 else if (VALID_INT_MODE_P (mode))
41696 return true;
41697 else if (VALID_FP_MODE_P (mode))
41698 return true;
41699 else if (VALID_DFP_MODE_P (mode))
41700 return true;
41701 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
41702 on to use that value in smaller contexts, this can easily force a
41703 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
41704 supporting DImode, allow it. */
41705 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
41706 return true;
41708 return false;
41711 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
41712 tieable integer mode. */
41714 static bool
41715 ix86_tieable_integer_mode_p (machine_mode mode)
41717 switch (mode)
41719 case HImode:
41720 case SImode:
41721 return true;
41723 case QImode:
41724 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
41726 case DImode:
41727 return TARGET_64BIT;
41729 default:
41730 return false;
41734 /* Return true if MODE1 is accessible in a register that can hold MODE2
41735 without copying. That is, all register classes that can hold MODE2
41736 can also hold MODE1. */
41738 bool
41739 ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
41741 if (mode1 == mode2)
41742 return true;
41744 if (ix86_tieable_integer_mode_p (mode1)
41745 && ix86_tieable_integer_mode_p (mode2))
41746 return true;
41748 /* MODE2 being XFmode implies fp stack or general regs, which means we
41749 can tie any smaller floating point modes to it. Note that we do not
41750 tie this with TFmode. */
41751 if (mode2 == XFmode)
41752 return mode1 == SFmode || mode1 == DFmode;
41754 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
41755 that we can tie it with SFmode. */
41756 if (mode2 == DFmode)
41757 return mode1 == SFmode;
41759 /* If MODE2 is only appropriate for an SSE register, then tie with
41760 any other mode acceptable to SSE registers. */
41761 if (GET_MODE_SIZE (mode2) == 32
41762 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
41763 return (GET_MODE_SIZE (mode1) == 32
41764 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
41765 if (GET_MODE_SIZE (mode2) == 16
41766 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
41767 return (GET_MODE_SIZE (mode1) == 16
41768 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
41770 /* If MODE2 is appropriate for an MMX register, then tie
41771 with any other mode acceptable to MMX registers. */
41772 if (GET_MODE_SIZE (mode2) == 8
41773 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
41774 return (GET_MODE_SIZE (mode1) == 8
41775 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
41777 return false;
41780 /* Return the cost of moving between two registers of mode MODE. */
41782 static int
41783 ix86_set_reg_reg_cost (machine_mode mode)
41785 unsigned int units = UNITS_PER_WORD;
41787 switch (GET_MODE_CLASS (mode))
41789 default:
41790 break;
41792 case MODE_CC:
41793 units = GET_MODE_SIZE (CCmode);
41794 break;
41796 case MODE_FLOAT:
41797 if ((TARGET_SSE && mode == TFmode)
41798 || (TARGET_80387 && mode == XFmode)
41799 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
41800 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
41801 units = GET_MODE_SIZE (mode);
41802 break;
41804 case MODE_COMPLEX_FLOAT:
41805 if ((TARGET_SSE && mode == TCmode)
41806 || (TARGET_80387 && mode == XCmode)
41807 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
41808 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
41809 units = GET_MODE_SIZE (mode);
41810 break;
41812 case MODE_VECTOR_INT:
41813 case MODE_VECTOR_FLOAT:
41814 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
41815 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
41816 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
41817 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
41818 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
41819 units = GET_MODE_SIZE (mode);
41822 /* Return the cost of moving between two registers of mode MODE,
41823 assuming that the move will be in pieces of at most UNITS bytes. */
41824 return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
41827 /* Compute a (partial) cost for rtx X. Return true if the complete
41828 cost has been computed, and false if subexpressions should be
41829 scanned. In either case, *TOTAL contains the cost result. */
41831 static bool
41832 ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
41833 bool speed)
41835 rtx mask;
41836 enum rtx_code code = (enum rtx_code) code_i;
41837 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
41838 machine_mode mode = GET_MODE (x);
41839 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
41841 switch (code)
41843 case SET:
41844 if (register_operand (SET_DEST (x), VOIDmode)
41845 && reg_or_0_operand (SET_SRC (x), VOIDmode))
41847 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
41848 return true;
41850 return false;
41852 case CONST_INT:
41853 case CONST:
41854 case LABEL_REF:
41855 case SYMBOL_REF:
41856 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
41857 *total = 3;
41858 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
41859 *total = 2;
41860 else if (flag_pic && SYMBOLIC_CONST (x)
41861 && !(TARGET_64BIT
41862 && (GET_CODE (x) == LABEL_REF
41863 || (GET_CODE (x) == SYMBOL_REF
41864 && SYMBOL_REF_LOCAL_P (x)))))
41865 *total = 1;
41866 else
41867 *total = 0;
41868 return true;
41870 case CONST_DOUBLE:
41871 if (mode == VOIDmode)
41873 *total = 0;
41874 return true;
41876 switch (standard_80387_constant_p (x))
41878 case 1: /* 0.0 */
41879 *total = 1;
41880 return true;
41881 default: /* Other constants */
41882 *total = 2;
41883 return true;
41884 case 0:
41885 case -1:
41886 break;
41888 if (SSE_FLOAT_MODE_P (mode))
41890 case CONST_VECTOR:
41891 switch (standard_sse_constant_p (x))
41893 case 0:
41894 break;
41895 case 1: /* 0: xor eliminates false dependency */
41896 *total = 0;
41897 return true;
41898 default: /* -1: cmp contains false dependency */
41899 *total = 1;
41900 return true;
41903 /* Fall back to (MEM (SYMBOL_REF)), since that's where
41904 it'll probably end up. Add a penalty for size. */
41905 *total = (COSTS_N_INSNS (1)
41906 + (flag_pic != 0 && !TARGET_64BIT)
41907 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
41908 return true;
41910 case ZERO_EXTEND:
41911 /* The zero extensions is often completely free on x86_64, so make
41912 it as cheap as possible. */
41913 if (TARGET_64BIT && mode == DImode
41914 && GET_MODE (XEXP (x, 0)) == SImode)
41915 *total = 1;
41916 else if (TARGET_ZERO_EXTEND_WITH_AND)
41917 *total = cost->add;
41918 else
41919 *total = cost->movzx;
41920 return false;
41922 case SIGN_EXTEND:
41923 *total = cost->movsx;
41924 return false;
41926 case ASHIFT:
41927 if (SCALAR_INT_MODE_P (mode)
41928 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
41929 && CONST_INT_P (XEXP (x, 1)))
41931 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
41932 if (value == 1)
41934 *total = cost->add;
41935 return false;
41937 if ((value == 2 || value == 3)
41938 && cost->lea <= cost->shift_const)
41940 *total = cost->lea;
41941 return false;
41944 /* FALLTHRU */
41946 case ROTATE:
41947 case ASHIFTRT:
41948 case LSHIFTRT:
41949 case ROTATERT:
41950 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
41952 /* ??? Should be SSE vector operation cost. */
41953 /* At least for published AMD latencies, this really is the same
41954 as the latency for a simple fpu operation like fabs. */
41955 /* V*QImode is emulated with 1-11 insns. */
41956 if (mode == V16QImode || mode == V32QImode)
41958 int count = 11;
41959 if (TARGET_XOP && mode == V16QImode)
41961 /* For XOP we use vpshab, which requires a broadcast of the
41962 value to the variable shift insn. For constants this
41963 means a V16Q const in mem; even when we can perform the
41964 shift with one insn set the cost to prefer paddb. */
41965 if (CONSTANT_P (XEXP (x, 1)))
41967 *total = (cost->fabs
41968 + rtx_cost (XEXP (x, 0), code, 0, speed)
41969 + (speed ? 2 : COSTS_N_BYTES (16)));
41970 return true;
41972 count = 3;
41974 else if (TARGET_SSSE3)
41975 count = 7;
41976 *total = cost->fabs * count;
41978 else
41979 *total = cost->fabs;
41981 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
41983 if (CONST_INT_P (XEXP (x, 1)))
41985 if (INTVAL (XEXP (x, 1)) > 32)
41986 *total = cost->shift_const + COSTS_N_INSNS (2);
41987 else
41988 *total = cost->shift_const * 2;
41990 else
41992 if (GET_CODE (XEXP (x, 1)) == AND)
41993 *total = cost->shift_var * 2;
41994 else
41995 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
41998 else
42000 if (CONST_INT_P (XEXP (x, 1)))
42001 *total = cost->shift_const;
42002 else if (GET_CODE (XEXP (x, 1)) == SUBREG
42003 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
42005 /* Return the cost after shift-and truncation. */
42006 *total = cost->shift_var;
42007 return true;
42009 else
42010 *total = cost->shift_var;
42012 return false;
42014 case FMA:
42016 rtx sub;
42018 gcc_assert (FLOAT_MODE_P (mode));
42019 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
42021 /* ??? SSE scalar/vector cost should be used here. */
42022 /* ??? Bald assumption that fma has the same cost as fmul. */
42023 *total = cost->fmul;
42024 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
42026 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
42027 sub = XEXP (x, 0);
42028 if (GET_CODE (sub) == NEG)
42029 sub = XEXP (sub, 0);
42030 *total += rtx_cost (sub, FMA, 0, speed);
42032 sub = XEXP (x, 2);
42033 if (GET_CODE (sub) == NEG)
42034 sub = XEXP (sub, 0);
42035 *total += rtx_cost (sub, FMA, 2, speed);
42036 return true;
42039 case MULT:
42040 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42042 /* ??? SSE scalar cost should be used here. */
42043 *total = cost->fmul;
42044 return false;
42046 else if (X87_FLOAT_MODE_P (mode))
42048 *total = cost->fmul;
42049 return false;
42051 else if (FLOAT_MODE_P (mode))
42053 /* ??? SSE vector cost should be used here. */
42054 *total = cost->fmul;
42055 return false;
42057 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42059 /* V*QImode is emulated with 7-13 insns. */
42060 if (mode == V16QImode || mode == V32QImode)
42062 int extra = 11;
42063 if (TARGET_XOP && mode == V16QImode)
42064 extra = 5;
42065 else if (TARGET_SSSE3)
42066 extra = 6;
42067 *total = cost->fmul * 2 + cost->fabs * extra;
42069 /* V*DImode is emulated with 5-8 insns. */
42070 else if (mode == V2DImode || mode == V4DImode)
42072 if (TARGET_XOP && mode == V2DImode)
42073 *total = cost->fmul * 2 + cost->fabs * 3;
42074 else
42075 *total = cost->fmul * 3 + cost->fabs * 5;
42077 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
42078 insns, including two PMULUDQ. */
42079 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
42080 *total = cost->fmul * 2 + cost->fabs * 5;
42081 else
42082 *total = cost->fmul;
42083 return false;
42085 else
42087 rtx op0 = XEXP (x, 0);
42088 rtx op1 = XEXP (x, 1);
42089 int nbits;
42090 if (CONST_INT_P (XEXP (x, 1)))
42092 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
42093 for (nbits = 0; value != 0; value &= value - 1)
42094 nbits++;
42096 else
42097 /* This is arbitrary. */
42098 nbits = 7;
42100 /* Compute costs correctly for widening multiplication. */
42101 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
42102 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
42103 == GET_MODE_SIZE (mode))
42105 int is_mulwiden = 0;
42106 machine_mode inner_mode = GET_MODE (op0);
42108 if (GET_CODE (op0) == GET_CODE (op1))
42109 is_mulwiden = 1, op1 = XEXP (op1, 0);
42110 else if (CONST_INT_P (op1))
42112 if (GET_CODE (op0) == SIGN_EXTEND)
42113 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
42114 == INTVAL (op1);
42115 else
42116 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
42119 if (is_mulwiden)
42120 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
42123 *total = (cost->mult_init[MODE_INDEX (mode)]
42124 + nbits * cost->mult_bit
42125 + rtx_cost (op0, outer_code, opno, speed)
42126 + rtx_cost (op1, outer_code, opno, speed));
42128 return true;
42131 case DIV:
42132 case UDIV:
42133 case MOD:
42134 case UMOD:
42135 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42136 /* ??? SSE cost should be used here. */
42137 *total = cost->fdiv;
42138 else if (X87_FLOAT_MODE_P (mode))
42139 *total = cost->fdiv;
42140 else if (FLOAT_MODE_P (mode))
42141 /* ??? SSE vector cost should be used here. */
42142 *total = cost->fdiv;
42143 else
42144 *total = cost->divide[MODE_INDEX (mode)];
42145 return false;
42147 case PLUS:
42148 if (GET_MODE_CLASS (mode) == MODE_INT
42149 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
42151 if (GET_CODE (XEXP (x, 0)) == PLUS
42152 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
42153 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
42154 && CONSTANT_P (XEXP (x, 1)))
42156 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
42157 if (val == 2 || val == 4 || val == 8)
42159 *total = cost->lea;
42160 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
42161 outer_code, opno, speed);
42162 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
42163 outer_code, opno, speed);
42164 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42165 return true;
42168 else if (GET_CODE (XEXP (x, 0)) == MULT
42169 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
42171 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
42172 if (val == 2 || val == 4 || val == 8)
42174 *total = cost->lea;
42175 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
42176 outer_code, opno, speed);
42177 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42178 return true;
42181 else if (GET_CODE (XEXP (x, 0)) == PLUS)
42183 *total = cost->lea;
42184 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
42185 outer_code, opno, speed);
42186 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
42187 outer_code, opno, speed);
42188 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42189 return true;
42192 /* FALLTHRU */
42194 case MINUS:
42195 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42197 /* ??? SSE cost should be used here. */
42198 *total = cost->fadd;
42199 return false;
42201 else if (X87_FLOAT_MODE_P (mode))
42203 *total = cost->fadd;
42204 return false;
42206 else if (FLOAT_MODE_P (mode))
42208 /* ??? SSE vector cost should be used here. */
42209 *total = cost->fadd;
42210 return false;
42212 /* FALLTHRU */
42214 case AND:
42215 case IOR:
42216 case XOR:
42217 if (GET_MODE_CLASS (mode) == MODE_INT
42218 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42220 *total = (cost->add * 2
42221 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
42222 << (GET_MODE (XEXP (x, 0)) != DImode))
42223 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
42224 << (GET_MODE (XEXP (x, 1)) != DImode)));
42225 return true;
42227 /* FALLTHRU */
42229 case NEG:
42230 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42232 /* ??? SSE cost should be used here. */
42233 *total = cost->fchs;
42234 return false;
42236 else if (X87_FLOAT_MODE_P (mode))
42238 *total = cost->fchs;
42239 return false;
42241 else if (FLOAT_MODE_P (mode))
42243 /* ??? SSE vector cost should be used here. */
42244 *total = cost->fchs;
42245 return false;
42247 /* FALLTHRU */
42249 case NOT:
42250 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42252 /* ??? Should be SSE vector operation cost. */
42253 /* At least for published AMD latencies, this really is the same
42254 as the latency for a simple fpu operation like fabs. */
42255 *total = cost->fabs;
42257 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42258 *total = cost->add * 2;
42259 else
42260 *total = cost->add;
42261 return false;
42263 case COMPARE:
42264 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
42265 && XEXP (XEXP (x, 0), 1) == const1_rtx
42266 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
42267 && XEXP (x, 1) == const0_rtx)
42269 /* This kind of construct is implemented using test[bwl].
42270 Treat it as if we had an AND. */
42271 *total = (cost->add
42272 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
42273 + rtx_cost (const1_rtx, outer_code, opno, speed));
42274 return true;
42276 return false;
42278 case FLOAT_EXTEND:
42279 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
42280 *total = 0;
42281 return false;
42283 case ABS:
42284 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42285 /* ??? SSE cost should be used here. */
42286 *total = cost->fabs;
42287 else if (X87_FLOAT_MODE_P (mode))
42288 *total = cost->fabs;
42289 else if (FLOAT_MODE_P (mode))
42290 /* ??? SSE vector cost should be used here. */
42291 *total = cost->fabs;
42292 return false;
42294 case SQRT:
42295 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42296 /* ??? SSE cost should be used here. */
42297 *total = cost->fsqrt;
42298 else if (X87_FLOAT_MODE_P (mode))
42299 *total = cost->fsqrt;
42300 else if (FLOAT_MODE_P (mode))
42301 /* ??? SSE vector cost should be used here. */
42302 *total = cost->fsqrt;
42303 return false;
42305 case UNSPEC:
42306 if (XINT (x, 1) == UNSPEC_TP)
42307 *total = 0;
42308 return false;
42310 case VEC_SELECT:
42311 case VEC_CONCAT:
42312 case VEC_DUPLICATE:
42313 /* ??? Assume all of these vector manipulation patterns are
42314 recognizable. In which case they all pretty much have the
42315 same cost. */
42316 *total = cost->fabs;
42317 return true;
42318 case VEC_MERGE:
42319 mask = XEXP (x, 2);
42320 /* This is masked instruction, assume the same cost,
42321 as nonmasked variant. */
42322 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
42323 *total = rtx_cost (XEXP (x, 0), outer_code, opno, speed);
42324 else
42325 *total = cost->fabs;
42326 return true;
42328 default:
42329 return false;
42333 #if TARGET_MACHO
42335 static int current_machopic_label_num;
42337 /* Given a symbol name and its associated stub, write out the
42338 definition of the stub. */
42340 void
42341 machopic_output_stub (FILE *file, const char *symb, const char *stub)
42343 unsigned int length;
42344 char *binder_name, *symbol_name, lazy_ptr_name[32];
42345 int label = ++current_machopic_label_num;
42347 /* For 64-bit we shouldn't get here. */
42348 gcc_assert (!TARGET_64BIT);
42350 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
42351 symb = targetm.strip_name_encoding (symb);
42353 length = strlen (stub);
42354 binder_name = XALLOCAVEC (char, length + 32);
42355 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
42357 length = strlen (symb);
42358 symbol_name = XALLOCAVEC (char, length + 32);
42359 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
42361 sprintf (lazy_ptr_name, "L%d$lz", label);
42363 if (MACHOPIC_ATT_STUB)
42364 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
42365 else if (MACHOPIC_PURE)
42366 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
42367 else
42368 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
42370 fprintf (file, "%s:\n", stub);
42371 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
42373 if (MACHOPIC_ATT_STUB)
42375 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
42377 else if (MACHOPIC_PURE)
42379 /* PIC stub. */
42380 /* 25-byte PIC stub using "CALL get_pc_thunk". */
42381 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
42382 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
42383 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
42384 label, lazy_ptr_name, label);
42385 fprintf (file, "\tjmp\t*%%ecx\n");
42387 else
42388 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
42390 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
42391 it needs no stub-binding-helper. */
42392 if (MACHOPIC_ATT_STUB)
42393 return;
42395 fprintf (file, "%s:\n", binder_name);
42397 if (MACHOPIC_PURE)
42399 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
42400 fprintf (file, "\tpushl\t%%ecx\n");
42402 else
42403 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
42405 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
42407 /* N.B. Keep the correspondence of these
42408 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
42409 old-pic/new-pic/non-pic stubs; altering this will break
42410 compatibility with existing dylibs. */
42411 if (MACHOPIC_PURE)
42413 /* 25-byte PIC stub using "CALL get_pc_thunk". */
42414 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
42416 else
42417 /* 16-byte -mdynamic-no-pic stub. */
42418 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
42420 fprintf (file, "%s:\n", lazy_ptr_name);
42421 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
42422 fprintf (file, ASM_LONG "%s\n", binder_name);
42424 #endif /* TARGET_MACHO */
42426 /* Order the registers for register allocator. */
42428 void
42429 x86_order_regs_for_local_alloc (void)
42431 int pos = 0;
42432 int i;
42434 /* First allocate the local general purpose registers. */
42435 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
42436 if (GENERAL_REGNO_P (i) && call_used_regs[i])
42437 reg_alloc_order [pos++] = i;
42439 /* Global general purpose registers. */
42440 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
42441 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
42442 reg_alloc_order [pos++] = i;
42444 /* x87 registers come first in case we are doing FP math
42445 using them. */
42446 if (!TARGET_SSE_MATH)
42447 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
42448 reg_alloc_order [pos++] = i;
42450 /* SSE registers. */
42451 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
42452 reg_alloc_order [pos++] = i;
42453 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
42454 reg_alloc_order [pos++] = i;
42456 /* Extended REX SSE registers. */
42457 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
42458 reg_alloc_order [pos++] = i;
42460 /* Mask register. */
42461 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
42462 reg_alloc_order [pos++] = i;
42464 /* MPX bound registers. */
42465 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
42466 reg_alloc_order [pos++] = i;
42468 /* x87 registers. */
42469 if (TARGET_SSE_MATH)
42470 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
42471 reg_alloc_order [pos++] = i;
42473 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
42474 reg_alloc_order [pos++] = i;
42476 /* Initialize the rest of array as we do not allocate some registers
42477 at all. */
42478 while (pos < FIRST_PSEUDO_REGISTER)
42479 reg_alloc_order [pos++] = 0;
42482 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
42483 in struct attribute_spec handler. */
42484 static tree
42485 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
42486 tree args,
42487 int,
42488 bool *no_add_attrs)
42490 if (TREE_CODE (*node) != FUNCTION_TYPE
42491 && TREE_CODE (*node) != METHOD_TYPE
42492 && TREE_CODE (*node) != FIELD_DECL
42493 && TREE_CODE (*node) != TYPE_DECL)
42495 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42496 name);
42497 *no_add_attrs = true;
42498 return NULL_TREE;
42500 if (TARGET_64BIT)
42502 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
42503 name);
42504 *no_add_attrs = true;
42505 return NULL_TREE;
42507 if (is_attribute_p ("callee_pop_aggregate_return", name))
42509 tree cst;
42511 cst = TREE_VALUE (args);
42512 if (TREE_CODE (cst) != INTEGER_CST)
42514 warning (OPT_Wattributes,
42515 "%qE attribute requires an integer constant argument",
42516 name);
42517 *no_add_attrs = true;
42519 else if (compare_tree_int (cst, 0) != 0
42520 && compare_tree_int (cst, 1) != 0)
42522 warning (OPT_Wattributes,
42523 "argument to %qE attribute is neither zero, nor one",
42524 name);
42525 *no_add_attrs = true;
42528 return NULL_TREE;
42531 return NULL_TREE;
42534 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
42535 struct attribute_spec.handler. */
42536 static tree
42537 ix86_handle_abi_attribute (tree *node, tree name, tree, int,
42538 bool *no_add_attrs)
42540 if (TREE_CODE (*node) != FUNCTION_TYPE
42541 && TREE_CODE (*node) != METHOD_TYPE
42542 && TREE_CODE (*node) != FIELD_DECL
42543 && TREE_CODE (*node) != TYPE_DECL)
42545 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42546 name);
42547 *no_add_attrs = true;
42548 return NULL_TREE;
42551 /* Can combine regparm with all attributes but fastcall. */
42552 if (is_attribute_p ("ms_abi", name))
42554 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
42556 error ("ms_abi and sysv_abi attributes are not compatible");
42559 return NULL_TREE;
42561 else if (is_attribute_p ("sysv_abi", name))
42563 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
42565 error ("ms_abi and sysv_abi attributes are not compatible");
42568 return NULL_TREE;
42571 return NULL_TREE;
42574 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
42575 struct attribute_spec.handler. */
42576 static tree
42577 ix86_handle_struct_attribute (tree *node, tree name, tree, int,
42578 bool *no_add_attrs)
42580 tree *type = NULL;
42581 if (DECL_P (*node))
42583 if (TREE_CODE (*node) == TYPE_DECL)
42584 type = &TREE_TYPE (*node);
42586 else
42587 type = node;
42589 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
42591 warning (OPT_Wattributes, "%qE attribute ignored",
42592 name);
42593 *no_add_attrs = true;
42596 else if ((is_attribute_p ("ms_struct", name)
42597 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
42598 || ((is_attribute_p ("gcc_struct", name)
42599 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
42601 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
42602 name);
42603 *no_add_attrs = true;
42606 return NULL_TREE;
42609 static tree
42610 ix86_handle_fndecl_attribute (tree *node, tree name, tree, int,
42611 bool *no_add_attrs)
42613 if (TREE_CODE (*node) != FUNCTION_DECL)
42615 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42616 name);
42617 *no_add_attrs = true;
42619 return NULL_TREE;
42622 static bool
42623 ix86_ms_bitfield_layout_p (const_tree record_type)
42625 return ((TARGET_MS_BITFIELD_LAYOUT
42626 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
42627 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
42630 /* Returns an expression indicating where the this parameter is
42631 located on entry to the FUNCTION. */
42633 static rtx
42634 x86_this_parameter (tree function)
42636 tree type = TREE_TYPE (function);
42637 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
42638 int nregs;
42640 if (TARGET_64BIT)
42642 const int *parm_regs;
42644 if (ix86_function_type_abi (type) == MS_ABI)
42645 parm_regs = x86_64_ms_abi_int_parameter_registers;
42646 else
42647 parm_regs = x86_64_int_parameter_registers;
42648 return gen_rtx_REG (Pmode, parm_regs[aggr]);
42651 nregs = ix86_function_regparm (type, function);
42653 if (nregs > 0 && !stdarg_p (type))
42655 int regno;
42656 unsigned int ccvt = ix86_get_callcvt (type);
42658 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
42659 regno = aggr ? DX_REG : CX_REG;
42660 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
42662 regno = CX_REG;
42663 if (aggr)
42664 return gen_rtx_MEM (SImode,
42665 plus_constant (Pmode, stack_pointer_rtx, 4));
42667 else
42669 regno = AX_REG;
42670 if (aggr)
42672 regno = DX_REG;
42673 if (nregs == 1)
42674 return gen_rtx_MEM (SImode,
42675 plus_constant (Pmode,
42676 stack_pointer_rtx, 4));
42679 return gen_rtx_REG (SImode, regno);
42682 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
42683 aggr ? 8 : 4));
42686 /* Determine whether x86_output_mi_thunk can succeed. */
42688 static bool
42689 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
42690 const_tree function)
42692 /* 64-bit can handle anything. */
42693 if (TARGET_64BIT)
42694 return true;
42696 /* For 32-bit, everything's fine if we have one free register. */
42697 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
42698 return true;
42700 /* Need a free register for vcall_offset. */
42701 if (vcall_offset)
42702 return false;
42704 /* Need a free register for GOT references. */
42705 if (flag_pic && !targetm.binds_local_p (function))
42706 return false;
42708 /* Otherwise ok. */
42709 return true;
42712 /* Output the assembler code for a thunk function. THUNK_DECL is the
42713 declaration for the thunk function itself, FUNCTION is the decl for
42714 the target function. DELTA is an immediate constant offset to be
42715 added to THIS. If VCALL_OFFSET is nonzero, the word at
42716 *(*this + vcall_offset) should be added to THIS. */
42718 static void
42719 x86_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
42720 HOST_WIDE_INT vcall_offset, tree function)
42722 rtx this_param = x86_this_parameter (function);
42723 rtx this_reg, tmp, fnaddr;
42724 unsigned int tmp_regno;
42725 rtx_insn *insn;
42727 if (TARGET_64BIT)
42728 tmp_regno = R10_REG;
42729 else
42731 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
42732 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
42733 tmp_regno = AX_REG;
42734 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
42735 tmp_regno = DX_REG;
42736 else
42737 tmp_regno = CX_REG;
42740 emit_note (NOTE_INSN_PROLOGUE_END);
42742 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
42743 pull it in now and let DELTA benefit. */
42744 if (REG_P (this_param))
42745 this_reg = this_param;
42746 else if (vcall_offset)
42748 /* Put the this parameter into %eax. */
42749 this_reg = gen_rtx_REG (Pmode, AX_REG);
42750 emit_move_insn (this_reg, this_param);
42752 else
42753 this_reg = NULL_RTX;
42755 /* Adjust the this parameter by a fixed constant. */
42756 if (delta)
42758 rtx delta_rtx = GEN_INT (delta);
42759 rtx delta_dst = this_reg ? this_reg : this_param;
42761 if (TARGET_64BIT)
42763 if (!x86_64_general_operand (delta_rtx, Pmode))
42765 tmp = gen_rtx_REG (Pmode, tmp_regno);
42766 emit_move_insn (tmp, delta_rtx);
42767 delta_rtx = tmp;
42771 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
42774 /* Adjust the this parameter by a value stored in the vtable. */
42775 if (vcall_offset)
42777 rtx vcall_addr, vcall_mem, this_mem;
42779 tmp = gen_rtx_REG (Pmode, tmp_regno);
42781 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
42782 if (Pmode != ptr_mode)
42783 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
42784 emit_move_insn (tmp, this_mem);
42786 /* Adjust the this parameter. */
42787 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
42788 if (TARGET_64BIT
42789 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
42791 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
42792 emit_move_insn (tmp2, GEN_INT (vcall_offset));
42793 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
42796 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
42797 if (Pmode != ptr_mode)
42798 emit_insn (gen_addsi_1_zext (this_reg,
42799 gen_rtx_REG (ptr_mode,
42800 REGNO (this_reg)),
42801 vcall_mem));
42802 else
42803 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
42806 /* If necessary, drop THIS back to its stack slot. */
42807 if (this_reg && this_reg != this_param)
42808 emit_move_insn (this_param, this_reg);
42810 fnaddr = XEXP (DECL_RTL (function), 0);
42811 if (TARGET_64BIT)
42813 if (!flag_pic || targetm.binds_local_p (function)
42814 || TARGET_PECOFF)
42816 else
42818 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
42819 tmp = gen_rtx_CONST (Pmode, tmp);
42820 fnaddr = gen_const_mem (Pmode, tmp);
42823 else
42825 if (!flag_pic || targetm.binds_local_p (function))
42827 #if TARGET_MACHO
42828 else if (TARGET_MACHO)
42830 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
42831 fnaddr = XEXP (fnaddr, 0);
42833 #endif /* TARGET_MACHO */
42834 else
42836 tmp = gen_rtx_REG (Pmode, CX_REG);
42837 output_set_got (tmp, NULL_RTX);
42839 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
42840 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
42841 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
42842 fnaddr = gen_const_mem (Pmode, fnaddr);
42846 /* Our sibling call patterns do not allow memories, because we have no
42847 predicate that can distinguish between frame and non-frame memory.
42848 For our purposes here, we can get away with (ab)using a jump pattern,
42849 because we're going to do no optimization. */
42850 if (MEM_P (fnaddr))
42852 if (sibcall_insn_operand (fnaddr, word_mode))
42854 fnaddr = XEXP (DECL_RTL (function), 0);
42855 tmp = gen_rtx_MEM (QImode, fnaddr);
42856 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
42857 tmp = emit_call_insn (tmp);
42858 SIBLING_CALL_P (tmp) = 1;
42860 else
42861 emit_jump_insn (gen_indirect_jump (fnaddr));
42863 else
42865 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
42867 // CM_LARGE_PIC always uses pseudo PIC register which is
42868 // uninitialized. Since FUNCTION is local and calling it
42869 // doesn't go through PLT, we use scratch register %r11 as
42870 // PIC register and initialize it here.
42871 pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
42872 ix86_init_large_pic_reg (tmp_regno);
42873 fnaddr = legitimize_pic_address (fnaddr,
42874 gen_rtx_REG (Pmode, tmp_regno));
42877 if (!sibcall_insn_operand (fnaddr, word_mode))
42879 tmp = gen_rtx_REG (word_mode, tmp_regno);
42880 if (GET_MODE (fnaddr) != word_mode)
42881 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
42882 emit_move_insn (tmp, fnaddr);
42883 fnaddr = tmp;
42886 tmp = gen_rtx_MEM (QImode, fnaddr);
42887 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
42888 tmp = emit_call_insn (tmp);
42889 SIBLING_CALL_P (tmp) = 1;
42891 emit_barrier ();
42893 /* Emit just enough of rest_of_compilation to get the insns emitted.
42894 Note that use_thunk calls assemble_start_function et al. */
42895 insn = get_insns ();
42896 shorten_branches (insn);
42897 final_start_function (insn, file, 1);
42898 final (insn, file, 1);
42899 final_end_function ();
42902 static void
42903 x86_file_start (void)
42905 default_file_start ();
42906 if (TARGET_16BIT)
42907 fputs ("\t.code16gcc\n", asm_out_file);
42908 #if TARGET_MACHO
42909 darwin_file_start ();
42910 #endif
42911 if (X86_FILE_START_VERSION_DIRECTIVE)
42912 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
42913 if (X86_FILE_START_FLTUSED)
42914 fputs ("\t.global\t__fltused\n", asm_out_file);
42915 if (ix86_asm_dialect == ASM_INTEL)
42916 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
42920 x86_field_alignment (tree field, int computed)
42922 machine_mode mode;
42923 tree type = TREE_TYPE (field);
42925 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
42926 return computed;
42927 mode = TYPE_MODE (strip_array_types (type));
42928 if (mode == DFmode || mode == DCmode
42929 || GET_MODE_CLASS (mode) == MODE_INT
42930 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
42931 return MIN (32, computed);
42932 return computed;
42935 /* Print call to TARGET to FILE. */
42937 static void
42938 x86_print_call_or_nop (FILE *file, const char *target)
42940 if (flag_nop_mcount)
42941 fprintf (file, "1:\tnopl 0x00(%%eax,%%eax,1)\n"); /* 5 byte nop. */
42942 else
42943 fprintf (file, "1:\tcall\t%s\n", target);
42946 /* Output assembler code to FILE to increment profiler label # LABELNO
42947 for profiling a function entry. */
42948 void
42949 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
42951 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
42952 : MCOUNT_NAME);
42953 if (TARGET_64BIT)
42955 #ifndef NO_PROFILE_COUNTERS
42956 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
42957 #endif
42959 if (!TARGET_PECOFF && flag_pic)
42960 fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
42961 else
42962 x86_print_call_or_nop (file, mcount_name);
42964 else if (flag_pic)
42966 #ifndef NO_PROFILE_COUNTERS
42967 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
42968 LPREFIX, labelno);
42969 #endif
42970 fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
42972 else
42974 #ifndef NO_PROFILE_COUNTERS
42975 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
42976 LPREFIX, labelno);
42977 #endif
42978 x86_print_call_or_nop (file, mcount_name);
42981 if (flag_record_mcount)
42983 fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
42984 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
42985 fprintf (file, "\t.previous\n");
42989 /* We don't have exact information about the insn sizes, but we may assume
42990 quite safely that we are informed about all 1 byte insns and memory
42991 address sizes. This is enough to eliminate unnecessary padding in
42992 99% of cases. */
42994 static int
42995 min_insn_size (rtx_insn *insn)
42997 int l = 0, len;
42999 if (!INSN_P (insn) || !active_insn_p (insn))
43000 return 0;
43002 /* Discard alignments we've emit and jump instructions. */
43003 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
43004 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
43005 return 0;
43007 /* Important case - calls are always 5 bytes.
43008 It is common to have many calls in the row. */
43009 if (CALL_P (insn)
43010 && symbolic_reference_mentioned_p (PATTERN (insn))
43011 && !SIBLING_CALL_P (insn))
43012 return 5;
43013 len = get_attr_length (insn);
43014 if (len <= 1)
43015 return 1;
43017 /* For normal instructions we rely on get_attr_length being exact,
43018 with a few exceptions. */
43019 if (!JUMP_P (insn))
43021 enum attr_type type = get_attr_type (insn);
43023 switch (type)
43025 case TYPE_MULTI:
43026 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
43027 || asm_noperands (PATTERN (insn)) >= 0)
43028 return 0;
43029 break;
43030 case TYPE_OTHER:
43031 case TYPE_FCMP:
43032 break;
43033 default:
43034 /* Otherwise trust get_attr_length. */
43035 return len;
43038 l = get_attr_length_address (insn);
43039 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
43040 l = 4;
43042 if (l)
43043 return 1+l;
43044 else
43045 return 2;
43048 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
43050 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
43051 window. */
43053 static void
43054 ix86_avoid_jump_mispredicts (void)
43056 rtx_insn *insn, *start = get_insns ();
43057 int nbytes = 0, njumps = 0;
43058 int isjump = 0;
43060 /* Look for all minimal intervals of instructions containing 4 jumps.
43061 The intervals are bounded by START and INSN. NBYTES is the total
43062 size of instructions in the interval including INSN and not including
43063 START. When the NBYTES is smaller than 16 bytes, it is possible
43064 that the end of START and INSN ends up in the same 16byte page.
43066 The smallest offset in the page INSN can start is the case where START
43067 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
43068 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
43070 Don't consider asm goto as jump, while it can contain a jump, it doesn't
43071 have to, control transfer to label(s) can be performed through other
43072 means, and also we estimate minimum length of all asm stmts as 0. */
43073 for (insn = start; insn; insn = NEXT_INSN (insn))
43075 int min_size;
43077 if (LABEL_P (insn))
43079 int align = label_to_alignment (insn);
43080 int max_skip = label_to_max_skip (insn);
43082 if (max_skip > 15)
43083 max_skip = 15;
43084 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
43085 already in the current 16 byte page, because otherwise
43086 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
43087 bytes to reach 16 byte boundary. */
43088 if (align <= 0
43089 || (align <= 3 && max_skip != (1 << align) - 1))
43090 max_skip = 0;
43091 if (dump_file)
43092 fprintf (dump_file, "Label %i with max_skip %i\n",
43093 INSN_UID (insn), max_skip);
43094 if (max_skip)
43096 while (nbytes + max_skip >= 16)
43098 start = NEXT_INSN (start);
43099 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
43100 || CALL_P (start))
43101 njumps--, isjump = 1;
43102 else
43103 isjump = 0;
43104 nbytes -= min_insn_size (start);
43107 continue;
43110 min_size = min_insn_size (insn);
43111 nbytes += min_size;
43112 if (dump_file)
43113 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
43114 INSN_UID (insn), min_size);
43115 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
43116 || CALL_P (insn))
43117 njumps++;
43118 else
43119 continue;
43121 while (njumps > 3)
43123 start = NEXT_INSN (start);
43124 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
43125 || CALL_P (start))
43126 njumps--, isjump = 1;
43127 else
43128 isjump = 0;
43129 nbytes -= min_insn_size (start);
43131 gcc_assert (njumps >= 0);
43132 if (dump_file)
43133 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
43134 INSN_UID (start), INSN_UID (insn), nbytes);
43136 if (njumps == 3 && isjump && nbytes < 16)
43138 int padsize = 15 - nbytes + min_insn_size (insn);
43140 if (dump_file)
43141 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
43142 INSN_UID (insn), padsize);
43143 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
43147 #endif
43149 /* AMD Athlon works faster
43150 when RET is not destination of conditional jump or directly preceded
43151 by other jump instruction. We avoid the penalty by inserting NOP just
43152 before the RET instructions in such cases. */
43153 static void
43154 ix86_pad_returns (void)
43156 edge e;
43157 edge_iterator ei;
43159 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43161 basic_block bb = e->src;
43162 rtx_insn *ret = BB_END (bb);
43163 rtx_insn *prev;
43164 bool replace = false;
43166 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
43167 || optimize_bb_for_size_p (bb))
43168 continue;
43169 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
43170 if (active_insn_p (prev) || LABEL_P (prev))
43171 break;
43172 if (prev && LABEL_P (prev))
43174 edge e;
43175 edge_iterator ei;
43177 FOR_EACH_EDGE (e, ei, bb->preds)
43178 if (EDGE_FREQUENCY (e) && e->src->index >= 0
43179 && !(e->flags & EDGE_FALLTHRU))
43181 replace = true;
43182 break;
43185 if (!replace)
43187 prev = prev_active_insn (ret);
43188 if (prev
43189 && ((JUMP_P (prev) && any_condjump_p (prev))
43190 || CALL_P (prev)))
43191 replace = true;
43192 /* Empty functions get branch mispredict even when
43193 the jump destination is not visible to us. */
43194 if (!prev && !optimize_function_for_size_p (cfun))
43195 replace = true;
43197 if (replace)
43199 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
43200 delete_insn (ret);
43205 /* Count the minimum number of instructions in BB. Return 4 if the
43206 number of instructions >= 4. */
43208 static int
43209 ix86_count_insn_bb (basic_block bb)
43211 rtx_insn *insn;
43212 int insn_count = 0;
43214 /* Count number of instructions in this block. Return 4 if the number
43215 of instructions >= 4. */
43216 FOR_BB_INSNS (bb, insn)
43218 /* Only happen in exit blocks. */
43219 if (JUMP_P (insn)
43220 && ANY_RETURN_P (PATTERN (insn)))
43221 break;
43223 if (NONDEBUG_INSN_P (insn)
43224 && GET_CODE (PATTERN (insn)) != USE
43225 && GET_CODE (PATTERN (insn)) != CLOBBER)
43227 insn_count++;
43228 if (insn_count >= 4)
43229 return insn_count;
43233 return insn_count;
43237 /* Count the minimum number of instructions in code path in BB.
43238 Return 4 if the number of instructions >= 4. */
43240 static int
43241 ix86_count_insn (basic_block bb)
43243 edge e;
43244 edge_iterator ei;
43245 int min_prev_count;
43247 /* Only bother counting instructions along paths with no
43248 more than 2 basic blocks between entry and exit. Given
43249 that BB has an edge to exit, determine if a predecessor
43250 of BB has an edge from entry. If so, compute the number
43251 of instructions in the predecessor block. If there
43252 happen to be multiple such blocks, compute the minimum. */
43253 min_prev_count = 4;
43254 FOR_EACH_EDGE (e, ei, bb->preds)
43256 edge prev_e;
43257 edge_iterator prev_ei;
43259 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
43261 min_prev_count = 0;
43262 break;
43264 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
43266 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
43268 int count = ix86_count_insn_bb (e->src);
43269 if (count < min_prev_count)
43270 min_prev_count = count;
43271 break;
43276 if (min_prev_count < 4)
43277 min_prev_count += ix86_count_insn_bb (bb);
43279 return min_prev_count;
43282 /* Pad short function to 4 instructions. */
43284 static void
43285 ix86_pad_short_function (void)
43287 edge e;
43288 edge_iterator ei;
43290 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43292 rtx_insn *ret = BB_END (e->src);
43293 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
43295 int insn_count = ix86_count_insn (e->src);
43297 /* Pad short function. */
43298 if (insn_count < 4)
43300 rtx_insn *insn = ret;
43302 /* Find epilogue. */
43303 while (insn
43304 && (!NOTE_P (insn)
43305 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
43306 insn = PREV_INSN (insn);
43308 if (!insn)
43309 insn = ret;
43311 /* Two NOPs count as one instruction. */
43312 insn_count = 2 * (4 - insn_count);
43313 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
43319 /* Fix up a Windows system unwinder issue. If an EH region falls through into
43320 the epilogue, the Windows system unwinder will apply epilogue logic and
43321 produce incorrect offsets. This can be avoided by adding a nop between
43322 the last insn that can throw and the first insn of the epilogue. */
43324 static void
43325 ix86_seh_fixup_eh_fallthru (void)
43327 edge e;
43328 edge_iterator ei;
43330 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43332 rtx_insn *insn, *next;
43334 /* Find the beginning of the epilogue. */
43335 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
43336 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
43337 break;
43338 if (insn == NULL)
43339 continue;
43341 /* We only care about preceding insns that can throw. */
43342 insn = prev_active_insn (insn);
43343 if (insn == NULL || !can_throw_internal (insn))
43344 continue;
43346 /* Do not separate calls from their debug information. */
43347 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
43348 if (NOTE_P (next)
43349 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
43350 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
43351 insn = next;
43352 else
43353 break;
43355 emit_insn_after (gen_nops (const1_rtx), insn);
43359 /* Implement machine specific optimizations. We implement padding of returns
43360 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
43361 static void
43362 ix86_reorg (void)
43364 /* We are freeing block_for_insn in the toplev to keep compatibility
43365 with old MDEP_REORGS that are not CFG based. Recompute it now. */
43366 compute_bb_for_insn ();
43368 if (TARGET_SEH && current_function_has_exception_handlers ())
43369 ix86_seh_fixup_eh_fallthru ();
43371 if (optimize && optimize_function_for_speed_p (cfun))
43373 if (TARGET_PAD_SHORT_FUNCTION)
43374 ix86_pad_short_function ();
43375 else if (TARGET_PAD_RETURNS)
43376 ix86_pad_returns ();
43377 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
43378 if (TARGET_FOUR_JUMP_LIMIT)
43379 ix86_avoid_jump_mispredicts ();
43380 #endif
43384 /* Return nonzero when QImode register that must be represented via REX prefix
43385 is used. */
43386 bool
43387 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
43389 int i;
43390 extract_insn_cached (insn);
43391 for (i = 0; i < recog_data.n_operands; i++)
43392 if (GENERAL_REG_P (recog_data.operand[i])
43393 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
43394 return true;
43395 return false;
43398 /* Return true when INSN mentions register that must be encoded using REX
43399 prefix. */
43400 bool
43401 x86_extended_reg_mentioned_p (rtx insn)
43403 subrtx_iterator::array_type array;
43404 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
43406 const_rtx x = *iter;
43407 if (REG_P (x)
43408 && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))))
43409 return true;
43411 return false;
43414 /* If profitable, negate (without causing overflow) integer constant
43415 of mode MODE at location LOC. Return true in this case. */
43416 bool
43417 x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
43419 HOST_WIDE_INT val;
43421 if (!CONST_INT_P (*loc))
43422 return false;
43424 switch (mode)
43426 case DImode:
43427 /* DImode x86_64 constants must fit in 32 bits. */
43428 gcc_assert (x86_64_immediate_operand (*loc, mode));
43430 mode = SImode;
43431 break;
43433 case SImode:
43434 case HImode:
43435 case QImode:
43436 break;
43438 default:
43439 gcc_unreachable ();
43442 /* Avoid overflows. */
43443 if (mode_signbit_p (mode, *loc))
43444 return false;
43446 val = INTVAL (*loc);
43448 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
43449 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
43450 if ((val < 0 && val != -128)
43451 || val == 128)
43453 *loc = GEN_INT (-val);
43454 return true;
43457 return false;
43460 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
43461 optabs would emit if we didn't have TFmode patterns. */
43463 void
43464 x86_emit_floatuns (rtx operands[2])
43466 rtx_code_label *neglab, *donelab;
43467 rtx i0, i1, f0, in, out;
43468 machine_mode mode, inmode;
43470 inmode = GET_MODE (operands[1]);
43471 gcc_assert (inmode == SImode || inmode == DImode);
43473 out = operands[0];
43474 in = force_reg (inmode, operands[1]);
43475 mode = GET_MODE (out);
43476 neglab = gen_label_rtx ();
43477 donelab = gen_label_rtx ();
43478 f0 = gen_reg_rtx (mode);
43480 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
43482 expand_float (out, in, 0);
43484 emit_jump_insn (gen_jump (donelab));
43485 emit_barrier ();
43487 emit_label (neglab);
43489 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
43490 1, OPTAB_DIRECT);
43491 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
43492 1, OPTAB_DIRECT);
43493 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
43495 expand_float (f0, i0, 0);
43497 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
43499 emit_label (donelab);
43502 static bool canonicalize_perm (struct expand_vec_perm_d *d);
43503 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
43504 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
43505 static bool expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool);
43507 /* Get a vector mode of the same size as the original but with elements
43508 twice as wide. This is only guaranteed to apply to integral vectors. */
43510 static inline machine_mode
43511 get_mode_wider_vector (machine_mode o)
43513 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
43514 machine_mode n = GET_MODE_WIDER_MODE (o);
43515 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
43516 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
43517 return n;
43520 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
43521 fill target with val via vec_duplicate. */
43523 static bool
43524 ix86_vector_duplicate_value (machine_mode mode, rtx target, rtx val)
43526 bool ok;
43527 rtx_insn *insn;
43528 rtx dup;
43530 /* First attempt to recognize VAL as-is. */
43531 dup = gen_rtx_VEC_DUPLICATE (mode, val);
43532 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
43533 if (recog_memoized (insn) < 0)
43535 rtx_insn *seq;
43536 /* If that fails, force VAL into a register. */
43538 start_sequence ();
43539 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
43540 seq = get_insns ();
43541 end_sequence ();
43542 if (seq)
43543 emit_insn_before (seq, insn);
43545 ok = recog_memoized (insn) >= 0;
43546 gcc_assert (ok);
43548 return true;
43551 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
43552 with all elements equal to VAR. Return true if successful. */
43554 static bool
43555 ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
43556 rtx target, rtx val)
43558 bool ok;
43560 switch (mode)
43562 case V2SImode:
43563 case V2SFmode:
43564 if (!mmx_ok)
43565 return false;
43566 /* FALLTHRU */
43568 case V4DFmode:
43569 case V4DImode:
43570 case V8SFmode:
43571 case V8SImode:
43572 case V2DFmode:
43573 case V2DImode:
43574 case V4SFmode:
43575 case V4SImode:
43576 case V16SImode:
43577 case V8DImode:
43578 case V16SFmode:
43579 case V8DFmode:
43580 return ix86_vector_duplicate_value (mode, target, val);
43582 case V4HImode:
43583 if (!mmx_ok)
43584 return false;
43585 if (TARGET_SSE || TARGET_3DNOW_A)
43587 rtx x;
43589 val = gen_lowpart (SImode, val);
43590 x = gen_rtx_TRUNCATE (HImode, val);
43591 x = gen_rtx_VEC_DUPLICATE (mode, x);
43592 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43593 return true;
43595 goto widen;
43597 case V8QImode:
43598 if (!mmx_ok)
43599 return false;
43600 goto widen;
43602 case V8HImode:
43603 if (TARGET_AVX2)
43604 return ix86_vector_duplicate_value (mode, target, val);
43606 if (TARGET_SSE2)
43608 struct expand_vec_perm_d dperm;
43609 rtx tmp1, tmp2;
43611 permute:
43612 memset (&dperm, 0, sizeof (dperm));
43613 dperm.target = target;
43614 dperm.vmode = mode;
43615 dperm.nelt = GET_MODE_NUNITS (mode);
43616 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
43617 dperm.one_operand_p = true;
43619 /* Extend to SImode using a paradoxical SUBREG. */
43620 tmp1 = gen_reg_rtx (SImode);
43621 emit_move_insn (tmp1, gen_lowpart (SImode, val));
43623 /* Insert the SImode value as low element of a V4SImode vector. */
43624 tmp2 = gen_reg_rtx (V4SImode);
43625 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
43626 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
43628 ok = (expand_vec_perm_1 (&dperm)
43629 || expand_vec_perm_broadcast_1 (&dperm));
43630 gcc_assert (ok);
43631 return ok;
43633 goto widen;
43635 case V16QImode:
43636 if (TARGET_AVX2)
43637 return ix86_vector_duplicate_value (mode, target, val);
43639 if (TARGET_SSE2)
43640 goto permute;
43641 goto widen;
43643 widen:
43644 /* Replicate the value once into the next wider mode and recurse. */
43646 machine_mode smode, wsmode, wvmode;
43647 rtx x;
43649 smode = GET_MODE_INNER (mode);
43650 wvmode = get_mode_wider_vector (mode);
43651 wsmode = GET_MODE_INNER (wvmode);
43653 val = convert_modes (wsmode, smode, val, true);
43654 x = expand_simple_binop (wsmode, ASHIFT, val,
43655 GEN_INT (GET_MODE_BITSIZE (smode)),
43656 NULL_RTX, 1, OPTAB_LIB_WIDEN);
43657 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
43659 x = gen_reg_rtx (wvmode);
43660 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
43661 gcc_assert (ok);
43662 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
43663 return ok;
43666 case V16HImode:
43667 case V32QImode:
43668 if (TARGET_AVX2)
43669 return ix86_vector_duplicate_value (mode, target, val);
43670 else
43672 machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
43673 rtx x = gen_reg_rtx (hvmode);
43675 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
43676 gcc_assert (ok);
43678 x = gen_rtx_VEC_CONCAT (mode, x, x);
43679 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43681 return true;
43683 case V64QImode:
43684 case V32HImode:
43685 if (TARGET_AVX512BW)
43686 return ix86_vector_duplicate_value (mode, target, val);
43687 else
43689 machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode);
43690 rtx x = gen_reg_rtx (hvmode);
43692 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
43693 gcc_assert (ok);
43695 x = gen_rtx_VEC_CONCAT (mode, x, x);
43696 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43698 return true;
43700 default:
43701 return false;
43705 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
43706 whose ONE_VAR element is VAR, and other elements are zero. Return true
43707 if successful. */
43709 static bool
43710 ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
43711 rtx target, rtx var, int one_var)
43713 machine_mode vsimode;
43714 rtx new_target;
43715 rtx x, tmp;
43716 bool use_vector_set = false;
43718 switch (mode)
43720 case V2DImode:
43721 /* For SSE4.1, we normally use vector set. But if the second
43722 element is zero and inter-unit moves are OK, we use movq
43723 instead. */
43724 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
43725 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
43726 && one_var == 0));
43727 break;
43728 case V16QImode:
43729 case V4SImode:
43730 case V4SFmode:
43731 use_vector_set = TARGET_SSE4_1;
43732 break;
43733 case V8HImode:
43734 use_vector_set = TARGET_SSE2;
43735 break;
43736 case V4HImode:
43737 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
43738 break;
43739 case V32QImode:
43740 case V16HImode:
43741 case V8SImode:
43742 case V8SFmode:
43743 case V4DFmode:
43744 use_vector_set = TARGET_AVX;
43745 break;
43746 case V4DImode:
43747 /* Use ix86_expand_vector_set in 64bit mode only. */
43748 use_vector_set = TARGET_AVX && TARGET_64BIT;
43749 break;
43750 default:
43751 break;
43754 if (use_vector_set)
43756 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
43757 var = force_reg (GET_MODE_INNER (mode), var);
43758 ix86_expand_vector_set (mmx_ok, target, var, one_var);
43759 return true;
43762 switch (mode)
43764 case V2SFmode:
43765 case V2SImode:
43766 if (!mmx_ok)
43767 return false;
43768 /* FALLTHRU */
43770 case V2DFmode:
43771 case V2DImode:
43772 if (one_var != 0)
43773 return false;
43774 var = force_reg (GET_MODE_INNER (mode), var);
43775 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
43776 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43777 return true;
43779 case V4SFmode:
43780 case V4SImode:
43781 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
43782 new_target = gen_reg_rtx (mode);
43783 else
43784 new_target = target;
43785 var = force_reg (GET_MODE_INNER (mode), var);
43786 x = gen_rtx_VEC_DUPLICATE (mode, var);
43787 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
43788 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
43789 if (one_var != 0)
43791 /* We need to shuffle the value to the correct position, so
43792 create a new pseudo to store the intermediate result. */
43794 /* With SSE2, we can use the integer shuffle insns. */
43795 if (mode != V4SFmode && TARGET_SSE2)
43797 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
43798 const1_rtx,
43799 GEN_INT (one_var == 1 ? 0 : 1),
43800 GEN_INT (one_var == 2 ? 0 : 1),
43801 GEN_INT (one_var == 3 ? 0 : 1)));
43802 if (target != new_target)
43803 emit_move_insn (target, new_target);
43804 return true;
43807 /* Otherwise convert the intermediate result to V4SFmode and
43808 use the SSE1 shuffle instructions. */
43809 if (mode != V4SFmode)
43811 tmp = gen_reg_rtx (V4SFmode);
43812 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
43814 else
43815 tmp = new_target;
43817 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
43818 const1_rtx,
43819 GEN_INT (one_var == 1 ? 0 : 1),
43820 GEN_INT (one_var == 2 ? 0+4 : 1+4),
43821 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
43823 if (mode != V4SFmode)
43824 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
43825 else if (tmp != target)
43826 emit_move_insn (target, tmp);
43828 else if (target != new_target)
43829 emit_move_insn (target, new_target);
43830 return true;
43832 case V8HImode:
43833 case V16QImode:
43834 vsimode = V4SImode;
43835 goto widen;
43836 case V4HImode:
43837 case V8QImode:
43838 if (!mmx_ok)
43839 return false;
43840 vsimode = V2SImode;
43841 goto widen;
43842 widen:
43843 if (one_var != 0)
43844 return false;
43846 /* Zero extend the variable element to SImode and recurse. */
43847 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
43849 x = gen_reg_rtx (vsimode);
43850 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
43851 var, one_var))
43852 gcc_unreachable ();
43854 emit_move_insn (target, gen_lowpart (mode, x));
43855 return true;
43857 default:
43858 return false;
43862 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
43863 consisting of the values in VALS. It is known that all elements
43864 except ONE_VAR are constants. Return true if successful. */
43866 static bool
43867 ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode,
43868 rtx target, rtx vals, int one_var)
43870 rtx var = XVECEXP (vals, 0, one_var);
43871 machine_mode wmode;
43872 rtx const_vec, x;
43874 const_vec = copy_rtx (vals);
43875 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
43876 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
43878 switch (mode)
43880 case V2DFmode:
43881 case V2DImode:
43882 case V2SFmode:
43883 case V2SImode:
43884 /* For the two element vectors, it's just as easy to use
43885 the general case. */
43886 return false;
43888 case V4DImode:
43889 /* Use ix86_expand_vector_set in 64bit mode only. */
43890 if (!TARGET_64BIT)
43891 return false;
43892 case V4DFmode:
43893 case V8SFmode:
43894 case V8SImode:
43895 case V16HImode:
43896 case V32QImode:
43897 case V4SFmode:
43898 case V4SImode:
43899 case V8HImode:
43900 case V4HImode:
43901 break;
43903 case V16QImode:
43904 if (TARGET_SSE4_1)
43905 break;
43906 wmode = V8HImode;
43907 goto widen;
43908 case V8QImode:
43909 wmode = V4HImode;
43910 goto widen;
43911 widen:
43912 /* There's no way to set one QImode entry easily. Combine
43913 the variable value with its adjacent constant value, and
43914 promote to an HImode set. */
43915 x = XVECEXP (vals, 0, one_var ^ 1);
43916 if (one_var & 1)
43918 var = convert_modes (HImode, QImode, var, true);
43919 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
43920 NULL_RTX, 1, OPTAB_LIB_WIDEN);
43921 x = GEN_INT (INTVAL (x) & 0xff);
43923 else
43925 var = convert_modes (HImode, QImode, var, true);
43926 x = gen_int_mode (INTVAL (x) << 8, HImode);
43928 if (x != const0_rtx)
43929 var = expand_simple_binop (HImode, IOR, var, x, var,
43930 1, OPTAB_LIB_WIDEN);
43932 x = gen_reg_rtx (wmode);
43933 emit_move_insn (x, gen_lowpart (wmode, const_vec));
43934 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
43936 emit_move_insn (target, gen_lowpart (mode, x));
43937 return true;
43939 default:
43940 return false;
43943 emit_move_insn (target, const_vec);
43944 ix86_expand_vector_set (mmx_ok, target, var, one_var);
43945 return true;
43948 /* A subroutine of ix86_expand_vector_init_general. Use vector
43949 concatenate to handle the most general case: all values variable,
43950 and none identical. */
43952 static void
43953 ix86_expand_vector_init_concat (machine_mode mode,
43954 rtx target, rtx *ops, int n)
43956 machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
43957 rtx first[16], second[8], third[4];
43958 rtvec v;
43959 int i, j;
43961 switch (n)
43963 case 2:
43964 switch (mode)
43966 case V16SImode:
43967 cmode = V8SImode;
43968 break;
43969 case V16SFmode:
43970 cmode = V8SFmode;
43971 break;
43972 case V8DImode:
43973 cmode = V4DImode;
43974 break;
43975 case V8DFmode:
43976 cmode = V4DFmode;
43977 break;
43978 case V8SImode:
43979 cmode = V4SImode;
43980 break;
43981 case V8SFmode:
43982 cmode = V4SFmode;
43983 break;
43984 case V4DImode:
43985 cmode = V2DImode;
43986 break;
43987 case V4DFmode:
43988 cmode = V2DFmode;
43989 break;
43990 case V4SImode:
43991 cmode = V2SImode;
43992 break;
43993 case V4SFmode:
43994 cmode = V2SFmode;
43995 break;
43996 case V2DImode:
43997 cmode = DImode;
43998 break;
43999 case V2SImode:
44000 cmode = SImode;
44001 break;
44002 case V2DFmode:
44003 cmode = DFmode;
44004 break;
44005 case V2SFmode:
44006 cmode = SFmode;
44007 break;
44008 default:
44009 gcc_unreachable ();
44012 if (!register_operand (ops[1], cmode))
44013 ops[1] = force_reg (cmode, ops[1]);
44014 if (!register_operand (ops[0], cmode))
44015 ops[0] = force_reg (cmode, ops[0]);
44016 emit_insn (gen_rtx_SET (VOIDmode, target,
44017 gen_rtx_VEC_CONCAT (mode, ops[0],
44018 ops[1])));
44019 break;
44021 case 4:
44022 switch (mode)
44024 case V4DImode:
44025 cmode = V2DImode;
44026 break;
44027 case V4DFmode:
44028 cmode = V2DFmode;
44029 break;
44030 case V4SImode:
44031 cmode = V2SImode;
44032 break;
44033 case V4SFmode:
44034 cmode = V2SFmode;
44035 break;
44036 default:
44037 gcc_unreachable ();
44039 goto half;
44041 case 8:
44042 switch (mode)
44044 case V8DImode:
44045 cmode = V2DImode;
44046 hmode = V4DImode;
44047 break;
44048 case V8DFmode:
44049 cmode = V2DFmode;
44050 hmode = V4DFmode;
44051 break;
44052 case V8SImode:
44053 cmode = V2SImode;
44054 hmode = V4SImode;
44055 break;
44056 case V8SFmode:
44057 cmode = V2SFmode;
44058 hmode = V4SFmode;
44059 break;
44060 default:
44061 gcc_unreachable ();
44063 goto half;
44065 case 16:
44066 switch (mode)
44068 case V16SImode:
44069 cmode = V2SImode;
44070 hmode = V4SImode;
44071 gmode = V8SImode;
44072 break;
44073 case V16SFmode:
44074 cmode = V2SFmode;
44075 hmode = V4SFmode;
44076 gmode = V8SFmode;
44077 break;
44078 default:
44079 gcc_unreachable ();
44081 goto half;
44083 half:
44084 /* FIXME: We process inputs backward to help RA. PR 36222. */
44085 i = n - 1;
44086 j = (n >> 1) - 1;
44087 for (; i > 0; i -= 2, j--)
44089 first[j] = gen_reg_rtx (cmode);
44090 v = gen_rtvec (2, ops[i - 1], ops[i]);
44091 ix86_expand_vector_init (false, first[j],
44092 gen_rtx_PARALLEL (cmode, v));
44095 n >>= 1;
44096 if (n > 4)
44098 gcc_assert (hmode != VOIDmode);
44099 gcc_assert (gmode != VOIDmode);
44100 for (i = j = 0; i < n; i += 2, j++)
44102 second[j] = gen_reg_rtx (hmode);
44103 ix86_expand_vector_init_concat (hmode, second [j],
44104 &first [i], 2);
44106 n >>= 1;
44107 for (i = j = 0; i < n; i += 2, j++)
44109 third[j] = gen_reg_rtx (gmode);
44110 ix86_expand_vector_init_concat (gmode, third[j],
44111 &second[i], 2);
44113 n >>= 1;
44114 ix86_expand_vector_init_concat (mode, target, third, n);
44116 else if (n > 2)
44118 gcc_assert (hmode != VOIDmode);
44119 for (i = j = 0; i < n; i += 2, j++)
44121 second[j] = gen_reg_rtx (hmode);
44122 ix86_expand_vector_init_concat (hmode, second [j],
44123 &first [i], 2);
44125 n >>= 1;
44126 ix86_expand_vector_init_concat (mode, target, second, n);
44128 else
44129 ix86_expand_vector_init_concat (mode, target, first, n);
44130 break;
44132 default:
44133 gcc_unreachable ();
44137 /* A subroutine of ix86_expand_vector_init_general. Use vector
44138 interleave to handle the most general case: all values variable,
44139 and none identical. */
44141 static void
44142 ix86_expand_vector_init_interleave (machine_mode mode,
44143 rtx target, rtx *ops, int n)
44145 machine_mode first_imode, second_imode, third_imode, inner_mode;
44146 int i, j;
44147 rtx op0, op1;
44148 rtx (*gen_load_even) (rtx, rtx, rtx);
44149 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
44150 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
44152 switch (mode)
44154 case V8HImode:
44155 gen_load_even = gen_vec_setv8hi;
44156 gen_interleave_first_low = gen_vec_interleave_lowv4si;
44157 gen_interleave_second_low = gen_vec_interleave_lowv2di;
44158 inner_mode = HImode;
44159 first_imode = V4SImode;
44160 second_imode = V2DImode;
44161 third_imode = VOIDmode;
44162 break;
44163 case V16QImode:
44164 gen_load_even = gen_vec_setv16qi;
44165 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
44166 gen_interleave_second_low = gen_vec_interleave_lowv4si;
44167 inner_mode = QImode;
44168 first_imode = V8HImode;
44169 second_imode = V4SImode;
44170 third_imode = V2DImode;
44171 break;
44172 default:
44173 gcc_unreachable ();
44176 for (i = 0; i < n; i++)
44178 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
44179 op0 = gen_reg_rtx (SImode);
44180 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
44182 /* Insert the SImode value as low element of V4SImode vector. */
44183 op1 = gen_reg_rtx (V4SImode);
44184 op0 = gen_rtx_VEC_MERGE (V4SImode,
44185 gen_rtx_VEC_DUPLICATE (V4SImode,
44186 op0),
44187 CONST0_RTX (V4SImode),
44188 const1_rtx);
44189 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
44191 /* Cast the V4SImode vector back to a vector in orignal mode. */
44192 op0 = gen_reg_rtx (mode);
44193 emit_move_insn (op0, gen_lowpart (mode, op1));
44195 /* Load even elements into the second position. */
44196 emit_insn (gen_load_even (op0,
44197 force_reg (inner_mode,
44198 ops [i + i + 1]),
44199 const1_rtx));
44201 /* Cast vector to FIRST_IMODE vector. */
44202 ops[i] = gen_reg_rtx (first_imode);
44203 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
44206 /* Interleave low FIRST_IMODE vectors. */
44207 for (i = j = 0; i < n; i += 2, j++)
44209 op0 = gen_reg_rtx (first_imode);
44210 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
44212 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
44213 ops[j] = gen_reg_rtx (second_imode);
44214 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
44217 /* Interleave low SECOND_IMODE vectors. */
44218 switch (second_imode)
44220 case V4SImode:
44221 for (i = j = 0; i < n / 2; i += 2, j++)
44223 op0 = gen_reg_rtx (second_imode);
44224 emit_insn (gen_interleave_second_low (op0, ops[i],
44225 ops[i + 1]));
44227 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
44228 vector. */
44229 ops[j] = gen_reg_rtx (third_imode);
44230 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
44232 second_imode = V2DImode;
44233 gen_interleave_second_low = gen_vec_interleave_lowv2di;
44234 /* FALLTHRU */
44236 case V2DImode:
44237 op0 = gen_reg_rtx (second_imode);
44238 emit_insn (gen_interleave_second_low (op0, ops[0],
44239 ops[1]));
44241 /* Cast the SECOND_IMODE vector back to a vector on original
44242 mode. */
44243 emit_insn (gen_rtx_SET (VOIDmode, target,
44244 gen_lowpart (mode, op0)));
44245 break;
44247 default:
44248 gcc_unreachable ();
44252 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
44253 all values variable, and none identical. */
44255 static void
44256 ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode,
44257 rtx target, rtx vals)
44259 rtx ops[64], op0, op1, op2, op3, op4, op5;
44260 machine_mode half_mode = VOIDmode;
44261 machine_mode quarter_mode = VOIDmode;
44262 int n, i;
44264 switch (mode)
44266 case V2SFmode:
44267 case V2SImode:
44268 if (!mmx_ok && !TARGET_SSE)
44269 break;
44270 /* FALLTHRU */
44272 case V16SImode:
44273 case V16SFmode:
44274 case V8DFmode:
44275 case V8DImode:
44276 case V8SFmode:
44277 case V8SImode:
44278 case V4DFmode:
44279 case V4DImode:
44280 case V4SFmode:
44281 case V4SImode:
44282 case V2DFmode:
44283 case V2DImode:
44284 n = GET_MODE_NUNITS (mode);
44285 for (i = 0; i < n; i++)
44286 ops[i] = XVECEXP (vals, 0, i);
44287 ix86_expand_vector_init_concat (mode, target, ops, n);
44288 return;
44290 case V32QImode:
44291 half_mode = V16QImode;
44292 goto half;
44294 case V16HImode:
44295 half_mode = V8HImode;
44296 goto half;
44298 half:
44299 n = GET_MODE_NUNITS (mode);
44300 for (i = 0; i < n; i++)
44301 ops[i] = XVECEXP (vals, 0, i);
44302 op0 = gen_reg_rtx (half_mode);
44303 op1 = gen_reg_rtx (half_mode);
44304 ix86_expand_vector_init_interleave (half_mode, op0, ops,
44305 n >> 2);
44306 ix86_expand_vector_init_interleave (half_mode, op1,
44307 &ops [n >> 1], n >> 2);
44308 emit_insn (gen_rtx_SET (VOIDmode, target,
44309 gen_rtx_VEC_CONCAT (mode, op0, op1)));
44310 return;
44312 case V64QImode:
44313 quarter_mode = V16QImode;
44314 half_mode = V32QImode;
44315 goto quarter;
44317 case V32HImode:
44318 quarter_mode = V8HImode;
44319 half_mode = V16HImode;
44320 goto quarter;
44322 quarter:
44323 n = GET_MODE_NUNITS (mode);
44324 for (i = 0; i < n; i++)
44325 ops[i] = XVECEXP (vals, 0, i);
44326 op0 = gen_reg_rtx (quarter_mode);
44327 op1 = gen_reg_rtx (quarter_mode);
44328 op2 = gen_reg_rtx (quarter_mode);
44329 op3 = gen_reg_rtx (quarter_mode);
44330 op4 = gen_reg_rtx (half_mode);
44331 op5 = gen_reg_rtx (half_mode);
44332 ix86_expand_vector_init_interleave (quarter_mode, op0, ops,
44333 n >> 3);
44334 ix86_expand_vector_init_interleave (quarter_mode, op1,
44335 &ops [n >> 2], n >> 3);
44336 ix86_expand_vector_init_interleave (quarter_mode, op2,
44337 &ops [n >> 1], n >> 3);
44338 ix86_expand_vector_init_interleave (quarter_mode, op3,
44339 &ops [(n >> 1) | (n >> 2)], n >> 3);
44340 emit_insn (gen_rtx_SET (VOIDmode, op4,
44341 gen_rtx_VEC_CONCAT (half_mode, op0, op1)));
44342 emit_insn (gen_rtx_SET (VOIDmode, op5,
44343 gen_rtx_VEC_CONCAT (half_mode, op2, op3)));
44344 emit_insn (gen_rtx_SET (VOIDmode, target,
44345 gen_rtx_VEC_CONCAT (mode, op4, op5)));
44346 return;
44348 case V16QImode:
44349 if (!TARGET_SSE4_1)
44350 break;
44351 /* FALLTHRU */
44353 case V8HImode:
44354 if (!TARGET_SSE2)
44355 break;
44357 /* Don't use ix86_expand_vector_init_interleave if we can't
44358 move from GPR to SSE register directly. */
44359 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
44360 break;
44362 n = GET_MODE_NUNITS (mode);
44363 for (i = 0; i < n; i++)
44364 ops[i] = XVECEXP (vals, 0, i);
44365 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
44366 return;
44368 case V4HImode:
44369 case V8QImode:
44370 break;
44372 default:
44373 gcc_unreachable ();
44377 int i, j, n_elts, n_words, n_elt_per_word;
44378 machine_mode inner_mode;
44379 rtx words[4], shift;
44381 inner_mode = GET_MODE_INNER (mode);
44382 n_elts = GET_MODE_NUNITS (mode);
44383 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
44384 n_elt_per_word = n_elts / n_words;
44385 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
44387 for (i = 0; i < n_words; ++i)
44389 rtx word = NULL_RTX;
44391 for (j = 0; j < n_elt_per_word; ++j)
44393 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
44394 elt = convert_modes (word_mode, inner_mode, elt, true);
44396 if (j == 0)
44397 word = elt;
44398 else
44400 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
44401 word, 1, OPTAB_LIB_WIDEN);
44402 word = expand_simple_binop (word_mode, IOR, word, elt,
44403 word, 1, OPTAB_LIB_WIDEN);
44407 words[i] = word;
44410 if (n_words == 1)
44411 emit_move_insn (target, gen_lowpart (mode, words[0]));
44412 else if (n_words == 2)
44414 rtx tmp = gen_reg_rtx (mode);
44415 emit_clobber (tmp);
44416 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
44417 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
44418 emit_move_insn (target, tmp);
44420 else if (n_words == 4)
44422 rtx tmp = gen_reg_rtx (V4SImode);
44423 gcc_assert (word_mode == SImode);
44424 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
44425 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
44426 emit_move_insn (target, gen_lowpart (mode, tmp));
44428 else
44429 gcc_unreachable ();
44433 /* Initialize vector TARGET via VALS. Suppress the use of MMX
44434 instructions unless MMX_OK is true. */
44436 void
44437 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
44439 machine_mode mode = GET_MODE (target);
44440 machine_mode inner_mode = GET_MODE_INNER (mode);
44441 int n_elts = GET_MODE_NUNITS (mode);
44442 int n_var = 0, one_var = -1;
44443 bool all_same = true, all_const_zero = true;
44444 int i;
44445 rtx x;
44447 for (i = 0; i < n_elts; ++i)
44449 x = XVECEXP (vals, 0, i);
44450 if (!(CONST_INT_P (x)
44451 || GET_CODE (x) == CONST_DOUBLE
44452 || GET_CODE (x) == CONST_FIXED))
44453 n_var++, one_var = i;
44454 else if (x != CONST0_RTX (inner_mode))
44455 all_const_zero = false;
44456 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
44457 all_same = false;
44460 /* Constants are best loaded from the constant pool. */
44461 if (n_var == 0)
44463 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
44464 return;
44467 /* If all values are identical, broadcast the value. */
44468 if (all_same
44469 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
44470 XVECEXP (vals, 0, 0)))
44471 return;
44473 /* Values where only one field is non-constant are best loaded from
44474 the pool and overwritten via move later. */
44475 if (n_var == 1)
44477 if (all_const_zero
44478 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
44479 XVECEXP (vals, 0, one_var),
44480 one_var))
44481 return;
44483 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
44484 return;
44487 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
44490 void
44491 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
44493 machine_mode mode = GET_MODE (target);
44494 machine_mode inner_mode = GET_MODE_INNER (mode);
44495 machine_mode half_mode;
44496 bool use_vec_merge = false;
44497 rtx tmp;
44498 static rtx (*gen_extract[6][2]) (rtx, rtx)
44500 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
44501 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
44502 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
44503 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
44504 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
44505 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
44507 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
44509 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
44510 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
44511 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
44512 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
44513 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
44514 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
44516 int i, j, n;
44518 switch (mode)
44520 case V2SFmode:
44521 case V2SImode:
44522 if (mmx_ok)
44524 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
44525 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
44526 if (elt == 0)
44527 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
44528 else
44529 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
44530 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44531 return;
44533 break;
44535 case V2DImode:
44536 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
44537 if (use_vec_merge)
44538 break;
44540 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
44541 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
44542 if (elt == 0)
44543 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
44544 else
44545 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
44546 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44547 return;
44549 case V2DFmode:
44551 rtx op0, op1;
44553 /* For the two element vectors, we implement a VEC_CONCAT with
44554 the extraction of the other element. */
44556 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
44557 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
44559 if (elt == 0)
44560 op0 = val, op1 = tmp;
44561 else
44562 op0 = tmp, op1 = val;
44564 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
44565 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44567 return;
44569 case V4SFmode:
44570 use_vec_merge = TARGET_SSE4_1;
44571 if (use_vec_merge)
44572 break;
44574 switch (elt)
44576 case 0:
44577 use_vec_merge = true;
44578 break;
44580 case 1:
44581 /* tmp = target = A B C D */
44582 tmp = copy_to_reg (target);
44583 /* target = A A B B */
44584 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
44585 /* target = X A B B */
44586 ix86_expand_vector_set (false, target, val, 0);
44587 /* target = A X C D */
44588 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44589 const1_rtx, const0_rtx,
44590 GEN_INT (2+4), GEN_INT (3+4)));
44591 return;
44593 case 2:
44594 /* tmp = target = A B C D */
44595 tmp = copy_to_reg (target);
44596 /* tmp = X B C D */
44597 ix86_expand_vector_set (false, tmp, val, 0);
44598 /* target = A B X D */
44599 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44600 const0_rtx, const1_rtx,
44601 GEN_INT (0+4), GEN_INT (3+4)));
44602 return;
44604 case 3:
44605 /* tmp = target = A B C D */
44606 tmp = copy_to_reg (target);
44607 /* tmp = X B C D */
44608 ix86_expand_vector_set (false, tmp, val, 0);
44609 /* target = A B X D */
44610 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44611 const0_rtx, const1_rtx,
44612 GEN_INT (2+4), GEN_INT (0+4)));
44613 return;
44615 default:
44616 gcc_unreachable ();
44618 break;
44620 case V4SImode:
44621 use_vec_merge = TARGET_SSE4_1;
44622 if (use_vec_merge)
44623 break;
44625 /* Element 0 handled by vec_merge below. */
44626 if (elt == 0)
44628 use_vec_merge = true;
44629 break;
44632 if (TARGET_SSE2)
44634 /* With SSE2, use integer shuffles to swap element 0 and ELT,
44635 store into element 0, then shuffle them back. */
44637 rtx order[4];
44639 order[0] = GEN_INT (elt);
44640 order[1] = const1_rtx;
44641 order[2] = const2_rtx;
44642 order[3] = GEN_INT (3);
44643 order[elt] = const0_rtx;
44645 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
44646 order[1], order[2], order[3]));
44648 ix86_expand_vector_set (false, target, val, 0);
44650 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
44651 order[1], order[2], order[3]));
44653 else
44655 /* For SSE1, we have to reuse the V4SF code. */
44656 rtx t = gen_reg_rtx (V4SFmode);
44657 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
44658 emit_move_insn (target, gen_lowpart (mode, t));
44660 return;
44662 case V8HImode:
44663 use_vec_merge = TARGET_SSE2;
44664 break;
44665 case V4HImode:
44666 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
44667 break;
44669 case V16QImode:
44670 use_vec_merge = TARGET_SSE4_1;
44671 break;
44673 case V8QImode:
44674 break;
44676 case V32QImode:
44677 half_mode = V16QImode;
44678 j = 0;
44679 n = 16;
44680 goto half;
44682 case V16HImode:
44683 half_mode = V8HImode;
44684 j = 1;
44685 n = 8;
44686 goto half;
44688 case V8SImode:
44689 half_mode = V4SImode;
44690 j = 2;
44691 n = 4;
44692 goto half;
44694 case V4DImode:
44695 half_mode = V2DImode;
44696 j = 3;
44697 n = 2;
44698 goto half;
44700 case V8SFmode:
44701 half_mode = V4SFmode;
44702 j = 4;
44703 n = 4;
44704 goto half;
44706 case V4DFmode:
44707 half_mode = V2DFmode;
44708 j = 5;
44709 n = 2;
44710 goto half;
44712 half:
44713 /* Compute offset. */
44714 i = elt / n;
44715 elt %= n;
44717 gcc_assert (i <= 1);
44719 /* Extract the half. */
44720 tmp = gen_reg_rtx (half_mode);
44721 emit_insn (gen_extract[j][i] (tmp, target));
44723 /* Put val in tmp at elt. */
44724 ix86_expand_vector_set (false, tmp, val, elt);
44726 /* Put it back. */
44727 emit_insn (gen_insert[j][i] (target, target, tmp));
44728 return;
44730 case V8DFmode:
44731 if (TARGET_AVX512F)
44733 tmp = gen_reg_rtx (mode);
44734 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44735 gen_rtx_VEC_DUPLICATE (mode, val)));
44736 emit_insn (gen_avx512f_blendmv8df (target, tmp, target,
44737 force_reg (QImode, GEN_INT (1 << elt))));
44738 return;
44740 else
44741 break;
44742 case V8DImode:
44743 if (TARGET_AVX512F)
44745 tmp = gen_reg_rtx (mode);
44746 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44747 gen_rtx_VEC_DUPLICATE (mode, val)));
44748 emit_insn (gen_avx512f_blendmv8di (target, tmp, target,
44749 force_reg (QImode, GEN_INT (1 << elt))));
44750 return;
44752 else
44753 break;
44754 case V16SFmode:
44755 if (TARGET_AVX512F)
44757 tmp = gen_reg_rtx (mode);
44758 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44759 gen_rtx_VEC_DUPLICATE (mode, val)));
44760 emit_insn (gen_avx512f_blendmv16sf (target, tmp, target,
44761 force_reg (HImode, GEN_INT (1 << elt))));
44762 return;
44764 else
44765 break;
44766 case V16SImode:
44767 if (TARGET_AVX512F)
44769 tmp = gen_reg_rtx (mode);
44770 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44771 gen_rtx_VEC_DUPLICATE (mode, val)));
44772 emit_insn (gen_avx512f_blendmv16si (target, tmp, target,
44773 force_reg (HImode, GEN_INT (1 << elt))));
44774 return;
44776 else
44777 break;
44778 case V32HImode:
44779 if (TARGET_AVX512F && TARGET_AVX512BW)
44781 tmp = gen_reg_rtx (mode);
44782 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44783 gen_rtx_VEC_DUPLICATE (mode, val)));
44784 emit_insn (gen_avx512bw_blendmv32hi (target, tmp, target,
44785 force_reg (SImode, GEN_INT (1 << elt))));
44786 return;
44788 else
44789 break;
44790 case V64QImode:
44791 if (TARGET_AVX512F && TARGET_AVX512BW)
44793 tmp = gen_reg_rtx (mode);
44794 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44795 gen_rtx_VEC_DUPLICATE (mode, val)));
44796 emit_insn (gen_avx512bw_blendmv64qi (target, tmp, target,
44797 force_reg (DImode, GEN_INT (1 << elt))));
44798 return;
44800 else
44801 break;
44803 default:
44804 break;
44807 if (use_vec_merge)
44809 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
44810 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
44811 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44813 else
44815 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
44817 emit_move_insn (mem, target);
44819 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
44820 emit_move_insn (tmp, val);
44822 emit_move_insn (target, mem);
44826 void
44827 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
44829 machine_mode mode = GET_MODE (vec);
44830 machine_mode inner_mode = GET_MODE_INNER (mode);
44831 bool use_vec_extr = false;
44832 rtx tmp;
44834 switch (mode)
44836 case V2SImode:
44837 case V2SFmode:
44838 if (!mmx_ok)
44839 break;
44840 /* FALLTHRU */
44842 case V2DFmode:
44843 case V2DImode:
44844 use_vec_extr = true;
44845 break;
44847 case V4SFmode:
44848 use_vec_extr = TARGET_SSE4_1;
44849 if (use_vec_extr)
44850 break;
44852 switch (elt)
44854 case 0:
44855 tmp = vec;
44856 break;
44858 case 1:
44859 case 3:
44860 tmp = gen_reg_rtx (mode);
44861 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
44862 GEN_INT (elt), GEN_INT (elt),
44863 GEN_INT (elt+4), GEN_INT (elt+4)));
44864 break;
44866 case 2:
44867 tmp = gen_reg_rtx (mode);
44868 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
44869 break;
44871 default:
44872 gcc_unreachable ();
44874 vec = tmp;
44875 use_vec_extr = true;
44876 elt = 0;
44877 break;
44879 case V4SImode:
44880 use_vec_extr = TARGET_SSE4_1;
44881 if (use_vec_extr)
44882 break;
44884 if (TARGET_SSE2)
44886 switch (elt)
44888 case 0:
44889 tmp = vec;
44890 break;
44892 case 1:
44893 case 3:
44894 tmp = gen_reg_rtx (mode);
44895 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
44896 GEN_INT (elt), GEN_INT (elt),
44897 GEN_INT (elt), GEN_INT (elt)));
44898 break;
44900 case 2:
44901 tmp = gen_reg_rtx (mode);
44902 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
44903 break;
44905 default:
44906 gcc_unreachable ();
44908 vec = tmp;
44909 use_vec_extr = true;
44910 elt = 0;
44912 else
44914 /* For SSE1, we have to reuse the V4SF code. */
44915 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
44916 gen_lowpart (V4SFmode, vec), elt);
44917 return;
44919 break;
44921 case V8HImode:
44922 use_vec_extr = TARGET_SSE2;
44923 break;
44924 case V4HImode:
44925 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
44926 break;
44928 case V16QImode:
44929 use_vec_extr = TARGET_SSE4_1;
44930 break;
44932 case V8SFmode:
44933 if (TARGET_AVX)
44935 tmp = gen_reg_rtx (V4SFmode);
44936 if (elt < 4)
44937 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
44938 else
44939 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
44940 ix86_expand_vector_extract (false, target, tmp, elt & 3);
44941 return;
44943 break;
44945 case V4DFmode:
44946 if (TARGET_AVX)
44948 tmp = gen_reg_rtx (V2DFmode);
44949 if (elt < 2)
44950 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
44951 else
44952 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
44953 ix86_expand_vector_extract (false, target, tmp, elt & 1);
44954 return;
44956 break;
44958 case V32QImode:
44959 if (TARGET_AVX)
44961 tmp = gen_reg_rtx (V16QImode);
44962 if (elt < 16)
44963 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
44964 else
44965 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
44966 ix86_expand_vector_extract (false, target, tmp, elt & 15);
44967 return;
44969 break;
44971 case V16HImode:
44972 if (TARGET_AVX)
44974 tmp = gen_reg_rtx (V8HImode);
44975 if (elt < 8)
44976 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
44977 else
44978 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
44979 ix86_expand_vector_extract (false, target, tmp, elt & 7);
44980 return;
44982 break;
44984 case V8SImode:
44985 if (TARGET_AVX)
44987 tmp = gen_reg_rtx (V4SImode);
44988 if (elt < 4)
44989 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
44990 else
44991 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
44992 ix86_expand_vector_extract (false, target, tmp, elt & 3);
44993 return;
44995 break;
44997 case V4DImode:
44998 if (TARGET_AVX)
45000 tmp = gen_reg_rtx (V2DImode);
45001 if (elt < 2)
45002 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
45003 else
45004 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
45005 ix86_expand_vector_extract (false, target, tmp, elt & 1);
45006 return;
45008 break;
45010 case V32HImode:
45011 if (TARGET_AVX512BW)
45013 tmp = gen_reg_rtx (V16HImode);
45014 if (elt < 16)
45015 emit_insn (gen_vec_extract_lo_v32hi (tmp, vec));
45016 else
45017 emit_insn (gen_vec_extract_hi_v32hi (tmp, vec));
45018 ix86_expand_vector_extract (false, target, tmp, elt & 15);
45019 return;
45021 break;
45023 case V64QImode:
45024 if (TARGET_AVX512BW)
45026 tmp = gen_reg_rtx (V32QImode);
45027 if (elt < 32)
45028 emit_insn (gen_vec_extract_lo_v64qi (tmp, vec));
45029 else
45030 emit_insn (gen_vec_extract_hi_v64qi (tmp, vec));
45031 ix86_expand_vector_extract (false, target, tmp, elt & 31);
45032 return;
45034 break;
45036 case V16SFmode:
45037 tmp = gen_reg_rtx (V8SFmode);
45038 if (elt < 8)
45039 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
45040 else
45041 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
45042 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45043 return;
45045 case V8DFmode:
45046 tmp = gen_reg_rtx (V4DFmode);
45047 if (elt < 4)
45048 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
45049 else
45050 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
45051 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45052 return;
45054 case V16SImode:
45055 tmp = gen_reg_rtx (V8SImode);
45056 if (elt < 8)
45057 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
45058 else
45059 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
45060 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45061 return;
45063 case V8DImode:
45064 tmp = gen_reg_rtx (V4DImode);
45065 if (elt < 4)
45066 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
45067 else
45068 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
45069 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45070 return;
45072 case V8QImode:
45073 /* ??? Could extract the appropriate HImode element and shift. */
45074 default:
45075 break;
45078 if (use_vec_extr)
45080 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
45081 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
45083 /* Let the rtl optimizers know about the zero extension performed. */
45084 if (inner_mode == QImode || inner_mode == HImode)
45086 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
45087 target = gen_lowpart (SImode, target);
45090 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
45092 else
45094 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
45096 emit_move_insn (mem, vec);
45098 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
45099 emit_move_insn (target, tmp);
45103 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
45104 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
45105 The upper bits of DEST are undefined, though they shouldn't cause
45106 exceptions (some bits from src or all zeros are ok). */
45108 static void
45109 emit_reduc_half (rtx dest, rtx src, int i)
45111 rtx tem, d = dest;
45112 switch (GET_MODE (src))
45114 case V4SFmode:
45115 if (i == 128)
45116 tem = gen_sse_movhlps (dest, src, src);
45117 else
45118 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
45119 GEN_INT (1 + 4), GEN_INT (1 + 4));
45120 break;
45121 case V2DFmode:
45122 tem = gen_vec_interleave_highv2df (dest, src, src);
45123 break;
45124 case V16QImode:
45125 case V8HImode:
45126 case V4SImode:
45127 case V2DImode:
45128 d = gen_reg_rtx (V1TImode);
45129 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
45130 GEN_INT (i / 2));
45131 break;
45132 case V8SFmode:
45133 if (i == 256)
45134 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
45135 else
45136 tem = gen_avx_shufps256 (dest, src, src,
45137 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
45138 break;
45139 case V4DFmode:
45140 if (i == 256)
45141 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
45142 else
45143 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
45144 break;
45145 case V32QImode:
45146 case V16HImode:
45147 case V8SImode:
45148 case V4DImode:
45149 if (i == 256)
45151 if (GET_MODE (dest) != V4DImode)
45152 d = gen_reg_rtx (V4DImode);
45153 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
45154 gen_lowpart (V4DImode, src),
45155 const1_rtx);
45157 else
45159 d = gen_reg_rtx (V2TImode);
45160 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
45161 GEN_INT (i / 2));
45163 break;
45164 case V64QImode:
45165 case V32HImode:
45166 case V16SImode:
45167 case V16SFmode:
45168 case V8DImode:
45169 case V8DFmode:
45170 if (i > 128)
45171 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
45172 gen_lowpart (V16SImode, src),
45173 gen_lowpart (V16SImode, src),
45174 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
45175 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
45176 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
45177 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
45178 GEN_INT (0xC), GEN_INT (0xD),
45179 GEN_INT (0xE), GEN_INT (0xF),
45180 GEN_INT (0x10), GEN_INT (0x11),
45181 GEN_INT (0x12), GEN_INT (0x13),
45182 GEN_INT (0x14), GEN_INT (0x15),
45183 GEN_INT (0x16), GEN_INT (0x17));
45184 else
45185 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
45186 gen_lowpart (V16SImode, src),
45187 GEN_INT (i == 128 ? 0x2 : 0x1),
45188 GEN_INT (0x3),
45189 GEN_INT (0x3),
45190 GEN_INT (0x3),
45191 GEN_INT (i == 128 ? 0x6 : 0x5),
45192 GEN_INT (0x7),
45193 GEN_INT (0x7),
45194 GEN_INT (0x7),
45195 GEN_INT (i == 128 ? 0xA : 0x9),
45196 GEN_INT (0xB),
45197 GEN_INT (0xB),
45198 GEN_INT (0xB),
45199 GEN_INT (i == 128 ? 0xE : 0xD),
45200 GEN_INT (0xF),
45201 GEN_INT (0xF),
45202 GEN_INT (0xF));
45203 break;
45204 default:
45205 gcc_unreachable ();
45207 emit_insn (tem);
45208 if (d != dest)
45209 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
45212 /* Expand a vector reduction. FN is the binary pattern to reduce;
45213 DEST is the destination; IN is the input vector. */
45215 void
45216 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
45218 rtx half, dst, vec = in;
45219 machine_mode mode = GET_MODE (in);
45220 int i;
45222 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
45223 if (TARGET_SSE4_1
45224 && mode == V8HImode
45225 && fn == gen_uminv8hi3)
45227 emit_insn (gen_sse4_1_phminposuw (dest, in));
45228 return;
45231 for (i = GET_MODE_BITSIZE (mode);
45232 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
45233 i >>= 1)
45235 half = gen_reg_rtx (mode);
45236 emit_reduc_half (half, vec, i);
45237 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
45238 dst = dest;
45239 else
45240 dst = gen_reg_rtx (mode);
45241 emit_insn (fn (dst, half, vec));
45242 vec = dst;
45246 /* Target hook for scalar_mode_supported_p. */
45247 static bool
45248 ix86_scalar_mode_supported_p (machine_mode mode)
45250 if (DECIMAL_FLOAT_MODE_P (mode))
45251 return default_decimal_float_supported_p ();
45252 else if (mode == TFmode)
45253 return true;
45254 else
45255 return default_scalar_mode_supported_p (mode);
45258 /* Implements target hook vector_mode_supported_p. */
45259 static bool
45260 ix86_vector_mode_supported_p (machine_mode mode)
45262 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
45263 return true;
45264 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
45265 return true;
45266 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
45267 return true;
45268 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
45269 return true;
45270 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
45271 return true;
45272 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
45273 return true;
45274 return false;
45277 /* Implement target hook libgcc_floating_mode_supported_p. */
45278 static bool
45279 ix86_libgcc_floating_mode_supported_p (machine_mode mode)
45281 switch (mode)
45283 case SFmode:
45284 case DFmode:
45285 case XFmode:
45286 return true;
45288 case TFmode:
45289 #ifdef IX86_NO_LIBGCC_TFMODE
45290 return false;
45291 #elif defined IX86_MAYBE_NO_LIBGCC_TFMODE
45292 return TARGET_LONG_DOUBLE_128;
45293 #else
45294 return true;
45295 #endif
45297 default:
45298 return false;
45302 /* Target hook for c_mode_for_suffix. */
45303 static machine_mode
45304 ix86_c_mode_for_suffix (char suffix)
45306 if (suffix == 'q')
45307 return TFmode;
45308 if (suffix == 'w')
45309 return XFmode;
45311 return VOIDmode;
45314 /* Worker function for TARGET_MD_ASM_CLOBBERS.
45316 We do this in the new i386 backend to maintain source compatibility
45317 with the old cc0-based compiler. */
45319 static tree
45320 ix86_md_asm_clobbers (tree, tree, tree clobbers)
45322 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
45323 clobbers);
45324 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
45325 clobbers);
45326 return clobbers;
45329 /* Implements target vector targetm.asm.encode_section_info. */
45331 static void ATTRIBUTE_UNUSED
45332 ix86_encode_section_info (tree decl, rtx rtl, int first)
45334 default_encode_section_info (decl, rtl, first);
45336 if (ix86_in_large_data_p (decl))
45337 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
45340 /* Worker function for REVERSE_CONDITION. */
45342 enum rtx_code
45343 ix86_reverse_condition (enum rtx_code code, machine_mode mode)
45345 return (mode != CCFPmode && mode != CCFPUmode
45346 ? reverse_condition (code)
45347 : reverse_condition_maybe_unordered (code));
45350 /* Output code to perform an x87 FP register move, from OPERANDS[1]
45351 to OPERANDS[0]. */
45353 const char *
45354 output_387_reg_move (rtx insn, rtx *operands)
45356 if (REG_P (operands[0]))
45358 if (REG_P (operands[1])
45359 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
45361 if (REGNO (operands[0]) == FIRST_STACK_REG)
45362 return output_387_ffreep (operands, 0);
45363 return "fstp\t%y0";
45365 if (STACK_TOP_P (operands[0]))
45366 return "fld%Z1\t%y1";
45367 return "fst\t%y0";
45369 else if (MEM_P (operands[0]))
45371 gcc_assert (REG_P (operands[1]));
45372 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
45373 return "fstp%Z0\t%y0";
45374 else
45376 /* There is no non-popping store to memory for XFmode.
45377 So if we need one, follow the store with a load. */
45378 if (GET_MODE (operands[0]) == XFmode)
45379 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
45380 else
45381 return "fst%Z0\t%y0";
45384 else
45385 gcc_unreachable();
45388 /* Output code to perform a conditional jump to LABEL, if C2 flag in
45389 FP status register is set. */
45391 void
45392 ix86_emit_fp_unordered_jump (rtx label)
45394 rtx reg = gen_reg_rtx (HImode);
45395 rtx temp;
45397 emit_insn (gen_x86_fnstsw_1 (reg));
45399 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
45401 emit_insn (gen_x86_sahf_1 (reg));
45403 temp = gen_rtx_REG (CCmode, FLAGS_REG);
45404 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
45406 else
45408 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
45410 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
45411 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
45414 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
45415 gen_rtx_LABEL_REF (VOIDmode, label),
45416 pc_rtx);
45417 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
45419 emit_jump_insn (temp);
45420 predict_jump (REG_BR_PROB_BASE * 10 / 100);
45423 /* Output code to perform a log1p XFmode calculation. */
45425 void ix86_emit_i387_log1p (rtx op0, rtx op1)
45427 rtx_code_label *label1 = gen_label_rtx ();
45428 rtx_code_label *label2 = gen_label_rtx ();
45430 rtx tmp = gen_reg_rtx (XFmode);
45431 rtx tmp2 = gen_reg_rtx (XFmode);
45432 rtx test;
45434 emit_insn (gen_absxf2 (tmp, op1));
45435 test = gen_rtx_GE (VOIDmode, tmp,
45436 CONST_DOUBLE_FROM_REAL_VALUE (
45437 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
45438 XFmode));
45439 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
45441 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
45442 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
45443 emit_jump (label2);
45445 emit_label (label1);
45446 emit_move_insn (tmp, CONST1_RTX (XFmode));
45447 emit_insn (gen_addxf3 (tmp, op1, tmp));
45448 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
45449 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
45451 emit_label (label2);
45454 /* Emit code for round calculation. */
45455 void ix86_emit_i387_round (rtx op0, rtx op1)
45457 machine_mode inmode = GET_MODE (op1);
45458 machine_mode outmode = GET_MODE (op0);
45459 rtx e1, e2, res, tmp, tmp1, half;
45460 rtx scratch = gen_reg_rtx (HImode);
45461 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
45462 rtx_code_label *jump_label = gen_label_rtx ();
45463 rtx insn;
45464 rtx (*gen_abs) (rtx, rtx);
45465 rtx (*gen_neg) (rtx, rtx);
45467 switch (inmode)
45469 case SFmode:
45470 gen_abs = gen_abssf2;
45471 break;
45472 case DFmode:
45473 gen_abs = gen_absdf2;
45474 break;
45475 case XFmode:
45476 gen_abs = gen_absxf2;
45477 break;
45478 default:
45479 gcc_unreachable ();
45482 switch (outmode)
45484 case SFmode:
45485 gen_neg = gen_negsf2;
45486 break;
45487 case DFmode:
45488 gen_neg = gen_negdf2;
45489 break;
45490 case XFmode:
45491 gen_neg = gen_negxf2;
45492 break;
45493 case HImode:
45494 gen_neg = gen_neghi2;
45495 break;
45496 case SImode:
45497 gen_neg = gen_negsi2;
45498 break;
45499 case DImode:
45500 gen_neg = gen_negdi2;
45501 break;
45502 default:
45503 gcc_unreachable ();
45506 e1 = gen_reg_rtx (inmode);
45507 e2 = gen_reg_rtx (inmode);
45508 res = gen_reg_rtx (outmode);
45510 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
45512 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
45514 /* scratch = fxam(op1) */
45515 emit_insn (gen_rtx_SET (VOIDmode, scratch,
45516 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
45517 UNSPEC_FXAM)));
45518 /* e1 = fabs(op1) */
45519 emit_insn (gen_abs (e1, op1));
45521 /* e2 = e1 + 0.5 */
45522 half = force_reg (inmode, half);
45523 emit_insn (gen_rtx_SET (VOIDmode, e2,
45524 gen_rtx_PLUS (inmode, e1, half)));
45526 /* res = floor(e2) */
45527 if (inmode != XFmode)
45529 tmp1 = gen_reg_rtx (XFmode);
45531 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
45532 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
45534 else
45535 tmp1 = e2;
45537 switch (outmode)
45539 case SFmode:
45540 case DFmode:
45542 rtx tmp0 = gen_reg_rtx (XFmode);
45544 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
45546 emit_insn (gen_rtx_SET (VOIDmode, res,
45547 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
45548 UNSPEC_TRUNC_NOOP)));
45550 break;
45551 case XFmode:
45552 emit_insn (gen_frndintxf2_floor (res, tmp1));
45553 break;
45554 case HImode:
45555 emit_insn (gen_lfloorxfhi2 (res, tmp1));
45556 break;
45557 case SImode:
45558 emit_insn (gen_lfloorxfsi2 (res, tmp1));
45559 break;
45560 case DImode:
45561 emit_insn (gen_lfloorxfdi2 (res, tmp1));
45562 break;
45563 default:
45564 gcc_unreachable ();
45567 /* flags = signbit(a) */
45568 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
45570 /* if (flags) then res = -res */
45571 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
45572 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
45573 gen_rtx_LABEL_REF (VOIDmode, jump_label),
45574 pc_rtx);
45575 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
45576 predict_jump (REG_BR_PROB_BASE * 50 / 100);
45577 JUMP_LABEL (insn) = jump_label;
45579 emit_insn (gen_neg (res, res));
45581 emit_label (jump_label);
45582 LABEL_NUSES (jump_label) = 1;
45584 emit_move_insn (op0, res);
45587 /* Output code to perform a Newton-Rhapson approximation of a single precision
45588 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
45590 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
45592 rtx x0, x1, e0, e1;
45594 x0 = gen_reg_rtx (mode);
45595 e0 = gen_reg_rtx (mode);
45596 e1 = gen_reg_rtx (mode);
45597 x1 = gen_reg_rtx (mode);
45599 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
45601 b = force_reg (mode, b);
45603 /* x0 = rcp(b) estimate */
45604 if (mode == V16SFmode || mode == V8DFmode)
45605 emit_insn (gen_rtx_SET (VOIDmode, x0,
45606 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
45607 UNSPEC_RCP14)));
45608 else
45609 emit_insn (gen_rtx_SET (VOIDmode, x0,
45610 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
45611 UNSPEC_RCP)));
45613 /* e0 = x0 * b */
45614 emit_insn (gen_rtx_SET (VOIDmode, e0,
45615 gen_rtx_MULT (mode, x0, b)));
45617 /* e0 = x0 * e0 */
45618 emit_insn (gen_rtx_SET (VOIDmode, e0,
45619 gen_rtx_MULT (mode, x0, e0)));
45621 /* e1 = x0 + x0 */
45622 emit_insn (gen_rtx_SET (VOIDmode, e1,
45623 gen_rtx_PLUS (mode, x0, x0)));
45625 /* x1 = e1 - e0 */
45626 emit_insn (gen_rtx_SET (VOIDmode, x1,
45627 gen_rtx_MINUS (mode, e1, e0)));
45629 /* res = a * x1 */
45630 emit_insn (gen_rtx_SET (VOIDmode, res,
45631 gen_rtx_MULT (mode, a, x1)));
45634 /* Output code to perform a Newton-Rhapson approximation of a
45635 single precision floating point [reciprocal] square root. */
45637 void ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode,
45638 bool recip)
45640 rtx x0, e0, e1, e2, e3, mthree, mhalf;
45641 REAL_VALUE_TYPE r;
45642 int unspec;
45644 x0 = gen_reg_rtx (mode);
45645 e0 = gen_reg_rtx (mode);
45646 e1 = gen_reg_rtx (mode);
45647 e2 = gen_reg_rtx (mode);
45648 e3 = gen_reg_rtx (mode);
45650 real_from_integer (&r, VOIDmode, -3, SIGNED);
45651 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
45653 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
45654 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
45655 unspec = UNSPEC_RSQRT;
45657 if (VECTOR_MODE_P (mode))
45659 mthree = ix86_build_const_vector (mode, true, mthree);
45660 mhalf = ix86_build_const_vector (mode, true, mhalf);
45661 /* There is no 512-bit rsqrt. There is however rsqrt14. */
45662 if (GET_MODE_SIZE (mode) == 64)
45663 unspec = UNSPEC_RSQRT14;
45666 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
45667 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
45669 a = force_reg (mode, a);
45671 /* x0 = rsqrt(a) estimate */
45672 emit_insn (gen_rtx_SET (VOIDmode, x0,
45673 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
45674 unspec)));
45676 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
45677 if (!recip)
45679 rtx zero, mask;
45681 zero = gen_reg_rtx (mode);
45682 mask = gen_reg_rtx (mode);
45684 zero = force_reg (mode, CONST0_RTX(mode));
45686 /* Handle masked compare. */
45687 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
45689 mask = gen_reg_rtx (HImode);
45690 /* Imm value 0x4 corresponds to not-equal comparison. */
45691 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
45692 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
45694 else
45696 emit_insn (gen_rtx_SET (VOIDmode, mask,
45697 gen_rtx_NE (mode, zero, a)));
45699 emit_insn (gen_rtx_SET (VOIDmode, x0,
45700 gen_rtx_AND (mode, x0, mask)));
45704 /* e0 = x0 * a */
45705 emit_insn (gen_rtx_SET (VOIDmode, e0,
45706 gen_rtx_MULT (mode, x0, a)));
45707 /* e1 = e0 * x0 */
45708 emit_insn (gen_rtx_SET (VOIDmode, e1,
45709 gen_rtx_MULT (mode, e0, x0)));
45711 /* e2 = e1 - 3. */
45712 mthree = force_reg (mode, mthree);
45713 emit_insn (gen_rtx_SET (VOIDmode, e2,
45714 gen_rtx_PLUS (mode, e1, mthree)));
45716 mhalf = force_reg (mode, mhalf);
45717 if (recip)
45718 /* e3 = -.5 * x0 */
45719 emit_insn (gen_rtx_SET (VOIDmode, e3,
45720 gen_rtx_MULT (mode, x0, mhalf)));
45721 else
45722 /* e3 = -.5 * e0 */
45723 emit_insn (gen_rtx_SET (VOIDmode, e3,
45724 gen_rtx_MULT (mode, e0, mhalf)));
45725 /* ret = e2 * e3 */
45726 emit_insn (gen_rtx_SET (VOIDmode, res,
45727 gen_rtx_MULT (mode, e2, e3)));
45730 #ifdef TARGET_SOLARIS
45731 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
45733 static void
45734 i386_solaris_elf_named_section (const char *name, unsigned int flags,
45735 tree decl)
45737 /* With Binutils 2.15, the "@unwind" marker must be specified on
45738 every occurrence of the ".eh_frame" section, not just the first
45739 one. */
45740 if (TARGET_64BIT
45741 && strcmp (name, ".eh_frame") == 0)
45743 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
45744 flags & SECTION_WRITE ? "aw" : "a");
45745 return;
45748 #ifndef USE_GAS
45749 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
45751 solaris_elf_asm_comdat_section (name, flags, decl);
45752 return;
45754 #endif
45756 default_elf_asm_named_section (name, flags, decl);
45758 #endif /* TARGET_SOLARIS */
45760 /* Return the mangling of TYPE if it is an extended fundamental type. */
45762 static const char *
45763 ix86_mangle_type (const_tree type)
45765 type = TYPE_MAIN_VARIANT (type);
45767 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
45768 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
45769 return NULL;
45771 switch (TYPE_MODE (type))
45773 case TFmode:
45774 /* __float128 is "g". */
45775 return "g";
45776 case XFmode:
45777 /* "long double" or __float80 is "e". */
45778 return "e";
45779 default:
45780 return NULL;
45784 /* For 32-bit code we can save PIC register setup by using
45785 __stack_chk_fail_local hidden function instead of calling
45786 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
45787 register, so it is better to call __stack_chk_fail directly. */
45789 static tree ATTRIBUTE_UNUSED
45790 ix86_stack_protect_fail (void)
45792 return TARGET_64BIT
45793 ? default_external_stack_protect_fail ()
45794 : default_hidden_stack_protect_fail ();
45797 /* Select a format to encode pointers in exception handling data. CODE
45798 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
45799 true if the symbol may be affected by dynamic relocations.
45801 ??? All x86 object file formats are capable of representing this.
45802 After all, the relocation needed is the same as for the call insn.
45803 Whether or not a particular assembler allows us to enter such, I
45804 guess we'll have to see. */
45806 asm_preferred_eh_data_format (int code, int global)
45808 if (flag_pic)
45810 int type = DW_EH_PE_sdata8;
45811 if (!TARGET_64BIT
45812 || ix86_cmodel == CM_SMALL_PIC
45813 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
45814 type = DW_EH_PE_sdata4;
45815 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
45817 if (ix86_cmodel == CM_SMALL
45818 || (ix86_cmodel == CM_MEDIUM && code))
45819 return DW_EH_PE_udata4;
45820 return DW_EH_PE_absptr;
45823 /* Expand copysign from SIGN to the positive value ABS_VALUE
45824 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
45825 the sign-bit. */
45826 static void
45827 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
45829 machine_mode mode = GET_MODE (sign);
45830 rtx sgn = gen_reg_rtx (mode);
45831 if (mask == NULL_RTX)
45833 machine_mode vmode;
45835 if (mode == SFmode)
45836 vmode = V4SFmode;
45837 else if (mode == DFmode)
45838 vmode = V2DFmode;
45839 else
45840 vmode = mode;
45842 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
45843 if (!VECTOR_MODE_P (mode))
45845 /* We need to generate a scalar mode mask in this case. */
45846 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
45847 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
45848 mask = gen_reg_rtx (mode);
45849 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
45852 else
45853 mask = gen_rtx_NOT (mode, mask);
45854 emit_insn (gen_rtx_SET (VOIDmode, sgn,
45855 gen_rtx_AND (mode, mask, sign)));
45856 emit_insn (gen_rtx_SET (VOIDmode, result,
45857 gen_rtx_IOR (mode, abs_value, sgn)));
45860 /* Expand fabs (OP0) and return a new rtx that holds the result. The
45861 mask for masking out the sign-bit is stored in *SMASK, if that is
45862 non-null. */
45863 static rtx
45864 ix86_expand_sse_fabs (rtx op0, rtx *smask)
45866 machine_mode vmode, mode = GET_MODE (op0);
45867 rtx xa, mask;
45869 xa = gen_reg_rtx (mode);
45870 if (mode == SFmode)
45871 vmode = V4SFmode;
45872 else if (mode == DFmode)
45873 vmode = V2DFmode;
45874 else
45875 vmode = mode;
45876 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
45877 if (!VECTOR_MODE_P (mode))
45879 /* We need to generate a scalar mode mask in this case. */
45880 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
45881 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
45882 mask = gen_reg_rtx (mode);
45883 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
45885 emit_insn (gen_rtx_SET (VOIDmode, xa,
45886 gen_rtx_AND (mode, op0, mask)));
45888 if (smask)
45889 *smask = mask;
45891 return xa;
45894 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
45895 swapping the operands if SWAP_OPERANDS is true. The expanded
45896 code is a forward jump to a newly created label in case the
45897 comparison is true. The generated label rtx is returned. */
45898 static rtx_code_label *
45899 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
45900 bool swap_operands)
45902 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
45903 rtx_code_label *label;
45904 rtx tmp;
45906 if (swap_operands)
45907 std::swap (op0, op1);
45909 label = gen_label_rtx ();
45910 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
45911 emit_insn (gen_rtx_SET (VOIDmode, tmp,
45912 gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
45913 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
45914 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
45915 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
45916 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
45917 JUMP_LABEL (tmp) = label;
45919 return label;
45922 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
45923 using comparison code CODE. Operands are swapped for the comparison if
45924 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
45925 static rtx
45926 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
45927 bool swap_operands)
45929 rtx (*insn)(rtx, rtx, rtx, rtx);
45930 machine_mode mode = GET_MODE (op0);
45931 rtx mask = gen_reg_rtx (mode);
45933 if (swap_operands)
45934 std::swap (op0, op1);
45936 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
45938 emit_insn (insn (mask, op0, op1,
45939 gen_rtx_fmt_ee (code, mode, op0, op1)));
45940 return mask;
45943 /* Generate and return a rtx of mode MODE for 2**n where n is the number
45944 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
45945 static rtx
45946 ix86_gen_TWO52 (machine_mode mode)
45948 REAL_VALUE_TYPE TWO52r;
45949 rtx TWO52;
45951 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
45952 TWO52 = const_double_from_real_value (TWO52r, mode);
45953 TWO52 = force_reg (mode, TWO52);
45955 return TWO52;
45958 /* Expand SSE sequence for computing lround from OP1 storing
45959 into OP0. */
45960 void
45961 ix86_expand_lround (rtx op0, rtx op1)
45963 /* C code for the stuff we're doing below:
45964 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
45965 return (long)tmp;
45967 machine_mode mode = GET_MODE (op1);
45968 const struct real_format *fmt;
45969 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
45970 rtx adj;
45972 /* load nextafter (0.5, 0.0) */
45973 fmt = REAL_MODE_FORMAT (mode);
45974 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
45975 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
45977 /* adj = copysign (0.5, op1) */
45978 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
45979 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
45981 /* adj = op1 + adj */
45982 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
45984 /* op0 = (imode)adj */
45985 expand_fix (op0, adj, 0);
45988 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
45989 into OPERAND0. */
45990 void
45991 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
45993 /* C code for the stuff we're doing below (for do_floor):
45994 xi = (long)op1;
45995 xi -= (double)xi > op1 ? 1 : 0;
45996 return xi;
45998 machine_mode fmode = GET_MODE (op1);
45999 machine_mode imode = GET_MODE (op0);
46000 rtx ireg, freg, tmp;
46001 rtx_code_label *label;
46003 /* reg = (long)op1 */
46004 ireg = gen_reg_rtx (imode);
46005 expand_fix (ireg, op1, 0);
46007 /* freg = (double)reg */
46008 freg = gen_reg_rtx (fmode);
46009 expand_float (freg, ireg, 0);
46011 /* ireg = (freg > op1) ? ireg - 1 : ireg */
46012 label = ix86_expand_sse_compare_and_jump (UNLE,
46013 freg, op1, !do_floor);
46014 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
46015 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
46016 emit_move_insn (ireg, tmp);
46018 emit_label (label);
46019 LABEL_NUSES (label) = 1;
46021 emit_move_insn (op0, ireg);
46024 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
46025 result in OPERAND0. */
46026 void
46027 ix86_expand_rint (rtx operand0, rtx operand1)
46029 /* C code for the stuff we're doing below:
46030 xa = fabs (operand1);
46031 if (!isless (xa, 2**52))
46032 return operand1;
46033 xa = xa + 2**52 - 2**52;
46034 return copysign (xa, operand1);
46036 machine_mode mode = GET_MODE (operand0);
46037 rtx res, xa, TWO52, mask;
46038 rtx_code_label *label;
46040 res = gen_reg_rtx (mode);
46041 emit_move_insn (res, operand1);
46043 /* xa = abs (operand1) */
46044 xa = ix86_expand_sse_fabs (res, &mask);
46046 /* if (!isless (xa, TWO52)) goto label; */
46047 TWO52 = ix86_gen_TWO52 (mode);
46048 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46050 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46051 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
46053 ix86_sse_copysign_to_positive (res, xa, res, mask);
46055 emit_label (label);
46056 LABEL_NUSES (label) = 1;
46058 emit_move_insn (operand0, res);
46061 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
46062 into OPERAND0. */
46063 void
46064 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
46066 /* C code for the stuff we expand below.
46067 double xa = fabs (x), x2;
46068 if (!isless (xa, TWO52))
46069 return x;
46070 xa = xa + TWO52 - TWO52;
46071 x2 = copysign (xa, x);
46072 Compensate. Floor:
46073 if (x2 > x)
46074 x2 -= 1;
46075 Compensate. Ceil:
46076 if (x2 < x)
46077 x2 -= -1;
46078 return x2;
46080 machine_mode mode = GET_MODE (operand0);
46081 rtx xa, TWO52, tmp, one, res, mask;
46082 rtx_code_label *label;
46084 TWO52 = ix86_gen_TWO52 (mode);
46086 /* Temporary for holding the result, initialized to the input
46087 operand to ease control flow. */
46088 res = gen_reg_rtx (mode);
46089 emit_move_insn (res, operand1);
46091 /* xa = abs (operand1) */
46092 xa = ix86_expand_sse_fabs (res, &mask);
46094 /* if (!isless (xa, TWO52)) goto label; */
46095 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46097 /* xa = xa + TWO52 - TWO52; */
46098 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46099 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
46101 /* xa = copysign (xa, operand1) */
46102 ix86_sse_copysign_to_positive (xa, xa, res, mask);
46104 /* generate 1.0 or -1.0 */
46105 one = force_reg (mode,
46106 const_double_from_real_value (do_floor
46107 ? dconst1 : dconstm1, mode));
46109 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
46110 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
46111 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46112 gen_rtx_AND (mode, one, tmp)));
46113 /* We always need to subtract here to preserve signed zero. */
46114 tmp = expand_simple_binop (mode, MINUS,
46115 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46116 emit_move_insn (res, tmp);
46118 emit_label (label);
46119 LABEL_NUSES (label) = 1;
46121 emit_move_insn (operand0, res);
46124 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
46125 into OPERAND0. */
46126 void
46127 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
46129 /* C code for the stuff we expand below.
46130 double xa = fabs (x), x2;
46131 if (!isless (xa, TWO52))
46132 return x;
46133 x2 = (double)(long)x;
46134 Compensate. Floor:
46135 if (x2 > x)
46136 x2 -= 1;
46137 Compensate. Ceil:
46138 if (x2 < x)
46139 x2 += 1;
46140 if (HONOR_SIGNED_ZEROS (mode))
46141 return copysign (x2, x);
46142 return x2;
46144 machine_mode mode = GET_MODE (operand0);
46145 rtx xa, xi, TWO52, tmp, one, res, mask;
46146 rtx_code_label *label;
46148 TWO52 = ix86_gen_TWO52 (mode);
46150 /* Temporary for holding the result, initialized to the input
46151 operand to ease control flow. */
46152 res = gen_reg_rtx (mode);
46153 emit_move_insn (res, operand1);
46155 /* xa = abs (operand1) */
46156 xa = ix86_expand_sse_fabs (res, &mask);
46158 /* if (!isless (xa, TWO52)) goto label; */
46159 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46161 /* xa = (double)(long)x */
46162 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46163 expand_fix (xi, res, 0);
46164 expand_float (xa, xi, 0);
46166 /* generate 1.0 */
46167 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
46169 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
46170 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
46171 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46172 gen_rtx_AND (mode, one, tmp)));
46173 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
46174 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46175 emit_move_insn (res, tmp);
46177 if (HONOR_SIGNED_ZEROS (mode))
46178 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
46180 emit_label (label);
46181 LABEL_NUSES (label) = 1;
46183 emit_move_insn (operand0, res);
46186 /* Expand SSE sequence for computing round from OPERAND1 storing
46187 into OPERAND0. Sequence that works without relying on DImode truncation
46188 via cvttsd2siq that is only available on 64bit targets. */
46189 void
46190 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
46192 /* C code for the stuff we expand below.
46193 double xa = fabs (x), xa2, x2;
46194 if (!isless (xa, TWO52))
46195 return x;
46196 Using the absolute value and copying back sign makes
46197 -0.0 -> -0.0 correct.
46198 xa2 = xa + TWO52 - TWO52;
46199 Compensate.
46200 dxa = xa2 - xa;
46201 if (dxa <= -0.5)
46202 xa2 += 1;
46203 else if (dxa > 0.5)
46204 xa2 -= 1;
46205 x2 = copysign (xa2, x);
46206 return x2;
46208 machine_mode mode = GET_MODE (operand0);
46209 rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask;
46210 rtx_code_label *label;
46212 TWO52 = ix86_gen_TWO52 (mode);
46214 /* Temporary for holding the result, initialized to the input
46215 operand to ease control flow. */
46216 res = gen_reg_rtx (mode);
46217 emit_move_insn (res, operand1);
46219 /* xa = abs (operand1) */
46220 xa = ix86_expand_sse_fabs (res, &mask);
46222 /* if (!isless (xa, TWO52)) goto label; */
46223 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46225 /* xa2 = xa + TWO52 - TWO52; */
46226 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46227 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
46229 /* dxa = xa2 - xa; */
46230 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
46232 /* generate 0.5, 1.0 and -0.5 */
46233 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
46234 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
46235 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
46236 0, OPTAB_DIRECT);
46238 /* Compensate. */
46239 tmp = gen_reg_rtx (mode);
46240 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
46241 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
46242 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46243 gen_rtx_AND (mode, one, tmp)));
46244 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46245 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
46246 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
46247 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46248 gen_rtx_AND (mode, one, tmp)));
46249 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46251 /* res = copysign (xa2, operand1) */
46252 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
46254 emit_label (label);
46255 LABEL_NUSES (label) = 1;
46257 emit_move_insn (operand0, res);
46260 /* Expand SSE sequence for computing trunc from OPERAND1 storing
46261 into OPERAND0. */
46262 void
46263 ix86_expand_trunc (rtx operand0, rtx operand1)
46265 /* C code for SSE variant we expand below.
46266 double xa = fabs (x), x2;
46267 if (!isless (xa, TWO52))
46268 return x;
46269 x2 = (double)(long)x;
46270 if (HONOR_SIGNED_ZEROS (mode))
46271 return copysign (x2, x);
46272 return x2;
46274 machine_mode mode = GET_MODE (operand0);
46275 rtx xa, xi, TWO52, res, mask;
46276 rtx_code_label *label;
46278 TWO52 = ix86_gen_TWO52 (mode);
46280 /* Temporary for holding the result, initialized to the input
46281 operand to ease control flow. */
46282 res = gen_reg_rtx (mode);
46283 emit_move_insn (res, operand1);
46285 /* xa = abs (operand1) */
46286 xa = ix86_expand_sse_fabs (res, &mask);
46288 /* if (!isless (xa, TWO52)) goto label; */
46289 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46291 /* x = (double)(long)x */
46292 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46293 expand_fix (xi, res, 0);
46294 expand_float (res, xi, 0);
46296 if (HONOR_SIGNED_ZEROS (mode))
46297 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
46299 emit_label (label);
46300 LABEL_NUSES (label) = 1;
46302 emit_move_insn (operand0, res);
46305 /* Expand SSE sequence for computing trunc from OPERAND1 storing
46306 into OPERAND0. */
46307 void
46308 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
46310 machine_mode mode = GET_MODE (operand0);
46311 rtx xa, mask, TWO52, one, res, smask, tmp;
46312 rtx_code_label *label;
46314 /* C code for SSE variant we expand below.
46315 double xa = fabs (x), x2;
46316 if (!isless (xa, TWO52))
46317 return x;
46318 xa2 = xa + TWO52 - TWO52;
46319 Compensate:
46320 if (xa2 > xa)
46321 xa2 -= 1.0;
46322 x2 = copysign (xa2, x);
46323 return x2;
46326 TWO52 = ix86_gen_TWO52 (mode);
46328 /* Temporary for holding the result, initialized to the input
46329 operand to ease control flow. */
46330 res = gen_reg_rtx (mode);
46331 emit_move_insn (res, operand1);
46333 /* xa = abs (operand1) */
46334 xa = ix86_expand_sse_fabs (res, &smask);
46336 /* if (!isless (xa, TWO52)) goto label; */
46337 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46339 /* res = xa + TWO52 - TWO52; */
46340 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46341 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
46342 emit_move_insn (res, tmp);
46344 /* generate 1.0 */
46345 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
46347 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
46348 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
46349 emit_insn (gen_rtx_SET (VOIDmode, mask,
46350 gen_rtx_AND (mode, mask, one)));
46351 tmp = expand_simple_binop (mode, MINUS,
46352 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
46353 emit_move_insn (res, tmp);
46355 /* res = copysign (res, operand1) */
46356 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
46358 emit_label (label);
46359 LABEL_NUSES (label) = 1;
46361 emit_move_insn (operand0, res);
46364 /* Expand SSE sequence for computing round from OPERAND1 storing
46365 into OPERAND0. */
46366 void
46367 ix86_expand_round (rtx operand0, rtx operand1)
46369 /* C code for the stuff we're doing below:
46370 double xa = fabs (x);
46371 if (!isless (xa, TWO52))
46372 return x;
46373 xa = (double)(long)(xa + nextafter (0.5, 0.0));
46374 return copysign (xa, x);
46376 machine_mode mode = GET_MODE (operand0);
46377 rtx res, TWO52, xa, xi, half, mask;
46378 rtx_code_label *label;
46379 const struct real_format *fmt;
46380 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46382 /* Temporary for holding the result, initialized to the input
46383 operand to ease control flow. */
46384 res = gen_reg_rtx (mode);
46385 emit_move_insn (res, operand1);
46387 TWO52 = ix86_gen_TWO52 (mode);
46388 xa = ix86_expand_sse_fabs (res, &mask);
46389 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46391 /* load nextafter (0.5, 0.0) */
46392 fmt = REAL_MODE_FORMAT (mode);
46393 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46394 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46396 /* xa = xa + 0.5 */
46397 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
46398 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
46400 /* xa = (double)(int64_t)xa */
46401 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46402 expand_fix (xi, xa, 0);
46403 expand_float (xa, xi, 0);
46405 /* res = copysign (xa, operand1) */
46406 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
46408 emit_label (label);
46409 LABEL_NUSES (label) = 1;
46411 emit_move_insn (operand0, res);
46414 /* Expand SSE sequence for computing round
46415 from OP1 storing into OP0 using sse4 round insn. */
46416 void
46417 ix86_expand_round_sse4 (rtx op0, rtx op1)
46419 machine_mode mode = GET_MODE (op0);
46420 rtx e1, e2, res, half;
46421 const struct real_format *fmt;
46422 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46423 rtx (*gen_copysign) (rtx, rtx, rtx);
46424 rtx (*gen_round) (rtx, rtx, rtx);
46426 switch (mode)
46428 case SFmode:
46429 gen_copysign = gen_copysignsf3;
46430 gen_round = gen_sse4_1_roundsf2;
46431 break;
46432 case DFmode:
46433 gen_copysign = gen_copysigndf3;
46434 gen_round = gen_sse4_1_rounddf2;
46435 break;
46436 default:
46437 gcc_unreachable ();
46440 /* round (a) = trunc (a + copysign (0.5, a)) */
46442 /* load nextafter (0.5, 0.0) */
46443 fmt = REAL_MODE_FORMAT (mode);
46444 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46445 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46446 half = const_double_from_real_value (pred_half, mode);
46448 /* e1 = copysign (0.5, op1) */
46449 e1 = gen_reg_rtx (mode);
46450 emit_insn (gen_copysign (e1, half, op1));
46452 /* e2 = op1 + e1 */
46453 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
46455 /* res = trunc (e2) */
46456 res = gen_reg_rtx (mode);
46457 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
46459 emit_move_insn (op0, res);
46463 /* Table of valid machine attributes. */
46464 static const struct attribute_spec ix86_attribute_table[] =
46466 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
46467 affects_type_identity } */
46468 /* Stdcall attribute says callee is responsible for popping arguments
46469 if they are not variable. */
46470 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46471 true },
46472 /* Fastcall attribute says callee is responsible for popping arguments
46473 if they are not variable. */
46474 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46475 true },
46476 /* Thiscall attribute says callee is responsible for popping arguments
46477 if they are not variable. */
46478 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46479 true },
46480 /* Cdecl attribute says the callee is a normal C declaration */
46481 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46482 true },
46483 /* Regparm attribute specifies how many integer arguments are to be
46484 passed in registers. */
46485 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
46486 true },
46487 /* Sseregparm attribute says we are using x86_64 calling conventions
46488 for FP arguments. */
46489 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46490 true },
46491 /* The transactional memory builtins are implicitly regparm or fastcall
46492 depending on the ABI. Override the generic do-nothing attribute that
46493 these builtins were declared with. */
46494 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
46495 true },
46496 /* force_align_arg_pointer says this function realigns the stack at entry. */
46497 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
46498 false, true, true, ix86_handle_cconv_attribute, false },
46499 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
46500 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
46501 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
46502 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
46503 false },
46504 #endif
46505 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
46506 false },
46507 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
46508 false },
46509 #ifdef SUBTARGET_ATTRIBUTE_TABLE
46510 SUBTARGET_ATTRIBUTE_TABLE,
46511 #endif
46512 /* ms_abi and sysv_abi calling convention function attributes. */
46513 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
46514 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
46515 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
46516 false },
46517 { "callee_pop_aggregate_return", 1, 1, false, true, true,
46518 ix86_handle_callee_pop_aggregate_return, true },
46519 /* End element. */
46520 { NULL, 0, 0, false, false, false, NULL, false }
46523 /* Implement targetm.vectorize.builtin_vectorization_cost. */
46524 static int
46525 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
46526 tree vectype, int)
46528 unsigned elements;
46530 switch (type_of_cost)
46532 case scalar_stmt:
46533 return ix86_cost->scalar_stmt_cost;
46535 case scalar_load:
46536 return ix86_cost->scalar_load_cost;
46538 case scalar_store:
46539 return ix86_cost->scalar_store_cost;
46541 case vector_stmt:
46542 return ix86_cost->vec_stmt_cost;
46544 case vector_load:
46545 return ix86_cost->vec_align_load_cost;
46547 case vector_store:
46548 return ix86_cost->vec_store_cost;
46550 case vec_to_scalar:
46551 return ix86_cost->vec_to_scalar_cost;
46553 case scalar_to_vec:
46554 return ix86_cost->scalar_to_vec_cost;
46556 case unaligned_load:
46557 case unaligned_store:
46558 return ix86_cost->vec_unalign_load_cost;
46560 case cond_branch_taken:
46561 return ix86_cost->cond_taken_branch_cost;
46563 case cond_branch_not_taken:
46564 return ix86_cost->cond_not_taken_branch_cost;
46566 case vec_perm:
46567 case vec_promote_demote:
46568 return ix86_cost->vec_stmt_cost;
46570 case vec_construct:
46571 elements = TYPE_VECTOR_SUBPARTS (vectype);
46572 return elements / 2 + 1;
46574 default:
46575 gcc_unreachable ();
46579 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
46580 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
46581 insn every time. */
46583 static GTY(()) rtx_insn *vselect_insn;
46585 /* Initialize vselect_insn. */
46587 static void
46588 init_vselect_insn (void)
46590 unsigned i;
46591 rtx x;
46593 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
46594 for (i = 0; i < MAX_VECT_LEN; ++i)
46595 XVECEXP (x, 0, i) = const0_rtx;
46596 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
46597 const0_rtx), x);
46598 x = gen_rtx_SET (VOIDmode, const0_rtx, x);
46599 start_sequence ();
46600 vselect_insn = emit_insn (x);
46601 end_sequence ();
46604 /* Construct (set target (vec_select op0 (parallel perm))) and
46605 return true if that's a valid instruction in the active ISA. */
46607 static bool
46608 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
46609 unsigned nelt, bool testing_p)
46611 unsigned int i;
46612 rtx x, save_vconcat;
46613 int icode;
46615 if (vselect_insn == NULL_RTX)
46616 init_vselect_insn ();
46618 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
46619 PUT_NUM_ELEM (XVEC (x, 0), nelt);
46620 for (i = 0; i < nelt; ++i)
46621 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
46622 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
46623 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
46624 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
46625 SET_DEST (PATTERN (vselect_insn)) = target;
46626 icode = recog_memoized (vselect_insn);
46628 if (icode >= 0 && !testing_p)
46629 emit_insn (copy_rtx (PATTERN (vselect_insn)));
46631 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
46632 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
46633 INSN_CODE (vselect_insn) = -1;
46635 return icode >= 0;
46638 /* Similar, but generate a vec_concat from op0 and op1 as well. */
46640 static bool
46641 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
46642 const unsigned char *perm, unsigned nelt,
46643 bool testing_p)
46645 machine_mode v2mode;
46646 rtx x;
46647 bool ok;
46649 if (vselect_insn == NULL_RTX)
46650 init_vselect_insn ();
46652 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
46653 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
46654 PUT_MODE (x, v2mode);
46655 XEXP (x, 0) = op0;
46656 XEXP (x, 1) = op1;
46657 ok = expand_vselect (target, x, perm, nelt, testing_p);
46658 XEXP (x, 0) = const0_rtx;
46659 XEXP (x, 1) = const0_rtx;
46660 return ok;
46663 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
46664 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
46666 static bool
46667 expand_vec_perm_blend (struct expand_vec_perm_d *d)
46669 machine_mode vmode = d->vmode;
46670 unsigned i, mask, nelt = d->nelt;
46671 rtx target, op0, op1, x;
46672 rtx rperm[32], vperm;
46674 if (d->one_operand_p)
46675 return false;
46676 if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64
46677 && GET_MODE_SIZE (GET_MODE_INNER (vmode)) >= 4)
46679 else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
46681 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
46683 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
46685 else
46686 return false;
46688 /* This is a blend, not a permute. Elements must stay in their
46689 respective lanes. */
46690 for (i = 0; i < nelt; ++i)
46692 unsigned e = d->perm[i];
46693 if (!(e == i || e == i + nelt))
46694 return false;
46697 if (d->testing_p)
46698 return true;
46700 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
46701 decision should be extracted elsewhere, so that we only try that
46702 sequence once all budget==3 options have been tried. */
46703 target = d->target;
46704 op0 = d->op0;
46705 op1 = d->op1;
46706 mask = 0;
46708 switch (vmode)
46710 case V8DFmode:
46711 case V16SFmode:
46712 case V4DFmode:
46713 case V8SFmode:
46714 case V2DFmode:
46715 case V4SFmode:
46716 case V8HImode:
46717 case V8SImode:
46718 case V32HImode:
46719 case V64QImode:
46720 case V16SImode:
46721 case V8DImode:
46722 for (i = 0; i < nelt; ++i)
46723 mask |= (d->perm[i] >= nelt) << i;
46724 break;
46726 case V2DImode:
46727 for (i = 0; i < 2; ++i)
46728 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
46729 vmode = V8HImode;
46730 goto do_subreg;
46732 case V4SImode:
46733 for (i = 0; i < 4; ++i)
46734 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
46735 vmode = V8HImode;
46736 goto do_subreg;
46738 case V16QImode:
46739 /* See if bytes move in pairs so we can use pblendw with
46740 an immediate argument, rather than pblendvb with a vector
46741 argument. */
46742 for (i = 0; i < 16; i += 2)
46743 if (d->perm[i] + 1 != d->perm[i + 1])
46745 use_pblendvb:
46746 for (i = 0; i < nelt; ++i)
46747 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
46749 finish_pblendvb:
46750 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
46751 vperm = force_reg (vmode, vperm);
46753 if (GET_MODE_SIZE (vmode) == 16)
46754 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
46755 else
46756 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
46757 if (target != d->target)
46758 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
46759 return true;
46762 for (i = 0; i < 8; ++i)
46763 mask |= (d->perm[i * 2] >= 16) << i;
46764 vmode = V8HImode;
46765 /* FALLTHRU */
46767 do_subreg:
46768 target = gen_reg_rtx (vmode);
46769 op0 = gen_lowpart (vmode, op0);
46770 op1 = gen_lowpart (vmode, op1);
46771 break;
46773 case V32QImode:
46774 /* See if bytes move in pairs. If not, vpblendvb must be used. */
46775 for (i = 0; i < 32; i += 2)
46776 if (d->perm[i] + 1 != d->perm[i + 1])
46777 goto use_pblendvb;
46778 /* See if bytes move in quadruplets. If yes, vpblendd
46779 with immediate can be used. */
46780 for (i = 0; i < 32; i += 4)
46781 if (d->perm[i] + 2 != d->perm[i + 2])
46782 break;
46783 if (i < 32)
46785 /* See if bytes move the same in both lanes. If yes,
46786 vpblendw with immediate can be used. */
46787 for (i = 0; i < 16; i += 2)
46788 if (d->perm[i] + 16 != d->perm[i + 16])
46789 goto use_pblendvb;
46791 /* Use vpblendw. */
46792 for (i = 0; i < 16; ++i)
46793 mask |= (d->perm[i * 2] >= 32) << i;
46794 vmode = V16HImode;
46795 goto do_subreg;
46798 /* Use vpblendd. */
46799 for (i = 0; i < 8; ++i)
46800 mask |= (d->perm[i * 4] >= 32) << i;
46801 vmode = V8SImode;
46802 goto do_subreg;
46804 case V16HImode:
46805 /* See if words move in pairs. If yes, vpblendd can be used. */
46806 for (i = 0; i < 16; i += 2)
46807 if (d->perm[i] + 1 != d->perm[i + 1])
46808 break;
46809 if (i < 16)
46811 /* See if words move the same in both lanes. If not,
46812 vpblendvb must be used. */
46813 for (i = 0; i < 8; i++)
46814 if (d->perm[i] + 8 != d->perm[i + 8])
46816 /* Use vpblendvb. */
46817 for (i = 0; i < 32; ++i)
46818 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
46820 vmode = V32QImode;
46821 nelt = 32;
46822 target = gen_reg_rtx (vmode);
46823 op0 = gen_lowpart (vmode, op0);
46824 op1 = gen_lowpart (vmode, op1);
46825 goto finish_pblendvb;
46828 /* Use vpblendw. */
46829 for (i = 0; i < 16; ++i)
46830 mask |= (d->perm[i] >= 16) << i;
46831 break;
46834 /* Use vpblendd. */
46835 for (i = 0; i < 8; ++i)
46836 mask |= (d->perm[i * 2] >= 16) << i;
46837 vmode = V8SImode;
46838 goto do_subreg;
46840 case V4DImode:
46841 /* Use vpblendd. */
46842 for (i = 0; i < 4; ++i)
46843 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
46844 vmode = V8SImode;
46845 goto do_subreg;
46847 default:
46848 gcc_unreachable ();
46851 /* This matches five different patterns with the different modes. */
46852 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
46853 x = gen_rtx_SET (VOIDmode, target, x);
46854 emit_insn (x);
46855 if (target != d->target)
46856 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
46858 return true;
46861 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
46862 in terms of the variable form of vpermilps.
46864 Note that we will have already failed the immediate input vpermilps,
46865 which requires that the high and low part shuffle be identical; the
46866 variable form doesn't require that. */
46868 static bool
46869 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
46871 rtx rperm[8], vperm;
46872 unsigned i;
46874 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
46875 return false;
46877 /* We can only permute within the 128-bit lane. */
46878 for (i = 0; i < 8; ++i)
46880 unsigned e = d->perm[i];
46881 if (i < 4 ? e >= 4 : e < 4)
46882 return false;
46885 if (d->testing_p)
46886 return true;
46888 for (i = 0; i < 8; ++i)
46890 unsigned e = d->perm[i];
46892 /* Within each 128-bit lane, the elements of op0 are numbered
46893 from 0 and the elements of op1 are numbered from 4. */
46894 if (e >= 8 + 4)
46895 e -= 8;
46896 else if (e >= 4)
46897 e -= 4;
46899 rperm[i] = GEN_INT (e);
46902 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
46903 vperm = force_reg (V8SImode, vperm);
46904 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
46906 return true;
46909 /* Return true if permutation D can be performed as VMODE permutation
46910 instead. */
46912 static bool
46913 valid_perm_using_mode_p (machine_mode vmode, struct expand_vec_perm_d *d)
46915 unsigned int i, j, chunk;
46917 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
46918 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
46919 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
46920 return false;
46922 if (GET_MODE_NUNITS (vmode) >= d->nelt)
46923 return true;
46925 chunk = d->nelt / GET_MODE_NUNITS (vmode);
46926 for (i = 0; i < d->nelt; i += chunk)
46927 if (d->perm[i] & (chunk - 1))
46928 return false;
46929 else
46930 for (j = 1; j < chunk; ++j)
46931 if (d->perm[i] + j != d->perm[i + j])
46932 return false;
46934 return true;
46937 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
46938 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
46940 static bool
46941 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
46943 unsigned i, nelt, eltsz, mask;
46944 unsigned char perm[64];
46945 machine_mode vmode = V16QImode;
46946 rtx rperm[64], vperm, target, op0, op1;
46948 nelt = d->nelt;
46950 if (!d->one_operand_p)
46952 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
46954 if (TARGET_AVX2
46955 && valid_perm_using_mode_p (V2TImode, d))
46957 if (d->testing_p)
46958 return true;
46960 /* Use vperm2i128 insn. The pattern uses
46961 V4DImode instead of V2TImode. */
46962 target = d->target;
46963 if (d->vmode != V4DImode)
46964 target = gen_reg_rtx (V4DImode);
46965 op0 = gen_lowpart (V4DImode, d->op0);
46966 op1 = gen_lowpart (V4DImode, d->op1);
46967 rperm[0]
46968 = GEN_INT ((d->perm[0] / (nelt / 2))
46969 | ((d->perm[nelt / 2] / (nelt / 2)) * 16));
46970 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
46971 if (target != d->target)
46972 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
46973 return true;
46975 return false;
46978 else
46980 if (GET_MODE_SIZE (d->vmode) == 16)
46982 if (!TARGET_SSSE3)
46983 return false;
46985 else if (GET_MODE_SIZE (d->vmode) == 32)
46987 if (!TARGET_AVX2)
46988 return false;
46990 /* V4DImode should be already handled through
46991 expand_vselect by vpermq instruction. */
46992 gcc_assert (d->vmode != V4DImode);
46994 vmode = V32QImode;
46995 if (d->vmode == V8SImode
46996 || d->vmode == V16HImode
46997 || d->vmode == V32QImode)
46999 /* First see if vpermq can be used for
47000 V8SImode/V16HImode/V32QImode. */
47001 if (valid_perm_using_mode_p (V4DImode, d))
47003 for (i = 0; i < 4; i++)
47004 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
47005 if (d->testing_p)
47006 return true;
47007 target = gen_reg_rtx (V4DImode);
47008 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
47009 perm, 4, false))
47011 emit_move_insn (d->target,
47012 gen_lowpart (d->vmode, target));
47013 return true;
47015 return false;
47018 /* Next see if vpermd can be used. */
47019 if (valid_perm_using_mode_p (V8SImode, d))
47020 vmode = V8SImode;
47022 /* Or if vpermps can be used. */
47023 else if (d->vmode == V8SFmode)
47024 vmode = V8SImode;
47026 if (vmode == V32QImode)
47028 /* vpshufb only works intra lanes, it is not
47029 possible to shuffle bytes in between the lanes. */
47030 for (i = 0; i < nelt; ++i)
47031 if ((d->perm[i] ^ i) & (nelt / 2))
47032 return false;
47035 else if (GET_MODE_SIZE (d->vmode) == 64)
47037 if (!TARGET_AVX512BW)
47038 return false;
47040 /* If vpermq didn't work, vpshufb won't work either. */
47041 if (d->vmode == V8DFmode || d->vmode == V8DImode)
47042 return false;
47044 vmode = V64QImode;
47045 if (d->vmode == V16SImode
47046 || d->vmode == V32HImode
47047 || d->vmode == V64QImode)
47049 /* First see if vpermq can be used for
47050 V16SImode/V32HImode/V64QImode. */
47051 if (valid_perm_using_mode_p (V8DImode, d))
47053 for (i = 0; i < 8; i++)
47054 perm[i] = (d->perm[i * nelt / 8] * 8 / nelt) & 7;
47055 if (d->testing_p)
47056 return true;
47057 target = gen_reg_rtx (V8DImode);
47058 if (expand_vselect (target, gen_lowpart (V8DImode, d->op0),
47059 perm, 8, false))
47061 emit_move_insn (d->target,
47062 gen_lowpart (d->vmode, target));
47063 return true;
47065 return false;
47068 /* Next see if vpermd can be used. */
47069 if (valid_perm_using_mode_p (V16SImode, d))
47070 vmode = V16SImode;
47072 /* Or if vpermps can be used. */
47073 else if (d->vmode == V16SFmode)
47074 vmode = V16SImode;
47075 if (vmode == V64QImode)
47077 /* vpshufb only works intra lanes, it is not
47078 possible to shuffle bytes in between the lanes. */
47079 for (i = 0; i < nelt; ++i)
47080 if ((d->perm[i] ^ i) & (nelt / 4))
47081 return false;
47084 else
47085 return false;
47088 if (d->testing_p)
47089 return true;
47091 if (vmode == V8SImode)
47092 for (i = 0; i < 8; ++i)
47093 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
47094 else if (vmode == V16SImode)
47095 for (i = 0; i < 16; ++i)
47096 rperm[i] = GEN_INT ((d->perm[i * nelt / 16] * 16 / nelt) & 15);
47097 else
47099 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
47100 if (!d->one_operand_p)
47101 mask = 2 * nelt - 1;
47102 else if (vmode == V16QImode)
47103 mask = nelt - 1;
47104 else if (vmode == V64QImode)
47105 mask = nelt / 4 - 1;
47106 else
47107 mask = nelt / 2 - 1;
47109 for (i = 0; i < nelt; ++i)
47111 unsigned j, e = d->perm[i] & mask;
47112 for (j = 0; j < eltsz; ++j)
47113 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
47117 vperm = gen_rtx_CONST_VECTOR (vmode,
47118 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
47119 vperm = force_reg (vmode, vperm);
47121 target = d->target;
47122 if (d->vmode != vmode)
47123 target = gen_reg_rtx (vmode);
47124 op0 = gen_lowpart (vmode, d->op0);
47125 if (d->one_operand_p)
47127 if (vmode == V16QImode)
47128 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
47129 else if (vmode == V32QImode)
47130 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
47131 else if (vmode == V64QImode)
47132 emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm));
47133 else if (vmode == V8SFmode)
47134 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
47135 else if (vmode == V8SImode)
47136 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
47137 else if (vmode == V16SFmode)
47138 emit_insn (gen_avx512f_permvarv16sf (target, op0, vperm));
47139 else if (vmode == V16SImode)
47140 emit_insn (gen_avx512f_permvarv16si (target, op0, vperm));
47141 else
47142 gcc_unreachable ();
47144 else
47146 op1 = gen_lowpart (vmode, d->op1);
47147 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
47149 if (target != d->target)
47150 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47152 return true;
47155 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
47156 in a single instruction. */
47158 static bool
47159 expand_vec_perm_1 (struct expand_vec_perm_d *d)
47161 unsigned i, nelt = d->nelt;
47162 unsigned char perm2[MAX_VECT_LEN];
47164 /* Check plain VEC_SELECT first, because AVX has instructions that could
47165 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
47166 input where SEL+CONCAT may not. */
47167 if (d->one_operand_p)
47169 int mask = nelt - 1;
47170 bool identity_perm = true;
47171 bool broadcast_perm = true;
47173 for (i = 0; i < nelt; i++)
47175 perm2[i] = d->perm[i] & mask;
47176 if (perm2[i] != i)
47177 identity_perm = false;
47178 if (perm2[i])
47179 broadcast_perm = false;
47182 if (identity_perm)
47184 if (!d->testing_p)
47185 emit_move_insn (d->target, d->op0);
47186 return true;
47188 else if (broadcast_perm && TARGET_AVX2)
47190 /* Use vpbroadcast{b,w,d}. */
47191 rtx (*gen) (rtx, rtx) = NULL;
47192 switch (d->vmode)
47194 case V64QImode:
47195 if (TARGET_AVX512BW)
47196 gen = gen_avx512bw_vec_dupv64qi_1;
47197 break;
47198 case V32QImode:
47199 gen = gen_avx2_pbroadcastv32qi_1;
47200 break;
47201 case V32HImode:
47202 if (TARGET_AVX512BW)
47203 gen = gen_avx512bw_vec_dupv32hi_1;
47204 break;
47205 case V16HImode:
47206 gen = gen_avx2_pbroadcastv16hi_1;
47207 break;
47208 case V16SImode:
47209 if (TARGET_AVX512F)
47210 gen = gen_avx512f_vec_dupv16si_1;
47211 break;
47212 case V8SImode:
47213 gen = gen_avx2_pbroadcastv8si_1;
47214 break;
47215 case V16QImode:
47216 gen = gen_avx2_pbroadcastv16qi;
47217 break;
47218 case V8HImode:
47219 gen = gen_avx2_pbroadcastv8hi;
47220 break;
47221 case V16SFmode:
47222 if (TARGET_AVX512F)
47223 gen = gen_avx512f_vec_dupv16sf_1;
47224 break;
47225 case V8SFmode:
47226 gen = gen_avx2_vec_dupv8sf_1;
47227 break;
47228 case V8DFmode:
47229 if (TARGET_AVX512F)
47230 gen = gen_avx512f_vec_dupv8df_1;
47231 break;
47232 case V8DImode:
47233 if (TARGET_AVX512F)
47234 gen = gen_avx512f_vec_dupv8di_1;
47235 break;
47236 /* For other modes prefer other shuffles this function creates. */
47237 default: break;
47239 if (gen != NULL)
47241 if (!d->testing_p)
47242 emit_insn (gen (d->target, d->op0));
47243 return true;
47247 if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
47248 return true;
47250 /* There are plenty of patterns in sse.md that are written for
47251 SEL+CONCAT and are not replicated for a single op. Perhaps
47252 that should be changed, to avoid the nastiness here. */
47254 /* Recognize interleave style patterns, which means incrementing
47255 every other permutation operand. */
47256 for (i = 0; i < nelt; i += 2)
47258 perm2[i] = d->perm[i] & mask;
47259 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
47261 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
47262 d->testing_p))
47263 return true;
47265 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
47266 if (nelt >= 4)
47268 for (i = 0; i < nelt; i += 4)
47270 perm2[i + 0] = d->perm[i + 0] & mask;
47271 perm2[i + 1] = d->perm[i + 1] & mask;
47272 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
47273 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
47276 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
47277 d->testing_p))
47278 return true;
47282 /* Finally, try the fully general two operand permute. */
47283 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
47284 d->testing_p))
47285 return true;
47287 /* Recognize interleave style patterns with reversed operands. */
47288 if (!d->one_operand_p)
47290 for (i = 0; i < nelt; ++i)
47292 unsigned e = d->perm[i];
47293 if (e >= nelt)
47294 e -= nelt;
47295 else
47296 e += nelt;
47297 perm2[i] = e;
47300 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
47301 d->testing_p))
47302 return true;
47305 /* Try the SSE4.1 blend variable merge instructions. */
47306 if (expand_vec_perm_blend (d))
47307 return true;
47309 /* Try one of the AVX vpermil variable permutations. */
47310 if (expand_vec_perm_vpermil (d))
47311 return true;
47313 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
47314 vpshufb, vpermd, vpermps or vpermq variable permutation. */
47315 if (expand_vec_perm_pshufb (d))
47316 return true;
47318 /* Try the AVX2 vpalignr instruction. */
47319 if (expand_vec_perm_palignr (d, true))
47320 return true;
47322 /* Try the AVX512F vpermi2 instructions. */
47323 if (ix86_expand_vec_perm_vpermi2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d))
47324 return true;
47326 return false;
47329 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47330 in terms of a pair of pshuflw + pshufhw instructions. */
47332 static bool
47333 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
47335 unsigned char perm2[MAX_VECT_LEN];
47336 unsigned i;
47337 bool ok;
47339 if (d->vmode != V8HImode || !d->one_operand_p)
47340 return false;
47342 /* The two permutations only operate in 64-bit lanes. */
47343 for (i = 0; i < 4; ++i)
47344 if (d->perm[i] >= 4)
47345 return false;
47346 for (i = 4; i < 8; ++i)
47347 if (d->perm[i] < 4)
47348 return false;
47350 if (d->testing_p)
47351 return true;
47353 /* Emit the pshuflw. */
47354 memcpy (perm2, d->perm, 4);
47355 for (i = 4; i < 8; ++i)
47356 perm2[i] = i;
47357 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
47358 gcc_assert (ok);
47360 /* Emit the pshufhw. */
47361 memcpy (perm2 + 4, d->perm + 4, 4);
47362 for (i = 0; i < 4; ++i)
47363 perm2[i] = i;
47364 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
47365 gcc_assert (ok);
47367 return true;
47370 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47371 the permutation using the SSSE3 palignr instruction. This succeeds
47372 when all of the elements in PERM fit within one vector and we merely
47373 need to shift them down so that a single vector permutation has a
47374 chance to succeed. If SINGLE_INSN_ONLY_P, succeed if only
47375 the vpalignr instruction itself can perform the requested permutation. */
47377 static bool
47378 expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p)
47380 unsigned i, nelt = d->nelt;
47381 unsigned min, max, minswap, maxswap;
47382 bool in_order, ok, swap = false;
47383 rtx shift, target;
47384 struct expand_vec_perm_d dcopy;
47386 /* Even with AVX, palignr only operates on 128-bit vectors,
47387 in AVX2 palignr operates on both 128-bit lanes. */
47388 if ((!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
47389 && (!TARGET_AVX2 || GET_MODE_SIZE (d->vmode) != 32))
47390 return false;
47392 min = 2 * nelt;
47393 max = 0;
47394 minswap = 2 * nelt;
47395 maxswap = 0;
47396 for (i = 0; i < nelt; ++i)
47398 unsigned e = d->perm[i];
47399 unsigned eswap = d->perm[i] ^ nelt;
47400 if (GET_MODE_SIZE (d->vmode) == 32)
47402 e = (e & ((nelt / 2) - 1)) | ((e & nelt) >> 1);
47403 eswap = e ^ (nelt / 2);
47405 if (e < min)
47406 min = e;
47407 if (e > max)
47408 max = e;
47409 if (eswap < minswap)
47410 minswap = eswap;
47411 if (eswap > maxswap)
47412 maxswap = eswap;
47414 if (min == 0
47415 || max - min >= (GET_MODE_SIZE (d->vmode) == 32 ? nelt / 2 : nelt))
47417 if (d->one_operand_p
47418 || minswap == 0
47419 || maxswap - minswap >= (GET_MODE_SIZE (d->vmode) == 32
47420 ? nelt / 2 : nelt))
47421 return false;
47422 swap = true;
47423 min = minswap;
47424 max = maxswap;
47427 /* Given that we have SSSE3, we know we'll be able to implement the
47428 single operand permutation after the palignr with pshufb for
47429 128-bit vectors. If SINGLE_INSN_ONLY_P, in_order has to be computed
47430 first. */
47431 if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16 && !single_insn_only_p)
47432 return true;
47434 dcopy = *d;
47435 if (swap)
47437 dcopy.op0 = d->op1;
47438 dcopy.op1 = d->op0;
47439 for (i = 0; i < nelt; ++i)
47440 dcopy.perm[i] ^= nelt;
47443 in_order = true;
47444 for (i = 0; i < nelt; ++i)
47446 unsigned e = dcopy.perm[i];
47447 if (GET_MODE_SIZE (d->vmode) == 32
47448 && e >= nelt
47449 && (e & (nelt / 2 - 1)) < min)
47450 e = e - min - (nelt / 2);
47451 else
47452 e = e - min;
47453 if (e != i)
47454 in_order = false;
47455 dcopy.perm[i] = e;
47457 dcopy.one_operand_p = true;
47459 if (single_insn_only_p && !in_order)
47460 return false;
47462 /* For AVX2, test whether we can permute the result in one instruction. */
47463 if (d->testing_p)
47465 if (in_order)
47466 return true;
47467 dcopy.op1 = dcopy.op0;
47468 return expand_vec_perm_1 (&dcopy);
47471 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
47472 if (GET_MODE_SIZE (d->vmode) == 16)
47474 target = gen_reg_rtx (TImode);
47475 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, dcopy.op1),
47476 gen_lowpart (TImode, dcopy.op0), shift));
47478 else
47480 target = gen_reg_rtx (V2TImode);
47481 emit_insn (gen_avx2_palignrv2ti (target,
47482 gen_lowpart (V2TImode, dcopy.op1),
47483 gen_lowpart (V2TImode, dcopy.op0),
47484 shift));
47487 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
47489 /* Test for the degenerate case where the alignment by itself
47490 produces the desired permutation. */
47491 if (in_order)
47493 emit_move_insn (d->target, dcopy.op0);
47494 return true;
47497 ok = expand_vec_perm_1 (&dcopy);
47498 gcc_assert (ok || GET_MODE_SIZE (d->vmode) == 32);
47500 return ok;
47503 /* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
47504 the permutation using the SSE4_1 pblendv instruction. Potentially
47505 reduces permutation from 2 pshufb and or to 1 pshufb and pblendv. */
47507 static bool
47508 expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
47510 unsigned i, which, nelt = d->nelt;
47511 struct expand_vec_perm_d dcopy, dcopy1;
47512 machine_mode vmode = d->vmode;
47513 bool ok;
47515 /* Use the same checks as in expand_vec_perm_blend. */
47516 if (d->one_operand_p)
47517 return false;
47518 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
47520 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
47522 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
47524 else
47525 return false;
47527 /* Figure out where permutation elements stay not in their
47528 respective lanes. */
47529 for (i = 0, which = 0; i < nelt; ++i)
47531 unsigned e = d->perm[i];
47532 if (e != i)
47533 which |= (e < nelt ? 1 : 2);
47535 /* We can pblend the part where elements stay not in their
47536 respective lanes only when these elements are all in one
47537 half of a permutation.
47538 {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
47539 lanes, but both 8 and 9 >= 8
47540 {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
47541 respective lanes and 8 >= 8, but 2 not. */
47542 if (which != 1 && which != 2)
47543 return false;
47544 if (d->testing_p && GET_MODE_SIZE (vmode) == 16)
47545 return true;
47547 /* First we apply one operand permutation to the part where
47548 elements stay not in their respective lanes. */
47549 dcopy = *d;
47550 if (which == 2)
47551 dcopy.op0 = dcopy.op1 = d->op1;
47552 else
47553 dcopy.op0 = dcopy.op1 = d->op0;
47554 dcopy.one_operand_p = true;
47556 for (i = 0; i < nelt; ++i)
47557 dcopy.perm[i] = d->perm[i] & (nelt - 1);
47559 ok = expand_vec_perm_1 (&dcopy);
47560 if (GET_MODE_SIZE (vmode) != 16 && !ok)
47561 return false;
47562 else
47563 gcc_assert (ok);
47564 if (d->testing_p)
47565 return true;
47567 /* Next we put permuted elements into their positions. */
47568 dcopy1 = *d;
47569 if (which == 2)
47570 dcopy1.op1 = dcopy.target;
47571 else
47572 dcopy1.op0 = dcopy.target;
47574 for (i = 0; i < nelt; ++i)
47575 dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i);
47577 ok = expand_vec_perm_blend (&dcopy1);
47578 gcc_assert (ok);
47580 return true;
47583 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
47585 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47586 a two vector permutation into a single vector permutation by using
47587 an interleave operation to merge the vectors. */
47589 static bool
47590 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
47592 struct expand_vec_perm_d dremap, dfinal;
47593 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
47594 unsigned HOST_WIDE_INT contents;
47595 unsigned char remap[2 * MAX_VECT_LEN];
47596 rtx_insn *seq;
47597 bool ok, same_halves = false;
47599 if (GET_MODE_SIZE (d->vmode) == 16)
47601 if (d->one_operand_p)
47602 return false;
47604 else if (GET_MODE_SIZE (d->vmode) == 32)
47606 if (!TARGET_AVX)
47607 return false;
47608 /* For 32-byte modes allow even d->one_operand_p.
47609 The lack of cross-lane shuffling in some instructions
47610 might prevent a single insn shuffle. */
47611 dfinal = *d;
47612 dfinal.testing_p = true;
47613 /* If expand_vec_perm_interleave3 can expand this into
47614 a 3 insn sequence, give up and let it be expanded as
47615 3 insn sequence. While that is one insn longer,
47616 it doesn't need a memory operand and in the common
47617 case that both interleave low and high permutations
47618 with the same operands are adjacent needs 4 insns
47619 for both after CSE. */
47620 if (expand_vec_perm_interleave3 (&dfinal))
47621 return false;
47623 else
47624 return false;
47626 /* Examine from whence the elements come. */
47627 contents = 0;
47628 for (i = 0; i < nelt; ++i)
47629 contents |= ((unsigned HOST_WIDE_INT) 1) << d->perm[i];
47631 memset (remap, 0xff, sizeof (remap));
47632 dremap = *d;
47634 if (GET_MODE_SIZE (d->vmode) == 16)
47636 unsigned HOST_WIDE_INT h1, h2, h3, h4;
47638 /* Split the two input vectors into 4 halves. */
47639 h1 = (((unsigned HOST_WIDE_INT) 1) << nelt2) - 1;
47640 h2 = h1 << nelt2;
47641 h3 = h2 << nelt2;
47642 h4 = h3 << nelt2;
47644 /* If the elements from the low halves use interleave low, and similarly
47645 for interleave high. If the elements are from mis-matched halves, we
47646 can use shufps for V4SF/V4SI or do a DImode shuffle. */
47647 if ((contents & (h1 | h3)) == contents)
47649 /* punpckl* */
47650 for (i = 0; i < nelt2; ++i)
47652 remap[i] = i * 2;
47653 remap[i + nelt] = i * 2 + 1;
47654 dremap.perm[i * 2] = i;
47655 dremap.perm[i * 2 + 1] = i + nelt;
47657 if (!TARGET_SSE2 && d->vmode == V4SImode)
47658 dremap.vmode = V4SFmode;
47660 else if ((contents & (h2 | h4)) == contents)
47662 /* punpckh* */
47663 for (i = 0; i < nelt2; ++i)
47665 remap[i + nelt2] = i * 2;
47666 remap[i + nelt + nelt2] = i * 2 + 1;
47667 dremap.perm[i * 2] = i + nelt2;
47668 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
47670 if (!TARGET_SSE2 && d->vmode == V4SImode)
47671 dremap.vmode = V4SFmode;
47673 else if ((contents & (h1 | h4)) == contents)
47675 /* shufps */
47676 for (i = 0; i < nelt2; ++i)
47678 remap[i] = i;
47679 remap[i + nelt + nelt2] = i + nelt2;
47680 dremap.perm[i] = i;
47681 dremap.perm[i + nelt2] = i + nelt + nelt2;
47683 if (nelt != 4)
47685 /* shufpd */
47686 dremap.vmode = V2DImode;
47687 dremap.nelt = 2;
47688 dremap.perm[0] = 0;
47689 dremap.perm[1] = 3;
47692 else if ((contents & (h2 | h3)) == contents)
47694 /* shufps */
47695 for (i = 0; i < nelt2; ++i)
47697 remap[i + nelt2] = i;
47698 remap[i + nelt] = i + nelt2;
47699 dremap.perm[i] = i + nelt2;
47700 dremap.perm[i + nelt2] = i + nelt;
47702 if (nelt != 4)
47704 /* shufpd */
47705 dremap.vmode = V2DImode;
47706 dremap.nelt = 2;
47707 dremap.perm[0] = 1;
47708 dremap.perm[1] = 2;
47711 else
47712 return false;
47714 else
47716 unsigned int nelt4 = nelt / 4, nzcnt = 0;
47717 unsigned HOST_WIDE_INT q[8];
47718 unsigned int nonzero_halves[4];
47720 /* Split the two input vectors into 8 quarters. */
47721 q[0] = (((unsigned HOST_WIDE_INT) 1) << nelt4) - 1;
47722 for (i = 1; i < 8; ++i)
47723 q[i] = q[0] << (nelt4 * i);
47724 for (i = 0; i < 4; ++i)
47725 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
47727 nonzero_halves[nzcnt] = i;
47728 ++nzcnt;
47731 if (nzcnt == 1)
47733 gcc_assert (d->one_operand_p);
47734 nonzero_halves[1] = nonzero_halves[0];
47735 same_halves = true;
47737 else if (d->one_operand_p)
47739 gcc_assert (nonzero_halves[0] == 0);
47740 gcc_assert (nonzero_halves[1] == 1);
47743 if (nzcnt <= 2)
47745 if (d->perm[0] / nelt2 == nonzero_halves[1])
47747 /* Attempt to increase the likelihood that dfinal
47748 shuffle will be intra-lane. */
47749 char tmph = nonzero_halves[0];
47750 nonzero_halves[0] = nonzero_halves[1];
47751 nonzero_halves[1] = tmph;
47754 /* vperm2f128 or vperm2i128. */
47755 for (i = 0; i < nelt2; ++i)
47757 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
47758 remap[i + nonzero_halves[0] * nelt2] = i;
47759 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
47760 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
47763 if (d->vmode != V8SFmode
47764 && d->vmode != V4DFmode
47765 && d->vmode != V8SImode)
47767 dremap.vmode = V8SImode;
47768 dremap.nelt = 8;
47769 for (i = 0; i < 4; ++i)
47771 dremap.perm[i] = i + nonzero_halves[0] * 4;
47772 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
47776 else if (d->one_operand_p)
47777 return false;
47778 else if (TARGET_AVX2
47779 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
47781 /* vpunpckl* */
47782 for (i = 0; i < nelt4; ++i)
47784 remap[i] = i * 2;
47785 remap[i + nelt] = i * 2 + 1;
47786 remap[i + nelt2] = i * 2 + nelt2;
47787 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
47788 dremap.perm[i * 2] = i;
47789 dremap.perm[i * 2 + 1] = i + nelt;
47790 dremap.perm[i * 2 + nelt2] = i + nelt2;
47791 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
47794 else if (TARGET_AVX2
47795 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
47797 /* vpunpckh* */
47798 for (i = 0; i < nelt4; ++i)
47800 remap[i + nelt4] = i * 2;
47801 remap[i + nelt + nelt4] = i * 2 + 1;
47802 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
47803 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
47804 dremap.perm[i * 2] = i + nelt4;
47805 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
47806 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
47807 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
47810 else
47811 return false;
47814 /* Use the remapping array set up above to move the elements from their
47815 swizzled locations into their final destinations. */
47816 dfinal = *d;
47817 for (i = 0; i < nelt; ++i)
47819 unsigned e = remap[d->perm[i]];
47820 gcc_assert (e < nelt);
47821 /* If same_halves is true, both halves of the remapped vector are the
47822 same. Avoid cross-lane accesses if possible. */
47823 if (same_halves && i >= nelt2)
47825 gcc_assert (e < nelt2);
47826 dfinal.perm[i] = e + nelt2;
47828 else
47829 dfinal.perm[i] = e;
47831 if (!d->testing_p)
47833 dremap.target = gen_reg_rtx (dremap.vmode);
47834 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
47836 dfinal.op1 = dfinal.op0;
47837 dfinal.one_operand_p = true;
47839 /* Test if the final remap can be done with a single insn. For V4SFmode or
47840 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
47841 start_sequence ();
47842 ok = expand_vec_perm_1 (&dfinal);
47843 seq = get_insns ();
47844 end_sequence ();
47846 if (!ok)
47847 return false;
47849 if (d->testing_p)
47850 return true;
47852 if (dremap.vmode != dfinal.vmode)
47854 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
47855 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
47858 ok = expand_vec_perm_1 (&dremap);
47859 gcc_assert (ok);
47861 emit_insn (seq);
47862 return true;
47865 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47866 a single vector cross-lane permutation into vpermq followed
47867 by any of the single insn permutations. */
47869 static bool
47870 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
47872 struct expand_vec_perm_d dremap, dfinal;
47873 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
47874 unsigned contents[2];
47875 bool ok;
47877 if (!(TARGET_AVX2
47878 && (d->vmode == V32QImode || d->vmode == V16HImode)
47879 && d->one_operand_p))
47880 return false;
47882 contents[0] = 0;
47883 contents[1] = 0;
47884 for (i = 0; i < nelt2; ++i)
47886 contents[0] |= 1u << (d->perm[i] / nelt4);
47887 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
47890 for (i = 0; i < 2; ++i)
47892 unsigned int cnt = 0;
47893 for (j = 0; j < 4; ++j)
47894 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
47895 return false;
47898 if (d->testing_p)
47899 return true;
47901 dremap = *d;
47902 dremap.vmode = V4DImode;
47903 dremap.nelt = 4;
47904 dremap.target = gen_reg_rtx (V4DImode);
47905 dremap.op0 = gen_lowpart (V4DImode, d->op0);
47906 dremap.op1 = dremap.op0;
47907 dremap.one_operand_p = true;
47908 for (i = 0; i < 2; ++i)
47910 unsigned int cnt = 0;
47911 for (j = 0; j < 4; ++j)
47912 if ((contents[i] & (1u << j)) != 0)
47913 dremap.perm[2 * i + cnt++] = j;
47914 for (; cnt < 2; ++cnt)
47915 dremap.perm[2 * i + cnt] = 0;
47918 dfinal = *d;
47919 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
47920 dfinal.op1 = dfinal.op0;
47921 dfinal.one_operand_p = true;
47922 for (i = 0, j = 0; i < nelt; ++i)
47924 if (i == nelt2)
47925 j = 2;
47926 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
47927 if ((d->perm[i] / nelt4) == dremap.perm[j])
47929 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
47930 dfinal.perm[i] |= nelt4;
47931 else
47932 gcc_unreachable ();
47935 ok = expand_vec_perm_1 (&dremap);
47936 gcc_assert (ok);
47938 ok = expand_vec_perm_1 (&dfinal);
47939 gcc_assert (ok);
47941 return true;
47944 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
47945 a vector permutation using two instructions, vperm2f128 resp.
47946 vperm2i128 followed by any single in-lane permutation. */
47948 static bool
47949 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
47951 struct expand_vec_perm_d dfirst, dsecond;
47952 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
47953 bool ok;
47955 if (!TARGET_AVX
47956 || GET_MODE_SIZE (d->vmode) != 32
47957 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
47958 return false;
47960 dsecond = *d;
47961 dsecond.one_operand_p = false;
47962 dsecond.testing_p = true;
47964 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
47965 immediate. For perm < 16 the second permutation uses
47966 d->op0 as first operand, for perm >= 16 it uses d->op1
47967 as first operand. The second operand is the result of
47968 vperm2[fi]128. */
47969 for (perm = 0; perm < 32; perm++)
47971 /* Ignore permutations which do not move anything cross-lane. */
47972 if (perm < 16)
47974 /* The second shuffle for e.g. V4DFmode has
47975 0123 and ABCD operands.
47976 Ignore AB23, as 23 is already in the second lane
47977 of the first operand. */
47978 if ((perm & 0xc) == (1 << 2)) continue;
47979 /* And 01CD, as 01 is in the first lane of the first
47980 operand. */
47981 if ((perm & 3) == 0) continue;
47982 /* And 4567, as then the vperm2[fi]128 doesn't change
47983 anything on the original 4567 second operand. */
47984 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
47986 else
47988 /* The second shuffle for e.g. V4DFmode has
47989 4567 and ABCD operands.
47990 Ignore AB67, as 67 is already in the second lane
47991 of the first operand. */
47992 if ((perm & 0xc) == (3 << 2)) continue;
47993 /* And 45CD, as 45 is in the first lane of the first
47994 operand. */
47995 if ((perm & 3) == 2) continue;
47996 /* And 0123, as then the vperm2[fi]128 doesn't change
47997 anything on the original 0123 first operand. */
47998 if ((perm & 0xf) == (1 << 2)) continue;
48001 for (i = 0; i < nelt; i++)
48003 j = d->perm[i] / nelt2;
48004 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
48005 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
48006 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
48007 dsecond.perm[i] = d->perm[i] & (nelt - 1);
48008 else
48009 break;
48012 if (i == nelt)
48014 start_sequence ();
48015 ok = expand_vec_perm_1 (&dsecond);
48016 end_sequence ();
48018 else
48019 ok = false;
48021 if (ok)
48023 if (d->testing_p)
48024 return true;
48026 /* Found a usable second shuffle. dfirst will be
48027 vperm2f128 on d->op0 and d->op1. */
48028 dsecond.testing_p = false;
48029 dfirst = *d;
48030 dfirst.target = gen_reg_rtx (d->vmode);
48031 for (i = 0; i < nelt; i++)
48032 dfirst.perm[i] = (i & (nelt2 - 1))
48033 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
48035 canonicalize_perm (&dfirst);
48036 ok = expand_vec_perm_1 (&dfirst);
48037 gcc_assert (ok);
48039 /* And dsecond is some single insn shuffle, taking
48040 d->op0 and result of vperm2f128 (if perm < 16) or
48041 d->op1 and result of vperm2f128 (otherwise). */
48042 if (perm >= 16)
48043 dsecond.op0 = dsecond.op1;
48044 dsecond.op1 = dfirst.target;
48046 ok = expand_vec_perm_1 (&dsecond);
48047 gcc_assert (ok);
48049 return true;
48052 /* For one operand, the only useful vperm2f128 permutation is 0x01
48053 aka lanes swap. */
48054 if (d->one_operand_p)
48055 return false;
48058 return false;
48061 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
48062 a two vector permutation using 2 intra-lane interleave insns
48063 and cross-lane shuffle for 32-byte vectors. */
48065 static bool
48066 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
48068 unsigned i, nelt;
48069 rtx (*gen) (rtx, rtx, rtx);
48071 if (d->one_operand_p)
48072 return false;
48073 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
48075 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
48077 else
48078 return false;
48080 nelt = d->nelt;
48081 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
48082 return false;
48083 for (i = 0; i < nelt; i += 2)
48084 if (d->perm[i] != d->perm[0] + i / 2
48085 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
48086 return false;
48088 if (d->testing_p)
48089 return true;
48091 switch (d->vmode)
48093 case V32QImode:
48094 if (d->perm[0])
48095 gen = gen_vec_interleave_highv32qi;
48096 else
48097 gen = gen_vec_interleave_lowv32qi;
48098 break;
48099 case V16HImode:
48100 if (d->perm[0])
48101 gen = gen_vec_interleave_highv16hi;
48102 else
48103 gen = gen_vec_interleave_lowv16hi;
48104 break;
48105 case V8SImode:
48106 if (d->perm[0])
48107 gen = gen_vec_interleave_highv8si;
48108 else
48109 gen = gen_vec_interleave_lowv8si;
48110 break;
48111 case V4DImode:
48112 if (d->perm[0])
48113 gen = gen_vec_interleave_highv4di;
48114 else
48115 gen = gen_vec_interleave_lowv4di;
48116 break;
48117 case V8SFmode:
48118 if (d->perm[0])
48119 gen = gen_vec_interleave_highv8sf;
48120 else
48121 gen = gen_vec_interleave_lowv8sf;
48122 break;
48123 case V4DFmode:
48124 if (d->perm[0])
48125 gen = gen_vec_interleave_highv4df;
48126 else
48127 gen = gen_vec_interleave_lowv4df;
48128 break;
48129 default:
48130 gcc_unreachable ();
48133 emit_insn (gen (d->target, d->op0, d->op1));
48134 return true;
48137 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
48138 a single vector permutation using a single intra-lane vector
48139 permutation, vperm2f128 swapping the lanes and vblend* insn blending
48140 the non-swapped and swapped vectors together. */
48142 static bool
48143 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
48145 struct expand_vec_perm_d dfirst, dsecond;
48146 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
48147 rtx_insn *seq;
48148 bool ok;
48149 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
48151 if (!TARGET_AVX
48152 || TARGET_AVX2
48153 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
48154 || !d->one_operand_p)
48155 return false;
48157 dfirst = *d;
48158 for (i = 0; i < nelt; i++)
48159 dfirst.perm[i] = 0xff;
48160 for (i = 0, msk = 0; i < nelt; i++)
48162 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
48163 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
48164 return false;
48165 dfirst.perm[j] = d->perm[i];
48166 if (j != i)
48167 msk |= (1 << i);
48169 for (i = 0; i < nelt; i++)
48170 if (dfirst.perm[i] == 0xff)
48171 dfirst.perm[i] = i;
48173 if (!d->testing_p)
48174 dfirst.target = gen_reg_rtx (dfirst.vmode);
48176 start_sequence ();
48177 ok = expand_vec_perm_1 (&dfirst);
48178 seq = get_insns ();
48179 end_sequence ();
48181 if (!ok)
48182 return false;
48184 if (d->testing_p)
48185 return true;
48187 emit_insn (seq);
48189 dsecond = *d;
48190 dsecond.op0 = dfirst.target;
48191 dsecond.op1 = dfirst.target;
48192 dsecond.one_operand_p = true;
48193 dsecond.target = gen_reg_rtx (dsecond.vmode);
48194 for (i = 0; i < nelt; i++)
48195 dsecond.perm[i] = i ^ nelt2;
48197 ok = expand_vec_perm_1 (&dsecond);
48198 gcc_assert (ok);
48200 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
48201 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
48202 return true;
48205 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
48206 permutation using two vperm2f128, followed by a vshufpd insn blending
48207 the two vectors together. */
48209 static bool
48210 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
48212 struct expand_vec_perm_d dfirst, dsecond, dthird;
48213 bool ok;
48215 if (!TARGET_AVX || (d->vmode != V4DFmode))
48216 return false;
48218 if (d->testing_p)
48219 return true;
48221 dfirst = *d;
48222 dsecond = *d;
48223 dthird = *d;
48225 dfirst.perm[0] = (d->perm[0] & ~1);
48226 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
48227 dfirst.perm[2] = (d->perm[2] & ~1);
48228 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
48229 dsecond.perm[0] = (d->perm[1] & ~1);
48230 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
48231 dsecond.perm[2] = (d->perm[3] & ~1);
48232 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
48233 dthird.perm[0] = (d->perm[0] % 2);
48234 dthird.perm[1] = (d->perm[1] % 2) + 4;
48235 dthird.perm[2] = (d->perm[2] % 2) + 2;
48236 dthird.perm[3] = (d->perm[3] % 2) + 6;
48238 dfirst.target = gen_reg_rtx (dfirst.vmode);
48239 dsecond.target = gen_reg_rtx (dsecond.vmode);
48240 dthird.op0 = dfirst.target;
48241 dthird.op1 = dsecond.target;
48242 dthird.one_operand_p = false;
48244 canonicalize_perm (&dfirst);
48245 canonicalize_perm (&dsecond);
48247 ok = expand_vec_perm_1 (&dfirst)
48248 && expand_vec_perm_1 (&dsecond)
48249 && expand_vec_perm_1 (&dthird);
48251 gcc_assert (ok);
48253 return true;
48256 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
48257 permutation with two pshufb insns and an ior. We should have already
48258 failed all two instruction sequences. */
48260 static bool
48261 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
48263 rtx rperm[2][16], vperm, l, h, op, m128;
48264 unsigned int i, nelt, eltsz;
48266 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
48267 return false;
48268 gcc_assert (!d->one_operand_p);
48270 if (d->testing_p)
48271 return true;
48273 nelt = d->nelt;
48274 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48276 /* Generate two permutation masks. If the required element is within
48277 the given vector it is shuffled into the proper lane. If the required
48278 element is in the other vector, force a zero into the lane by setting
48279 bit 7 in the permutation mask. */
48280 m128 = GEN_INT (-128);
48281 for (i = 0; i < nelt; ++i)
48283 unsigned j, e = d->perm[i];
48284 unsigned which = (e >= nelt);
48285 if (e >= nelt)
48286 e -= nelt;
48288 for (j = 0; j < eltsz; ++j)
48290 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
48291 rperm[1-which][i*eltsz + j] = m128;
48295 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
48296 vperm = force_reg (V16QImode, vperm);
48298 l = gen_reg_rtx (V16QImode);
48299 op = gen_lowpart (V16QImode, d->op0);
48300 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
48302 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
48303 vperm = force_reg (V16QImode, vperm);
48305 h = gen_reg_rtx (V16QImode);
48306 op = gen_lowpart (V16QImode, d->op1);
48307 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
48309 op = d->target;
48310 if (d->vmode != V16QImode)
48311 op = gen_reg_rtx (V16QImode);
48312 emit_insn (gen_iorv16qi3 (op, l, h));
48313 if (op != d->target)
48314 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48316 return true;
48319 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
48320 with two vpshufb insns, vpermq and vpor. We should have already failed
48321 all two or three instruction sequences. */
48323 static bool
48324 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
48326 rtx rperm[2][32], vperm, l, h, hp, op, m128;
48327 unsigned int i, nelt, eltsz;
48329 if (!TARGET_AVX2
48330 || !d->one_operand_p
48331 || (d->vmode != V32QImode && d->vmode != V16HImode))
48332 return false;
48334 if (d->testing_p)
48335 return true;
48337 nelt = d->nelt;
48338 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48340 /* Generate two permutation masks. If the required element is within
48341 the same lane, it is shuffled in. If the required element from the
48342 other lane, force a zero by setting bit 7 in the permutation mask.
48343 In the other mask the mask has non-negative elements if element
48344 is requested from the other lane, but also moved to the other lane,
48345 so that the result of vpshufb can have the two V2TImode halves
48346 swapped. */
48347 m128 = GEN_INT (-128);
48348 for (i = 0; i < nelt; ++i)
48350 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48351 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
48353 for (j = 0; j < eltsz; ++j)
48355 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
48356 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
48360 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48361 vperm = force_reg (V32QImode, vperm);
48363 h = gen_reg_rtx (V32QImode);
48364 op = gen_lowpart (V32QImode, d->op0);
48365 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
48367 /* Swap the 128-byte lanes of h into hp. */
48368 hp = gen_reg_rtx (V4DImode);
48369 op = gen_lowpart (V4DImode, h);
48370 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
48371 const1_rtx));
48373 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48374 vperm = force_reg (V32QImode, vperm);
48376 l = gen_reg_rtx (V32QImode);
48377 op = gen_lowpart (V32QImode, d->op0);
48378 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48380 op = d->target;
48381 if (d->vmode != V32QImode)
48382 op = gen_reg_rtx (V32QImode);
48383 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
48384 if (op != d->target)
48385 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48387 return true;
48390 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
48391 and extract-odd permutations of two V32QImode and V16QImode operand
48392 with two vpshufb insns, vpor and vpermq. We should have already
48393 failed all two or three instruction sequences. */
48395 static bool
48396 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
48398 rtx rperm[2][32], vperm, l, h, ior, op, m128;
48399 unsigned int i, nelt, eltsz;
48401 if (!TARGET_AVX2
48402 || d->one_operand_p
48403 || (d->vmode != V32QImode && d->vmode != V16HImode))
48404 return false;
48406 for (i = 0; i < d->nelt; ++i)
48407 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
48408 return false;
48410 if (d->testing_p)
48411 return true;
48413 nelt = d->nelt;
48414 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48416 /* Generate two permutation masks. In the first permutation mask
48417 the first quarter will contain indexes for the first half
48418 of the op0, the second quarter will contain bit 7 set, third quarter
48419 will contain indexes for the second half of the op0 and the
48420 last quarter bit 7 set. In the second permutation mask
48421 the first quarter will contain bit 7 set, the second quarter
48422 indexes for the first half of the op1, the third quarter bit 7 set
48423 and last quarter indexes for the second half of the op1.
48424 I.e. the first mask e.g. for V32QImode extract even will be:
48425 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
48426 (all values masked with 0xf except for -128) and second mask
48427 for extract even will be
48428 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
48429 m128 = GEN_INT (-128);
48430 for (i = 0; i < nelt; ++i)
48432 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48433 unsigned which = d->perm[i] >= nelt;
48434 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
48436 for (j = 0; j < eltsz; ++j)
48438 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
48439 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
48443 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48444 vperm = force_reg (V32QImode, vperm);
48446 l = gen_reg_rtx (V32QImode);
48447 op = gen_lowpart (V32QImode, d->op0);
48448 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48450 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48451 vperm = force_reg (V32QImode, vperm);
48453 h = gen_reg_rtx (V32QImode);
48454 op = gen_lowpart (V32QImode, d->op1);
48455 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
48457 ior = gen_reg_rtx (V32QImode);
48458 emit_insn (gen_iorv32qi3 (ior, l, h));
48460 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
48461 op = gen_reg_rtx (V4DImode);
48462 ior = gen_lowpart (V4DImode, ior);
48463 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
48464 const1_rtx, GEN_INT (3)));
48465 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48467 return true;
48470 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
48471 and extract-odd permutations of two V16QI, V8HI, V16HI or V32QI operands
48472 with two "and" and "pack" or two "shift" and "pack" insns. We should
48473 have already failed all two instruction sequences. */
48475 static bool
48476 expand_vec_perm_even_odd_pack (struct expand_vec_perm_d *d)
48478 rtx op, dop0, dop1, t, rperm[16];
48479 unsigned i, odd, c, s, nelt = d->nelt;
48480 bool end_perm = false;
48481 machine_mode half_mode;
48482 rtx (*gen_and) (rtx, rtx, rtx);
48483 rtx (*gen_pack) (rtx, rtx, rtx);
48484 rtx (*gen_shift) (rtx, rtx, rtx);
48486 if (d->one_operand_p)
48487 return false;
48489 switch (d->vmode)
48491 case V8HImode:
48492 /* Required for "pack". */
48493 if (!TARGET_SSE4_1)
48494 return false;
48495 c = 0xffff;
48496 s = 16;
48497 half_mode = V4SImode;
48498 gen_and = gen_andv4si3;
48499 gen_pack = gen_sse4_1_packusdw;
48500 gen_shift = gen_lshrv4si3;
48501 break;
48502 case V16QImode:
48503 /* No check as all instructions are SSE2. */
48504 c = 0xff;
48505 s = 8;
48506 half_mode = V8HImode;
48507 gen_and = gen_andv8hi3;
48508 gen_pack = gen_sse2_packuswb;
48509 gen_shift = gen_lshrv8hi3;
48510 break;
48511 case V16HImode:
48512 if (!TARGET_AVX2)
48513 return false;
48514 c = 0xffff;
48515 s = 16;
48516 half_mode = V8SImode;
48517 gen_and = gen_andv8si3;
48518 gen_pack = gen_avx2_packusdw;
48519 gen_shift = gen_lshrv8si3;
48520 end_perm = true;
48521 break;
48522 case V32QImode:
48523 if (!TARGET_AVX2)
48524 return false;
48525 c = 0xff;
48526 s = 8;
48527 half_mode = V16HImode;
48528 gen_and = gen_andv16hi3;
48529 gen_pack = gen_avx2_packuswb;
48530 gen_shift = gen_lshrv16hi3;
48531 end_perm = true;
48532 break;
48533 default:
48534 /* Only V8HI, V16QI, V16HI and V32QI modes are more profitable than
48535 general shuffles. */
48536 return false;
48539 /* Check that permutation is even or odd. */
48540 odd = d->perm[0];
48541 if (odd > 1)
48542 return false;
48544 for (i = 1; i < nelt; ++i)
48545 if (d->perm[i] != 2 * i + odd)
48546 return false;
48548 if (d->testing_p)
48549 return true;
48551 dop0 = gen_reg_rtx (half_mode);
48552 dop1 = gen_reg_rtx (half_mode);
48553 if (odd == 0)
48555 for (i = 0; i < nelt / 2; i++)
48556 rperm[i] = GEN_INT (c);
48557 t = gen_rtx_CONST_VECTOR (half_mode, gen_rtvec_v (nelt / 2, rperm));
48558 t = force_reg (half_mode, t);
48559 emit_insn (gen_and (dop0, t, gen_lowpart (half_mode, d->op0)));
48560 emit_insn (gen_and (dop1, t, gen_lowpart (half_mode, d->op1)));
48562 else
48564 emit_insn (gen_shift (dop0,
48565 gen_lowpart (half_mode, d->op0),
48566 GEN_INT (s)));
48567 emit_insn (gen_shift (dop1,
48568 gen_lowpart (half_mode, d->op1),
48569 GEN_INT (s)));
48571 /* In AVX2 for 256 bit case we need to permute pack result. */
48572 if (TARGET_AVX2 && end_perm)
48574 op = gen_reg_rtx (d->vmode);
48575 t = gen_reg_rtx (V4DImode);
48576 emit_insn (gen_pack (op, dop0, dop1));
48577 emit_insn (gen_avx2_permv4di_1 (t,
48578 gen_lowpart (V4DImode, op),
48579 const0_rtx,
48580 const2_rtx,
48581 const1_rtx,
48582 GEN_INT (3)));
48583 emit_move_insn (d->target, gen_lowpart (d->vmode, t));
48585 else
48586 emit_insn (gen_pack (d->target, dop0, dop1));
48588 return true;
48591 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
48592 and extract-odd permutations. */
48594 static bool
48595 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
48597 rtx t1, t2, t3, t4, t5;
48599 switch (d->vmode)
48601 case V4DFmode:
48602 if (d->testing_p)
48603 break;
48604 t1 = gen_reg_rtx (V4DFmode);
48605 t2 = gen_reg_rtx (V4DFmode);
48607 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
48608 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
48609 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
48611 /* Now an unpck[lh]pd will produce the result required. */
48612 if (odd)
48613 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
48614 else
48615 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
48616 emit_insn (t3);
48617 break;
48619 case V8SFmode:
48621 int mask = odd ? 0xdd : 0x88;
48623 if (d->testing_p)
48624 break;
48625 t1 = gen_reg_rtx (V8SFmode);
48626 t2 = gen_reg_rtx (V8SFmode);
48627 t3 = gen_reg_rtx (V8SFmode);
48629 /* Shuffle within the 128-bit lanes to produce:
48630 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
48631 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
48632 GEN_INT (mask)));
48634 /* Shuffle the lanes around to produce:
48635 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
48636 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
48637 GEN_INT (0x3)));
48639 /* Shuffle within the 128-bit lanes to produce:
48640 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
48641 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
48643 /* Shuffle within the 128-bit lanes to produce:
48644 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
48645 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
48647 /* Shuffle the lanes around to produce:
48648 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
48649 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
48650 GEN_INT (0x20)));
48652 break;
48654 case V2DFmode:
48655 case V4SFmode:
48656 case V2DImode:
48657 case V4SImode:
48658 /* These are always directly implementable by expand_vec_perm_1. */
48659 gcc_unreachable ();
48661 case V8HImode:
48662 if (TARGET_SSE4_1)
48663 return expand_vec_perm_even_odd_pack (d);
48664 else if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
48665 return expand_vec_perm_pshufb2 (d);
48666 else
48668 if (d->testing_p)
48669 break;
48670 /* We need 2*log2(N)-1 operations to achieve odd/even
48671 with interleave. */
48672 t1 = gen_reg_rtx (V8HImode);
48673 t2 = gen_reg_rtx (V8HImode);
48674 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
48675 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
48676 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
48677 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
48678 if (odd)
48679 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
48680 else
48681 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
48682 emit_insn (t3);
48684 break;
48686 case V16QImode:
48687 return expand_vec_perm_even_odd_pack (d);
48689 case V16HImode:
48690 case V32QImode:
48691 return expand_vec_perm_even_odd_pack (d);
48693 case V4DImode:
48694 if (!TARGET_AVX2)
48696 struct expand_vec_perm_d d_copy = *d;
48697 d_copy.vmode = V4DFmode;
48698 if (d->testing_p)
48699 d_copy.target = gen_lowpart (V4DFmode, d->target);
48700 else
48701 d_copy.target = gen_reg_rtx (V4DFmode);
48702 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
48703 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
48704 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
48706 if (!d->testing_p)
48707 emit_move_insn (d->target,
48708 gen_lowpart (V4DImode, d_copy.target));
48709 return true;
48711 return false;
48714 if (d->testing_p)
48715 break;
48717 t1 = gen_reg_rtx (V4DImode);
48718 t2 = gen_reg_rtx (V4DImode);
48720 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
48721 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
48722 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
48724 /* Now an vpunpck[lh]qdq will produce the result required. */
48725 if (odd)
48726 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
48727 else
48728 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
48729 emit_insn (t3);
48730 break;
48732 case V8SImode:
48733 if (!TARGET_AVX2)
48735 struct expand_vec_perm_d d_copy = *d;
48736 d_copy.vmode = V8SFmode;
48737 if (d->testing_p)
48738 d_copy.target = gen_lowpart (V8SFmode, d->target);
48739 else
48740 d_copy.target = gen_reg_rtx (V8SFmode);
48741 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
48742 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
48743 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
48745 if (!d->testing_p)
48746 emit_move_insn (d->target,
48747 gen_lowpart (V8SImode, d_copy.target));
48748 return true;
48750 return false;
48753 if (d->testing_p)
48754 break;
48756 t1 = gen_reg_rtx (V8SImode);
48757 t2 = gen_reg_rtx (V8SImode);
48758 t3 = gen_reg_rtx (V4DImode);
48759 t4 = gen_reg_rtx (V4DImode);
48760 t5 = gen_reg_rtx (V4DImode);
48762 /* Shuffle the lanes around into
48763 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
48764 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
48765 gen_lowpart (V4DImode, d->op1),
48766 GEN_INT (0x20)));
48767 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
48768 gen_lowpart (V4DImode, d->op1),
48769 GEN_INT (0x31)));
48771 /* Swap the 2nd and 3rd position in each lane into
48772 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
48773 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
48774 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
48775 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
48776 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
48778 /* Now an vpunpck[lh]qdq will produce
48779 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
48780 if (odd)
48781 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
48782 gen_lowpart (V4DImode, t2));
48783 else
48784 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
48785 gen_lowpart (V4DImode, t2));
48786 emit_insn (t3);
48787 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
48788 break;
48790 default:
48791 gcc_unreachable ();
48794 return true;
48797 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
48798 extract-even and extract-odd permutations. */
48800 static bool
48801 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
48803 unsigned i, odd, nelt = d->nelt;
48805 odd = d->perm[0];
48806 if (odd != 0 && odd != 1)
48807 return false;
48809 for (i = 1; i < nelt; ++i)
48810 if (d->perm[i] != 2 * i + odd)
48811 return false;
48813 return expand_vec_perm_even_odd_1 (d, odd);
48816 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
48817 permutations. We assume that expand_vec_perm_1 has already failed. */
48819 static bool
48820 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
48822 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
48823 machine_mode vmode = d->vmode;
48824 unsigned char perm2[4];
48825 rtx op0 = d->op0, dest;
48826 bool ok;
48828 switch (vmode)
48830 case V4DFmode:
48831 case V8SFmode:
48832 /* These are special-cased in sse.md so that we can optionally
48833 use the vbroadcast instruction. They expand to two insns
48834 if the input happens to be in a register. */
48835 gcc_unreachable ();
48837 case V2DFmode:
48838 case V2DImode:
48839 case V4SFmode:
48840 case V4SImode:
48841 /* These are always implementable using standard shuffle patterns. */
48842 gcc_unreachable ();
48844 case V8HImode:
48845 case V16QImode:
48846 /* These can be implemented via interleave. We save one insn by
48847 stopping once we have promoted to V4SImode and then use pshufd. */
48848 if (d->testing_p)
48849 return true;
48852 rtx dest;
48853 rtx (*gen) (rtx, rtx, rtx)
48854 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
48855 : gen_vec_interleave_lowv8hi;
48857 if (elt >= nelt2)
48859 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
48860 : gen_vec_interleave_highv8hi;
48861 elt -= nelt2;
48863 nelt2 /= 2;
48865 dest = gen_reg_rtx (vmode);
48866 emit_insn (gen (dest, op0, op0));
48867 vmode = get_mode_wider_vector (vmode);
48868 op0 = gen_lowpart (vmode, dest);
48870 while (vmode != V4SImode);
48872 memset (perm2, elt, 4);
48873 dest = gen_reg_rtx (V4SImode);
48874 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
48875 gcc_assert (ok);
48876 if (!d->testing_p)
48877 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
48878 return true;
48880 case V32QImode:
48881 case V16HImode:
48882 case V8SImode:
48883 case V4DImode:
48884 /* For AVX2 broadcasts of the first element vpbroadcast* or
48885 vpermq should be used by expand_vec_perm_1. */
48886 gcc_assert (!TARGET_AVX2 || d->perm[0]);
48887 return false;
48889 default:
48890 gcc_unreachable ();
48894 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
48895 broadcast permutations. */
48897 static bool
48898 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
48900 unsigned i, elt, nelt = d->nelt;
48902 if (!d->one_operand_p)
48903 return false;
48905 elt = d->perm[0];
48906 for (i = 1; i < nelt; ++i)
48907 if (d->perm[i] != elt)
48908 return false;
48910 return expand_vec_perm_broadcast_1 (d);
48913 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
48914 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
48915 all the shorter instruction sequences. */
48917 static bool
48918 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
48920 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
48921 unsigned int i, nelt, eltsz;
48922 bool used[4];
48924 if (!TARGET_AVX2
48925 || d->one_operand_p
48926 || (d->vmode != V32QImode && d->vmode != V16HImode))
48927 return false;
48929 if (d->testing_p)
48930 return true;
48932 nelt = d->nelt;
48933 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48935 /* Generate 4 permutation masks. If the required element is within
48936 the same lane, it is shuffled in. If the required element from the
48937 other lane, force a zero by setting bit 7 in the permutation mask.
48938 In the other mask the mask has non-negative elements if element
48939 is requested from the other lane, but also moved to the other lane,
48940 so that the result of vpshufb can have the two V2TImode halves
48941 swapped. */
48942 m128 = GEN_INT (-128);
48943 for (i = 0; i < 32; ++i)
48945 rperm[0][i] = m128;
48946 rperm[1][i] = m128;
48947 rperm[2][i] = m128;
48948 rperm[3][i] = m128;
48950 used[0] = false;
48951 used[1] = false;
48952 used[2] = false;
48953 used[3] = false;
48954 for (i = 0; i < nelt; ++i)
48956 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48957 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
48958 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
48960 for (j = 0; j < eltsz; ++j)
48961 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
48962 used[which] = true;
48965 for (i = 0; i < 2; ++i)
48967 if (!used[2 * i + 1])
48969 h[i] = NULL_RTX;
48970 continue;
48972 vperm = gen_rtx_CONST_VECTOR (V32QImode,
48973 gen_rtvec_v (32, rperm[2 * i + 1]));
48974 vperm = force_reg (V32QImode, vperm);
48975 h[i] = gen_reg_rtx (V32QImode);
48976 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
48977 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
48980 /* Swap the 128-byte lanes of h[X]. */
48981 for (i = 0; i < 2; ++i)
48983 if (h[i] == NULL_RTX)
48984 continue;
48985 op = gen_reg_rtx (V4DImode);
48986 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
48987 const2_rtx, GEN_INT (3), const0_rtx,
48988 const1_rtx));
48989 h[i] = gen_lowpart (V32QImode, op);
48992 for (i = 0; i < 2; ++i)
48994 if (!used[2 * i])
48996 l[i] = NULL_RTX;
48997 continue;
48999 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
49000 vperm = force_reg (V32QImode, vperm);
49001 l[i] = gen_reg_rtx (V32QImode);
49002 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
49003 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
49006 for (i = 0; i < 2; ++i)
49008 if (h[i] && l[i])
49010 op = gen_reg_rtx (V32QImode);
49011 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
49012 l[i] = op;
49014 else if (h[i])
49015 l[i] = h[i];
49018 gcc_assert (l[0] && l[1]);
49019 op = d->target;
49020 if (d->vmode != V32QImode)
49021 op = gen_reg_rtx (V32QImode);
49022 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
49023 if (op != d->target)
49024 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
49025 return true;
49028 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
49029 With all of the interface bits taken care of, perform the expansion
49030 in D and return true on success. */
49032 static bool
49033 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
49035 /* Try a single instruction expansion. */
49036 if (expand_vec_perm_1 (d))
49037 return true;
49039 /* Try sequences of two instructions. */
49041 if (expand_vec_perm_pshuflw_pshufhw (d))
49042 return true;
49044 if (expand_vec_perm_palignr (d, false))
49045 return true;
49047 if (expand_vec_perm_interleave2 (d))
49048 return true;
49050 if (expand_vec_perm_broadcast (d))
49051 return true;
49053 if (expand_vec_perm_vpermq_perm_1 (d))
49054 return true;
49056 if (expand_vec_perm_vperm2f128 (d))
49057 return true;
49059 if (expand_vec_perm_pblendv (d))
49060 return true;
49062 /* Try sequences of three instructions. */
49064 if (expand_vec_perm_even_odd_pack (d))
49065 return true;
49067 if (expand_vec_perm_2vperm2f128_vshuf (d))
49068 return true;
49070 if (expand_vec_perm_pshufb2 (d))
49071 return true;
49073 if (expand_vec_perm_interleave3 (d))
49074 return true;
49076 if (expand_vec_perm_vperm2f128_vblend (d))
49077 return true;
49079 /* Try sequences of four instructions. */
49081 if (expand_vec_perm_vpshufb2_vpermq (d))
49082 return true;
49084 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
49085 return true;
49087 /* ??? Look for narrow permutations whose element orderings would
49088 allow the promotion to a wider mode. */
49090 /* ??? Look for sequences of interleave or a wider permute that place
49091 the data into the correct lanes for a half-vector shuffle like
49092 pshuf[lh]w or vpermilps. */
49094 /* ??? Look for sequences of interleave that produce the desired results.
49095 The combinatorics of punpck[lh] get pretty ugly... */
49097 if (expand_vec_perm_even_odd (d))
49098 return true;
49100 /* Even longer sequences. */
49101 if (expand_vec_perm_vpshufb4_vpermq2 (d))
49102 return true;
49104 return false;
49107 /* If a permutation only uses one operand, make it clear. Returns true
49108 if the permutation references both operands. */
49110 static bool
49111 canonicalize_perm (struct expand_vec_perm_d *d)
49113 int i, which, nelt = d->nelt;
49115 for (i = which = 0; i < nelt; ++i)
49116 which |= (d->perm[i] < nelt ? 1 : 2);
49118 d->one_operand_p = true;
49119 switch (which)
49121 default:
49122 gcc_unreachable();
49124 case 3:
49125 if (!rtx_equal_p (d->op0, d->op1))
49127 d->one_operand_p = false;
49128 break;
49130 /* The elements of PERM do not suggest that only the first operand
49131 is used, but both operands are identical. Allow easier matching
49132 of the permutation by folding the permutation into the single
49133 input vector. */
49134 /* FALLTHRU */
49136 case 2:
49137 for (i = 0; i < nelt; ++i)
49138 d->perm[i] &= nelt - 1;
49139 d->op0 = d->op1;
49140 break;
49142 case 1:
49143 d->op1 = d->op0;
49144 break;
49147 return (which == 3);
49150 bool
49151 ix86_expand_vec_perm_const (rtx operands[4])
49153 struct expand_vec_perm_d d;
49154 unsigned char perm[MAX_VECT_LEN];
49155 int i, nelt;
49156 bool two_args;
49157 rtx sel;
49159 d.target = operands[0];
49160 d.op0 = operands[1];
49161 d.op1 = operands[2];
49162 sel = operands[3];
49164 d.vmode = GET_MODE (d.target);
49165 gcc_assert (VECTOR_MODE_P (d.vmode));
49166 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49167 d.testing_p = false;
49169 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
49170 gcc_assert (XVECLEN (sel, 0) == nelt);
49171 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
49173 for (i = 0; i < nelt; ++i)
49175 rtx e = XVECEXP (sel, 0, i);
49176 int ei = INTVAL (e) & (2 * nelt - 1);
49177 d.perm[i] = ei;
49178 perm[i] = ei;
49181 two_args = canonicalize_perm (&d);
49183 if (ix86_expand_vec_perm_const_1 (&d))
49184 return true;
49186 /* If the selector says both arguments are needed, but the operands are the
49187 same, the above tried to expand with one_operand_p and flattened selector.
49188 If that didn't work, retry without one_operand_p; we succeeded with that
49189 during testing. */
49190 if (two_args && d.one_operand_p)
49192 d.one_operand_p = false;
49193 memcpy (d.perm, perm, sizeof (perm));
49194 return ix86_expand_vec_perm_const_1 (&d);
49197 return false;
49200 /* Implement targetm.vectorize.vec_perm_const_ok. */
49202 static bool
49203 ix86_vectorize_vec_perm_const_ok (machine_mode vmode,
49204 const unsigned char *sel)
49206 struct expand_vec_perm_d d;
49207 unsigned int i, nelt, which;
49208 bool ret;
49210 d.vmode = vmode;
49211 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49212 d.testing_p = true;
49214 /* Given sufficient ISA support we can just return true here
49215 for selected vector modes. */
49216 switch (d.vmode)
49218 case V16SFmode:
49219 case V16SImode:
49220 case V8DImode:
49221 case V8DFmode:
49222 if (TARGET_AVX512F)
49223 /* All implementable with a single vpermi2 insn. */
49224 return true;
49225 break;
49226 case V32HImode:
49227 if (TARGET_AVX512BW)
49228 /* All implementable with a single vpermi2 insn. */
49229 return true;
49230 break;
49231 case V8SImode:
49232 case V8SFmode:
49233 case V4DFmode:
49234 case V4DImode:
49235 if (TARGET_AVX512VL)
49236 /* All implementable with a single vpermi2 insn. */
49237 return true;
49238 break;
49239 case V16HImode:
49240 if (TARGET_AVX2)
49241 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
49242 return true;
49243 break;
49244 case V32QImode:
49245 if (TARGET_AVX2)
49246 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
49247 return true;
49248 break;
49249 case V4SImode:
49250 case V4SFmode:
49251 case V8HImode:
49252 case V16QImode:
49253 /* All implementable with a single vpperm insn. */
49254 if (TARGET_XOP)
49255 return true;
49256 /* All implementable with 2 pshufb + 1 ior. */
49257 if (TARGET_SSSE3)
49258 return true;
49259 break;
49260 case V2DImode:
49261 case V2DFmode:
49262 /* All implementable with shufpd or unpck[lh]pd. */
49263 return true;
49264 default:
49265 return false;
49268 /* Extract the values from the vector CST into the permutation
49269 array in D. */
49270 memcpy (d.perm, sel, nelt);
49271 for (i = which = 0; i < nelt; ++i)
49273 unsigned char e = d.perm[i];
49274 gcc_assert (e < 2 * nelt);
49275 which |= (e < nelt ? 1 : 2);
49278 /* For all elements from second vector, fold the elements to first. */
49279 if (which == 2)
49280 for (i = 0; i < nelt; ++i)
49281 d.perm[i] -= nelt;
49283 /* Check whether the mask can be applied to the vector type. */
49284 d.one_operand_p = (which != 3);
49286 /* Implementable with shufps or pshufd. */
49287 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
49288 return true;
49290 /* Otherwise we have to go through the motions and see if we can
49291 figure out how to generate the requested permutation. */
49292 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
49293 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
49294 if (!d.one_operand_p)
49295 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
49297 start_sequence ();
49298 ret = ix86_expand_vec_perm_const_1 (&d);
49299 end_sequence ();
49301 return ret;
49304 void
49305 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
49307 struct expand_vec_perm_d d;
49308 unsigned i, nelt;
49310 d.target = targ;
49311 d.op0 = op0;
49312 d.op1 = op1;
49313 d.vmode = GET_MODE (targ);
49314 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49315 d.one_operand_p = false;
49316 d.testing_p = false;
49318 for (i = 0; i < nelt; ++i)
49319 d.perm[i] = i * 2 + odd;
49321 /* We'll either be able to implement the permutation directly... */
49322 if (expand_vec_perm_1 (&d))
49323 return;
49325 /* ... or we use the special-case patterns. */
49326 expand_vec_perm_even_odd_1 (&d, odd);
49329 static void
49330 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
49332 struct expand_vec_perm_d d;
49333 unsigned i, nelt, base;
49334 bool ok;
49336 d.target = targ;
49337 d.op0 = op0;
49338 d.op1 = op1;
49339 d.vmode = GET_MODE (targ);
49340 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49341 d.one_operand_p = false;
49342 d.testing_p = false;
49344 base = high_p ? nelt / 2 : 0;
49345 for (i = 0; i < nelt / 2; ++i)
49347 d.perm[i * 2] = i + base;
49348 d.perm[i * 2 + 1] = i + base + nelt;
49351 /* Note that for AVX this isn't one instruction. */
49352 ok = ix86_expand_vec_perm_const_1 (&d);
49353 gcc_assert (ok);
49357 /* Expand a vector operation CODE for a V*QImode in terms of the
49358 same operation on V*HImode. */
49360 void
49361 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
49363 machine_mode qimode = GET_MODE (dest);
49364 machine_mode himode;
49365 rtx (*gen_il) (rtx, rtx, rtx);
49366 rtx (*gen_ih) (rtx, rtx, rtx);
49367 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
49368 struct expand_vec_perm_d d;
49369 bool ok, full_interleave;
49370 bool uns_p = false;
49371 int i;
49373 switch (qimode)
49375 case V16QImode:
49376 himode = V8HImode;
49377 gen_il = gen_vec_interleave_lowv16qi;
49378 gen_ih = gen_vec_interleave_highv16qi;
49379 break;
49380 case V32QImode:
49381 himode = V16HImode;
49382 gen_il = gen_avx2_interleave_lowv32qi;
49383 gen_ih = gen_avx2_interleave_highv32qi;
49384 break;
49385 case V64QImode:
49386 himode = V32HImode;
49387 gen_il = gen_avx512bw_interleave_lowv64qi;
49388 gen_ih = gen_avx512bw_interleave_highv64qi;
49389 break;
49390 default:
49391 gcc_unreachable ();
49394 op2_l = op2_h = op2;
49395 switch (code)
49397 case MULT:
49398 /* Unpack data such that we've got a source byte in each low byte of
49399 each word. We don't care what goes into the high byte of each word.
49400 Rather than trying to get zero in there, most convenient is to let
49401 it be a copy of the low byte. */
49402 op2_l = gen_reg_rtx (qimode);
49403 op2_h = gen_reg_rtx (qimode);
49404 emit_insn (gen_il (op2_l, op2, op2));
49405 emit_insn (gen_ih (op2_h, op2, op2));
49406 /* FALLTHRU */
49408 op1_l = gen_reg_rtx (qimode);
49409 op1_h = gen_reg_rtx (qimode);
49410 emit_insn (gen_il (op1_l, op1, op1));
49411 emit_insn (gen_ih (op1_h, op1, op1));
49412 full_interleave = qimode == V16QImode;
49413 break;
49415 case ASHIFT:
49416 case LSHIFTRT:
49417 uns_p = true;
49418 /* FALLTHRU */
49419 case ASHIFTRT:
49420 op1_l = gen_reg_rtx (himode);
49421 op1_h = gen_reg_rtx (himode);
49422 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
49423 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
49424 full_interleave = true;
49425 break;
49426 default:
49427 gcc_unreachable ();
49430 /* Perform the operation. */
49431 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
49432 1, OPTAB_DIRECT);
49433 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
49434 1, OPTAB_DIRECT);
49435 gcc_assert (res_l && res_h);
49437 /* Merge the data back into the right place. */
49438 d.target = dest;
49439 d.op0 = gen_lowpart (qimode, res_l);
49440 d.op1 = gen_lowpart (qimode, res_h);
49441 d.vmode = qimode;
49442 d.nelt = GET_MODE_NUNITS (qimode);
49443 d.one_operand_p = false;
49444 d.testing_p = false;
49446 if (full_interleave)
49448 /* For SSE2, we used an full interleave, so the desired
49449 results are in the even elements. */
49450 for (i = 0; i < 64; ++i)
49451 d.perm[i] = i * 2;
49453 else
49455 /* For AVX, the interleave used above was not cross-lane. So the
49456 extraction is evens but with the second and third quarter swapped.
49457 Happily, that is even one insn shorter than even extraction. */
49458 for (i = 0; i < 64; ++i)
49459 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
49462 ok = ix86_expand_vec_perm_const_1 (&d);
49463 gcc_assert (ok);
49465 set_unique_reg_note (get_last_insn (), REG_EQUAL,
49466 gen_rtx_fmt_ee (code, qimode, op1, op2));
49469 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
49470 if op is CONST_VECTOR with all odd elements equal to their
49471 preceding element. */
49473 static bool
49474 const_vector_equal_evenodd_p (rtx op)
49476 machine_mode mode = GET_MODE (op);
49477 int i, nunits = GET_MODE_NUNITS (mode);
49478 if (GET_CODE (op) != CONST_VECTOR
49479 || nunits != CONST_VECTOR_NUNITS (op))
49480 return false;
49481 for (i = 0; i < nunits; i += 2)
49482 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
49483 return false;
49484 return true;
49487 void
49488 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
49489 bool uns_p, bool odd_p)
49491 machine_mode mode = GET_MODE (op1);
49492 machine_mode wmode = GET_MODE (dest);
49493 rtx x;
49494 rtx orig_op1 = op1, orig_op2 = op2;
49496 if (!nonimmediate_operand (op1, mode))
49497 op1 = force_reg (mode, op1);
49498 if (!nonimmediate_operand (op2, mode))
49499 op2 = force_reg (mode, op2);
49501 /* We only play even/odd games with vectors of SImode. */
49502 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
49504 /* If we're looking for the odd results, shift those members down to
49505 the even slots. For some cpus this is faster than a PSHUFD. */
49506 if (odd_p)
49508 /* For XOP use vpmacsdqh, but only for smult, as it is only
49509 signed. */
49510 if (TARGET_XOP && mode == V4SImode && !uns_p)
49512 x = force_reg (wmode, CONST0_RTX (wmode));
49513 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
49514 return;
49517 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
49518 if (!const_vector_equal_evenodd_p (orig_op1))
49519 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
49520 x, NULL, 1, OPTAB_DIRECT);
49521 if (!const_vector_equal_evenodd_p (orig_op2))
49522 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
49523 x, NULL, 1, OPTAB_DIRECT);
49524 op1 = gen_lowpart (mode, op1);
49525 op2 = gen_lowpart (mode, op2);
49528 if (mode == V16SImode)
49530 if (uns_p)
49531 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
49532 else
49533 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
49535 else if (mode == V8SImode)
49537 if (uns_p)
49538 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
49539 else
49540 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
49542 else if (uns_p)
49543 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
49544 else if (TARGET_SSE4_1)
49545 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
49546 else
49548 rtx s1, s2, t0, t1, t2;
49550 /* The easiest way to implement this without PMULDQ is to go through
49551 the motions as if we are performing a full 64-bit multiply. With
49552 the exception that we need to do less shuffling of the elements. */
49554 /* Compute the sign-extension, aka highparts, of the two operands. */
49555 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
49556 op1, pc_rtx, pc_rtx);
49557 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
49558 op2, pc_rtx, pc_rtx);
49560 /* Multiply LO(A) * HI(B), and vice-versa. */
49561 t1 = gen_reg_rtx (wmode);
49562 t2 = gen_reg_rtx (wmode);
49563 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
49564 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
49566 /* Multiply LO(A) * LO(B). */
49567 t0 = gen_reg_rtx (wmode);
49568 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
49570 /* Combine and shift the highparts into place. */
49571 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
49572 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
49573 1, OPTAB_DIRECT);
49575 /* Combine high and low parts. */
49576 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
49577 return;
49579 emit_insn (x);
49582 void
49583 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
49584 bool uns_p, bool high_p)
49586 machine_mode wmode = GET_MODE (dest);
49587 machine_mode mode = GET_MODE (op1);
49588 rtx t1, t2, t3, t4, mask;
49590 switch (mode)
49592 case V4SImode:
49593 t1 = gen_reg_rtx (mode);
49594 t2 = gen_reg_rtx (mode);
49595 if (TARGET_XOP && !uns_p)
49597 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
49598 shuffle the elements once so that all elements are in the right
49599 place for immediate use: { A C B D }. */
49600 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
49601 const1_rtx, GEN_INT (3)));
49602 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
49603 const1_rtx, GEN_INT (3)));
49605 else
49607 /* Put the elements into place for the multiply. */
49608 ix86_expand_vec_interleave (t1, op1, op1, high_p);
49609 ix86_expand_vec_interleave (t2, op2, op2, high_p);
49610 high_p = false;
49612 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
49613 break;
49615 case V8SImode:
49616 /* Shuffle the elements between the lanes. After this we
49617 have { A B E F | C D G H } for each operand. */
49618 t1 = gen_reg_rtx (V4DImode);
49619 t2 = gen_reg_rtx (V4DImode);
49620 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
49621 const0_rtx, const2_rtx,
49622 const1_rtx, GEN_INT (3)));
49623 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
49624 const0_rtx, const2_rtx,
49625 const1_rtx, GEN_INT (3)));
49627 /* Shuffle the elements within the lanes. After this we
49628 have { A A B B | C C D D } or { E E F F | G G H H }. */
49629 t3 = gen_reg_rtx (V8SImode);
49630 t4 = gen_reg_rtx (V8SImode);
49631 mask = GEN_INT (high_p
49632 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
49633 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
49634 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
49635 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
49637 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
49638 break;
49640 case V8HImode:
49641 case V16HImode:
49642 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
49643 uns_p, OPTAB_DIRECT);
49644 t2 = expand_binop (mode,
49645 uns_p ? umul_highpart_optab : smul_highpart_optab,
49646 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
49647 gcc_assert (t1 && t2);
49649 t3 = gen_reg_rtx (mode);
49650 ix86_expand_vec_interleave (t3, t1, t2, high_p);
49651 emit_move_insn (dest, gen_lowpart (wmode, t3));
49652 break;
49654 case V16QImode:
49655 case V32QImode:
49656 case V32HImode:
49657 case V16SImode:
49658 case V64QImode:
49659 t1 = gen_reg_rtx (wmode);
49660 t2 = gen_reg_rtx (wmode);
49661 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
49662 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
49664 emit_insn (gen_rtx_SET (VOIDmode, dest, gen_rtx_MULT (wmode, t1, t2)));
49665 break;
49667 default:
49668 gcc_unreachable ();
49672 void
49673 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
49675 rtx res_1, res_2, res_3, res_4;
49677 res_1 = gen_reg_rtx (V4SImode);
49678 res_2 = gen_reg_rtx (V4SImode);
49679 res_3 = gen_reg_rtx (V2DImode);
49680 res_4 = gen_reg_rtx (V2DImode);
49681 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
49682 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
49684 /* Move the results in element 2 down to element 1; we don't care
49685 what goes in elements 2 and 3. Then we can merge the parts
49686 back together with an interleave.
49688 Note that two other sequences were tried:
49689 (1) Use interleaves at the start instead of psrldq, which allows
49690 us to use a single shufps to merge things back at the end.
49691 (2) Use shufps here to combine the two vectors, then pshufd to
49692 put the elements in the correct order.
49693 In both cases the cost of the reformatting stall was too high
49694 and the overall sequence slower. */
49696 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
49697 const0_rtx, const2_rtx,
49698 const0_rtx, const0_rtx));
49699 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
49700 const0_rtx, const2_rtx,
49701 const0_rtx, const0_rtx));
49702 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
49704 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
49707 void
49708 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
49710 machine_mode mode = GET_MODE (op0);
49711 rtx t1, t2, t3, t4, t5, t6;
49713 if (TARGET_AVX512DQ && mode == V8DImode)
49714 emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2));
49715 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode)
49716 emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2));
49717 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V2DImode)
49718 emit_insn (gen_avx512dq_mulv2di3 (op0, op1, op2));
49719 else if (TARGET_XOP && mode == V2DImode)
49721 /* op1: A,B,C,D, op2: E,F,G,H */
49722 op1 = gen_lowpart (V4SImode, op1);
49723 op2 = gen_lowpart (V4SImode, op2);
49725 t1 = gen_reg_rtx (V4SImode);
49726 t2 = gen_reg_rtx (V4SImode);
49727 t3 = gen_reg_rtx (V2DImode);
49728 t4 = gen_reg_rtx (V2DImode);
49730 /* t1: B,A,D,C */
49731 emit_insn (gen_sse2_pshufd_1 (t1, op1,
49732 GEN_INT (1),
49733 GEN_INT (0),
49734 GEN_INT (3),
49735 GEN_INT (2)));
49737 /* t2: (B*E),(A*F),(D*G),(C*H) */
49738 emit_insn (gen_mulv4si3 (t2, t1, op2));
49740 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
49741 emit_insn (gen_xop_phadddq (t3, t2));
49743 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
49744 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
49746 /* Multiply lower parts and add all */
49747 t5 = gen_reg_rtx (V2DImode);
49748 emit_insn (gen_vec_widen_umult_even_v4si (t5,
49749 gen_lowpart (V4SImode, op1),
49750 gen_lowpart (V4SImode, op2)));
49751 op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
49754 else
49756 machine_mode nmode;
49757 rtx (*umul) (rtx, rtx, rtx);
49759 if (mode == V2DImode)
49761 umul = gen_vec_widen_umult_even_v4si;
49762 nmode = V4SImode;
49764 else if (mode == V4DImode)
49766 umul = gen_vec_widen_umult_even_v8si;
49767 nmode = V8SImode;
49769 else if (mode == V8DImode)
49771 umul = gen_vec_widen_umult_even_v16si;
49772 nmode = V16SImode;
49774 else
49775 gcc_unreachable ();
49778 /* Multiply low parts. */
49779 t1 = gen_reg_rtx (mode);
49780 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
49782 /* Shift input vectors right 32 bits so we can multiply high parts. */
49783 t6 = GEN_INT (32);
49784 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
49785 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
49787 /* Multiply high parts by low parts. */
49788 t4 = gen_reg_rtx (mode);
49789 t5 = gen_reg_rtx (mode);
49790 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
49791 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
49793 /* Combine and shift the highparts back. */
49794 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
49795 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
49797 /* Combine high and low parts. */
49798 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
49801 set_unique_reg_note (get_last_insn (), REG_EQUAL,
49802 gen_rtx_MULT (mode, op1, op2));
49805 /* Return 1 if control tansfer instruction INSN
49806 should be encoded with bnd prefix.
49807 If insn is NULL then return 1 when control
49808 transfer instructions should be prefixed with
49809 bnd by default for current function. */
49811 bool
49812 ix86_bnd_prefixed_insn_p (rtx insn)
49814 /* For call insns check special flag. */
49815 if (insn && CALL_P (insn))
49817 rtx call = get_call_rtx_from (insn);
49818 if (call)
49819 return CALL_EXPR_WITH_BOUNDS_P (call);
49822 /* All other insns are prefixed only if function is instrumented. */
49823 return chkp_function_instrumented_p (current_function_decl);
49826 /* Calculate integer abs() using only SSE2 instructions. */
49828 void
49829 ix86_expand_sse2_abs (rtx target, rtx input)
49831 machine_mode mode = GET_MODE (target);
49832 rtx tmp0, tmp1, x;
49834 switch (mode)
49836 /* For 32-bit signed integer X, the best way to calculate the absolute
49837 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
49838 case V4SImode:
49839 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
49840 GEN_INT (GET_MODE_BITSIZE
49841 (GET_MODE_INNER (mode)) - 1),
49842 NULL, 0, OPTAB_DIRECT);
49843 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
49844 NULL, 0, OPTAB_DIRECT);
49845 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
49846 target, 0, OPTAB_DIRECT);
49847 break;
49849 /* For 16-bit signed integer X, the best way to calculate the absolute
49850 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
49851 case V8HImode:
49852 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
49854 x = expand_simple_binop (mode, SMAX, tmp0, input,
49855 target, 0, OPTAB_DIRECT);
49856 break;
49858 /* For 8-bit signed integer X, the best way to calculate the absolute
49859 value of X is min ((unsigned char) X, (unsigned char) (-X)),
49860 as SSE2 provides the PMINUB insn. */
49861 case V16QImode:
49862 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
49864 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
49865 target, 0, OPTAB_DIRECT);
49866 break;
49868 default:
49869 gcc_unreachable ();
49872 if (x != target)
49873 emit_move_insn (target, x);
49876 /* Expand an insert into a vector register through pinsr insn.
49877 Return true if successful. */
49879 bool
49880 ix86_expand_pinsr (rtx *operands)
49882 rtx dst = operands[0];
49883 rtx src = operands[3];
49885 unsigned int size = INTVAL (operands[1]);
49886 unsigned int pos = INTVAL (operands[2]);
49888 if (GET_CODE (dst) == SUBREG)
49890 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
49891 dst = SUBREG_REG (dst);
49894 if (GET_CODE (src) == SUBREG)
49895 src = SUBREG_REG (src);
49897 switch (GET_MODE (dst))
49899 case V16QImode:
49900 case V8HImode:
49901 case V4SImode:
49902 case V2DImode:
49904 machine_mode srcmode, dstmode;
49905 rtx (*pinsr)(rtx, rtx, rtx, rtx);
49907 srcmode = mode_for_size (size, MODE_INT, 0);
49909 switch (srcmode)
49911 case QImode:
49912 if (!TARGET_SSE4_1)
49913 return false;
49914 dstmode = V16QImode;
49915 pinsr = gen_sse4_1_pinsrb;
49916 break;
49918 case HImode:
49919 if (!TARGET_SSE2)
49920 return false;
49921 dstmode = V8HImode;
49922 pinsr = gen_sse2_pinsrw;
49923 break;
49925 case SImode:
49926 if (!TARGET_SSE4_1)
49927 return false;
49928 dstmode = V4SImode;
49929 pinsr = gen_sse4_1_pinsrd;
49930 break;
49932 case DImode:
49933 gcc_assert (TARGET_64BIT);
49934 if (!TARGET_SSE4_1)
49935 return false;
49936 dstmode = V2DImode;
49937 pinsr = gen_sse4_1_pinsrq;
49938 break;
49940 default:
49941 return false;
49944 rtx d = dst;
49945 if (GET_MODE (dst) != dstmode)
49946 d = gen_reg_rtx (dstmode);
49947 src = gen_lowpart (srcmode, src);
49949 pos /= size;
49951 emit_insn (pinsr (d, gen_lowpart (dstmode, dst), src,
49952 GEN_INT (1 << pos)));
49953 if (d != dst)
49954 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
49955 return true;
49958 default:
49959 return false;
49963 /* This function returns the calling abi specific va_list type node.
49964 It returns the FNDECL specific va_list type. */
49966 static tree
49967 ix86_fn_abi_va_list (tree fndecl)
49969 if (!TARGET_64BIT)
49970 return va_list_type_node;
49971 gcc_assert (fndecl != NULL_TREE);
49973 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
49974 return ms_va_list_type_node;
49975 else
49976 return sysv_va_list_type_node;
49979 /* Returns the canonical va_list type specified by TYPE. If there
49980 is no valid TYPE provided, it return NULL_TREE. */
49982 static tree
49983 ix86_canonical_va_list_type (tree type)
49985 tree wtype, htype;
49987 /* Resolve references and pointers to va_list type. */
49988 if (TREE_CODE (type) == MEM_REF)
49989 type = TREE_TYPE (type);
49990 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
49991 type = TREE_TYPE (type);
49992 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
49993 type = TREE_TYPE (type);
49995 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
49997 wtype = va_list_type_node;
49998 gcc_assert (wtype != NULL_TREE);
49999 htype = type;
50000 if (TREE_CODE (wtype) == ARRAY_TYPE)
50002 /* If va_list is an array type, the argument may have decayed
50003 to a pointer type, e.g. by being passed to another function.
50004 In that case, unwrap both types so that we can compare the
50005 underlying records. */
50006 if (TREE_CODE (htype) == ARRAY_TYPE
50007 || POINTER_TYPE_P (htype))
50009 wtype = TREE_TYPE (wtype);
50010 htype = TREE_TYPE (htype);
50013 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50014 return va_list_type_node;
50015 wtype = sysv_va_list_type_node;
50016 gcc_assert (wtype != NULL_TREE);
50017 htype = type;
50018 if (TREE_CODE (wtype) == ARRAY_TYPE)
50020 /* If va_list is an array type, the argument may have decayed
50021 to a pointer type, e.g. by being passed to another function.
50022 In that case, unwrap both types so that we can compare the
50023 underlying records. */
50024 if (TREE_CODE (htype) == ARRAY_TYPE
50025 || POINTER_TYPE_P (htype))
50027 wtype = TREE_TYPE (wtype);
50028 htype = TREE_TYPE (htype);
50031 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50032 return sysv_va_list_type_node;
50033 wtype = ms_va_list_type_node;
50034 gcc_assert (wtype != NULL_TREE);
50035 htype = type;
50036 if (TREE_CODE (wtype) == ARRAY_TYPE)
50038 /* If va_list is an array type, the argument may have decayed
50039 to a pointer type, e.g. by being passed to another function.
50040 In that case, unwrap both types so that we can compare the
50041 underlying records. */
50042 if (TREE_CODE (htype) == ARRAY_TYPE
50043 || POINTER_TYPE_P (htype))
50045 wtype = TREE_TYPE (wtype);
50046 htype = TREE_TYPE (htype);
50049 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50050 return ms_va_list_type_node;
50051 return NULL_TREE;
50053 return std_canonical_va_list_type (type);
50056 /* Iterate through the target-specific builtin types for va_list.
50057 IDX denotes the iterator, *PTREE is set to the result type of
50058 the va_list builtin, and *PNAME to its internal type.
50059 Returns zero if there is no element for this index, otherwise
50060 IDX should be increased upon the next call.
50061 Note, do not iterate a base builtin's name like __builtin_va_list.
50062 Used from c_common_nodes_and_builtins. */
50064 static int
50065 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
50067 if (TARGET_64BIT)
50069 switch (idx)
50071 default:
50072 break;
50074 case 0:
50075 *ptree = ms_va_list_type_node;
50076 *pname = "__builtin_ms_va_list";
50077 return 1;
50079 case 1:
50080 *ptree = sysv_va_list_type_node;
50081 *pname = "__builtin_sysv_va_list";
50082 return 1;
50086 return 0;
50089 #undef TARGET_SCHED_DISPATCH
50090 #define TARGET_SCHED_DISPATCH has_dispatch
50091 #undef TARGET_SCHED_DISPATCH_DO
50092 #define TARGET_SCHED_DISPATCH_DO do_dispatch
50093 #undef TARGET_SCHED_REASSOCIATION_WIDTH
50094 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
50095 #undef TARGET_SCHED_REORDER
50096 #define TARGET_SCHED_REORDER ix86_sched_reorder
50097 #undef TARGET_SCHED_ADJUST_PRIORITY
50098 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
50099 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
50100 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
50101 ix86_dependencies_evaluation_hook
50103 /* The size of the dispatch window is the total number of bytes of
50104 object code allowed in a window. */
50105 #define DISPATCH_WINDOW_SIZE 16
50107 /* Number of dispatch windows considered for scheduling. */
50108 #define MAX_DISPATCH_WINDOWS 3
50110 /* Maximum number of instructions in a window. */
50111 #define MAX_INSN 4
50113 /* Maximum number of immediate operands in a window. */
50114 #define MAX_IMM 4
50116 /* Maximum number of immediate bits allowed in a window. */
50117 #define MAX_IMM_SIZE 128
50119 /* Maximum number of 32 bit immediates allowed in a window. */
50120 #define MAX_IMM_32 4
50122 /* Maximum number of 64 bit immediates allowed in a window. */
50123 #define MAX_IMM_64 2
50125 /* Maximum total of loads or prefetches allowed in a window. */
50126 #define MAX_LOAD 2
50128 /* Maximum total of stores allowed in a window. */
50129 #define MAX_STORE 1
50131 #undef BIG
50132 #define BIG 100
50135 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
50136 enum dispatch_group {
50137 disp_no_group = 0,
50138 disp_load,
50139 disp_store,
50140 disp_load_store,
50141 disp_prefetch,
50142 disp_imm,
50143 disp_imm_32,
50144 disp_imm_64,
50145 disp_branch,
50146 disp_cmp,
50147 disp_jcc,
50148 disp_last
50151 /* Number of allowable groups in a dispatch window. It is an array
50152 indexed by dispatch_group enum. 100 is used as a big number,
50153 because the number of these kind of operations does not have any
50154 effect in dispatch window, but we need them for other reasons in
50155 the table. */
50156 static unsigned int num_allowable_groups[disp_last] = {
50157 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
50160 char group_name[disp_last + 1][16] = {
50161 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
50162 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
50163 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
50166 /* Instruction path. */
50167 enum insn_path {
50168 no_path = 0,
50169 path_single, /* Single micro op. */
50170 path_double, /* Double micro op. */
50171 path_multi, /* Instructions with more than 2 micro op.. */
50172 last_path
50175 /* sched_insn_info defines a window to the instructions scheduled in
50176 the basic block. It contains a pointer to the insn_info table and
50177 the instruction scheduled.
50179 Windows are allocated for each basic block and are linked
50180 together. */
50181 typedef struct sched_insn_info_s {
50182 rtx insn;
50183 enum dispatch_group group;
50184 enum insn_path path;
50185 int byte_len;
50186 int imm_bytes;
50187 } sched_insn_info;
50189 /* Linked list of dispatch windows. This is a two way list of
50190 dispatch windows of a basic block. It contains information about
50191 the number of uops in the window and the total number of
50192 instructions and of bytes in the object code for this dispatch
50193 window. */
50194 typedef struct dispatch_windows_s {
50195 int num_insn; /* Number of insn in the window. */
50196 int num_uops; /* Number of uops in the window. */
50197 int window_size; /* Number of bytes in the window. */
50198 int window_num; /* Window number between 0 or 1. */
50199 int num_imm; /* Number of immediates in an insn. */
50200 int num_imm_32; /* Number of 32 bit immediates in an insn. */
50201 int num_imm_64; /* Number of 64 bit immediates in an insn. */
50202 int imm_size; /* Total immediates in the window. */
50203 int num_loads; /* Total memory loads in the window. */
50204 int num_stores; /* Total memory stores in the window. */
50205 int violation; /* Violation exists in window. */
50206 sched_insn_info *window; /* Pointer to the window. */
50207 struct dispatch_windows_s *next;
50208 struct dispatch_windows_s *prev;
50209 } dispatch_windows;
50211 /* Immediate valuse used in an insn. */
50212 typedef struct imm_info_s
50214 int imm;
50215 int imm32;
50216 int imm64;
50217 } imm_info;
50219 static dispatch_windows *dispatch_window_list;
50220 static dispatch_windows *dispatch_window_list1;
50222 /* Get dispatch group of insn. */
50224 static enum dispatch_group
50225 get_mem_group (rtx_insn *insn)
50227 enum attr_memory memory;
50229 if (INSN_CODE (insn) < 0)
50230 return disp_no_group;
50231 memory = get_attr_memory (insn);
50232 if (memory == MEMORY_STORE)
50233 return disp_store;
50235 if (memory == MEMORY_LOAD)
50236 return disp_load;
50238 if (memory == MEMORY_BOTH)
50239 return disp_load_store;
50241 return disp_no_group;
50244 /* Return true if insn is a compare instruction. */
50246 static bool
50247 is_cmp (rtx_insn *insn)
50249 enum attr_type type;
50251 type = get_attr_type (insn);
50252 return (type == TYPE_TEST
50253 || type == TYPE_ICMP
50254 || type == TYPE_FCMP
50255 || GET_CODE (PATTERN (insn)) == COMPARE);
50258 /* Return true if a dispatch violation encountered. */
50260 static bool
50261 dispatch_violation (void)
50263 if (dispatch_window_list->next)
50264 return dispatch_window_list->next->violation;
50265 return dispatch_window_list->violation;
50268 /* Return true if insn is a branch instruction. */
50270 static bool
50271 is_branch (rtx insn)
50273 return (CALL_P (insn) || JUMP_P (insn));
50276 /* Return true if insn is a prefetch instruction. */
50278 static bool
50279 is_prefetch (rtx insn)
50281 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
50284 /* This function initializes a dispatch window and the list container holding a
50285 pointer to the window. */
50287 static void
50288 init_window (int window_num)
50290 int i;
50291 dispatch_windows *new_list;
50293 if (window_num == 0)
50294 new_list = dispatch_window_list;
50295 else
50296 new_list = dispatch_window_list1;
50298 new_list->num_insn = 0;
50299 new_list->num_uops = 0;
50300 new_list->window_size = 0;
50301 new_list->next = NULL;
50302 new_list->prev = NULL;
50303 new_list->window_num = window_num;
50304 new_list->num_imm = 0;
50305 new_list->num_imm_32 = 0;
50306 new_list->num_imm_64 = 0;
50307 new_list->imm_size = 0;
50308 new_list->num_loads = 0;
50309 new_list->num_stores = 0;
50310 new_list->violation = false;
50312 for (i = 0; i < MAX_INSN; i++)
50314 new_list->window[i].insn = NULL;
50315 new_list->window[i].group = disp_no_group;
50316 new_list->window[i].path = no_path;
50317 new_list->window[i].byte_len = 0;
50318 new_list->window[i].imm_bytes = 0;
50320 return;
50323 /* This function allocates and initializes a dispatch window and the
50324 list container holding a pointer to the window. */
50326 static dispatch_windows *
50327 allocate_window (void)
50329 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
50330 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
50332 return new_list;
50335 /* This routine initializes the dispatch scheduling information. It
50336 initiates building dispatch scheduler tables and constructs the
50337 first dispatch window. */
50339 static void
50340 init_dispatch_sched (void)
50342 /* Allocate a dispatch list and a window. */
50343 dispatch_window_list = allocate_window ();
50344 dispatch_window_list1 = allocate_window ();
50345 init_window (0);
50346 init_window (1);
50349 /* This function returns true if a branch is detected. End of a basic block
50350 does not have to be a branch, but here we assume only branches end a
50351 window. */
50353 static bool
50354 is_end_basic_block (enum dispatch_group group)
50356 return group == disp_branch;
50359 /* This function is called when the end of a window processing is reached. */
50361 static void
50362 process_end_window (void)
50364 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
50365 if (dispatch_window_list->next)
50367 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
50368 gcc_assert (dispatch_window_list->window_size
50369 + dispatch_window_list1->window_size <= 48);
50370 init_window (1);
50372 init_window (0);
50375 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
50376 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
50377 for 48 bytes of instructions. Note that these windows are not dispatch
50378 windows that their sizes are DISPATCH_WINDOW_SIZE. */
50380 static dispatch_windows *
50381 allocate_next_window (int window_num)
50383 if (window_num == 0)
50385 if (dispatch_window_list->next)
50386 init_window (1);
50387 init_window (0);
50388 return dispatch_window_list;
50391 dispatch_window_list->next = dispatch_window_list1;
50392 dispatch_window_list1->prev = dispatch_window_list;
50394 return dispatch_window_list1;
50397 /* Compute number of immediate operands of an instruction. */
50399 static void
50400 find_constant (rtx in_rtx, imm_info *imm_values)
50402 if (INSN_P (in_rtx))
50403 in_rtx = PATTERN (in_rtx);
50404 subrtx_iterator::array_type array;
50405 FOR_EACH_SUBRTX (iter, array, in_rtx, ALL)
50406 if (const_rtx x = *iter)
50407 switch (GET_CODE (x))
50409 case CONST:
50410 case SYMBOL_REF:
50411 case CONST_INT:
50412 (imm_values->imm)++;
50413 if (x86_64_immediate_operand (CONST_CAST_RTX (x), SImode))
50414 (imm_values->imm32)++;
50415 else
50416 (imm_values->imm64)++;
50417 break;
50419 case CONST_DOUBLE:
50420 (imm_values->imm)++;
50421 (imm_values->imm64)++;
50422 break;
50424 case CODE_LABEL:
50425 if (LABEL_KIND (x) == LABEL_NORMAL)
50427 (imm_values->imm)++;
50428 (imm_values->imm32)++;
50430 break;
50432 default:
50433 break;
50437 /* Return total size of immediate operands of an instruction along with number
50438 of corresponding immediate-operands. It initializes its parameters to zero
50439 befor calling FIND_CONSTANT.
50440 INSN is the input instruction. IMM is the total of immediates.
50441 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
50442 bit immediates. */
50444 static int
50445 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
50447 imm_info imm_values = {0, 0, 0};
50449 find_constant (insn, &imm_values);
50450 *imm = imm_values.imm;
50451 *imm32 = imm_values.imm32;
50452 *imm64 = imm_values.imm64;
50453 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
50456 /* This function indicates if an operand of an instruction is an
50457 immediate. */
50459 static bool
50460 has_immediate (rtx insn)
50462 int num_imm_operand;
50463 int num_imm32_operand;
50464 int num_imm64_operand;
50466 if (insn)
50467 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50468 &num_imm64_operand);
50469 return false;
50472 /* Return single or double path for instructions. */
50474 static enum insn_path
50475 get_insn_path (rtx_insn *insn)
50477 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
50479 if ((int)path == 0)
50480 return path_single;
50482 if ((int)path == 1)
50483 return path_double;
50485 return path_multi;
50488 /* Return insn dispatch group. */
50490 static enum dispatch_group
50491 get_insn_group (rtx_insn *insn)
50493 enum dispatch_group group = get_mem_group (insn);
50494 if (group)
50495 return group;
50497 if (is_branch (insn))
50498 return disp_branch;
50500 if (is_cmp (insn))
50501 return disp_cmp;
50503 if (has_immediate (insn))
50504 return disp_imm;
50506 if (is_prefetch (insn))
50507 return disp_prefetch;
50509 return disp_no_group;
50512 /* Count number of GROUP restricted instructions in a dispatch
50513 window WINDOW_LIST. */
50515 static int
50516 count_num_restricted (rtx_insn *insn, dispatch_windows *window_list)
50518 enum dispatch_group group = get_insn_group (insn);
50519 int imm_size;
50520 int num_imm_operand;
50521 int num_imm32_operand;
50522 int num_imm64_operand;
50524 if (group == disp_no_group)
50525 return 0;
50527 if (group == disp_imm)
50529 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50530 &num_imm64_operand);
50531 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
50532 || num_imm_operand + window_list->num_imm > MAX_IMM
50533 || (num_imm32_operand > 0
50534 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
50535 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
50536 || (num_imm64_operand > 0
50537 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
50538 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
50539 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
50540 && num_imm64_operand > 0
50541 && ((window_list->num_imm_64 > 0
50542 && window_list->num_insn >= 2)
50543 || window_list->num_insn >= 3)))
50544 return BIG;
50546 return 1;
50549 if ((group == disp_load_store
50550 && (window_list->num_loads >= MAX_LOAD
50551 || window_list->num_stores >= MAX_STORE))
50552 || ((group == disp_load
50553 || group == disp_prefetch)
50554 && window_list->num_loads >= MAX_LOAD)
50555 || (group == disp_store
50556 && window_list->num_stores >= MAX_STORE))
50557 return BIG;
50559 return 1;
50562 /* This function returns true if insn satisfies dispatch rules on the
50563 last window scheduled. */
50565 static bool
50566 fits_dispatch_window (rtx_insn *insn)
50568 dispatch_windows *window_list = dispatch_window_list;
50569 dispatch_windows *window_list_next = dispatch_window_list->next;
50570 unsigned int num_restrict;
50571 enum dispatch_group group = get_insn_group (insn);
50572 enum insn_path path = get_insn_path (insn);
50573 int sum;
50575 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
50576 instructions should be given the lowest priority in the
50577 scheduling process in Haifa scheduler to make sure they will be
50578 scheduled in the same dispatch window as the reference to them. */
50579 if (group == disp_jcc || group == disp_cmp)
50580 return false;
50582 /* Check nonrestricted. */
50583 if (group == disp_no_group || group == disp_branch)
50584 return true;
50586 /* Get last dispatch window. */
50587 if (window_list_next)
50588 window_list = window_list_next;
50590 if (window_list->window_num == 1)
50592 sum = window_list->prev->window_size + window_list->window_size;
50594 if (sum == 32
50595 || (min_insn_size (insn) + sum) >= 48)
50596 /* Window 1 is full. Go for next window. */
50597 return true;
50600 num_restrict = count_num_restricted (insn, window_list);
50602 if (num_restrict > num_allowable_groups[group])
50603 return false;
50605 /* See if it fits in the first window. */
50606 if (window_list->window_num == 0)
50608 /* The first widow should have only single and double path
50609 uops. */
50610 if (path == path_double
50611 && (window_list->num_uops + 2) > MAX_INSN)
50612 return false;
50613 else if (path != path_single)
50614 return false;
50616 return true;
50619 /* Add an instruction INSN with NUM_UOPS micro-operations to the
50620 dispatch window WINDOW_LIST. */
50622 static void
50623 add_insn_window (rtx_insn *insn, dispatch_windows *window_list, int num_uops)
50625 int byte_len = min_insn_size (insn);
50626 int num_insn = window_list->num_insn;
50627 int imm_size;
50628 sched_insn_info *window = window_list->window;
50629 enum dispatch_group group = get_insn_group (insn);
50630 enum insn_path path = get_insn_path (insn);
50631 int num_imm_operand;
50632 int num_imm32_operand;
50633 int num_imm64_operand;
50635 if (!window_list->violation && group != disp_cmp
50636 && !fits_dispatch_window (insn))
50637 window_list->violation = true;
50639 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50640 &num_imm64_operand);
50642 /* Initialize window with new instruction. */
50643 window[num_insn].insn = insn;
50644 window[num_insn].byte_len = byte_len;
50645 window[num_insn].group = group;
50646 window[num_insn].path = path;
50647 window[num_insn].imm_bytes = imm_size;
50649 window_list->window_size += byte_len;
50650 window_list->num_insn = num_insn + 1;
50651 window_list->num_uops = window_list->num_uops + num_uops;
50652 window_list->imm_size += imm_size;
50653 window_list->num_imm += num_imm_operand;
50654 window_list->num_imm_32 += num_imm32_operand;
50655 window_list->num_imm_64 += num_imm64_operand;
50657 if (group == disp_store)
50658 window_list->num_stores += 1;
50659 else if (group == disp_load
50660 || group == disp_prefetch)
50661 window_list->num_loads += 1;
50662 else if (group == disp_load_store)
50664 window_list->num_stores += 1;
50665 window_list->num_loads += 1;
50669 /* Adds a scheduled instruction, INSN, to the current dispatch window.
50670 If the total bytes of instructions or the number of instructions in
50671 the window exceed allowable, it allocates a new window. */
50673 static void
50674 add_to_dispatch_window (rtx_insn *insn)
50676 int byte_len;
50677 dispatch_windows *window_list;
50678 dispatch_windows *next_list;
50679 dispatch_windows *window0_list;
50680 enum insn_path path;
50681 enum dispatch_group insn_group;
50682 bool insn_fits;
50683 int num_insn;
50684 int num_uops;
50685 int window_num;
50686 int insn_num_uops;
50687 int sum;
50689 if (INSN_CODE (insn) < 0)
50690 return;
50692 byte_len = min_insn_size (insn);
50693 window_list = dispatch_window_list;
50694 next_list = window_list->next;
50695 path = get_insn_path (insn);
50696 insn_group = get_insn_group (insn);
50698 /* Get the last dispatch window. */
50699 if (next_list)
50700 window_list = dispatch_window_list->next;
50702 if (path == path_single)
50703 insn_num_uops = 1;
50704 else if (path == path_double)
50705 insn_num_uops = 2;
50706 else
50707 insn_num_uops = (int) path;
50709 /* If current window is full, get a new window.
50710 Window number zero is full, if MAX_INSN uops are scheduled in it.
50711 Window number one is full, if window zero's bytes plus window
50712 one's bytes is 32, or if the bytes of the new instruction added
50713 to the total makes it greater than 48, or it has already MAX_INSN
50714 instructions in it. */
50715 num_insn = window_list->num_insn;
50716 num_uops = window_list->num_uops;
50717 window_num = window_list->window_num;
50718 insn_fits = fits_dispatch_window (insn);
50720 if (num_insn >= MAX_INSN
50721 || num_uops + insn_num_uops > MAX_INSN
50722 || !(insn_fits))
50724 window_num = ~window_num & 1;
50725 window_list = allocate_next_window (window_num);
50728 if (window_num == 0)
50730 add_insn_window (insn, window_list, insn_num_uops);
50731 if (window_list->num_insn >= MAX_INSN
50732 && insn_group == disp_branch)
50734 process_end_window ();
50735 return;
50738 else if (window_num == 1)
50740 window0_list = window_list->prev;
50741 sum = window0_list->window_size + window_list->window_size;
50742 if (sum == 32
50743 || (byte_len + sum) >= 48)
50745 process_end_window ();
50746 window_list = dispatch_window_list;
50749 add_insn_window (insn, window_list, insn_num_uops);
50751 else
50752 gcc_unreachable ();
50754 if (is_end_basic_block (insn_group))
50756 /* End of basic block is reached do end-basic-block process. */
50757 process_end_window ();
50758 return;
50762 /* Print the dispatch window, WINDOW_NUM, to FILE. */
50764 DEBUG_FUNCTION static void
50765 debug_dispatch_window_file (FILE *file, int window_num)
50767 dispatch_windows *list;
50768 int i;
50770 if (window_num == 0)
50771 list = dispatch_window_list;
50772 else
50773 list = dispatch_window_list1;
50775 fprintf (file, "Window #%d:\n", list->window_num);
50776 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
50777 list->num_insn, list->num_uops, list->window_size);
50778 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
50779 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
50781 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
50782 list->num_stores);
50783 fprintf (file, " insn info:\n");
50785 for (i = 0; i < MAX_INSN; i++)
50787 if (!list->window[i].insn)
50788 break;
50789 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
50790 i, group_name[list->window[i].group],
50791 i, (void *)list->window[i].insn,
50792 i, list->window[i].path,
50793 i, list->window[i].byte_len,
50794 i, list->window[i].imm_bytes);
50798 /* Print to stdout a dispatch window. */
50800 DEBUG_FUNCTION void
50801 debug_dispatch_window (int window_num)
50803 debug_dispatch_window_file (stdout, window_num);
50806 /* Print INSN dispatch information to FILE. */
50808 DEBUG_FUNCTION static void
50809 debug_insn_dispatch_info_file (FILE *file, rtx_insn *insn)
50811 int byte_len;
50812 enum insn_path path;
50813 enum dispatch_group group;
50814 int imm_size;
50815 int num_imm_operand;
50816 int num_imm32_operand;
50817 int num_imm64_operand;
50819 if (INSN_CODE (insn) < 0)
50820 return;
50822 byte_len = min_insn_size (insn);
50823 path = get_insn_path (insn);
50824 group = get_insn_group (insn);
50825 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50826 &num_imm64_operand);
50828 fprintf (file, " insn info:\n");
50829 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
50830 group_name[group], path, byte_len);
50831 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
50832 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
50835 /* Print to STDERR the status of the ready list with respect to
50836 dispatch windows. */
50838 DEBUG_FUNCTION void
50839 debug_ready_dispatch (void)
50841 int i;
50842 int no_ready = number_in_ready ();
50844 fprintf (stdout, "Number of ready: %d\n", no_ready);
50846 for (i = 0; i < no_ready; i++)
50847 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
50850 /* This routine is the driver of the dispatch scheduler. */
50852 static void
50853 do_dispatch (rtx_insn *insn, int mode)
50855 if (mode == DISPATCH_INIT)
50856 init_dispatch_sched ();
50857 else if (mode == ADD_TO_DISPATCH_WINDOW)
50858 add_to_dispatch_window (insn);
50861 /* Return TRUE if Dispatch Scheduling is supported. */
50863 static bool
50864 has_dispatch (rtx_insn *insn, int action)
50866 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
50867 && flag_dispatch_scheduler)
50868 switch (action)
50870 default:
50871 return false;
50873 case IS_DISPATCH_ON:
50874 return true;
50875 break;
50877 case IS_CMP:
50878 return is_cmp (insn);
50880 case DISPATCH_VIOLATION:
50881 return dispatch_violation ();
50883 case FITS_DISPATCH_WINDOW:
50884 return fits_dispatch_window (insn);
50887 return false;
50890 /* Implementation of reassociation_width target hook used by
50891 reassoc phase to identify parallelism level in reassociated
50892 tree. Statements tree_code is passed in OPC. Arguments type
50893 is passed in MODE.
50895 Currently parallel reassociation is enabled for Atom
50896 processors only and we set reassociation width to be 2
50897 because Atom may issue up to 2 instructions per cycle.
50899 Return value should be fixed if parallel reassociation is
50900 enabled for other processors. */
50902 static int
50903 ix86_reassociation_width (unsigned int, machine_mode mode)
50905 int res = 1;
50907 /* Vector part. */
50908 if (VECTOR_MODE_P (mode))
50910 if (TARGET_VECTOR_PARALLEL_EXECUTION)
50911 return 2;
50912 else
50913 return 1;
50916 /* Scalar part. */
50917 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
50918 res = 2;
50919 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
50920 res = 2;
50922 return res;
50925 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
50926 place emms and femms instructions. */
50928 static machine_mode
50929 ix86_preferred_simd_mode (machine_mode mode)
50931 if (!TARGET_SSE)
50932 return word_mode;
50934 switch (mode)
50936 case QImode:
50937 return TARGET_AVX512BW ? V64QImode :
50938 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
50939 case HImode:
50940 return TARGET_AVX512BW ? V32HImode :
50941 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
50942 case SImode:
50943 return TARGET_AVX512F ? V16SImode :
50944 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
50945 case DImode:
50946 return TARGET_AVX512F ? V8DImode :
50947 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
50949 case SFmode:
50950 if (TARGET_AVX512F)
50951 return V16SFmode;
50952 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
50953 return V8SFmode;
50954 else
50955 return V4SFmode;
50957 case DFmode:
50958 if (!TARGET_VECTORIZE_DOUBLE)
50959 return word_mode;
50960 else if (TARGET_AVX512F)
50961 return V8DFmode;
50962 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
50963 return V4DFmode;
50964 else if (TARGET_SSE2)
50965 return V2DFmode;
50966 /* FALLTHRU */
50968 default:
50969 return word_mode;
50973 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
50974 vectors. If AVX512F is enabled then try vectorizing with 512bit,
50975 256bit and 128bit vectors. */
50977 static unsigned int
50978 ix86_autovectorize_vector_sizes (void)
50980 return TARGET_AVX512F ? 64 | 32 | 16 :
50981 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
50986 /* Return class of registers which could be used for pseudo of MODE
50987 and of class RCLASS for spilling instead of memory. Return NO_REGS
50988 if it is not possible or non-profitable. */
50989 static reg_class_t
50990 ix86_spill_class (reg_class_t rclass, machine_mode mode)
50992 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
50993 && (mode == SImode || (TARGET_64BIT && mode == DImode))
50994 && rclass != NO_REGS && INTEGER_CLASS_P (rclass))
50995 return ALL_SSE_REGS;
50996 return NO_REGS;
50999 /* Implement targetm.vectorize.init_cost. */
51001 static void *
51002 ix86_init_cost (struct loop *)
51004 unsigned *cost = XNEWVEC (unsigned, 3);
51005 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
51006 return cost;
51009 /* Implement targetm.vectorize.add_stmt_cost. */
51011 static unsigned
51012 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
51013 struct _stmt_vec_info *stmt_info, int misalign,
51014 enum vect_cost_model_location where)
51016 unsigned *cost = (unsigned *) data;
51017 unsigned retval = 0;
51019 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
51020 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
51022 /* Statements in an inner loop relative to the loop being
51023 vectorized are weighted more heavily. The value here is
51024 arbitrary and could potentially be improved with analysis. */
51025 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
51026 count *= 50; /* FIXME. */
51028 retval = (unsigned) (count * stmt_cost);
51030 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
51031 for Silvermont as it has out of order integer pipeline and can execute
51032 2 scalar instruction per tick, but has in order SIMD pipeline. */
51033 if (TARGET_SILVERMONT || TARGET_INTEL)
51034 if (stmt_info && stmt_info->stmt)
51036 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
51037 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
51038 retval = (retval * 17) / 10;
51041 cost[where] += retval;
51043 return retval;
51046 /* Implement targetm.vectorize.finish_cost. */
51048 static void
51049 ix86_finish_cost (void *data, unsigned *prologue_cost,
51050 unsigned *body_cost, unsigned *epilogue_cost)
51052 unsigned *cost = (unsigned *) data;
51053 *prologue_cost = cost[vect_prologue];
51054 *body_cost = cost[vect_body];
51055 *epilogue_cost = cost[vect_epilogue];
51058 /* Implement targetm.vectorize.destroy_cost_data. */
51060 static void
51061 ix86_destroy_cost_data (void *data)
51063 free (data);
51066 /* Validate target specific memory model bits in VAL. */
51068 static unsigned HOST_WIDE_INT
51069 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
51071 unsigned HOST_WIDE_INT model = val & MEMMODEL_MASK;
51072 bool strong;
51074 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
51075 |MEMMODEL_MASK)
51076 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
51078 warning (OPT_Winvalid_memory_model,
51079 "Unknown architecture specific memory model");
51080 return MEMMODEL_SEQ_CST;
51082 strong = (model == MEMMODEL_ACQ_REL || model == MEMMODEL_SEQ_CST);
51083 if (val & IX86_HLE_ACQUIRE && !(model == MEMMODEL_ACQUIRE || strong))
51085 warning (OPT_Winvalid_memory_model,
51086 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
51087 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
51089 if (val & IX86_HLE_RELEASE && !(model == MEMMODEL_RELEASE || strong))
51091 warning (OPT_Winvalid_memory_model,
51092 "HLE_RELEASE not used with RELEASE or stronger memory model");
51093 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
51095 return val;
51098 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
51099 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
51100 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
51101 or number of vecsize_mangle variants that should be emitted. */
51103 static int
51104 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
51105 struct cgraph_simd_clone *clonei,
51106 tree base_type, int num)
51108 int ret = 1;
51110 if (clonei->simdlen
51111 && (clonei->simdlen < 2
51112 || clonei->simdlen > 16
51113 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
51115 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51116 "unsupported simdlen %d", clonei->simdlen);
51117 return 0;
51120 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
51121 if (TREE_CODE (ret_type) != VOID_TYPE)
51122 switch (TYPE_MODE (ret_type))
51124 case QImode:
51125 case HImode:
51126 case SImode:
51127 case DImode:
51128 case SFmode:
51129 case DFmode:
51130 /* case SCmode: */
51131 /* case DCmode: */
51132 break;
51133 default:
51134 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51135 "unsupported return type %qT for simd\n", ret_type);
51136 return 0;
51139 tree t;
51140 int i;
51142 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
51143 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
51144 switch (TYPE_MODE (TREE_TYPE (t)))
51146 case QImode:
51147 case HImode:
51148 case SImode:
51149 case DImode:
51150 case SFmode:
51151 case DFmode:
51152 /* case SCmode: */
51153 /* case DCmode: */
51154 break;
51155 default:
51156 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51157 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
51158 return 0;
51161 if (clonei->cilk_elemental)
51163 /* Parse here processor clause. If not present, default to 'b'. */
51164 clonei->vecsize_mangle = 'b';
51166 else if (!TREE_PUBLIC (node->decl))
51168 /* If the function isn't exported, we can pick up just one ISA
51169 for the clones. */
51170 if (TARGET_AVX2)
51171 clonei->vecsize_mangle = 'd';
51172 else if (TARGET_AVX)
51173 clonei->vecsize_mangle = 'c';
51174 else
51175 clonei->vecsize_mangle = 'b';
51176 ret = 1;
51178 else
51180 clonei->vecsize_mangle = "bcd"[num];
51181 ret = 3;
51183 switch (clonei->vecsize_mangle)
51185 case 'b':
51186 clonei->vecsize_int = 128;
51187 clonei->vecsize_float = 128;
51188 break;
51189 case 'c':
51190 clonei->vecsize_int = 128;
51191 clonei->vecsize_float = 256;
51192 break;
51193 case 'd':
51194 clonei->vecsize_int = 256;
51195 clonei->vecsize_float = 256;
51196 break;
51198 if (clonei->simdlen == 0)
51200 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
51201 clonei->simdlen = clonei->vecsize_int;
51202 else
51203 clonei->simdlen = clonei->vecsize_float;
51204 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
51205 if (clonei->simdlen > 16)
51206 clonei->simdlen = 16;
51208 return ret;
51211 /* Add target attribute to SIMD clone NODE if needed. */
51213 static void
51214 ix86_simd_clone_adjust (struct cgraph_node *node)
51216 const char *str = NULL;
51217 gcc_assert (node->decl == cfun->decl);
51218 switch (node->simdclone->vecsize_mangle)
51220 case 'b':
51221 if (!TARGET_SSE2)
51222 str = "sse2";
51223 break;
51224 case 'c':
51225 if (!TARGET_AVX)
51226 str = "avx";
51227 break;
51228 case 'd':
51229 if (!TARGET_AVX2)
51230 str = "avx2";
51231 break;
51232 default:
51233 gcc_unreachable ();
51235 if (str == NULL)
51236 return;
51237 push_cfun (NULL);
51238 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
51239 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
51240 gcc_assert (ok);
51241 pop_cfun ();
51242 ix86_reset_previous_fndecl ();
51243 ix86_set_current_function (node->decl);
51246 /* If SIMD clone NODE can't be used in a vectorized loop
51247 in current function, return -1, otherwise return a badness of using it
51248 (0 if it is most desirable from vecsize_mangle point of view, 1
51249 slightly less desirable, etc.). */
51251 static int
51252 ix86_simd_clone_usable (struct cgraph_node *node)
51254 switch (node->simdclone->vecsize_mangle)
51256 case 'b':
51257 if (!TARGET_SSE2)
51258 return -1;
51259 if (!TARGET_AVX)
51260 return 0;
51261 return TARGET_AVX2 ? 2 : 1;
51262 case 'c':
51263 if (!TARGET_AVX)
51264 return -1;
51265 return TARGET_AVX2 ? 1 : 0;
51266 break;
51267 case 'd':
51268 if (!TARGET_AVX2)
51269 return -1;
51270 return 0;
51271 default:
51272 gcc_unreachable ();
51276 /* This function adjusts the unroll factor based on
51277 the hardware capabilities. For ex, bdver3 has
51278 a loop buffer which makes unrolling of smaller
51279 loops less important. This function decides the
51280 unroll factor using number of memory references
51281 (value 32 is used) as a heuristic. */
51283 static unsigned
51284 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
51286 basic_block *bbs;
51287 rtx_insn *insn;
51288 unsigned i;
51289 unsigned mem_count = 0;
51291 if (!TARGET_ADJUST_UNROLL)
51292 return nunroll;
51294 /* Count the number of memory references within the loop body.
51295 This value determines the unrolling factor for bdver3 and bdver4
51296 architectures. */
51297 subrtx_iterator::array_type array;
51298 bbs = get_loop_body (loop);
51299 for (i = 0; i < loop->num_nodes; i++)
51300 FOR_BB_INSNS (bbs[i], insn)
51301 if (NONDEBUG_INSN_P (insn))
51302 FOR_EACH_SUBRTX (iter, array, insn, NONCONST)
51303 if (const_rtx x = *iter)
51304 if (MEM_P (x))
51306 machine_mode mode = GET_MODE (x);
51307 unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
51308 if (n_words > 4)
51309 mem_count += 2;
51310 else
51311 mem_count += 1;
51313 free (bbs);
51315 if (mem_count && mem_count <=32)
51316 return 32/mem_count;
51318 return nunroll;
51322 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
51324 static bool
51325 ix86_float_exceptions_rounding_supported_p (void)
51327 /* For x87 floating point with standard excess precision handling,
51328 there is no adddf3 pattern (since x87 floating point only has
51329 XFmode operations) so the default hook implementation gets this
51330 wrong. */
51331 return TARGET_80387 || TARGET_SSE_MATH;
51334 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
51336 static void
51337 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
51339 if (!TARGET_80387 && !TARGET_SSE_MATH)
51340 return;
51341 tree exceptions_var = create_tmp_var (integer_type_node);
51342 if (TARGET_80387)
51344 tree fenv_index_type = build_index_type (size_int (6));
51345 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
51346 tree fenv_var = create_tmp_var (fenv_type);
51347 mark_addressable (fenv_var);
51348 tree fenv_ptr = build_pointer_type (fenv_type);
51349 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
51350 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
51351 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
51352 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
51353 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
51354 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
51355 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
51356 tree hold_fnclex = build_call_expr (fnclex, 0);
51357 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_fnstenv,
51358 hold_fnclex);
51359 *clear = build_call_expr (fnclex, 0);
51360 tree sw_var = create_tmp_var (short_unsigned_type_node);
51361 tree fnstsw_call = build_call_expr (fnstsw, 0);
51362 tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
51363 sw_var, fnstsw_call);
51364 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
51365 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
51366 exceptions_var, exceptions_x87);
51367 *update = build2 (COMPOUND_EXPR, integer_type_node,
51368 sw_mod, update_mod);
51369 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
51370 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
51372 if (TARGET_SSE_MATH)
51374 tree mxcsr_orig_var = create_tmp_var (unsigned_type_node);
51375 tree mxcsr_mod_var = create_tmp_var (unsigned_type_node);
51376 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
51377 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
51378 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
51379 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
51380 mxcsr_orig_var, stmxcsr_hold_call);
51381 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
51382 mxcsr_orig_var,
51383 build_int_cst (unsigned_type_node, 0x1f80));
51384 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
51385 build_int_cst (unsigned_type_node, 0xffffffc0));
51386 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
51387 mxcsr_mod_var, hold_mod_val);
51388 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
51389 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
51390 hold_assign_orig, hold_assign_mod);
51391 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
51392 ldmxcsr_hold_call);
51393 if (*hold)
51394 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
51395 else
51396 *hold = hold_all;
51397 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
51398 if (*clear)
51399 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
51400 ldmxcsr_clear_call);
51401 else
51402 *clear = ldmxcsr_clear_call;
51403 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
51404 tree exceptions_sse = fold_convert (integer_type_node,
51405 stxmcsr_update_call);
51406 if (*update)
51408 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
51409 exceptions_var, exceptions_sse);
51410 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
51411 exceptions_var, exceptions_mod);
51412 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
51413 exceptions_assign);
51415 else
51416 *update = build2 (MODIFY_EXPR, integer_type_node,
51417 exceptions_var, exceptions_sse);
51418 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
51419 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
51420 ldmxcsr_update_call);
51422 tree atomic_feraiseexcept
51423 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
51424 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
51425 1, exceptions_var);
51426 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
51427 atomic_feraiseexcept_call);
51430 /* Return mode to be used for bounds or VOIDmode
51431 if bounds are not supported. */
51433 static enum machine_mode
51434 ix86_mpx_bound_mode ()
51436 /* Do not support pointer checker if MPX
51437 is not enabled. */
51438 if (!TARGET_MPX)
51440 if (flag_check_pointer_bounds)
51441 warning (0, "Pointer Checker requires MPX support on this target."
51442 " Use -mmpx options to enable MPX.");
51443 return VOIDmode;
51446 return BNDmode;
51449 /* Return constant used to statically initialize constant bounds.
51451 This function is used to create special bound values. For now
51452 only INIT bounds and NONE bounds are expected. More special
51453 values may be added later. */
51455 static tree
51456 ix86_make_bounds_constant (HOST_WIDE_INT lb, HOST_WIDE_INT ub)
51458 tree low = lb ? build_minus_one_cst (pointer_sized_int_node)
51459 : build_zero_cst (pointer_sized_int_node);
51460 tree high = ub ? build_zero_cst (pointer_sized_int_node)
51461 : build_minus_one_cst (pointer_sized_int_node);
51463 /* This function is supposed to be used to create INIT and
51464 NONE bounds only. */
51465 gcc_assert ((lb == 0 && ub == -1)
51466 || (lb == -1 && ub == 0));
51468 return build_complex (NULL, low, high);
51471 /* Generate a list of statements STMTS to initialize pointer bounds
51472 variable VAR with bounds LB and UB. Return the number of generated
51473 statements. */
51475 static int
51476 ix86_initialize_bounds (tree var, tree lb, tree ub, tree *stmts)
51478 tree bnd_ptr = build_pointer_type (pointer_sized_int_node);
51479 tree lhs, modify, var_p;
51481 ub = build1 (BIT_NOT_EXPR, pointer_sized_int_node, ub);
51482 var_p = fold_convert (bnd_ptr, build_fold_addr_expr (var));
51484 lhs = build1 (INDIRECT_REF, pointer_sized_int_node, var_p);
51485 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, lb);
51486 append_to_statement_list (modify, stmts);
51488 lhs = build1 (INDIRECT_REF, pointer_sized_int_node,
51489 build2 (POINTER_PLUS_EXPR, bnd_ptr, var_p,
51490 TYPE_SIZE_UNIT (pointer_sized_int_node)));
51491 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, ub);
51492 append_to_statement_list (modify, stmts);
51494 return 2;
51497 /* Initialize the GCC target structure. */
51498 #undef TARGET_RETURN_IN_MEMORY
51499 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
51501 #undef TARGET_LEGITIMIZE_ADDRESS
51502 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
51504 #undef TARGET_ATTRIBUTE_TABLE
51505 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
51506 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
51507 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
51508 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
51509 # undef TARGET_MERGE_DECL_ATTRIBUTES
51510 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
51511 #endif
51513 #undef TARGET_COMP_TYPE_ATTRIBUTES
51514 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
51516 #undef TARGET_INIT_BUILTINS
51517 #define TARGET_INIT_BUILTINS ix86_init_builtins
51518 #undef TARGET_BUILTIN_DECL
51519 #define TARGET_BUILTIN_DECL ix86_builtin_decl
51520 #undef TARGET_EXPAND_BUILTIN
51521 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
51523 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
51524 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
51525 ix86_builtin_vectorized_function
51527 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
51528 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
51530 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
51531 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
51533 #undef TARGET_VECTORIZE_BUILTIN_GATHER
51534 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
51536 #undef TARGET_BUILTIN_RECIPROCAL
51537 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
51539 #undef TARGET_ASM_FUNCTION_EPILOGUE
51540 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
51542 #undef TARGET_ENCODE_SECTION_INFO
51543 #ifndef SUBTARGET_ENCODE_SECTION_INFO
51544 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
51545 #else
51546 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
51547 #endif
51549 #undef TARGET_ASM_OPEN_PAREN
51550 #define TARGET_ASM_OPEN_PAREN ""
51551 #undef TARGET_ASM_CLOSE_PAREN
51552 #define TARGET_ASM_CLOSE_PAREN ""
51554 #undef TARGET_ASM_BYTE_OP
51555 #define TARGET_ASM_BYTE_OP ASM_BYTE
51557 #undef TARGET_ASM_ALIGNED_HI_OP
51558 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
51559 #undef TARGET_ASM_ALIGNED_SI_OP
51560 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
51561 #ifdef ASM_QUAD
51562 #undef TARGET_ASM_ALIGNED_DI_OP
51563 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
51564 #endif
51566 #undef TARGET_PROFILE_BEFORE_PROLOGUE
51567 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
51569 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
51570 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
51572 #undef TARGET_ASM_UNALIGNED_HI_OP
51573 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
51574 #undef TARGET_ASM_UNALIGNED_SI_OP
51575 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
51576 #undef TARGET_ASM_UNALIGNED_DI_OP
51577 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
51579 #undef TARGET_PRINT_OPERAND
51580 #define TARGET_PRINT_OPERAND ix86_print_operand
51581 #undef TARGET_PRINT_OPERAND_ADDRESS
51582 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
51583 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
51584 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
51585 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
51586 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
51588 #undef TARGET_SCHED_INIT_GLOBAL
51589 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
51590 #undef TARGET_SCHED_ADJUST_COST
51591 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
51592 #undef TARGET_SCHED_ISSUE_RATE
51593 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
51594 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
51595 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
51596 ia32_multipass_dfa_lookahead
51597 #undef TARGET_SCHED_MACRO_FUSION_P
51598 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
51599 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
51600 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
51602 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
51603 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
51605 #undef TARGET_MEMMODEL_CHECK
51606 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
51608 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
51609 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
51611 #ifdef HAVE_AS_TLS
51612 #undef TARGET_HAVE_TLS
51613 #define TARGET_HAVE_TLS true
51614 #endif
51615 #undef TARGET_CANNOT_FORCE_CONST_MEM
51616 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
51617 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
51618 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
51620 #undef TARGET_DELEGITIMIZE_ADDRESS
51621 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
51623 #undef TARGET_MS_BITFIELD_LAYOUT_P
51624 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
51626 #if TARGET_MACHO
51627 #undef TARGET_BINDS_LOCAL_P
51628 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
51629 #endif
51630 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
51631 #undef TARGET_BINDS_LOCAL_P
51632 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
51633 #endif
51635 #undef TARGET_ASM_OUTPUT_MI_THUNK
51636 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
51637 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
51638 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
51640 #undef TARGET_ASM_FILE_START
51641 #define TARGET_ASM_FILE_START x86_file_start
51643 #undef TARGET_OPTION_OVERRIDE
51644 #define TARGET_OPTION_OVERRIDE ix86_option_override
51646 #undef TARGET_REGISTER_MOVE_COST
51647 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
51648 #undef TARGET_MEMORY_MOVE_COST
51649 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
51650 #undef TARGET_RTX_COSTS
51651 #define TARGET_RTX_COSTS ix86_rtx_costs
51652 #undef TARGET_ADDRESS_COST
51653 #define TARGET_ADDRESS_COST ix86_address_cost
51655 #undef TARGET_FIXED_CONDITION_CODE_REGS
51656 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
51657 #undef TARGET_CC_MODES_COMPATIBLE
51658 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
51660 #undef TARGET_MACHINE_DEPENDENT_REORG
51661 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
51663 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
51664 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
51666 #undef TARGET_BUILD_BUILTIN_VA_LIST
51667 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
51669 #undef TARGET_FOLD_BUILTIN
51670 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
51672 #undef TARGET_COMPARE_VERSION_PRIORITY
51673 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
51675 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
51676 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
51677 ix86_generate_version_dispatcher_body
51679 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
51680 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
51681 ix86_get_function_versions_dispatcher
51683 #undef TARGET_ENUM_VA_LIST_P
51684 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
51686 #undef TARGET_FN_ABI_VA_LIST
51687 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
51689 #undef TARGET_CANONICAL_VA_LIST_TYPE
51690 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
51692 #undef TARGET_EXPAND_BUILTIN_VA_START
51693 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
51695 #undef TARGET_MD_ASM_CLOBBERS
51696 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
51698 #undef TARGET_PROMOTE_PROTOTYPES
51699 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
51700 #undef TARGET_SETUP_INCOMING_VARARGS
51701 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
51702 #undef TARGET_MUST_PASS_IN_STACK
51703 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
51704 #undef TARGET_FUNCTION_ARG_ADVANCE
51705 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
51706 #undef TARGET_FUNCTION_ARG
51707 #define TARGET_FUNCTION_ARG ix86_function_arg
51708 #undef TARGET_INIT_PIC_REG
51709 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
51710 #undef TARGET_USE_PSEUDO_PIC_REG
51711 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
51712 #undef TARGET_FUNCTION_ARG_BOUNDARY
51713 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
51714 #undef TARGET_PASS_BY_REFERENCE
51715 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
51716 #undef TARGET_INTERNAL_ARG_POINTER
51717 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
51718 #undef TARGET_UPDATE_STACK_BOUNDARY
51719 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
51720 #undef TARGET_GET_DRAP_RTX
51721 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
51722 #undef TARGET_STRICT_ARGUMENT_NAMING
51723 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
51724 #undef TARGET_STATIC_CHAIN
51725 #define TARGET_STATIC_CHAIN ix86_static_chain
51726 #undef TARGET_TRAMPOLINE_INIT
51727 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
51728 #undef TARGET_RETURN_POPS_ARGS
51729 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
51731 #undef TARGET_LEGITIMATE_COMBINED_INSN
51732 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
51734 #undef TARGET_ASAN_SHADOW_OFFSET
51735 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
51737 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
51738 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
51740 #undef TARGET_SCALAR_MODE_SUPPORTED_P
51741 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
51743 #undef TARGET_VECTOR_MODE_SUPPORTED_P
51744 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
51746 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
51747 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
51748 ix86_libgcc_floating_mode_supported_p
51750 #undef TARGET_C_MODE_FOR_SUFFIX
51751 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
51753 #ifdef HAVE_AS_TLS
51754 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
51755 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
51756 #endif
51758 #ifdef SUBTARGET_INSERT_ATTRIBUTES
51759 #undef TARGET_INSERT_ATTRIBUTES
51760 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
51761 #endif
51763 #undef TARGET_MANGLE_TYPE
51764 #define TARGET_MANGLE_TYPE ix86_mangle_type
51766 #if !TARGET_MACHO
51767 #undef TARGET_STACK_PROTECT_FAIL
51768 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
51769 #endif
51771 #undef TARGET_FUNCTION_VALUE
51772 #define TARGET_FUNCTION_VALUE ix86_function_value
51774 #undef TARGET_FUNCTION_VALUE_REGNO_P
51775 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
51777 #undef TARGET_PROMOTE_FUNCTION_MODE
51778 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
51780 #undef TARGET_MEMBER_TYPE_FORCES_BLK
51781 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
51783 #undef TARGET_INSTANTIATE_DECLS
51784 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
51786 #undef TARGET_SECONDARY_RELOAD
51787 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
51789 #undef TARGET_CLASS_MAX_NREGS
51790 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
51792 #undef TARGET_PREFERRED_RELOAD_CLASS
51793 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
51794 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
51795 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
51796 #undef TARGET_CLASS_LIKELY_SPILLED_P
51797 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
51799 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
51800 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
51801 ix86_builtin_vectorization_cost
51802 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
51803 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
51804 ix86_vectorize_vec_perm_const_ok
51805 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
51806 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
51807 ix86_preferred_simd_mode
51808 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
51809 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
51810 ix86_autovectorize_vector_sizes
51811 #undef TARGET_VECTORIZE_INIT_COST
51812 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
51813 #undef TARGET_VECTORIZE_ADD_STMT_COST
51814 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
51815 #undef TARGET_VECTORIZE_FINISH_COST
51816 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
51817 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
51818 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
51820 #undef TARGET_SET_CURRENT_FUNCTION
51821 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
51823 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
51824 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
51826 #undef TARGET_OPTION_SAVE
51827 #define TARGET_OPTION_SAVE ix86_function_specific_save
51829 #undef TARGET_OPTION_RESTORE
51830 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
51832 #undef TARGET_OPTION_PRINT
51833 #define TARGET_OPTION_PRINT ix86_function_specific_print
51835 #undef TARGET_OPTION_FUNCTION_VERSIONS
51836 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
51838 #undef TARGET_CAN_INLINE_P
51839 #define TARGET_CAN_INLINE_P ix86_can_inline_p
51841 #undef TARGET_EXPAND_TO_RTL_HOOK
51842 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
51844 #undef TARGET_LEGITIMATE_ADDRESS_P
51845 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
51847 #undef TARGET_LRA_P
51848 #define TARGET_LRA_P hook_bool_void_true
51850 #undef TARGET_REGISTER_PRIORITY
51851 #define TARGET_REGISTER_PRIORITY ix86_register_priority
51853 #undef TARGET_REGISTER_USAGE_LEVELING_P
51854 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
51856 #undef TARGET_LEGITIMATE_CONSTANT_P
51857 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
51859 #undef TARGET_FRAME_POINTER_REQUIRED
51860 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
51862 #undef TARGET_CAN_ELIMINATE
51863 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
51865 #undef TARGET_EXTRA_LIVE_ON_ENTRY
51866 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
51868 #undef TARGET_ASM_CODE_END
51869 #define TARGET_ASM_CODE_END ix86_code_end
51871 #undef TARGET_CONDITIONAL_REGISTER_USAGE
51872 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
51874 #if TARGET_MACHO
51875 #undef TARGET_INIT_LIBFUNCS
51876 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
51877 #endif
51879 #undef TARGET_LOOP_UNROLL_ADJUST
51880 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
51882 #undef TARGET_SPILL_CLASS
51883 #define TARGET_SPILL_CLASS ix86_spill_class
51885 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
51886 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
51887 ix86_simd_clone_compute_vecsize_and_simdlen
51889 #undef TARGET_SIMD_CLONE_ADJUST
51890 #define TARGET_SIMD_CLONE_ADJUST \
51891 ix86_simd_clone_adjust
51893 #undef TARGET_SIMD_CLONE_USABLE
51894 #define TARGET_SIMD_CLONE_USABLE \
51895 ix86_simd_clone_usable
51897 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
51898 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
51899 ix86_float_exceptions_rounding_supported_p
51901 #undef TARGET_MODE_EMIT
51902 #define TARGET_MODE_EMIT ix86_emit_mode_set
51904 #undef TARGET_MODE_NEEDED
51905 #define TARGET_MODE_NEEDED ix86_mode_needed
51907 #undef TARGET_MODE_AFTER
51908 #define TARGET_MODE_AFTER ix86_mode_after
51910 #undef TARGET_MODE_ENTRY
51911 #define TARGET_MODE_ENTRY ix86_mode_entry
51913 #undef TARGET_MODE_EXIT
51914 #define TARGET_MODE_EXIT ix86_mode_exit
51916 #undef TARGET_MODE_PRIORITY
51917 #define TARGET_MODE_PRIORITY ix86_mode_priority
51919 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
51920 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
51922 #undef TARGET_LOAD_BOUNDS_FOR_ARG
51923 #define TARGET_LOAD_BOUNDS_FOR_ARG ix86_load_bounds
51925 #undef TARGET_STORE_BOUNDS_FOR_ARG
51926 #define TARGET_STORE_BOUNDS_FOR_ARG ix86_store_bounds
51928 #undef TARGET_LOAD_RETURNED_BOUNDS
51929 #define TARGET_LOAD_RETURNED_BOUNDS ix86_load_returned_bounds
51931 #undef TARGET_STORE_RETURNED_BOUNDS
51932 #define TARGET_STORE_RETURNED_BOUNDS ix86_store_returned_bounds
51934 #undef TARGET_CHKP_BOUND_MODE
51935 #define TARGET_CHKP_BOUND_MODE ix86_mpx_bound_mode
51937 #undef TARGET_BUILTIN_CHKP_FUNCTION
51938 #define TARGET_BUILTIN_CHKP_FUNCTION ix86_builtin_mpx_function
51940 #undef TARGET_CHKP_FUNCTION_VALUE_BOUNDS
51941 #define TARGET_CHKP_FUNCTION_VALUE_BOUNDS ix86_function_value_bounds
51943 #undef TARGET_CHKP_MAKE_BOUNDS_CONSTANT
51944 #define TARGET_CHKP_MAKE_BOUNDS_CONSTANT ix86_make_bounds_constant
51946 #undef TARGET_CHKP_INITIALIZE_BOUNDS
51947 #define TARGET_CHKP_INITIALIZE_BOUNDS ix86_initialize_bounds
51949 #undef TARGET_SETUP_INCOMING_VARARG_BOUNDS
51950 #define TARGET_SETUP_INCOMING_VARARG_BOUNDS ix86_setup_incoming_vararg_bounds
51952 #undef TARGET_OFFLOAD_OPTIONS
51953 #define TARGET_OFFLOAD_OPTIONS \
51954 ix86_offload_options
51956 struct gcc_target targetm = TARGET_INITIALIZER;
51958 #include "gt-i386.h"