Issue an error for ms_abi attribute with x32
[official-gcc.git] / gcc / config / i386 / i386.c
blobd693fdb2d336857c1403d0251b714558df7ac5fc
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2014 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "stringpool.h"
27 #include "attribs.h"
28 #include "calls.h"
29 #include "stor-layout.h"
30 #include "varasm.h"
31 #include "tm_p.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-codes.h"
38 #include "insn-attr.h"
39 #include "flags.h"
40 #include "except.h"
41 #include "hashtab.h"
42 #include "hash-set.h"
43 #include "vec.h"
44 #include "machmode.h"
45 #include "input.h"
46 #include "function.h"
47 #include "recog.h"
48 #include "expr.h"
49 #include "optabs.h"
50 #include "diagnostic-core.h"
51 #include "toplev.h"
52 #include "predict.h"
53 #include "dominance.h"
54 #include "cfg.h"
55 #include "cfgrtl.h"
56 #include "cfganal.h"
57 #include "lcm.h"
58 #include "cfgbuild.h"
59 #include "cfgcleanup.h"
60 #include "basic-block.h"
61 #include "ggc.h"
62 #include "target.h"
63 #include "target-def.h"
64 #include "common/common-target.h"
65 #include "langhooks.h"
66 #include "reload.h"
67 #include "hash-map.h"
68 #include "is-a.h"
69 #include "plugin-api.h"
70 #include "ipa-ref.h"
71 #include "cgraph.h"
72 #include "hash-table.h"
73 #include "tree-ssa-alias.h"
74 #include "internal-fn.h"
75 #include "gimple-fold.h"
76 #include "tree-eh.h"
77 #include "gimple-expr.h"
78 #include "gimple.h"
79 #include "gimplify.h"
80 #include "cfgloop.h"
81 #include "dwarf2.h"
82 #include "df.h"
83 #include "tm-constrs.h"
84 #include "params.h"
85 #include "cselib.h"
86 #include "debug.h"
87 #include "sched-int.h"
88 #include "sbitmap.h"
89 #include "fibheap.h"
90 #include "opts.h"
91 #include "diagnostic.h"
92 #include "dumpfile.h"
93 #include "tree-pass.h"
94 #include "wide-int.h"
95 #include "context.h"
96 #include "pass_manager.h"
97 #include "target-globals.h"
98 #include "tree-vectorizer.h"
99 #include "shrink-wrap.h"
100 #include "builtins.h"
101 #include "rtl-iter.h"
102 #include "tree-iterator.h"
103 #include "tree-chkp.h"
104 #include "rtl-chkp.h"
106 static rtx legitimize_dllimport_symbol (rtx, bool);
107 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
108 static rtx legitimize_pe_coff_symbol (rtx, bool);
110 #ifndef CHECK_STACK_LIMIT
111 #define CHECK_STACK_LIMIT (-1)
112 #endif
114 /* Return index of given mode in mult and division cost tables. */
115 #define MODE_INDEX(mode) \
116 ((mode) == QImode ? 0 \
117 : (mode) == HImode ? 1 \
118 : (mode) == SImode ? 2 \
119 : (mode) == DImode ? 3 \
120 : 4)
122 /* Processor costs (relative to an add) */
123 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
124 #define COSTS_N_BYTES(N) ((N) * 2)
126 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
128 static stringop_algs ix86_size_memcpy[2] = {
129 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
130 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
131 static stringop_algs ix86_size_memset[2] = {
132 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
133 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
135 const
136 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
137 COSTS_N_BYTES (2), /* cost of an add instruction */
138 COSTS_N_BYTES (3), /* cost of a lea instruction */
139 COSTS_N_BYTES (2), /* variable shift costs */
140 COSTS_N_BYTES (3), /* constant shift costs */
141 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
142 COSTS_N_BYTES (3), /* HI */
143 COSTS_N_BYTES (3), /* SI */
144 COSTS_N_BYTES (3), /* DI */
145 COSTS_N_BYTES (5)}, /* other */
146 0, /* cost of multiply per each bit set */
147 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
148 COSTS_N_BYTES (3), /* HI */
149 COSTS_N_BYTES (3), /* SI */
150 COSTS_N_BYTES (3), /* DI */
151 COSTS_N_BYTES (5)}, /* other */
152 COSTS_N_BYTES (3), /* cost of movsx */
153 COSTS_N_BYTES (3), /* cost of movzx */
154 0, /* "large" insn */
155 2, /* MOVE_RATIO */
156 2, /* cost for loading QImode using movzbl */
157 {2, 2, 2}, /* cost of loading integer registers
158 in QImode, HImode and SImode.
159 Relative to reg-reg move (2). */
160 {2, 2, 2}, /* cost of storing integer registers */
161 2, /* cost of reg,reg fld/fst */
162 {2, 2, 2}, /* cost of loading fp registers
163 in SFmode, DFmode and XFmode */
164 {2, 2, 2}, /* cost of storing fp registers
165 in SFmode, DFmode and XFmode */
166 3, /* cost of moving MMX register */
167 {3, 3}, /* cost of loading MMX registers
168 in SImode and DImode */
169 {3, 3}, /* cost of storing MMX registers
170 in SImode and DImode */
171 3, /* cost of moving SSE register */
172 {3, 3, 3}, /* cost of loading SSE registers
173 in SImode, DImode and TImode */
174 {3, 3, 3}, /* cost of storing SSE registers
175 in SImode, DImode and TImode */
176 3, /* MMX or SSE register to integer */
177 0, /* size of l1 cache */
178 0, /* size of l2 cache */
179 0, /* size of prefetch block */
180 0, /* number of parallel prefetches */
181 2, /* Branch cost */
182 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
183 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
184 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
185 COSTS_N_BYTES (2), /* cost of FABS instruction. */
186 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
187 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
188 ix86_size_memcpy,
189 ix86_size_memset,
190 1, /* scalar_stmt_cost. */
191 1, /* scalar load_cost. */
192 1, /* scalar_store_cost. */
193 1, /* vec_stmt_cost. */
194 1, /* vec_to_scalar_cost. */
195 1, /* scalar_to_vec_cost. */
196 1, /* vec_align_load_cost. */
197 1, /* vec_unalign_load_cost. */
198 1, /* vec_store_cost. */
199 1, /* cond_taken_branch_cost. */
200 1, /* cond_not_taken_branch_cost. */
203 /* Processor costs (relative to an add) */
204 static stringop_algs i386_memcpy[2] = {
205 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
206 DUMMY_STRINGOP_ALGS};
207 static stringop_algs i386_memset[2] = {
208 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
209 DUMMY_STRINGOP_ALGS};
211 static const
212 struct processor_costs i386_cost = { /* 386 specific costs */
213 COSTS_N_INSNS (1), /* cost of an add instruction */
214 COSTS_N_INSNS (1), /* cost of a lea instruction */
215 COSTS_N_INSNS (3), /* variable shift costs */
216 COSTS_N_INSNS (2), /* constant shift costs */
217 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
218 COSTS_N_INSNS (6), /* HI */
219 COSTS_N_INSNS (6), /* SI */
220 COSTS_N_INSNS (6), /* DI */
221 COSTS_N_INSNS (6)}, /* other */
222 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
223 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
224 COSTS_N_INSNS (23), /* HI */
225 COSTS_N_INSNS (23), /* SI */
226 COSTS_N_INSNS (23), /* DI */
227 COSTS_N_INSNS (23)}, /* other */
228 COSTS_N_INSNS (3), /* cost of movsx */
229 COSTS_N_INSNS (2), /* cost of movzx */
230 15, /* "large" insn */
231 3, /* MOVE_RATIO */
232 4, /* cost for loading QImode using movzbl */
233 {2, 4, 2}, /* cost of loading integer registers
234 in QImode, HImode and SImode.
235 Relative to reg-reg move (2). */
236 {2, 4, 2}, /* cost of storing integer registers */
237 2, /* cost of reg,reg fld/fst */
238 {8, 8, 8}, /* cost of loading fp registers
239 in SFmode, DFmode and XFmode */
240 {8, 8, 8}, /* cost of storing fp registers
241 in SFmode, DFmode and XFmode */
242 2, /* cost of moving MMX register */
243 {4, 8}, /* cost of loading MMX registers
244 in SImode and DImode */
245 {4, 8}, /* cost of storing MMX registers
246 in SImode and DImode */
247 2, /* cost of moving SSE register */
248 {4, 8, 16}, /* cost of loading SSE registers
249 in SImode, DImode and TImode */
250 {4, 8, 16}, /* cost of storing SSE registers
251 in SImode, DImode and TImode */
252 3, /* MMX or SSE register to integer */
253 0, /* size of l1 cache */
254 0, /* size of l2 cache */
255 0, /* size of prefetch block */
256 0, /* number of parallel prefetches */
257 1, /* Branch cost */
258 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
259 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
260 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
261 COSTS_N_INSNS (22), /* cost of FABS instruction. */
262 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
263 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
264 i386_memcpy,
265 i386_memset,
266 1, /* scalar_stmt_cost. */
267 1, /* scalar load_cost. */
268 1, /* scalar_store_cost. */
269 1, /* vec_stmt_cost. */
270 1, /* vec_to_scalar_cost. */
271 1, /* scalar_to_vec_cost. */
272 1, /* vec_align_load_cost. */
273 2, /* vec_unalign_load_cost. */
274 1, /* vec_store_cost. */
275 3, /* cond_taken_branch_cost. */
276 1, /* cond_not_taken_branch_cost. */
279 static stringop_algs i486_memcpy[2] = {
280 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
281 DUMMY_STRINGOP_ALGS};
282 static stringop_algs i486_memset[2] = {
283 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
284 DUMMY_STRINGOP_ALGS};
286 static const
287 struct processor_costs i486_cost = { /* 486 specific costs */
288 COSTS_N_INSNS (1), /* cost of an add instruction */
289 COSTS_N_INSNS (1), /* cost of a lea instruction */
290 COSTS_N_INSNS (3), /* variable shift costs */
291 COSTS_N_INSNS (2), /* constant shift costs */
292 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
293 COSTS_N_INSNS (12), /* HI */
294 COSTS_N_INSNS (12), /* SI */
295 COSTS_N_INSNS (12), /* DI */
296 COSTS_N_INSNS (12)}, /* other */
297 1, /* cost of multiply per each bit set */
298 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
299 COSTS_N_INSNS (40), /* HI */
300 COSTS_N_INSNS (40), /* SI */
301 COSTS_N_INSNS (40), /* DI */
302 COSTS_N_INSNS (40)}, /* other */
303 COSTS_N_INSNS (3), /* cost of movsx */
304 COSTS_N_INSNS (2), /* cost of movzx */
305 15, /* "large" insn */
306 3, /* MOVE_RATIO */
307 4, /* cost for loading QImode using movzbl */
308 {2, 4, 2}, /* cost of loading integer registers
309 in QImode, HImode and SImode.
310 Relative to reg-reg move (2). */
311 {2, 4, 2}, /* cost of storing integer registers */
312 2, /* cost of reg,reg fld/fst */
313 {8, 8, 8}, /* cost of loading fp registers
314 in SFmode, DFmode and XFmode */
315 {8, 8, 8}, /* cost of storing fp registers
316 in SFmode, DFmode and XFmode */
317 2, /* cost of moving MMX register */
318 {4, 8}, /* cost of loading MMX registers
319 in SImode and DImode */
320 {4, 8}, /* cost of storing MMX registers
321 in SImode and DImode */
322 2, /* cost of moving SSE register */
323 {4, 8, 16}, /* cost of loading SSE registers
324 in SImode, DImode and TImode */
325 {4, 8, 16}, /* cost of storing SSE registers
326 in SImode, DImode and TImode */
327 3, /* MMX or SSE register to integer */
328 4, /* size of l1 cache. 486 has 8kB cache
329 shared for code and data, so 4kB is
330 not really precise. */
331 4, /* size of l2 cache */
332 0, /* size of prefetch block */
333 0, /* number of parallel prefetches */
334 1, /* Branch cost */
335 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
336 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
337 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
338 COSTS_N_INSNS (3), /* cost of FABS instruction. */
339 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
340 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
341 i486_memcpy,
342 i486_memset,
343 1, /* scalar_stmt_cost. */
344 1, /* scalar load_cost. */
345 1, /* scalar_store_cost. */
346 1, /* vec_stmt_cost. */
347 1, /* vec_to_scalar_cost. */
348 1, /* scalar_to_vec_cost. */
349 1, /* vec_align_load_cost. */
350 2, /* vec_unalign_load_cost. */
351 1, /* vec_store_cost. */
352 3, /* cond_taken_branch_cost. */
353 1, /* cond_not_taken_branch_cost. */
356 static stringop_algs pentium_memcpy[2] = {
357 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
358 DUMMY_STRINGOP_ALGS};
359 static stringop_algs pentium_memset[2] = {
360 {libcall, {{-1, rep_prefix_4_byte, false}}},
361 DUMMY_STRINGOP_ALGS};
363 static const
364 struct processor_costs pentium_cost = {
365 COSTS_N_INSNS (1), /* cost of an add instruction */
366 COSTS_N_INSNS (1), /* cost of a lea instruction */
367 COSTS_N_INSNS (4), /* variable shift costs */
368 COSTS_N_INSNS (1), /* constant shift costs */
369 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
370 COSTS_N_INSNS (11), /* HI */
371 COSTS_N_INSNS (11), /* SI */
372 COSTS_N_INSNS (11), /* DI */
373 COSTS_N_INSNS (11)}, /* other */
374 0, /* cost of multiply per each bit set */
375 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
376 COSTS_N_INSNS (25), /* HI */
377 COSTS_N_INSNS (25), /* SI */
378 COSTS_N_INSNS (25), /* DI */
379 COSTS_N_INSNS (25)}, /* other */
380 COSTS_N_INSNS (3), /* cost of movsx */
381 COSTS_N_INSNS (2), /* cost of movzx */
382 8, /* "large" insn */
383 6, /* MOVE_RATIO */
384 6, /* cost for loading QImode using movzbl */
385 {2, 4, 2}, /* cost of loading integer registers
386 in QImode, HImode and SImode.
387 Relative to reg-reg move (2). */
388 {2, 4, 2}, /* cost of storing integer registers */
389 2, /* cost of reg,reg fld/fst */
390 {2, 2, 6}, /* cost of loading fp registers
391 in SFmode, DFmode and XFmode */
392 {4, 4, 6}, /* cost of storing fp registers
393 in SFmode, DFmode and XFmode */
394 8, /* cost of moving MMX register */
395 {8, 8}, /* cost of loading MMX registers
396 in SImode and DImode */
397 {8, 8}, /* cost of storing MMX registers
398 in SImode and DImode */
399 2, /* cost of moving SSE register */
400 {4, 8, 16}, /* cost of loading SSE registers
401 in SImode, DImode and TImode */
402 {4, 8, 16}, /* cost of storing SSE registers
403 in SImode, DImode and TImode */
404 3, /* MMX or SSE register to integer */
405 8, /* size of l1 cache. */
406 8, /* size of l2 cache */
407 0, /* size of prefetch block */
408 0, /* number of parallel prefetches */
409 2, /* Branch cost */
410 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
411 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
412 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
413 COSTS_N_INSNS (1), /* cost of FABS instruction. */
414 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
415 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
416 pentium_memcpy,
417 pentium_memset,
418 1, /* scalar_stmt_cost. */
419 1, /* scalar load_cost. */
420 1, /* scalar_store_cost. */
421 1, /* vec_stmt_cost. */
422 1, /* vec_to_scalar_cost. */
423 1, /* scalar_to_vec_cost. */
424 1, /* vec_align_load_cost. */
425 2, /* vec_unalign_load_cost. */
426 1, /* vec_store_cost. */
427 3, /* cond_taken_branch_cost. */
428 1, /* cond_not_taken_branch_cost. */
431 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
432 (we ensure the alignment). For small blocks inline loop is still a
433 noticeable win, for bigger blocks either rep movsl or rep movsb is
434 way to go. Rep movsb has apparently more expensive startup time in CPU,
435 but after 4K the difference is down in the noise. */
436 static stringop_algs pentiumpro_memcpy[2] = {
437 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
438 {8192, rep_prefix_4_byte, false},
439 {-1, rep_prefix_1_byte, false}}},
440 DUMMY_STRINGOP_ALGS};
441 static stringop_algs pentiumpro_memset[2] = {
442 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
443 {8192, rep_prefix_4_byte, false},
444 {-1, libcall, false}}},
445 DUMMY_STRINGOP_ALGS};
446 static const
447 struct processor_costs pentiumpro_cost = {
448 COSTS_N_INSNS (1), /* cost of an add instruction */
449 COSTS_N_INSNS (1), /* cost of a lea instruction */
450 COSTS_N_INSNS (1), /* variable shift costs */
451 COSTS_N_INSNS (1), /* constant shift costs */
452 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
453 COSTS_N_INSNS (4), /* HI */
454 COSTS_N_INSNS (4), /* SI */
455 COSTS_N_INSNS (4), /* DI */
456 COSTS_N_INSNS (4)}, /* other */
457 0, /* cost of multiply per each bit set */
458 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
459 COSTS_N_INSNS (17), /* HI */
460 COSTS_N_INSNS (17), /* SI */
461 COSTS_N_INSNS (17), /* DI */
462 COSTS_N_INSNS (17)}, /* other */
463 COSTS_N_INSNS (1), /* cost of movsx */
464 COSTS_N_INSNS (1), /* cost of movzx */
465 8, /* "large" insn */
466 6, /* MOVE_RATIO */
467 2, /* cost for loading QImode using movzbl */
468 {4, 4, 4}, /* cost of loading integer registers
469 in QImode, HImode and SImode.
470 Relative to reg-reg move (2). */
471 {2, 2, 2}, /* cost of storing integer registers */
472 2, /* cost of reg,reg fld/fst */
473 {2, 2, 6}, /* cost of loading fp registers
474 in SFmode, DFmode and XFmode */
475 {4, 4, 6}, /* cost of storing fp registers
476 in SFmode, DFmode and XFmode */
477 2, /* cost of moving MMX register */
478 {2, 2}, /* cost of loading MMX registers
479 in SImode and DImode */
480 {2, 2}, /* cost of storing MMX registers
481 in SImode and DImode */
482 2, /* cost of moving SSE register */
483 {2, 2, 8}, /* cost of loading SSE registers
484 in SImode, DImode and TImode */
485 {2, 2, 8}, /* cost of storing SSE registers
486 in SImode, DImode and TImode */
487 3, /* MMX or SSE register to integer */
488 8, /* size of l1 cache. */
489 256, /* size of l2 cache */
490 32, /* size of prefetch block */
491 6, /* number of parallel prefetches */
492 2, /* Branch cost */
493 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
494 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
495 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
496 COSTS_N_INSNS (2), /* cost of FABS instruction. */
497 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
498 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
499 pentiumpro_memcpy,
500 pentiumpro_memset,
501 1, /* scalar_stmt_cost. */
502 1, /* scalar load_cost. */
503 1, /* scalar_store_cost. */
504 1, /* vec_stmt_cost. */
505 1, /* vec_to_scalar_cost. */
506 1, /* scalar_to_vec_cost. */
507 1, /* vec_align_load_cost. */
508 2, /* vec_unalign_load_cost. */
509 1, /* vec_store_cost. */
510 3, /* cond_taken_branch_cost. */
511 1, /* cond_not_taken_branch_cost. */
514 static stringop_algs geode_memcpy[2] = {
515 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
516 DUMMY_STRINGOP_ALGS};
517 static stringop_algs geode_memset[2] = {
518 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
519 DUMMY_STRINGOP_ALGS};
520 static const
521 struct processor_costs geode_cost = {
522 COSTS_N_INSNS (1), /* cost of an add instruction */
523 COSTS_N_INSNS (1), /* cost of a lea instruction */
524 COSTS_N_INSNS (2), /* variable shift costs */
525 COSTS_N_INSNS (1), /* constant shift costs */
526 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
527 COSTS_N_INSNS (4), /* HI */
528 COSTS_N_INSNS (7), /* SI */
529 COSTS_N_INSNS (7), /* DI */
530 COSTS_N_INSNS (7)}, /* other */
531 0, /* cost of multiply per each bit set */
532 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
533 COSTS_N_INSNS (23), /* HI */
534 COSTS_N_INSNS (39), /* SI */
535 COSTS_N_INSNS (39), /* DI */
536 COSTS_N_INSNS (39)}, /* other */
537 COSTS_N_INSNS (1), /* cost of movsx */
538 COSTS_N_INSNS (1), /* cost of movzx */
539 8, /* "large" insn */
540 4, /* MOVE_RATIO */
541 1, /* cost for loading QImode using movzbl */
542 {1, 1, 1}, /* cost of loading integer registers
543 in QImode, HImode and SImode.
544 Relative to reg-reg move (2). */
545 {1, 1, 1}, /* cost of storing integer registers */
546 1, /* cost of reg,reg fld/fst */
547 {1, 1, 1}, /* cost of loading fp registers
548 in SFmode, DFmode and XFmode */
549 {4, 6, 6}, /* cost of storing fp registers
550 in SFmode, DFmode and XFmode */
552 1, /* cost of moving MMX register */
553 {1, 1}, /* cost of loading MMX registers
554 in SImode and DImode */
555 {1, 1}, /* cost of storing MMX registers
556 in SImode and DImode */
557 1, /* cost of moving SSE register */
558 {1, 1, 1}, /* cost of loading SSE registers
559 in SImode, DImode and TImode */
560 {1, 1, 1}, /* cost of storing SSE registers
561 in SImode, DImode and TImode */
562 1, /* MMX or SSE register to integer */
563 64, /* size of l1 cache. */
564 128, /* size of l2 cache. */
565 32, /* size of prefetch block */
566 1, /* number of parallel prefetches */
567 1, /* Branch cost */
568 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
569 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
570 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
571 COSTS_N_INSNS (1), /* cost of FABS instruction. */
572 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
573 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
574 geode_memcpy,
575 geode_memset,
576 1, /* scalar_stmt_cost. */
577 1, /* scalar load_cost. */
578 1, /* scalar_store_cost. */
579 1, /* vec_stmt_cost. */
580 1, /* vec_to_scalar_cost. */
581 1, /* scalar_to_vec_cost. */
582 1, /* vec_align_load_cost. */
583 2, /* vec_unalign_load_cost. */
584 1, /* vec_store_cost. */
585 3, /* cond_taken_branch_cost. */
586 1, /* cond_not_taken_branch_cost. */
589 static stringop_algs k6_memcpy[2] = {
590 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
591 DUMMY_STRINGOP_ALGS};
592 static stringop_algs k6_memset[2] = {
593 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
594 DUMMY_STRINGOP_ALGS};
595 static const
596 struct processor_costs k6_cost = {
597 COSTS_N_INSNS (1), /* cost of an add instruction */
598 COSTS_N_INSNS (2), /* cost of a lea instruction */
599 COSTS_N_INSNS (1), /* variable shift costs */
600 COSTS_N_INSNS (1), /* constant shift costs */
601 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
602 COSTS_N_INSNS (3), /* HI */
603 COSTS_N_INSNS (3), /* SI */
604 COSTS_N_INSNS (3), /* DI */
605 COSTS_N_INSNS (3)}, /* other */
606 0, /* cost of multiply per each bit set */
607 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
608 COSTS_N_INSNS (18), /* HI */
609 COSTS_N_INSNS (18), /* SI */
610 COSTS_N_INSNS (18), /* DI */
611 COSTS_N_INSNS (18)}, /* other */
612 COSTS_N_INSNS (2), /* cost of movsx */
613 COSTS_N_INSNS (2), /* cost of movzx */
614 8, /* "large" insn */
615 4, /* MOVE_RATIO */
616 3, /* cost for loading QImode using movzbl */
617 {4, 5, 4}, /* cost of loading integer registers
618 in QImode, HImode and SImode.
619 Relative to reg-reg move (2). */
620 {2, 3, 2}, /* cost of storing integer registers */
621 4, /* cost of reg,reg fld/fst */
622 {6, 6, 6}, /* cost of loading fp registers
623 in SFmode, DFmode and XFmode */
624 {4, 4, 4}, /* cost of storing fp registers
625 in SFmode, DFmode and XFmode */
626 2, /* cost of moving MMX register */
627 {2, 2}, /* cost of loading MMX registers
628 in SImode and DImode */
629 {2, 2}, /* cost of storing MMX registers
630 in SImode and DImode */
631 2, /* cost of moving SSE register */
632 {2, 2, 8}, /* cost of loading SSE registers
633 in SImode, DImode and TImode */
634 {2, 2, 8}, /* cost of storing SSE registers
635 in SImode, DImode and TImode */
636 6, /* MMX or SSE register to integer */
637 32, /* size of l1 cache. */
638 32, /* size of l2 cache. Some models
639 have integrated l2 cache, but
640 optimizing for k6 is not important
641 enough to worry about that. */
642 32, /* size of prefetch block */
643 1, /* number of parallel prefetches */
644 1, /* Branch cost */
645 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
646 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
647 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
648 COSTS_N_INSNS (2), /* cost of FABS instruction. */
649 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
650 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
651 k6_memcpy,
652 k6_memset,
653 1, /* scalar_stmt_cost. */
654 1, /* scalar load_cost. */
655 1, /* scalar_store_cost. */
656 1, /* vec_stmt_cost. */
657 1, /* vec_to_scalar_cost. */
658 1, /* scalar_to_vec_cost. */
659 1, /* vec_align_load_cost. */
660 2, /* vec_unalign_load_cost. */
661 1, /* vec_store_cost. */
662 3, /* cond_taken_branch_cost. */
663 1, /* cond_not_taken_branch_cost. */
666 /* For some reason, Athlon deals better with REP prefix (relative to loops)
667 compared to K8. Alignment becomes important after 8 bytes for memcpy and
668 128 bytes for memset. */
669 static stringop_algs athlon_memcpy[2] = {
670 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
671 DUMMY_STRINGOP_ALGS};
672 static stringop_algs athlon_memset[2] = {
673 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
674 DUMMY_STRINGOP_ALGS};
675 static const
676 struct processor_costs athlon_cost = {
677 COSTS_N_INSNS (1), /* cost of an add instruction */
678 COSTS_N_INSNS (2), /* cost of a lea instruction */
679 COSTS_N_INSNS (1), /* variable shift costs */
680 COSTS_N_INSNS (1), /* constant shift costs */
681 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
682 COSTS_N_INSNS (5), /* HI */
683 COSTS_N_INSNS (5), /* SI */
684 COSTS_N_INSNS (5), /* DI */
685 COSTS_N_INSNS (5)}, /* other */
686 0, /* cost of multiply per each bit set */
687 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
688 COSTS_N_INSNS (26), /* HI */
689 COSTS_N_INSNS (42), /* SI */
690 COSTS_N_INSNS (74), /* DI */
691 COSTS_N_INSNS (74)}, /* other */
692 COSTS_N_INSNS (1), /* cost of movsx */
693 COSTS_N_INSNS (1), /* cost of movzx */
694 8, /* "large" insn */
695 9, /* MOVE_RATIO */
696 4, /* cost for loading QImode using movzbl */
697 {3, 4, 3}, /* cost of loading integer registers
698 in QImode, HImode and SImode.
699 Relative to reg-reg move (2). */
700 {3, 4, 3}, /* cost of storing integer registers */
701 4, /* cost of reg,reg fld/fst */
702 {4, 4, 12}, /* cost of loading fp registers
703 in SFmode, DFmode and XFmode */
704 {6, 6, 8}, /* cost of storing fp registers
705 in SFmode, DFmode and XFmode */
706 2, /* cost of moving MMX register */
707 {4, 4}, /* cost of loading MMX registers
708 in SImode and DImode */
709 {4, 4}, /* cost of storing MMX registers
710 in SImode and DImode */
711 2, /* cost of moving SSE register */
712 {4, 4, 6}, /* cost of loading SSE registers
713 in SImode, DImode and TImode */
714 {4, 4, 5}, /* cost of storing SSE registers
715 in SImode, DImode and TImode */
716 5, /* MMX or SSE register to integer */
717 64, /* size of l1 cache. */
718 256, /* size of l2 cache. */
719 64, /* size of prefetch block */
720 6, /* number of parallel prefetches */
721 5, /* Branch cost */
722 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
723 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
724 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
725 COSTS_N_INSNS (2), /* cost of FABS instruction. */
726 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
727 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
728 athlon_memcpy,
729 athlon_memset,
730 1, /* scalar_stmt_cost. */
731 1, /* scalar load_cost. */
732 1, /* scalar_store_cost. */
733 1, /* vec_stmt_cost. */
734 1, /* vec_to_scalar_cost. */
735 1, /* scalar_to_vec_cost. */
736 1, /* vec_align_load_cost. */
737 2, /* vec_unalign_load_cost. */
738 1, /* vec_store_cost. */
739 3, /* cond_taken_branch_cost. */
740 1, /* cond_not_taken_branch_cost. */
743 /* K8 has optimized REP instruction for medium sized blocks, but for very
744 small blocks it is better to use loop. For large blocks, libcall can
745 do nontemporary accesses and beat inline considerably. */
746 static stringop_algs k8_memcpy[2] = {
747 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
748 {-1, rep_prefix_4_byte, false}}},
749 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
750 {-1, libcall, false}}}};
751 static stringop_algs k8_memset[2] = {
752 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
753 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
754 {libcall, {{48, unrolled_loop, false},
755 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
756 static const
757 struct processor_costs k8_cost = {
758 COSTS_N_INSNS (1), /* cost of an add instruction */
759 COSTS_N_INSNS (2), /* cost of a lea instruction */
760 COSTS_N_INSNS (1), /* variable shift costs */
761 COSTS_N_INSNS (1), /* constant shift costs */
762 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
763 COSTS_N_INSNS (4), /* HI */
764 COSTS_N_INSNS (3), /* SI */
765 COSTS_N_INSNS (4), /* DI */
766 COSTS_N_INSNS (5)}, /* other */
767 0, /* cost of multiply per each bit set */
768 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
769 COSTS_N_INSNS (26), /* HI */
770 COSTS_N_INSNS (42), /* SI */
771 COSTS_N_INSNS (74), /* DI */
772 COSTS_N_INSNS (74)}, /* other */
773 COSTS_N_INSNS (1), /* cost of movsx */
774 COSTS_N_INSNS (1), /* cost of movzx */
775 8, /* "large" insn */
776 9, /* MOVE_RATIO */
777 4, /* cost for loading QImode using movzbl */
778 {3, 4, 3}, /* cost of loading integer registers
779 in QImode, HImode and SImode.
780 Relative to reg-reg move (2). */
781 {3, 4, 3}, /* cost of storing integer registers */
782 4, /* cost of reg,reg fld/fst */
783 {4, 4, 12}, /* cost of loading fp registers
784 in SFmode, DFmode and XFmode */
785 {6, 6, 8}, /* cost of storing fp registers
786 in SFmode, DFmode and XFmode */
787 2, /* cost of moving MMX register */
788 {3, 3}, /* cost of loading MMX registers
789 in SImode and DImode */
790 {4, 4}, /* cost of storing MMX registers
791 in SImode and DImode */
792 2, /* cost of moving SSE register */
793 {4, 3, 6}, /* cost of loading SSE registers
794 in SImode, DImode and TImode */
795 {4, 4, 5}, /* cost of storing SSE registers
796 in SImode, DImode and TImode */
797 5, /* MMX or SSE register to integer */
798 64, /* size of l1 cache. */
799 512, /* size of l2 cache. */
800 64, /* size of prefetch block */
801 /* New AMD processors never drop prefetches; if they cannot be performed
802 immediately, they are queued. We set number of simultaneous prefetches
803 to a large constant to reflect this (it probably is not a good idea not
804 to limit number of prefetches at all, as their execution also takes some
805 time). */
806 100, /* number of parallel prefetches */
807 3, /* Branch cost */
808 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
809 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
810 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
811 COSTS_N_INSNS (2), /* cost of FABS instruction. */
812 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
813 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
815 k8_memcpy,
816 k8_memset,
817 4, /* scalar_stmt_cost. */
818 2, /* scalar load_cost. */
819 2, /* scalar_store_cost. */
820 5, /* vec_stmt_cost. */
821 0, /* vec_to_scalar_cost. */
822 2, /* scalar_to_vec_cost. */
823 2, /* vec_align_load_cost. */
824 3, /* vec_unalign_load_cost. */
825 3, /* vec_store_cost. */
826 3, /* cond_taken_branch_cost. */
827 2, /* cond_not_taken_branch_cost. */
830 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
831 very small blocks it is better to use loop. For large blocks, libcall can
832 do nontemporary accesses and beat inline considerably. */
833 static stringop_algs amdfam10_memcpy[2] = {
834 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
835 {-1, rep_prefix_4_byte, false}}},
836 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
837 {-1, libcall, false}}}};
838 static stringop_algs amdfam10_memset[2] = {
839 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
840 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
841 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
842 {-1, libcall, false}}}};
843 struct processor_costs amdfam10_cost = {
844 COSTS_N_INSNS (1), /* cost of an add instruction */
845 COSTS_N_INSNS (2), /* cost of a lea instruction */
846 COSTS_N_INSNS (1), /* variable shift costs */
847 COSTS_N_INSNS (1), /* constant shift costs */
848 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
849 COSTS_N_INSNS (4), /* HI */
850 COSTS_N_INSNS (3), /* SI */
851 COSTS_N_INSNS (4), /* DI */
852 COSTS_N_INSNS (5)}, /* other */
853 0, /* cost of multiply per each bit set */
854 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
855 COSTS_N_INSNS (35), /* HI */
856 COSTS_N_INSNS (51), /* SI */
857 COSTS_N_INSNS (83), /* DI */
858 COSTS_N_INSNS (83)}, /* other */
859 COSTS_N_INSNS (1), /* cost of movsx */
860 COSTS_N_INSNS (1), /* cost of movzx */
861 8, /* "large" insn */
862 9, /* MOVE_RATIO */
863 4, /* cost for loading QImode using movzbl */
864 {3, 4, 3}, /* cost of loading integer registers
865 in QImode, HImode and SImode.
866 Relative to reg-reg move (2). */
867 {3, 4, 3}, /* cost of storing integer registers */
868 4, /* cost of reg,reg fld/fst */
869 {4, 4, 12}, /* cost of loading fp registers
870 in SFmode, DFmode and XFmode */
871 {6, 6, 8}, /* cost of storing fp registers
872 in SFmode, DFmode and XFmode */
873 2, /* cost of moving MMX register */
874 {3, 3}, /* cost of loading MMX registers
875 in SImode and DImode */
876 {4, 4}, /* cost of storing MMX registers
877 in SImode and DImode */
878 2, /* cost of moving SSE register */
879 {4, 4, 3}, /* cost of loading SSE registers
880 in SImode, DImode and TImode */
881 {4, 4, 5}, /* cost of storing SSE registers
882 in SImode, DImode and TImode */
883 3, /* MMX or SSE register to integer */
884 /* On K8:
885 MOVD reg64, xmmreg Double FSTORE 4
886 MOVD reg32, xmmreg Double FSTORE 4
887 On AMDFAM10:
888 MOVD reg64, xmmreg Double FADD 3
889 1/1 1/1
890 MOVD reg32, xmmreg Double FADD 3
891 1/1 1/1 */
892 64, /* size of l1 cache. */
893 512, /* size of l2 cache. */
894 64, /* size of prefetch block */
895 /* New AMD processors never drop prefetches; if they cannot be performed
896 immediately, they are queued. We set number of simultaneous prefetches
897 to a large constant to reflect this (it probably is not a good idea not
898 to limit number of prefetches at all, as their execution also takes some
899 time). */
900 100, /* number of parallel prefetches */
901 2, /* Branch cost */
902 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
903 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
904 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
905 COSTS_N_INSNS (2), /* cost of FABS instruction. */
906 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
907 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
909 amdfam10_memcpy,
910 amdfam10_memset,
911 4, /* scalar_stmt_cost. */
912 2, /* scalar load_cost. */
913 2, /* scalar_store_cost. */
914 6, /* vec_stmt_cost. */
915 0, /* vec_to_scalar_cost. */
916 2, /* scalar_to_vec_cost. */
917 2, /* vec_align_load_cost. */
918 2, /* vec_unalign_load_cost. */
919 2, /* vec_store_cost. */
920 2, /* cond_taken_branch_cost. */
921 1, /* cond_not_taken_branch_cost. */
924 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
925 very small blocks it is better to use loop. For large blocks, libcall
926 can do nontemporary accesses and beat inline considerably. */
927 static stringop_algs bdver1_memcpy[2] = {
928 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
929 {-1, rep_prefix_4_byte, false}}},
930 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
931 {-1, libcall, false}}}};
932 static stringop_algs bdver1_memset[2] = {
933 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
934 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
935 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
936 {-1, libcall, false}}}};
938 const struct processor_costs bdver1_cost = {
939 COSTS_N_INSNS (1), /* cost of an add instruction */
940 COSTS_N_INSNS (1), /* cost of a lea instruction */
941 COSTS_N_INSNS (1), /* variable shift costs */
942 COSTS_N_INSNS (1), /* constant shift costs */
943 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
944 COSTS_N_INSNS (4), /* HI */
945 COSTS_N_INSNS (4), /* SI */
946 COSTS_N_INSNS (6), /* DI */
947 COSTS_N_INSNS (6)}, /* other */
948 0, /* cost of multiply per each bit set */
949 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
950 COSTS_N_INSNS (35), /* HI */
951 COSTS_N_INSNS (51), /* SI */
952 COSTS_N_INSNS (83), /* DI */
953 COSTS_N_INSNS (83)}, /* other */
954 COSTS_N_INSNS (1), /* cost of movsx */
955 COSTS_N_INSNS (1), /* cost of movzx */
956 8, /* "large" insn */
957 9, /* MOVE_RATIO */
958 4, /* cost for loading QImode using movzbl */
959 {5, 5, 4}, /* cost of loading integer registers
960 in QImode, HImode and SImode.
961 Relative to reg-reg move (2). */
962 {4, 4, 4}, /* cost of storing integer registers */
963 2, /* cost of reg,reg fld/fst */
964 {5, 5, 12}, /* cost of loading fp registers
965 in SFmode, DFmode and XFmode */
966 {4, 4, 8}, /* cost of storing fp registers
967 in SFmode, DFmode and XFmode */
968 2, /* cost of moving MMX register */
969 {4, 4}, /* cost of loading MMX registers
970 in SImode and DImode */
971 {4, 4}, /* cost of storing MMX registers
972 in SImode and DImode */
973 2, /* cost of moving SSE register */
974 {4, 4, 4}, /* cost of loading SSE registers
975 in SImode, DImode and TImode */
976 {4, 4, 4}, /* cost of storing SSE registers
977 in SImode, DImode and TImode */
978 2, /* MMX or SSE register to integer */
979 /* On K8:
980 MOVD reg64, xmmreg Double FSTORE 4
981 MOVD reg32, xmmreg Double FSTORE 4
982 On AMDFAM10:
983 MOVD reg64, xmmreg Double FADD 3
984 1/1 1/1
985 MOVD reg32, xmmreg Double FADD 3
986 1/1 1/1 */
987 16, /* size of l1 cache. */
988 2048, /* size of l2 cache. */
989 64, /* size of prefetch block */
990 /* New AMD processors never drop prefetches; if they cannot be performed
991 immediately, they are queued. We set number of simultaneous prefetches
992 to a large constant to reflect this (it probably is not a good idea not
993 to limit number of prefetches at all, as their execution also takes some
994 time). */
995 100, /* number of parallel prefetches */
996 2, /* Branch cost */
997 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
998 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
999 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1000 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1001 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1002 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1004 bdver1_memcpy,
1005 bdver1_memset,
1006 6, /* scalar_stmt_cost. */
1007 4, /* scalar load_cost. */
1008 4, /* scalar_store_cost. */
1009 6, /* vec_stmt_cost. */
1010 0, /* vec_to_scalar_cost. */
1011 2, /* scalar_to_vec_cost. */
1012 4, /* vec_align_load_cost. */
1013 4, /* vec_unalign_load_cost. */
1014 4, /* vec_store_cost. */
1015 2, /* cond_taken_branch_cost. */
1016 1, /* cond_not_taken_branch_cost. */
1019 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1020 very small blocks it is better to use loop. For large blocks, libcall
1021 can do nontemporary accesses and beat inline considerably. */
1023 static stringop_algs bdver2_memcpy[2] = {
1024 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1025 {-1, rep_prefix_4_byte, false}}},
1026 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1027 {-1, libcall, false}}}};
1028 static stringop_algs bdver2_memset[2] = {
1029 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1030 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1031 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1032 {-1, libcall, false}}}};
1034 const struct processor_costs bdver2_cost = {
1035 COSTS_N_INSNS (1), /* cost of an add instruction */
1036 COSTS_N_INSNS (1), /* cost of a lea instruction */
1037 COSTS_N_INSNS (1), /* variable shift costs */
1038 COSTS_N_INSNS (1), /* constant shift costs */
1039 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1040 COSTS_N_INSNS (4), /* HI */
1041 COSTS_N_INSNS (4), /* SI */
1042 COSTS_N_INSNS (6), /* DI */
1043 COSTS_N_INSNS (6)}, /* other */
1044 0, /* cost of multiply per each bit set */
1045 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1046 COSTS_N_INSNS (35), /* HI */
1047 COSTS_N_INSNS (51), /* SI */
1048 COSTS_N_INSNS (83), /* DI */
1049 COSTS_N_INSNS (83)}, /* other */
1050 COSTS_N_INSNS (1), /* cost of movsx */
1051 COSTS_N_INSNS (1), /* cost of movzx */
1052 8, /* "large" insn */
1053 9, /* MOVE_RATIO */
1054 4, /* cost for loading QImode using movzbl */
1055 {5, 5, 4}, /* cost of loading integer registers
1056 in QImode, HImode and SImode.
1057 Relative to reg-reg move (2). */
1058 {4, 4, 4}, /* cost of storing integer registers */
1059 2, /* cost of reg,reg fld/fst */
1060 {5, 5, 12}, /* cost of loading fp registers
1061 in SFmode, DFmode and XFmode */
1062 {4, 4, 8}, /* cost of storing fp registers
1063 in SFmode, DFmode and XFmode */
1064 2, /* cost of moving MMX register */
1065 {4, 4}, /* cost of loading MMX registers
1066 in SImode and DImode */
1067 {4, 4}, /* cost of storing MMX registers
1068 in SImode and DImode */
1069 2, /* cost of moving SSE register */
1070 {4, 4, 4}, /* cost of loading SSE registers
1071 in SImode, DImode and TImode */
1072 {4, 4, 4}, /* cost of storing SSE registers
1073 in SImode, DImode and TImode */
1074 2, /* MMX or SSE register to integer */
1075 /* On K8:
1076 MOVD reg64, xmmreg Double FSTORE 4
1077 MOVD reg32, xmmreg Double FSTORE 4
1078 On AMDFAM10:
1079 MOVD reg64, xmmreg Double FADD 3
1080 1/1 1/1
1081 MOVD reg32, xmmreg Double FADD 3
1082 1/1 1/1 */
1083 16, /* size of l1 cache. */
1084 2048, /* size of l2 cache. */
1085 64, /* size of prefetch block */
1086 /* New AMD processors never drop prefetches; if they cannot be performed
1087 immediately, they are queued. We set number of simultaneous prefetches
1088 to a large constant to reflect this (it probably is not a good idea not
1089 to limit number of prefetches at all, as their execution also takes some
1090 time). */
1091 100, /* number of parallel prefetches */
1092 2, /* Branch cost */
1093 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1094 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1095 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1096 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1097 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1098 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1100 bdver2_memcpy,
1101 bdver2_memset,
1102 6, /* scalar_stmt_cost. */
1103 4, /* scalar load_cost. */
1104 4, /* scalar_store_cost. */
1105 6, /* vec_stmt_cost. */
1106 0, /* vec_to_scalar_cost. */
1107 2, /* scalar_to_vec_cost. */
1108 4, /* vec_align_load_cost. */
1109 4, /* vec_unalign_load_cost. */
1110 4, /* vec_store_cost. */
1111 2, /* cond_taken_branch_cost. */
1112 1, /* cond_not_taken_branch_cost. */
1116 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1117 very small blocks it is better to use loop. For large blocks, libcall
1118 can do nontemporary accesses and beat inline considerably. */
1119 static stringop_algs bdver3_memcpy[2] = {
1120 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1121 {-1, rep_prefix_4_byte, false}}},
1122 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1123 {-1, libcall, false}}}};
1124 static stringop_algs bdver3_memset[2] = {
1125 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1126 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1127 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1128 {-1, libcall, false}}}};
1129 struct processor_costs bdver3_cost = {
1130 COSTS_N_INSNS (1), /* cost of an add instruction */
1131 COSTS_N_INSNS (1), /* cost of a lea instruction */
1132 COSTS_N_INSNS (1), /* variable shift costs */
1133 COSTS_N_INSNS (1), /* constant shift costs */
1134 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1135 COSTS_N_INSNS (4), /* HI */
1136 COSTS_N_INSNS (4), /* SI */
1137 COSTS_N_INSNS (6), /* DI */
1138 COSTS_N_INSNS (6)}, /* other */
1139 0, /* cost of multiply per each bit set */
1140 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1141 COSTS_N_INSNS (35), /* HI */
1142 COSTS_N_INSNS (51), /* SI */
1143 COSTS_N_INSNS (83), /* DI */
1144 COSTS_N_INSNS (83)}, /* other */
1145 COSTS_N_INSNS (1), /* cost of movsx */
1146 COSTS_N_INSNS (1), /* cost of movzx */
1147 8, /* "large" insn */
1148 9, /* MOVE_RATIO */
1149 4, /* cost for loading QImode using movzbl */
1150 {5, 5, 4}, /* cost of loading integer registers
1151 in QImode, HImode and SImode.
1152 Relative to reg-reg move (2). */
1153 {4, 4, 4}, /* cost of storing integer registers */
1154 2, /* cost of reg,reg fld/fst */
1155 {5, 5, 12}, /* cost of loading fp registers
1156 in SFmode, DFmode and XFmode */
1157 {4, 4, 8}, /* cost of storing fp registers
1158 in SFmode, DFmode and XFmode */
1159 2, /* cost of moving MMX register */
1160 {4, 4}, /* cost of loading MMX registers
1161 in SImode and DImode */
1162 {4, 4}, /* cost of storing MMX registers
1163 in SImode and DImode */
1164 2, /* cost of moving SSE register */
1165 {4, 4, 4}, /* cost of loading SSE registers
1166 in SImode, DImode and TImode */
1167 {4, 4, 4}, /* cost of storing SSE registers
1168 in SImode, DImode and TImode */
1169 2, /* MMX or SSE register to integer */
1170 16, /* size of l1 cache. */
1171 2048, /* size of l2 cache. */
1172 64, /* size of prefetch block */
1173 /* New AMD processors never drop prefetches; if they cannot be performed
1174 immediately, they are queued. We set number of simultaneous prefetches
1175 to a large constant to reflect this (it probably is not a good idea not
1176 to limit number of prefetches at all, as their execution also takes some
1177 time). */
1178 100, /* number of parallel prefetches */
1179 2, /* Branch cost */
1180 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1181 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1182 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1183 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1184 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1185 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1187 bdver3_memcpy,
1188 bdver3_memset,
1189 6, /* scalar_stmt_cost. */
1190 4, /* scalar load_cost. */
1191 4, /* scalar_store_cost. */
1192 6, /* vec_stmt_cost. */
1193 0, /* vec_to_scalar_cost. */
1194 2, /* scalar_to_vec_cost. */
1195 4, /* vec_align_load_cost. */
1196 4, /* vec_unalign_load_cost. */
1197 4, /* vec_store_cost. */
1198 2, /* cond_taken_branch_cost. */
1199 1, /* cond_not_taken_branch_cost. */
1202 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1203 very small blocks it is better to use loop. For large blocks, libcall
1204 can do nontemporary accesses and beat inline considerably. */
1205 static stringop_algs bdver4_memcpy[2] = {
1206 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1207 {-1, rep_prefix_4_byte, false}}},
1208 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1209 {-1, libcall, false}}}};
1210 static stringop_algs bdver4_memset[2] = {
1211 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1212 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1213 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1214 {-1, libcall, false}}}};
1215 struct processor_costs bdver4_cost = {
1216 COSTS_N_INSNS (1), /* cost of an add instruction */
1217 COSTS_N_INSNS (1), /* cost of a lea instruction */
1218 COSTS_N_INSNS (1), /* variable shift costs */
1219 COSTS_N_INSNS (1), /* constant shift costs */
1220 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1221 COSTS_N_INSNS (4), /* HI */
1222 COSTS_N_INSNS (4), /* SI */
1223 COSTS_N_INSNS (6), /* DI */
1224 COSTS_N_INSNS (6)}, /* other */
1225 0, /* cost of multiply per each bit set */
1226 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1227 COSTS_N_INSNS (35), /* HI */
1228 COSTS_N_INSNS (51), /* SI */
1229 COSTS_N_INSNS (83), /* DI */
1230 COSTS_N_INSNS (83)}, /* other */
1231 COSTS_N_INSNS (1), /* cost of movsx */
1232 COSTS_N_INSNS (1), /* cost of movzx */
1233 8, /* "large" insn */
1234 9, /* MOVE_RATIO */
1235 4, /* cost for loading QImode using movzbl */
1236 {5, 5, 4}, /* cost of loading integer registers
1237 in QImode, HImode and SImode.
1238 Relative to reg-reg move (2). */
1239 {4, 4, 4}, /* cost of storing integer registers */
1240 2, /* cost of reg,reg fld/fst */
1241 {5, 5, 12}, /* cost of loading fp registers
1242 in SFmode, DFmode and XFmode */
1243 {4, 4, 8}, /* cost of storing fp registers
1244 in SFmode, DFmode and XFmode */
1245 2, /* cost of moving MMX register */
1246 {4, 4}, /* cost of loading MMX registers
1247 in SImode and DImode */
1248 {4, 4}, /* cost of storing MMX registers
1249 in SImode and DImode */
1250 2, /* cost of moving SSE register */
1251 {4, 4, 4}, /* cost of loading SSE registers
1252 in SImode, DImode and TImode */
1253 {4, 4, 4}, /* cost of storing SSE registers
1254 in SImode, DImode and TImode */
1255 2, /* MMX or SSE register to integer */
1256 16, /* size of l1 cache. */
1257 2048, /* size of l2 cache. */
1258 64, /* size of prefetch block */
1259 /* New AMD processors never drop prefetches; if they cannot be performed
1260 immediately, they are queued. We set number of simultaneous prefetches
1261 to a large constant to reflect this (it probably is not a good idea not
1262 to limit number of prefetches at all, as their execution also takes some
1263 time). */
1264 100, /* number of parallel prefetches */
1265 2, /* Branch cost */
1266 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1267 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1268 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1269 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1270 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1271 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1273 bdver4_memcpy,
1274 bdver4_memset,
1275 6, /* scalar_stmt_cost. */
1276 4, /* scalar load_cost. */
1277 4, /* scalar_store_cost. */
1278 6, /* vec_stmt_cost. */
1279 0, /* vec_to_scalar_cost. */
1280 2, /* scalar_to_vec_cost. */
1281 4, /* vec_align_load_cost. */
1282 4, /* vec_unalign_load_cost. */
1283 4, /* vec_store_cost. */
1284 2, /* cond_taken_branch_cost. */
1285 1, /* cond_not_taken_branch_cost. */
1288 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1289 very small blocks it is better to use loop. For large blocks, libcall can
1290 do nontemporary accesses and beat inline considerably. */
1291 static stringop_algs btver1_memcpy[2] = {
1292 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1293 {-1, rep_prefix_4_byte, false}}},
1294 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1295 {-1, libcall, false}}}};
1296 static stringop_algs btver1_memset[2] = {
1297 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1298 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1299 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1300 {-1, libcall, false}}}};
1301 const struct processor_costs btver1_cost = {
1302 COSTS_N_INSNS (1), /* cost of an add instruction */
1303 COSTS_N_INSNS (2), /* cost of a lea instruction */
1304 COSTS_N_INSNS (1), /* variable shift costs */
1305 COSTS_N_INSNS (1), /* constant shift costs */
1306 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1307 COSTS_N_INSNS (4), /* HI */
1308 COSTS_N_INSNS (3), /* SI */
1309 COSTS_N_INSNS (4), /* DI */
1310 COSTS_N_INSNS (5)}, /* other */
1311 0, /* cost of multiply per each bit set */
1312 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1313 COSTS_N_INSNS (35), /* HI */
1314 COSTS_N_INSNS (51), /* SI */
1315 COSTS_N_INSNS (83), /* DI */
1316 COSTS_N_INSNS (83)}, /* other */
1317 COSTS_N_INSNS (1), /* cost of movsx */
1318 COSTS_N_INSNS (1), /* cost of movzx */
1319 8, /* "large" insn */
1320 9, /* MOVE_RATIO */
1321 4, /* cost for loading QImode using movzbl */
1322 {3, 4, 3}, /* cost of loading integer registers
1323 in QImode, HImode and SImode.
1324 Relative to reg-reg move (2). */
1325 {3, 4, 3}, /* cost of storing integer registers */
1326 4, /* cost of reg,reg fld/fst */
1327 {4, 4, 12}, /* cost of loading fp registers
1328 in SFmode, DFmode and XFmode */
1329 {6, 6, 8}, /* cost of storing fp registers
1330 in SFmode, DFmode and XFmode */
1331 2, /* cost of moving MMX register */
1332 {3, 3}, /* cost of loading MMX registers
1333 in SImode and DImode */
1334 {4, 4}, /* cost of storing MMX registers
1335 in SImode and DImode */
1336 2, /* cost of moving SSE register */
1337 {4, 4, 3}, /* cost of loading SSE registers
1338 in SImode, DImode and TImode */
1339 {4, 4, 5}, /* cost of storing SSE registers
1340 in SImode, DImode and TImode */
1341 3, /* MMX or SSE register to integer */
1342 /* On K8:
1343 MOVD reg64, xmmreg Double FSTORE 4
1344 MOVD reg32, xmmreg Double FSTORE 4
1345 On AMDFAM10:
1346 MOVD reg64, xmmreg Double FADD 3
1347 1/1 1/1
1348 MOVD reg32, xmmreg Double FADD 3
1349 1/1 1/1 */
1350 32, /* size of l1 cache. */
1351 512, /* size of l2 cache. */
1352 64, /* size of prefetch block */
1353 100, /* number of parallel prefetches */
1354 2, /* Branch cost */
1355 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1356 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1357 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1358 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1359 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1360 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1362 btver1_memcpy,
1363 btver1_memset,
1364 4, /* scalar_stmt_cost. */
1365 2, /* scalar load_cost. */
1366 2, /* scalar_store_cost. */
1367 6, /* vec_stmt_cost. */
1368 0, /* vec_to_scalar_cost. */
1369 2, /* scalar_to_vec_cost. */
1370 2, /* vec_align_load_cost. */
1371 2, /* vec_unalign_load_cost. */
1372 2, /* vec_store_cost. */
1373 2, /* cond_taken_branch_cost. */
1374 1, /* cond_not_taken_branch_cost. */
1377 static stringop_algs btver2_memcpy[2] = {
1378 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1379 {-1, rep_prefix_4_byte, false}}},
1380 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1381 {-1, libcall, false}}}};
1382 static stringop_algs btver2_memset[2] = {
1383 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1384 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1385 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1386 {-1, libcall, false}}}};
1387 const struct processor_costs btver2_cost = {
1388 COSTS_N_INSNS (1), /* cost of an add instruction */
1389 COSTS_N_INSNS (2), /* cost of a lea instruction */
1390 COSTS_N_INSNS (1), /* variable shift costs */
1391 COSTS_N_INSNS (1), /* constant shift costs */
1392 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1393 COSTS_N_INSNS (4), /* HI */
1394 COSTS_N_INSNS (3), /* SI */
1395 COSTS_N_INSNS (4), /* DI */
1396 COSTS_N_INSNS (5)}, /* other */
1397 0, /* cost of multiply per each bit set */
1398 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1399 COSTS_N_INSNS (35), /* HI */
1400 COSTS_N_INSNS (51), /* SI */
1401 COSTS_N_INSNS (83), /* DI */
1402 COSTS_N_INSNS (83)}, /* other */
1403 COSTS_N_INSNS (1), /* cost of movsx */
1404 COSTS_N_INSNS (1), /* cost of movzx */
1405 8, /* "large" insn */
1406 9, /* MOVE_RATIO */
1407 4, /* cost for loading QImode using movzbl */
1408 {3, 4, 3}, /* cost of loading integer registers
1409 in QImode, HImode and SImode.
1410 Relative to reg-reg move (2). */
1411 {3, 4, 3}, /* cost of storing integer registers */
1412 4, /* cost of reg,reg fld/fst */
1413 {4, 4, 12}, /* cost of loading fp registers
1414 in SFmode, DFmode and XFmode */
1415 {6, 6, 8}, /* cost of storing fp registers
1416 in SFmode, DFmode and XFmode */
1417 2, /* cost of moving MMX register */
1418 {3, 3}, /* cost of loading MMX registers
1419 in SImode and DImode */
1420 {4, 4}, /* cost of storing MMX registers
1421 in SImode and DImode */
1422 2, /* cost of moving SSE register */
1423 {4, 4, 3}, /* cost of loading SSE registers
1424 in SImode, DImode and TImode */
1425 {4, 4, 5}, /* cost of storing SSE registers
1426 in SImode, DImode and TImode */
1427 3, /* MMX or SSE register to integer */
1428 /* On K8:
1429 MOVD reg64, xmmreg Double FSTORE 4
1430 MOVD reg32, xmmreg Double FSTORE 4
1431 On AMDFAM10:
1432 MOVD reg64, xmmreg Double FADD 3
1433 1/1 1/1
1434 MOVD reg32, xmmreg Double FADD 3
1435 1/1 1/1 */
1436 32, /* size of l1 cache. */
1437 2048, /* size of l2 cache. */
1438 64, /* size of prefetch block */
1439 100, /* number of parallel prefetches */
1440 2, /* Branch cost */
1441 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1442 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1443 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1444 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1445 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1446 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1447 btver2_memcpy,
1448 btver2_memset,
1449 4, /* scalar_stmt_cost. */
1450 2, /* scalar load_cost. */
1451 2, /* scalar_store_cost. */
1452 6, /* vec_stmt_cost. */
1453 0, /* vec_to_scalar_cost. */
1454 2, /* scalar_to_vec_cost. */
1455 2, /* vec_align_load_cost. */
1456 2, /* vec_unalign_load_cost. */
1457 2, /* vec_store_cost. */
1458 2, /* cond_taken_branch_cost. */
1459 1, /* cond_not_taken_branch_cost. */
1462 static stringop_algs pentium4_memcpy[2] = {
1463 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1464 DUMMY_STRINGOP_ALGS};
1465 static stringop_algs pentium4_memset[2] = {
1466 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1467 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1468 DUMMY_STRINGOP_ALGS};
1470 static const
1471 struct processor_costs pentium4_cost = {
1472 COSTS_N_INSNS (1), /* cost of an add instruction */
1473 COSTS_N_INSNS (3), /* cost of a lea instruction */
1474 COSTS_N_INSNS (4), /* variable shift costs */
1475 COSTS_N_INSNS (4), /* constant shift costs */
1476 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1477 COSTS_N_INSNS (15), /* HI */
1478 COSTS_N_INSNS (15), /* SI */
1479 COSTS_N_INSNS (15), /* DI */
1480 COSTS_N_INSNS (15)}, /* other */
1481 0, /* cost of multiply per each bit set */
1482 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1483 COSTS_N_INSNS (56), /* HI */
1484 COSTS_N_INSNS (56), /* SI */
1485 COSTS_N_INSNS (56), /* DI */
1486 COSTS_N_INSNS (56)}, /* other */
1487 COSTS_N_INSNS (1), /* cost of movsx */
1488 COSTS_N_INSNS (1), /* cost of movzx */
1489 16, /* "large" insn */
1490 6, /* MOVE_RATIO */
1491 2, /* cost for loading QImode using movzbl */
1492 {4, 5, 4}, /* cost of loading integer registers
1493 in QImode, HImode and SImode.
1494 Relative to reg-reg move (2). */
1495 {2, 3, 2}, /* cost of storing integer registers */
1496 2, /* cost of reg,reg fld/fst */
1497 {2, 2, 6}, /* cost of loading fp registers
1498 in SFmode, DFmode and XFmode */
1499 {4, 4, 6}, /* cost of storing fp registers
1500 in SFmode, DFmode and XFmode */
1501 2, /* cost of moving MMX register */
1502 {2, 2}, /* cost of loading MMX registers
1503 in SImode and DImode */
1504 {2, 2}, /* cost of storing MMX registers
1505 in SImode and DImode */
1506 12, /* cost of moving SSE register */
1507 {12, 12, 12}, /* cost of loading SSE registers
1508 in SImode, DImode and TImode */
1509 {2, 2, 8}, /* cost of storing SSE registers
1510 in SImode, DImode and TImode */
1511 10, /* MMX or SSE register to integer */
1512 8, /* size of l1 cache. */
1513 256, /* size of l2 cache. */
1514 64, /* size of prefetch block */
1515 6, /* number of parallel prefetches */
1516 2, /* Branch cost */
1517 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1518 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1519 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1520 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1521 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1522 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1523 pentium4_memcpy,
1524 pentium4_memset,
1525 1, /* scalar_stmt_cost. */
1526 1, /* scalar load_cost. */
1527 1, /* scalar_store_cost. */
1528 1, /* vec_stmt_cost. */
1529 1, /* vec_to_scalar_cost. */
1530 1, /* scalar_to_vec_cost. */
1531 1, /* vec_align_load_cost. */
1532 2, /* vec_unalign_load_cost. */
1533 1, /* vec_store_cost. */
1534 3, /* cond_taken_branch_cost. */
1535 1, /* cond_not_taken_branch_cost. */
1538 static stringop_algs nocona_memcpy[2] = {
1539 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1540 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1541 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1543 static stringop_algs nocona_memset[2] = {
1544 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1545 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1546 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1547 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1549 static const
1550 struct processor_costs nocona_cost = {
1551 COSTS_N_INSNS (1), /* cost of an add instruction */
1552 COSTS_N_INSNS (1), /* cost of a lea instruction */
1553 COSTS_N_INSNS (1), /* variable shift costs */
1554 COSTS_N_INSNS (1), /* constant shift costs */
1555 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1556 COSTS_N_INSNS (10), /* HI */
1557 COSTS_N_INSNS (10), /* SI */
1558 COSTS_N_INSNS (10), /* DI */
1559 COSTS_N_INSNS (10)}, /* other */
1560 0, /* cost of multiply per each bit set */
1561 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1562 COSTS_N_INSNS (66), /* HI */
1563 COSTS_N_INSNS (66), /* SI */
1564 COSTS_N_INSNS (66), /* DI */
1565 COSTS_N_INSNS (66)}, /* other */
1566 COSTS_N_INSNS (1), /* cost of movsx */
1567 COSTS_N_INSNS (1), /* cost of movzx */
1568 16, /* "large" insn */
1569 17, /* MOVE_RATIO */
1570 4, /* cost for loading QImode using movzbl */
1571 {4, 4, 4}, /* cost of loading integer registers
1572 in QImode, HImode and SImode.
1573 Relative to reg-reg move (2). */
1574 {4, 4, 4}, /* cost of storing integer registers */
1575 3, /* cost of reg,reg fld/fst */
1576 {12, 12, 12}, /* cost of loading fp registers
1577 in SFmode, DFmode and XFmode */
1578 {4, 4, 4}, /* cost of storing fp registers
1579 in SFmode, DFmode and XFmode */
1580 6, /* cost of moving MMX register */
1581 {12, 12}, /* cost of loading MMX registers
1582 in SImode and DImode */
1583 {12, 12}, /* cost of storing MMX registers
1584 in SImode and DImode */
1585 6, /* cost of moving SSE register */
1586 {12, 12, 12}, /* cost of loading SSE registers
1587 in SImode, DImode and TImode */
1588 {12, 12, 12}, /* cost of storing SSE registers
1589 in SImode, DImode and TImode */
1590 8, /* MMX or SSE register to integer */
1591 8, /* size of l1 cache. */
1592 1024, /* size of l2 cache. */
1593 64, /* size of prefetch block */
1594 8, /* number of parallel prefetches */
1595 1, /* Branch cost */
1596 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1597 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1598 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1599 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1600 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1601 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1602 nocona_memcpy,
1603 nocona_memset,
1604 1, /* scalar_stmt_cost. */
1605 1, /* scalar load_cost. */
1606 1, /* scalar_store_cost. */
1607 1, /* vec_stmt_cost. */
1608 1, /* vec_to_scalar_cost. */
1609 1, /* scalar_to_vec_cost. */
1610 1, /* vec_align_load_cost. */
1611 2, /* vec_unalign_load_cost. */
1612 1, /* vec_store_cost. */
1613 3, /* cond_taken_branch_cost. */
1614 1, /* cond_not_taken_branch_cost. */
1617 static stringop_algs atom_memcpy[2] = {
1618 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1619 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1620 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1621 static stringop_algs atom_memset[2] = {
1622 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1623 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1624 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1625 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1626 static const
1627 struct processor_costs atom_cost = {
1628 COSTS_N_INSNS (1), /* cost of an add instruction */
1629 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1630 COSTS_N_INSNS (1), /* variable shift costs */
1631 COSTS_N_INSNS (1), /* constant shift costs */
1632 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1633 COSTS_N_INSNS (4), /* HI */
1634 COSTS_N_INSNS (3), /* SI */
1635 COSTS_N_INSNS (4), /* DI */
1636 COSTS_N_INSNS (2)}, /* other */
1637 0, /* cost of multiply per each bit set */
1638 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1639 COSTS_N_INSNS (26), /* HI */
1640 COSTS_N_INSNS (42), /* SI */
1641 COSTS_N_INSNS (74), /* DI */
1642 COSTS_N_INSNS (74)}, /* other */
1643 COSTS_N_INSNS (1), /* cost of movsx */
1644 COSTS_N_INSNS (1), /* cost of movzx */
1645 8, /* "large" insn */
1646 17, /* MOVE_RATIO */
1647 4, /* cost for loading QImode using movzbl */
1648 {4, 4, 4}, /* cost of loading integer registers
1649 in QImode, HImode and SImode.
1650 Relative to reg-reg move (2). */
1651 {4, 4, 4}, /* cost of storing integer registers */
1652 4, /* cost of reg,reg fld/fst */
1653 {12, 12, 12}, /* cost of loading fp registers
1654 in SFmode, DFmode and XFmode */
1655 {6, 6, 8}, /* cost of storing fp registers
1656 in SFmode, DFmode and XFmode */
1657 2, /* cost of moving MMX register */
1658 {8, 8}, /* cost of loading MMX registers
1659 in SImode and DImode */
1660 {8, 8}, /* cost of storing MMX registers
1661 in SImode and DImode */
1662 2, /* cost of moving SSE register */
1663 {8, 8, 8}, /* cost of loading SSE registers
1664 in SImode, DImode and TImode */
1665 {8, 8, 8}, /* cost of storing SSE registers
1666 in SImode, DImode and TImode */
1667 5, /* MMX or SSE register to integer */
1668 32, /* size of l1 cache. */
1669 256, /* size of l2 cache. */
1670 64, /* size of prefetch block */
1671 6, /* number of parallel prefetches */
1672 3, /* Branch cost */
1673 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1674 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1675 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1676 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1677 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1678 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1679 atom_memcpy,
1680 atom_memset,
1681 1, /* scalar_stmt_cost. */
1682 1, /* scalar load_cost. */
1683 1, /* scalar_store_cost. */
1684 1, /* vec_stmt_cost. */
1685 1, /* vec_to_scalar_cost. */
1686 1, /* scalar_to_vec_cost. */
1687 1, /* vec_align_load_cost. */
1688 2, /* vec_unalign_load_cost. */
1689 1, /* vec_store_cost. */
1690 3, /* cond_taken_branch_cost. */
1691 1, /* cond_not_taken_branch_cost. */
1694 static stringop_algs slm_memcpy[2] = {
1695 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1696 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1697 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1698 static stringop_algs slm_memset[2] = {
1699 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1700 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1701 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1702 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1703 static const
1704 struct processor_costs slm_cost = {
1705 COSTS_N_INSNS (1), /* cost of an add instruction */
1706 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1707 COSTS_N_INSNS (1), /* variable shift costs */
1708 COSTS_N_INSNS (1), /* constant shift costs */
1709 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1710 COSTS_N_INSNS (3), /* HI */
1711 COSTS_N_INSNS (3), /* SI */
1712 COSTS_N_INSNS (4), /* DI */
1713 COSTS_N_INSNS (2)}, /* other */
1714 0, /* cost of multiply per each bit set */
1715 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1716 COSTS_N_INSNS (26), /* HI */
1717 COSTS_N_INSNS (42), /* SI */
1718 COSTS_N_INSNS (74), /* DI */
1719 COSTS_N_INSNS (74)}, /* other */
1720 COSTS_N_INSNS (1), /* cost of movsx */
1721 COSTS_N_INSNS (1), /* cost of movzx */
1722 8, /* "large" insn */
1723 17, /* MOVE_RATIO */
1724 4, /* cost for loading QImode using movzbl */
1725 {4, 4, 4}, /* cost of loading integer registers
1726 in QImode, HImode and SImode.
1727 Relative to reg-reg move (2). */
1728 {4, 4, 4}, /* cost of storing integer registers */
1729 4, /* cost of reg,reg fld/fst */
1730 {12, 12, 12}, /* cost of loading fp registers
1731 in SFmode, DFmode and XFmode */
1732 {6, 6, 8}, /* cost of storing fp registers
1733 in SFmode, DFmode and XFmode */
1734 2, /* cost of moving MMX register */
1735 {8, 8}, /* cost of loading MMX registers
1736 in SImode and DImode */
1737 {8, 8}, /* cost of storing MMX registers
1738 in SImode and DImode */
1739 2, /* cost of moving SSE register */
1740 {8, 8, 8}, /* cost of loading SSE registers
1741 in SImode, DImode and TImode */
1742 {8, 8, 8}, /* cost of storing SSE registers
1743 in SImode, DImode and TImode */
1744 5, /* MMX or SSE register to integer */
1745 32, /* size of l1 cache. */
1746 256, /* size of l2 cache. */
1747 64, /* size of prefetch block */
1748 6, /* number of parallel prefetches */
1749 3, /* Branch cost */
1750 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1751 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1752 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1753 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1754 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1755 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1756 slm_memcpy,
1757 slm_memset,
1758 1, /* scalar_stmt_cost. */
1759 1, /* scalar load_cost. */
1760 1, /* scalar_store_cost. */
1761 1, /* vec_stmt_cost. */
1762 4, /* vec_to_scalar_cost. */
1763 1, /* scalar_to_vec_cost. */
1764 1, /* vec_align_load_cost. */
1765 2, /* vec_unalign_load_cost. */
1766 1, /* vec_store_cost. */
1767 3, /* cond_taken_branch_cost. */
1768 1, /* cond_not_taken_branch_cost. */
1771 static stringop_algs intel_memcpy[2] = {
1772 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1773 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1774 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1775 static stringop_algs intel_memset[2] = {
1776 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1777 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1778 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1779 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1780 static const
1781 struct processor_costs intel_cost = {
1782 COSTS_N_INSNS (1), /* cost of an add instruction */
1783 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1784 COSTS_N_INSNS (1), /* variable shift costs */
1785 COSTS_N_INSNS (1), /* constant shift costs */
1786 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1787 COSTS_N_INSNS (3), /* HI */
1788 COSTS_N_INSNS (3), /* SI */
1789 COSTS_N_INSNS (4), /* DI */
1790 COSTS_N_INSNS (2)}, /* other */
1791 0, /* cost of multiply per each bit set */
1792 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1793 COSTS_N_INSNS (26), /* HI */
1794 COSTS_N_INSNS (42), /* SI */
1795 COSTS_N_INSNS (74), /* DI */
1796 COSTS_N_INSNS (74)}, /* other */
1797 COSTS_N_INSNS (1), /* cost of movsx */
1798 COSTS_N_INSNS (1), /* cost of movzx */
1799 8, /* "large" insn */
1800 17, /* MOVE_RATIO */
1801 4, /* cost for loading QImode using movzbl */
1802 {4, 4, 4}, /* cost of loading integer registers
1803 in QImode, HImode and SImode.
1804 Relative to reg-reg move (2). */
1805 {4, 4, 4}, /* cost of storing integer registers */
1806 4, /* cost of reg,reg fld/fst */
1807 {12, 12, 12}, /* cost of loading fp registers
1808 in SFmode, DFmode and XFmode */
1809 {6, 6, 8}, /* cost of storing fp registers
1810 in SFmode, DFmode and XFmode */
1811 2, /* cost of moving MMX register */
1812 {8, 8}, /* cost of loading MMX registers
1813 in SImode and DImode */
1814 {8, 8}, /* cost of storing MMX registers
1815 in SImode and DImode */
1816 2, /* cost of moving SSE register */
1817 {8, 8, 8}, /* cost of loading SSE registers
1818 in SImode, DImode and TImode */
1819 {8, 8, 8}, /* cost of storing SSE registers
1820 in SImode, DImode and TImode */
1821 5, /* MMX or SSE register to integer */
1822 32, /* size of l1 cache. */
1823 256, /* size of l2 cache. */
1824 64, /* size of prefetch block */
1825 6, /* number of parallel prefetches */
1826 3, /* Branch cost */
1827 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1828 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1829 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1830 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1831 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1832 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1833 intel_memcpy,
1834 intel_memset,
1835 1, /* scalar_stmt_cost. */
1836 1, /* scalar load_cost. */
1837 1, /* scalar_store_cost. */
1838 1, /* vec_stmt_cost. */
1839 4, /* vec_to_scalar_cost. */
1840 1, /* scalar_to_vec_cost. */
1841 1, /* vec_align_load_cost. */
1842 2, /* vec_unalign_load_cost. */
1843 1, /* vec_store_cost. */
1844 3, /* cond_taken_branch_cost. */
1845 1, /* cond_not_taken_branch_cost. */
1848 /* Generic should produce code tuned for Core-i7 (and newer chips)
1849 and btver1 (and newer chips). */
1851 static stringop_algs generic_memcpy[2] = {
1852 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1853 {-1, libcall, false}}},
1854 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1855 {-1, libcall, false}}}};
1856 static stringop_algs generic_memset[2] = {
1857 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1858 {-1, libcall, false}}},
1859 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1860 {-1, libcall, false}}}};
1861 static const
1862 struct processor_costs generic_cost = {
1863 COSTS_N_INSNS (1), /* cost of an add instruction */
1864 /* On all chips taken into consideration lea is 2 cycles and more. With
1865 this cost however our current implementation of synth_mult results in
1866 use of unnecessary temporary registers causing regression on several
1867 SPECfp benchmarks. */
1868 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1869 COSTS_N_INSNS (1), /* variable shift costs */
1870 COSTS_N_INSNS (1), /* constant shift costs */
1871 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1872 COSTS_N_INSNS (4), /* HI */
1873 COSTS_N_INSNS (3), /* SI */
1874 COSTS_N_INSNS (4), /* DI */
1875 COSTS_N_INSNS (2)}, /* other */
1876 0, /* cost of multiply per each bit set */
1877 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1878 COSTS_N_INSNS (26), /* HI */
1879 COSTS_N_INSNS (42), /* SI */
1880 COSTS_N_INSNS (74), /* DI */
1881 COSTS_N_INSNS (74)}, /* other */
1882 COSTS_N_INSNS (1), /* cost of movsx */
1883 COSTS_N_INSNS (1), /* cost of movzx */
1884 8, /* "large" insn */
1885 17, /* MOVE_RATIO */
1886 4, /* cost for loading QImode using movzbl */
1887 {4, 4, 4}, /* cost of loading integer registers
1888 in QImode, HImode and SImode.
1889 Relative to reg-reg move (2). */
1890 {4, 4, 4}, /* cost of storing integer registers */
1891 4, /* cost of reg,reg fld/fst */
1892 {12, 12, 12}, /* cost of loading fp registers
1893 in SFmode, DFmode and XFmode */
1894 {6, 6, 8}, /* cost of storing fp registers
1895 in SFmode, DFmode and XFmode */
1896 2, /* cost of moving MMX register */
1897 {8, 8}, /* cost of loading MMX registers
1898 in SImode and DImode */
1899 {8, 8}, /* cost of storing MMX registers
1900 in SImode and DImode */
1901 2, /* cost of moving SSE register */
1902 {8, 8, 8}, /* cost of loading SSE registers
1903 in SImode, DImode and TImode */
1904 {8, 8, 8}, /* cost of storing SSE registers
1905 in SImode, DImode and TImode */
1906 5, /* MMX or SSE register to integer */
1907 32, /* size of l1 cache. */
1908 512, /* size of l2 cache. */
1909 64, /* size of prefetch block */
1910 6, /* number of parallel prefetches */
1911 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1912 value is increased to perhaps more appropriate value of 5. */
1913 3, /* Branch cost */
1914 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1915 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1916 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1917 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1918 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1919 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1920 generic_memcpy,
1921 generic_memset,
1922 1, /* scalar_stmt_cost. */
1923 1, /* scalar load_cost. */
1924 1, /* scalar_store_cost. */
1925 1, /* vec_stmt_cost. */
1926 1, /* vec_to_scalar_cost. */
1927 1, /* scalar_to_vec_cost. */
1928 1, /* vec_align_load_cost. */
1929 2, /* vec_unalign_load_cost. */
1930 1, /* vec_store_cost. */
1931 3, /* cond_taken_branch_cost. */
1932 1, /* cond_not_taken_branch_cost. */
1935 /* core_cost should produce code tuned for Core familly of CPUs. */
1936 static stringop_algs core_memcpy[2] = {
1937 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
1938 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
1939 {-1, libcall, false}}}};
1940 static stringop_algs core_memset[2] = {
1941 {libcall, {{6, loop_1_byte, true},
1942 {24, loop, true},
1943 {8192, rep_prefix_4_byte, true},
1944 {-1, libcall, false}}},
1945 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
1946 {-1, libcall, false}}}};
1948 static const
1949 struct processor_costs core_cost = {
1950 COSTS_N_INSNS (1), /* cost of an add instruction */
1951 /* On all chips taken into consideration lea is 2 cycles and more. With
1952 this cost however our current implementation of synth_mult results in
1953 use of unnecessary temporary registers causing regression on several
1954 SPECfp benchmarks. */
1955 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1956 COSTS_N_INSNS (1), /* variable shift costs */
1957 COSTS_N_INSNS (1), /* constant shift costs */
1958 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1959 COSTS_N_INSNS (4), /* HI */
1960 COSTS_N_INSNS (3), /* SI */
1961 COSTS_N_INSNS (4), /* DI */
1962 COSTS_N_INSNS (2)}, /* other */
1963 0, /* cost of multiply per each bit set */
1964 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1965 COSTS_N_INSNS (26), /* HI */
1966 COSTS_N_INSNS (42), /* SI */
1967 COSTS_N_INSNS (74), /* DI */
1968 COSTS_N_INSNS (74)}, /* other */
1969 COSTS_N_INSNS (1), /* cost of movsx */
1970 COSTS_N_INSNS (1), /* cost of movzx */
1971 8, /* "large" insn */
1972 17, /* MOVE_RATIO */
1973 4, /* cost for loading QImode using movzbl */
1974 {4, 4, 4}, /* cost of loading integer registers
1975 in QImode, HImode and SImode.
1976 Relative to reg-reg move (2). */
1977 {4, 4, 4}, /* cost of storing integer registers */
1978 4, /* cost of reg,reg fld/fst */
1979 {12, 12, 12}, /* cost of loading fp registers
1980 in SFmode, DFmode and XFmode */
1981 {6, 6, 8}, /* cost of storing fp registers
1982 in SFmode, DFmode and XFmode */
1983 2, /* cost of moving MMX register */
1984 {8, 8}, /* cost of loading MMX registers
1985 in SImode and DImode */
1986 {8, 8}, /* cost of storing MMX registers
1987 in SImode and DImode */
1988 2, /* cost of moving SSE register */
1989 {8, 8, 8}, /* cost of loading SSE registers
1990 in SImode, DImode and TImode */
1991 {8, 8, 8}, /* cost of storing SSE registers
1992 in SImode, DImode and TImode */
1993 5, /* MMX or SSE register to integer */
1994 64, /* size of l1 cache. */
1995 512, /* size of l2 cache. */
1996 64, /* size of prefetch block */
1997 6, /* number of parallel prefetches */
1998 /* FIXME perhaps more appropriate value is 5. */
1999 3, /* Branch cost */
2000 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
2001 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
2002 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
2003 COSTS_N_INSNS (8), /* cost of FABS instruction. */
2004 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
2005 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
2006 core_memcpy,
2007 core_memset,
2008 1, /* scalar_stmt_cost. */
2009 1, /* scalar load_cost. */
2010 1, /* scalar_store_cost. */
2011 1, /* vec_stmt_cost. */
2012 1, /* vec_to_scalar_cost. */
2013 1, /* scalar_to_vec_cost. */
2014 1, /* vec_align_load_cost. */
2015 2, /* vec_unalign_load_cost. */
2016 1, /* vec_store_cost. */
2017 3, /* cond_taken_branch_cost. */
2018 1, /* cond_not_taken_branch_cost. */
2022 /* Set by -mtune. */
2023 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2025 /* Set by -mtune or -Os. */
2026 const struct processor_costs *ix86_cost = &pentium_cost;
2028 /* Processor feature/optimization bitmasks. */
2029 #define m_386 (1<<PROCESSOR_I386)
2030 #define m_486 (1<<PROCESSOR_I486)
2031 #define m_PENT (1<<PROCESSOR_PENTIUM)
2032 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2033 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2034 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2035 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2036 #define m_CORE2 (1<<PROCESSOR_CORE2)
2037 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2038 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2039 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2040 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2041 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2042 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2043 #define m_KNL (1<<PROCESSOR_KNL)
2044 #define m_INTEL (1<<PROCESSOR_INTEL)
2046 #define m_GEODE (1<<PROCESSOR_GEODE)
2047 #define m_K6 (1<<PROCESSOR_K6)
2048 #define m_K6_GEODE (m_K6 | m_GEODE)
2049 #define m_K8 (1<<PROCESSOR_K8)
2050 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2051 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2052 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2053 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2054 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2055 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2056 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2057 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2058 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2059 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2060 #define m_BTVER (m_BTVER1 | m_BTVER2)
2061 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
2063 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2065 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2066 #undef DEF_TUNE
2067 #define DEF_TUNE(tune, name, selector) name,
2068 #include "x86-tune.def"
2069 #undef DEF_TUNE
2072 /* Feature tests against the various tunings. */
2073 unsigned char ix86_tune_features[X86_TUNE_LAST];
2075 /* Feature tests against the various tunings used to create ix86_tune_features
2076 based on the processor mask. */
2077 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2078 #undef DEF_TUNE
2079 #define DEF_TUNE(tune, name, selector) selector,
2080 #include "x86-tune.def"
2081 #undef DEF_TUNE
2084 /* Feature tests against the various architecture variations. */
2085 unsigned char ix86_arch_features[X86_ARCH_LAST];
2087 /* Feature tests against the various architecture variations, used to create
2088 ix86_arch_features based on the processor mask. */
2089 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2090 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2091 ~(m_386 | m_486 | m_PENT | m_K6),
2093 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2094 ~m_386,
2096 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2097 ~(m_386 | m_486),
2099 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2100 ~m_386,
2102 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2103 ~m_386,
2106 /* In case the average insn count for single function invocation is
2107 lower than this constant, emit fast (but longer) prologue and
2108 epilogue code. */
2109 #define FAST_PROLOGUE_INSN_COUNT 20
2111 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2112 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2113 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2114 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2116 /* Array of the smallest class containing reg number REGNO, indexed by
2117 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2119 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2121 /* ax, dx, cx, bx */
2122 AREG, DREG, CREG, BREG,
2123 /* si, di, bp, sp */
2124 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2125 /* FP registers */
2126 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2127 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2128 /* arg pointer */
2129 NON_Q_REGS,
2130 /* flags, fpsr, fpcr, frame */
2131 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2132 /* SSE registers */
2133 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2134 SSE_REGS, SSE_REGS,
2135 /* MMX registers */
2136 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2137 MMX_REGS, MMX_REGS,
2138 /* REX registers */
2139 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2140 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2141 /* SSE REX registers */
2142 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2143 SSE_REGS, SSE_REGS,
2144 /* AVX-512 SSE registers */
2145 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2146 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2147 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2148 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2149 /* Mask registers. */
2150 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2151 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2152 /* MPX bound registers */
2153 BND_REGS, BND_REGS, BND_REGS, BND_REGS,
2156 /* The "default" register map used in 32bit mode. */
2158 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2160 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2161 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2162 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2163 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2164 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2165 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2166 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2167 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2168 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2169 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2170 101, 102, 103, 104, /* bound registers */
2173 /* The "default" register map used in 64bit mode. */
2175 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2177 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2178 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2179 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2180 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2181 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2182 8,9,10,11,12,13,14,15, /* extended integer registers */
2183 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2184 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2185 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2186 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2187 126, 127, 128, 129, /* bound registers */
2190 /* Define the register numbers to be used in Dwarf debugging information.
2191 The SVR4 reference port C compiler uses the following register numbers
2192 in its Dwarf output code:
2193 0 for %eax (gcc regno = 0)
2194 1 for %ecx (gcc regno = 2)
2195 2 for %edx (gcc regno = 1)
2196 3 for %ebx (gcc regno = 3)
2197 4 for %esp (gcc regno = 7)
2198 5 for %ebp (gcc regno = 6)
2199 6 for %esi (gcc regno = 4)
2200 7 for %edi (gcc regno = 5)
2201 The following three DWARF register numbers are never generated by
2202 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2203 believes these numbers have these meanings.
2204 8 for %eip (no gcc equivalent)
2205 9 for %eflags (gcc regno = 17)
2206 10 for %trapno (no gcc equivalent)
2207 It is not at all clear how we should number the FP stack registers
2208 for the x86 architecture. If the version of SDB on x86/svr4 were
2209 a bit less brain dead with respect to floating-point then we would
2210 have a precedent to follow with respect to DWARF register numbers
2211 for x86 FP registers, but the SDB on x86/svr4 is so completely
2212 broken with respect to FP registers that it is hardly worth thinking
2213 of it as something to strive for compatibility with.
2214 The version of x86/svr4 SDB I have at the moment does (partially)
2215 seem to believe that DWARF register number 11 is associated with
2216 the x86 register %st(0), but that's about all. Higher DWARF
2217 register numbers don't seem to be associated with anything in
2218 particular, and even for DWARF regno 11, SDB only seems to under-
2219 stand that it should say that a variable lives in %st(0) (when
2220 asked via an `=' command) if we said it was in DWARF regno 11,
2221 but SDB still prints garbage when asked for the value of the
2222 variable in question (via a `/' command).
2223 (Also note that the labels SDB prints for various FP stack regs
2224 when doing an `x' command are all wrong.)
2225 Note that these problems generally don't affect the native SVR4
2226 C compiler because it doesn't allow the use of -O with -g and
2227 because when it is *not* optimizing, it allocates a memory
2228 location for each floating-point variable, and the memory
2229 location is what gets described in the DWARF AT_location
2230 attribute for the variable in question.
2231 Regardless of the severe mental illness of the x86/svr4 SDB, we
2232 do something sensible here and we use the following DWARF
2233 register numbers. Note that these are all stack-top-relative
2234 numbers.
2235 11 for %st(0) (gcc regno = 8)
2236 12 for %st(1) (gcc regno = 9)
2237 13 for %st(2) (gcc regno = 10)
2238 14 for %st(3) (gcc regno = 11)
2239 15 for %st(4) (gcc regno = 12)
2240 16 for %st(5) (gcc regno = 13)
2241 17 for %st(6) (gcc regno = 14)
2242 18 for %st(7) (gcc regno = 15)
2244 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2246 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2247 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2248 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2249 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2250 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2251 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2252 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2253 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2254 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2255 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2256 101, 102, 103, 104, /* bound registers */
2259 /* Define parameter passing and return registers. */
2261 static int const x86_64_int_parameter_registers[6] =
2263 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2266 static int const x86_64_ms_abi_int_parameter_registers[4] =
2268 CX_REG, DX_REG, R8_REG, R9_REG
2271 static int const x86_64_int_return_registers[4] =
2273 AX_REG, DX_REG, DI_REG, SI_REG
2276 /* Additional registers that are clobbered by SYSV calls. */
2278 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2280 SI_REG, DI_REG,
2281 XMM6_REG, XMM7_REG,
2282 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2283 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2286 /* Define the structure for the machine field in struct function. */
2288 struct GTY(()) stack_local_entry {
2289 unsigned short mode;
2290 unsigned short n;
2291 rtx rtl;
2292 struct stack_local_entry *next;
2295 /* Structure describing stack frame layout.
2296 Stack grows downward:
2298 [arguments]
2299 <- ARG_POINTER
2300 saved pc
2302 saved static chain if ix86_static_chain_on_stack
2304 saved frame pointer if frame_pointer_needed
2305 <- HARD_FRAME_POINTER
2306 [saved regs]
2307 <- regs_save_offset
2308 [padding0]
2310 [saved SSE regs]
2311 <- sse_regs_save_offset
2312 [padding1] |
2313 | <- FRAME_POINTER
2314 [va_arg registers] |
2316 [frame] |
2318 [padding2] | = to_allocate
2319 <- STACK_POINTER
2321 struct ix86_frame
2323 int nsseregs;
2324 int nregs;
2325 int va_arg_size;
2326 int red_zone_size;
2327 int outgoing_arguments_size;
2329 /* The offsets relative to ARG_POINTER. */
2330 HOST_WIDE_INT frame_pointer_offset;
2331 HOST_WIDE_INT hard_frame_pointer_offset;
2332 HOST_WIDE_INT stack_pointer_offset;
2333 HOST_WIDE_INT hfp_save_offset;
2334 HOST_WIDE_INT reg_save_offset;
2335 HOST_WIDE_INT sse_reg_save_offset;
2337 /* When save_regs_using_mov is set, emit prologue using
2338 move instead of push instructions. */
2339 bool save_regs_using_mov;
2342 /* Which cpu are we scheduling for. */
2343 enum attr_cpu ix86_schedule;
2345 /* Which cpu are we optimizing for. */
2346 enum processor_type ix86_tune;
2348 /* Which instruction set architecture to use. */
2349 enum processor_type ix86_arch;
2351 /* True if processor has SSE prefetch instruction. */
2352 unsigned char x86_prefetch_sse;
2354 /* -mstackrealign option */
2355 static const char ix86_force_align_arg_pointer_string[]
2356 = "force_align_arg_pointer";
2358 static rtx (*ix86_gen_leave) (void);
2359 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2360 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2361 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2362 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2363 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2364 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2365 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2366 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2367 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2368 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2369 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2371 /* Preferred alignment for stack boundary in bits. */
2372 unsigned int ix86_preferred_stack_boundary;
2374 /* Alignment for incoming stack boundary in bits specified at
2375 command line. */
2376 static unsigned int ix86_user_incoming_stack_boundary;
2378 /* Default alignment for incoming stack boundary in bits. */
2379 static unsigned int ix86_default_incoming_stack_boundary;
2381 /* Alignment for incoming stack boundary in bits. */
2382 unsigned int ix86_incoming_stack_boundary;
2384 /* Calling abi specific va_list type nodes. */
2385 static GTY(()) tree sysv_va_list_type_node;
2386 static GTY(()) tree ms_va_list_type_node;
2388 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2389 char internal_label_prefix[16];
2390 int internal_label_prefix_len;
2392 /* Fence to use after loop using movnt. */
2393 tree x86_mfence;
2395 /* Register class used for passing given 64bit part of the argument.
2396 These represent classes as documented by the PS ABI, with the exception
2397 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2398 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2400 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2401 whenever possible (upper half does contain padding). */
2402 enum x86_64_reg_class
2404 X86_64_NO_CLASS,
2405 X86_64_INTEGER_CLASS,
2406 X86_64_INTEGERSI_CLASS,
2407 X86_64_SSE_CLASS,
2408 X86_64_SSESF_CLASS,
2409 X86_64_SSEDF_CLASS,
2410 X86_64_SSEUP_CLASS,
2411 X86_64_X87_CLASS,
2412 X86_64_X87UP_CLASS,
2413 X86_64_COMPLEX_X87_CLASS,
2414 X86_64_MEMORY_CLASS
2417 #define MAX_CLASSES 8
2419 /* Table of constants used by fldpi, fldln2, etc.... */
2420 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2421 static bool ext_80387_constants_init = 0;
2424 static struct machine_function * ix86_init_machine_status (void);
2425 static rtx ix86_function_value (const_tree, const_tree, bool);
2426 static bool ix86_function_value_regno_p (const unsigned int);
2427 static unsigned int ix86_function_arg_boundary (machine_mode,
2428 const_tree);
2429 static rtx ix86_static_chain (const_tree, bool);
2430 static int ix86_function_regparm (const_tree, const_tree);
2431 static void ix86_compute_frame_layout (struct ix86_frame *);
2432 static bool ix86_expand_vector_init_one_nonzero (bool, machine_mode,
2433 rtx, rtx, int);
2434 static void ix86_add_new_builtins (HOST_WIDE_INT);
2435 static tree ix86_canonical_va_list_type (tree);
2436 static void predict_jump (int);
2437 static unsigned int split_stack_prologue_scratch_regno (void);
2438 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2440 enum ix86_function_specific_strings
2442 IX86_FUNCTION_SPECIFIC_ARCH,
2443 IX86_FUNCTION_SPECIFIC_TUNE,
2444 IX86_FUNCTION_SPECIFIC_MAX
2447 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2448 const char *, enum fpmath_unit, bool);
2449 static void ix86_function_specific_save (struct cl_target_option *,
2450 struct gcc_options *opts);
2451 static void ix86_function_specific_restore (struct gcc_options *opts,
2452 struct cl_target_option *);
2453 static void ix86_function_specific_print (FILE *, int,
2454 struct cl_target_option *);
2455 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2456 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2457 struct gcc_options *,
2458 struct gcc_options *,
2459 struct gcc_options *);
2460 static bool ix86_can_inline_p (tree, tree);
2461 static void ix86_set_current_function (tree);
2462 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2464 static enum calling_abi ix86_function_abi (const_tree);
2467 #ifndef SUBTARGET32_DEFAULT_CPU
2468 #define SUBTARGET32_DEFAULT_CPU "i386"
2469 #endif
2471 /* Whether -mtune= or -march= were specified */
2472 static int ix86_tune_defaulted;
2473 static int ix86_arch_specified;
2475 /* Vectorization library interface and handlers. */
2476 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2478 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2479 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2481 /* Processor target table, indexed by processor number */
2482 struct ptt
2484 const char *const name; /* processor name */
2485 const struct processor_costs *cost; /* Processor costs */
2486 const int align_loop; /* Default alignments. */
2487 const int align_loop_max_skip;
2488 const int align_jump;
2489 const int align_jump_max_skip;
2490 const int align_func;
2493 /* This table must be in sync with enum processor_type in i386.h. */
2494 static const struct ptt processor_target_table[PROCESSOR_max] =
2496 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2497 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2498 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2499 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2500 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2501 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2502 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2503 {"core2", &core_cost, 16, 10, 16, 10, 16},
2504 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2505 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2506 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2507 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2508 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2509 {"knl", &slm_cost, 16, 15, 16, 7, 16},
2510 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2511 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2512 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2513 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2514 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2515 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2516 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2517 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2518 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2519 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2520 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2521 {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2524 static unsigned int
2525 rest_of_handle_insert_vzeroupper (void)
2527 int i;
2529 /* vzeroupper instructions are inserted immediately after reload to
2530 account for possible spills from 256bit registers. The pass
2531 reuses mode switching infrastructure by re-running mode insertion
2532 pass, so disable entities that have already been processed. */
2533 for (i = 0; i < MAX_386_ENTITIES; i++)
2534 ix86_optimize_mode_switching[i] = 0;
2536 ix86_optimize_mode_switching[AVX_U128] = 1;
2538 /* Call optimize_mode_switching. */
2539 g->get_passes ()->execute_pass_mode_switching ();
2540 return 0;
2543 namespace {
2545 const pass_data pass_data_insert_vzeroupper =
2547 RTL_PASS, /* type */
2548 "vzeroupper", /* name */
2549 OPTGROUP_NONE, /* optinfo_flags */
2550 TV_NONE, /* tv_id */
2551 0, /* properties_required */
2552 0, /* properties_provided */
2553 0, /* properties_destroyed */
2554 0, /* todo_flags_start */
2555 TODO_df_finish, /* todo_flags_finish */
2558 class pass_insert_vzeroupper : public rtl_opt_pass
2560 public:
2561 pass_insert_vzeroupper(gcc::context *ctxt)
2562 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2565 /* opt_pass methods: */
2566 virtual bool gate (function *)
2568 return TARGET_AVX && !TARGET_AVX512F && TARGET_VZEROUPPER;
2571 virtual unsigned int execute (function *)
2573 return rest_of_handle_insert_vzeroupper ();
2576 }; // class pass_insert_vzeroupper
2578 } // anon namespace
2580 rtl_opt_pass *
2581 make_pass_insert_vzeroupper (gcc::context *ctxt)
2583 return new pass_insert_vzeroupper (ctxt);
2586 /* Return true if a red-zone is in use. */
2588 static inline bool
2589 ix86_using_red_zone (void)
2591 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2594 /* Return a string that documents the current -m options. The caller is
2595 responsible for freeing the string. */
2597 static char *
2598 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2599 const char *tune, enum fpmath_unit fpmath,
2600 bool add_nl_p)
2602 struct ix86_target_opts
2604 const char *option; /* option string */
2605 HOST_WIDE_INT mask; /* isa mask options */
2608 /* This table is ordered so that options like -msse4.2 that imply
2609 preceding options while match those first. */
2610 static struct ix86_target_opts isa_opts[] =
2612 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2613 { "-mfma", OPTION_MASK_ISA_FMA },
2614 { "-mxop", OPTION_MASK_ISA_XOP },
2615 { "-mlwp", OPTION_MASK_ISA_LWP },
2616 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2617 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2618 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2619 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2620 { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ },
2621 { "-mavx512bw", OPTION_MASK_ISA_AVX512BW },
2622 { "-mavx512vl", OPTION_MASK_ISA_AVX512VL },
2623 { "-mavx512ifma", OPTION_MASK_ISA_AVX512IFMA },
2624 { "-mavx512vbmi", OPTION_MASK_ISA_AVX512VBMI },
2625 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2626 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2627 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2628 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2629 { "-msse3", OPTION_MASK_ISA_SSE3 },
2630 { "-msse2", OPTION_MASK_ISA_SSE2 },
2631 { "-msse", OPTION_MASK_ISA_SSE },
2632 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2633 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2634 { "-mmmx", OPTION_MASK_ISA_MMX },
2635 { "-mabm", OPTION_MASK_ISA_ABM },
2636 { "-mbmi", OPTION_MASK_ISA_BMI },
2637 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2638 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2639 { "-mhle", OPTION_MASK_ISA_HLE },
2640 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2641 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2642 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2643 { "-madx", OPTION_MASK_ISA_ADX },
2644 { "-mtbm", OPTION_MASK_ISA_TBM },
2645 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2646 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2647 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2648 { "-maes", OPTION_MASK_ISA_AES },
2649 { "-msha", OPTION_MASK_ISA_SHA },
2650 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2651 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2652 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2653 { "-mf16c", OPTION_MASK_ISA_F16C },
2654 { "-mrtm", OPTION_MASK_ISA_RTM },
2655 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2656 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2657 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
2658 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT },
2659 { "-mxsavec", OPTION_MASK_ISA_XSAVEC },
2660 { "-mxsaves", OPTION_MASK_ISA_XSAVES },
2661 { "-mmpx", OPTION_MASK_ISA_MPX },
2662 { "-mclwb", OPTION_MASK_ISA_CLWB },
2663 { "-mpcommit", OPTION_MASK_ISA_PCOMMIT },
2666 /* Flag options. */
2667 static struct ix86_target_opts flag_opts[] =
2669 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2670 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2671 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2672 { "-m80387", MASK_80387 },
2673 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2674 { "-malign-double", MASK_ALIGN_DOUBLE },
2675 { "-mcld", MASK_CLD },
2676 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2677 { "-mieee-fp", MASK_IEEE_FP },
2678 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2679 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2680 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2681 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2682 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2683 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2684 { "-mno-red-zone", MASK_NO_RED_ZONE },
2685 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2686 { "-mrecip", MASK_RECIP },
2687 { "-mrtd", MASK_RTD },
2688 { "-msseregparm", MASK_SSEREGPARM },
2689 { "-mstack-arg-probe", MASK_STACK_PROBE },
2690 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2691 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2692 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2693 { "-mvzeroupper", MASK_VZEROUPPER },
2694 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2695 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2696 { "-mprefer-avx128", MASK_PREFER_AVX128},
2699 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2701 char isa_other[40];
2702 char target_other[40];
2703 unsigned num = 0;
2704 unsigned i, j;
2705 char *ret;
2706 char *ptr;
2707 size_t len;
2708 size_t line_len;
2709 size_t sep_len;
2710 const char *abi;
2712 memset (opts, '\0', sizeof (opts));
2714 /* Add -march= option. */
2715 if (arch)
2717 opts[num][0] = "-march=";
2718 opts[num++][1] = arch;
2721 /* Add -mtune= option. */
2722 if (tune)
2724 opts[num][0] = "-mtune=";
2725 opts[num++][1] = tune;
2728 /* Add -m32/-m64/-mx32. */
2729 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2731 if ((isa & OPTION_MASK_ABI_64) != 0)
2732 abi = "-m64";
2733 else
2734 abi = "-mx32";
2735 isa &= ~ (OPTION_MASK_ISA_64BIT
2736 | OPTION_MASK_ABI_64
2737 | OPTION_MASK_ABI_X32);
2739 else
2740 abi = "-m32";
2741 opts[num++][0] = abi;
2743 /* Pick out the options in isa options. */
2744 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2746 if ((isa & isa_opts[i].mask) != 0)
2748 opts[num++][0] = isa_opts[i].option;
2749 isa &= ~ isa_opts[i].mask;
2753 if (isa && add_nl_p)
2755 opts[num++][0] = isa_other;
2756 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2757 isa);
2760 /* Add flag options. */
2761 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2763 if ((flags & flag_opts[i].mask) != 0)
2765 opts[num++][0] = flag_opts[i].option;
2766 flags &= ~ flag_opts[i].mask;
2770 if (flags && add_nl_p)
2772 opts[num++][0] = target_other;
2773 sprintf (target_other, "(other flags: %#x)", flags);
2776 /* Add -fpmath= option. */
2777 if (fpmath)
2779 opts[num][0] = "-mfpmath=";
2780 switch ((int) fpmath)
2782 case FPMATH_387:
2783 opts[num++][1] = "387";
2784 break;
2786 case FPMATH_SSE:
2787 opts[num++][1] = "sse";
2788 break;
2790 case FPMATH_387 | FPMATH_SSE:
2791 opts[num++][1] = "sse+387";
2792 break;
2794 default:
2795 gcc_unreachable ();
2799 /* Any options? */
2800 if (num == 0)
2801 return NULL;
2803 gcc_assert (num < ARRAY_SIZE (opts));
2805 /* Size the string. */
2806 len = 0;
2807 sep_len = (add_nl_p) ? 3 : 1;
2808 for (i = 0; i < num; i++)
2810 len += sep_len;
2811 for (j = 0; j < 2; j++)
2812 if (opts[i][j])
2813 len += strlen (opts[i][j]);
2816 /* Build the string. */
2817 ret = ptr = (char *) xmalloc (len);
2818 line_len = 0;
2820 for (i = 0; i < num; i++)
2822 size_t len2[2];
2824 for (j = 0; j < 2; j++)
2825 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2827 if (i != 0)
2829 *ptr++ = ' ';
2830 line_len++;
2832 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2834 *ptr++ = '\\';
2835 *ptr++ = '\n';
2836 line_len = 0;
2840 for (j = 0; j < 2; j++)
2841 if (opts[i][j])
2843 memcpy (ptr, opts[i][j], len2[j]);
2844 ptr += len2[j];
2845 line_len += len2[j];
2849 *ptr = '\0';
2850 gcc_assert (ret + len >= ptr);
2852 return ret;
2855 /* Return true, if profiling code should be emitted before
2856 prologue. Otherwise it returns false.
2857 Note: For x86 with "hotfix" it is sorried. */
2858 static bool
2859 ix86_profile_before_prologue (void)
2861 return flag_fentry != 0;
2864 /* Function that is callable from the debugger to print the current
2865 options. */
2866 void ATTRIBUTE_UNUSED
2867 ix86_debug_options (void)
2869 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2870 ix86_arch_string, ix86_tune_string,
2871 ix86_fpmath, true);
2873 if (opts)
2875 fprintf (stderr, "%s\n\n", opts);
2876 free (opts);
2878 else
2879 fputs ("<no options>\n\n", stderr);
2881 return;
2884 static const char *stringop_alg_names[] = {
2885 #define DEF_ENUM
2886 #define DEF_ALG(alg, name) #name,
2887 #include "stringop.def"
2888 #undef DEF_ENUM
2889 #undef DEF_ALG
2892 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2893 The string is of the following form (or comma separated list of it):
2895 strategy_alg:max_size:[align|noalign]
2897 where the full size range for the strategy is either [0, max_size] or
2898 [min_size, max_size], in which min_size is the max_size + 1 of the
2899 preceding range. The last size range must have max_size == -1.
2901 Examples:
2904 -mmemcpy-strategy=libcall:-1:noalign
2906 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2910 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2912 This is to tell the compiler to use the following strategy for memset
2913 1) when the expected size is between [1, 16], use rep_8byte strategy;
2914 2) when the size is between [17, 2048], use vector_loop;
2915 3) when the size is > 2048, use libcall. */
2917 struct stringop_size_range
2919 int max;
2920 stringop_alg alg;
2921 bool noalign;
2924 static void
2925 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
2927 const struct stringop_algs *default_algs;
2928 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
2929 char *curr_range_str, *next_range_str;
2930 int i = 0, n = 0;
2932 if (is_memset)
2933 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
2934 else
2935 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
2937 curr_range_str = strategy_str;
2941 int maxs;
2942 char alg_name[128];
2943 char align[16];
2944 next_range_str = strchr (curr_range_str, ',');
2945 if (next_range_str)
2946 *next_range_str++ = '\0';
2948 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
2949 alg_name, &maxs, align))
2951 error ("wrong arg %s to option %s", curr_range_str,
2952 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2953 return;
2956 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
2958 error ("size ranges of option %s should be increasing",
2959 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2960 return;
2963 for (i = 0; i < last_alg; i++)
2964 if (!strcmp (alg_name, stringop_alg_names[i]))
2965 break;
2967 if (i == last_alg)
2969 error ("wrong stringop strategy name %s specified for option %s",
2970 alg_name,
2971 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2972 return;
2975 input_ranges[n].max = maxs;
2976 input_ranges[n].alg = (stringop_alg) i;
2977 if (!strcmp (align, "align"))
2978 input_ranges[n].noalign = false;
2979 else if (!strcmp (align, "noalign"))
2980 input_ranges[n].noalign = true;
2981 else
2983 error ("unknown alignment %s specified for option %s",
2984 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2985 return;
2987 n++;
2988 curr_range_str = next_range_str;
2990 while (curr_range_str);
2992 if (input_ranges[n - 1].max != -1)
2994 error ("the max value for the last size range should be -1"
2995 " for option %s",
2996 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2997 return;
3000 if (n > MAX_STRINGOP_ALGS)
3002 error ("too many size ranges specified in option %s",
3003 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3004 return;
3007 /* Now override the default algs array. */
3008 for (i = 0; i < n; i++)
3010 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
3011 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
3012 = input_ranges[i].alg;
3013 *const_cast<int *>(&default_algs->size[i].noalign)
3014 = input_ranges[i].noalign;
3019 /* parse -mtune-ctrl= option. When DUMP is true,
3020 print the features that are explicitly set. */
3022 static void
3023 parse_mtune_ctrl_str (bool dump)
3025 if (!ix86_tune_ctrl_string)
3026 return;
3028 char *next_feature_string = NULL;
3029 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
3030 char *orig = curr_feature_string;
3031 int i;
3034 bool clear = false;
3036 next_feature_string = strchr (curr_feature_string, ',');
3037 if (next_feature_string)
3038 *next_feature_string++ = '\0';
3039 if (*curr_feature_string == '^')
3041 curr_feature_string++;
3042 clear = true;
3044 for (i = 0; i < X86_TUNE_LAST; i++)
3046 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3048 ix86_tune_features[i] = !clear;
3049 if (dump)
3050 fprintf (stderr, "Explicitly %s feature %s\n",
3051 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3052 break;
3055 if (i == X86_TUNE_LAST)
3056 error ("Unknown parameter to option -mtune-ctrl: %s",
3057 clear ? curr_feature_string - 1 : curr_feature_string);
3058 curr_feature_string = next_feature_string;
3060 while (curr_feature_string);
3061 free (orig);
3064 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3065 processor type. */
3067 static void
3068 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3070 unsigned int ix86_tune_mask = 1u << ix86_tune;
3071 int i;
3073 for (i = 0; i < X86_TUNE_LAST; ++i)
3075 if (ix86_tune_no_default)
3076 ix86_tune_features[i] = 0;
3077 else
3078 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3081 if (dump)
3083 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3084 for (i = 0; i < X86_TUNE_LAST; i++)
3085 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3086 ix86_tune_features[i] ? "on" : "off");
3089 parse_mtune_ctrl_str (dump);
3093 /* Override various settings based on options. If MAIN_ARGS_P, the
3094 options are from the command line, otherwise they are from
3095 attributes. */
3097 static void
3098 ix86_option_override_internal (bool main_args_p,
3099 struct gcc_options *opts,
3100 struct gcc_options *opts_set)
3102 int i;
3103 unsigned int ix86_arch_mask;
3104 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3105 const char *prefix;
3106 const char *suffix;
3107 const char *sw;
3109 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3110 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3111 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3112 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3113 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3114 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3115 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3116 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3117 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3118 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3119 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3120 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3121 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3122 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3123 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3124 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3125 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3126 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3127 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3128 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3129 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3130 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3131 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3132 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3133 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3134 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3135 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3136 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3137 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3138 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3139 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3140 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3141 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3142 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3143 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3144 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3145 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3146 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3147 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3148 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3149 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3150 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3151 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3152 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3153 #define PTA_MPX (HOST_WIDE_INT_1 << 44)
3154 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3155 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
3156 #define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
3157 #define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
3158 #define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
3159 #define PTA_AVX512DQ (HOST_WIDE_INT_1 << 50)
3160 #define PTA_AVX512BW (HOST_WIDE_INT_1 << 51)
3161 #define PTA_AVX512VL (HOST_WIDE_INT_1 << 52)
3162 #define PTA_AVX512IFMA (HOST_WIDE_INT_1 << 53)
3163 #define PTA_AVX512VBMI (HOST_WIDE_INT_1 << 54)
3164 #define PTA_CLWB (HOST_WIDE_INT_1 << 55)
3165 #define PTA_PCOMMIT (HOST_WIDE_INT_1 << 56)
3167 #define PTA_CORE2 \
3168 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3169 | PTA_CX16 | PTA_FXSR)
3170 #define PTA_NEHALEM \
3171 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3172 #define PTA_WESTMERE \
3173 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3174 #define PTA_SANDYBRIDGE \
3175 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3176 #define PTA_IVYBRIDGE \
3177 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3178 #define PTA_HASWELL \
3179 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3180 | PTA_FMA | PTA_MOVBE | PTA_HLE)
3181 #define PTA_BROADWELL \
3182 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3183 #define PTA_KNL \
3184 (PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER | PTA_AVX512F | PTA_AVX512CD)
3185 #define PTA_BONNELL \
3186 (PTA_CORE2 | PTA_MOVBE)
3187 #define PTA_SILVERMONT \
3188 (PTA_WESTMERE | PTA_MOVBE)
3190 /* if this reaches 64, need to widen struct pta flags below */
3192 static struct pta
3194 const char *const name; /* processor name or nickname. */
3195 const enum processor_type processor;
3196 const enum attr_cpu schedule;
3197 const unsigned HOST_WIDE_INT flags;
3199 const processor_alias_table[] =
3201 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3202 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3203 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3204 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3205 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3206 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3207 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3208 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3209 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3210 PTA_MMX | PTA_SSE | PTA_FXSR},
3211 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3212 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3213 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3214 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3215 PTA_MMX | PTA_SSE | PTA_FXSR},
3216 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3217 PTA_MMX | PTA_SSE | PTA_FXSR},
3218 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3219 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3220 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3221 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3222 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3223 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3224 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3225 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3226 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3227 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3228 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3229 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3230 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3231 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3232 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3233 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3234 PTA_SANDYBRIDGE},
3235 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3236 PTA_SANDYBRIDGE},
3237 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3238 PTA_IVYBRIDGE},
3239 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3240 PTA_IVYBRIDGE},
3241 {"haswell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3242 {"core-avx2", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3243 {"broadwell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_BROADWELL},
3244 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3245 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3246 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3247 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3248 {"knl", PROCESSOR_KNL, CPU_KNL, PTA_KNL},
3249 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3250 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3251 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3252 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3253 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3254 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3255 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3256 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3257 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3258 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3259 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3260 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3261 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3262 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3263 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3264 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3265 {"x86-64", PROCESSOR_K8, CPU_K8,
3266 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3267 {"k8", PROCESSOR_K8, CPU_K8,
3268 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3269 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3270 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3271 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3272 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3273 {"opteron", PROCESSOR_K8, CPU_K8,
3274 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3275 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3276 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3277 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3278 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3279 {"athlon64", PROCESSOR_K8, CPU_K8,
3280 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3281 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3282 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3283 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3284 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3285 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3286 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3287 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3288 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3289 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3290 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3291 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3292 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3293 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3294 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3295 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3296 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3297 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3298 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3299 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3300 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3301 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3302 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3303 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3304 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3305 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3306 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3307 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3308 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3309 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3310 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3311 | PTA_XSAVEOPT | PTA_FSGSBASE},
3312 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3313 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3314 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3315 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3316 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3317 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3318 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
3319 | PTA_MOVBE},
3320 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3321 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3322 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3323 | PTA_FXSR | PTA_XSAVE},
3324 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3325 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3326 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3327 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3328 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3329 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3331 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3332 PTA_64BIT
3333 | PTA_HLE /* flags are only used for -march switch. */ },
3336 /* -mrecip options. */
3337 static struct
3339 const char *string; /* option name */
3340 unsigned int mask; /* mask bits to set */
3342 const recip_options[] =
3344 { "all", RECIP_MASK_ALL },
3345 { "none", RECIP_MASK_NONE },
3346 { "div", RECIP_MASK_DIV },
3347 { "sqrt", RECIP_MASK_SQRT },
3348 { "vec-div", RECIP_MASK_VEC_DIV },
3349 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3352 int const pta_size = ARRAY_SIZE (processor_alias_table);
3354 /* Set up prefix/suffix so the error messages refer to either the command
3355 line argument, or the attribute(target). */
3356 if (main_args_p)
3358 prefix = "-m";
3359 suffix = "";
3360 sw = "switch";
3362 else
3364 prefix = "option(\"";
3365 suffix = "\")";
3366 sw = "attribute";
3369 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3370 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3371 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3372 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3373 #ifdef TARGET_BI_ARCH
3374 else
3376 #if TARGET_BI_ARCH == 1
3377 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3378 is on and OPTION_MASK_ABI_X32 is off. We turn off
3379 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3380 -mx32. */
3381 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3382 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3383 #else
3384 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3385 on and OPTION_MASK_ABI_64 is off. We turn off
3386 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3387 -m64 or OPTION_MASK_CODE16 is turned on by -m16. */
3388 if (TARGET_LP64_P (opts->x_ix86_isa_flags)
3389 || TARGET_16BIT_P (opts->x_ix86_isa_flags))
3390 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3391 #endif
3393 #endif
3395 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3397 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3398 OPTION_MASK_ABI_64 for TARGET_X32. */
3399 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3400 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3402 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3403 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3404 | OPTION_MASK_ABI_X32
3405 | OPTION_MASK_ABI_64);
3406 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3408 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3409 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3410 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3411 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3414 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3415 SUBTARGET_OVERRIDE_OPTIONS;
3416 #endif
3418 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3419 SUBSUBTARGET_OVERRIDE_OPTIONS;
3420 #endif
3422 /* -fPIC is the default for x86_64. */
3423 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3424 opts->x_flag_pic = 2;
3426 /* Need to check -mtune=generic first. */
3427 if (opts->x_ix86_tune_string)
3429 /* As special support for cross compilers we read -mtune=native
3430 as -mtune=generic. With native compilers we won't see the
3431 -mtune=native, as it was changed by the driver. */
3432 if (!strcmp (opts->x_ix86_tune_string, "native"))
3434 opts->x_ix86_tune_string = "generic";
3436 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3437 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3438 "%stune=k8%s or %stune=generic%s instead as appropriate",
3439 prefix, suffix, prefix, suffix, prefix, suffix);
3441 else
3443 if (opts->x_ix86_arch_string)
3444 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3445 if (!opts->x_ix86_tune_string)
3447 opts->x_ix86_tune_string
3448 = processor_target_table[TARGET_CPU_DEFAULT].name;
3449 ix86_tune_defaulted = 1;
3452 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3453 or defaulted. We need to use a sensible tune option. */
3454 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3456 opts->x_ix86_tune_string = "generic";
3460 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3461 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3463 /* rep; movq isn't available in 32-bit code. */
3464 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3465 opts->x_ix86_stringop_alg = no_stringop;
3468 if (!opts->x_ix86_arch_string)
3469 opts->x_ix86_arch_string
3470 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3471 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3472 else
3473 ix86_arch_specified = 1;
3475 if (opts_set->x_ix86_pmode)
3477 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3478 && opts->x_ix86_pmode == PMODE_SI)
3479 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3480 && opts->x_ix86_pmode == PMODE_DI))
3481 error ("address mode %qs not supported in the %s bit mode",
3482 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3483 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3485 else
3486 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3487 ? PMODE_DI : PMODE_SI;
3489 if (!opts_set->x_ix86_abi)
3490 opts->x_ix86_abi = DEFAULT_ABI;
3492 /* For targets using ms ABI enable ms-extensions, if not
3493 explicit turned off. For non-ms ABI we turn off this
3494 option. */
3495 if (!opts_set->x_flag_ms_extensions)
3496 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3498 if (opts_set->x_ix86_cmodel)
3500 switch (opts->x_ix86_cmodel)
3502 case CM_SMALL:
3503 case CM_SMALL_PIC:
3504 if (opts->x_flag_pic)
3505 opts->x_ix86_cmodel = CM_SMALL_PIC;
3506 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3507 error ("code model %qs not supported in the %s bit mode",
3508 "small", "32");
3509 break;
3511 case CM_MEDIUM:
3512 case CM_MEDIUM_PIC:
3513 if (opts->x_flag_pic)
3514 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3515 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3516 error ("code model %qs not supported in the %s bit mode",
3517 "medium", "32");
3518 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3519 error ("code model %qs not supported in x32 mode",
3520 "medium");
3521 break;
3523 case CM_LARGE:
3524 case CM_LARGE_PIC:
3525 if (opts->x_flag_pic)
3526 opts->x_ix86_cmodel = CM_LARGE_PIC;
3527 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3528 error ("code model %qs not supported in the %s bit mode",
3529 "large", "32");
3530 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3531 error ("code model %qs not supported in x32 mode",
3532 "large");
3533 break;
3535 case CM_32:
3536 if (opts->x_flag_pic)
3537 error ("code model %s does not support PIC mode", "32");
3538 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3539 error ("code model %qs not supported in the %s bit mode",
3540 "32", "64");
3541 break;
3543 case CM_KERNEL:
3544 if (opts->x_flag_pic)
3546 error ("code model %s does not support PIC mode", "kernel");
3547 opts->x_ix86_cmodel = CM_32;
3549 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3550 error ("code model %qs not supported in the %s bit mode",
3551 "kernel", "32");
3552 break;
3554 default:
3555 gcc_unreachable ();
3558 else
3560 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3561 use of rip-relative addressing. This eliminates fixups that
3562 would otherwise be needed if this object is to be placed in a
3563 DLL, and is essentially just as efficient as direct addressing. */
3564 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3565 && (TARGET_RDOS || TARGET_PECOFF))
3566 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3567 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3568 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3569 else
3570 opts->x_ix86_cmodel = CM_32;
3572 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3574 error ("-masm=intel not supported in this configuration");
3575 opts->x_ix86_asm_dialect = ASM_ATT;
3577 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3578 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3579 sorry ("%i-bit mode not compiled in",
3580 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3582 for (i = 0; i < pta_size; i++)
3583 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3585 ix86_schedule = processor_alias_table[i].schedule;
3586 ix86_arch = processor_alias_table[i].processor;
3587 /* Default cpu tuning to the architecture. */
3588 ix86_tune = ix86_arch;
3590 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3591 && !(processor_alias_table[i].flags & PTA_64BIT))
3592 error ("CPU you selected does not support x86-64 "
3593 "instruction set");
3595 if (processor_alias_table[i].flags & PTA_MMX
3596 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3597 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3598 if (processor_alias_table[i].flags & PTA_3DNOW
3599 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3600 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3601 if (processor_alias_table[i].flags & PTA_3DNOW_A
3602 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3603 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3604 if (processor_alias_table[i].flags & PTA_SSE
3605 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3606 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3607 if (processor_alias_table[i].flags & PTA_SSE2
3608 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3609 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3610 if (processor_alias_table[i].flags & PTA_SSE3
3611 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3612 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3613 if (processor_alias_table[i].flags & PTA_SSSE3
3614 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3615 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3616 if (processor_alias_table[i].flags & PTA_SSE4_1
3617 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3618 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3619 if (processor_alias_table[i].flags & PTA_SSE4_2
3620 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3621 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3622 if (processor_alias_table[i].flags & PTA_AVX
3623 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3624 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3625 if (processor_alias_table[i].flags & PTA_AVX2
3626 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3627 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3628 if (processor_alias_table[i].flags & PTA_FMA
3629 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3630 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3631 if (processor_alias_table[i].flags & PTA_SSE4A
3632 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3633 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3634 if (processor_alias_table[i].flags & PTA_FMA4
3635 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3636 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3637 if (processor_alias_table[i].flags & PTA_XOP
3638 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3639 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3640 if (processor_alias_table[i].flags & PTA_LWP
3641 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3642 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3643 if (processor_alias_table[i].flags & PTA_ABM
3644 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3645 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3646 if (processor_alias_table[i].flags & PTA_BMI
3647 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3648 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3649 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3650 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3651 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3652 if (processor_alias_table[i].flags & PTA_TBM
3653 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3654 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3655 if (processor_alias_table[i].flags & PTA_BMI2
3656 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3657 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3658 if (processor_alias_table[i].flags & PTA_CX16
3659 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3660 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3661 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3662 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3663 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3664 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3665 && (processor_alias_table[i].flags & PTA_NO_SAHF))
3666 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3667 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3668 if (processor_alias_table[i].flags & PTA_MOVBE
3669 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3670 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3671 if (processor_alias_table[i].flags & PTA_AES
3672 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3673 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3674 if (processor_alias_table[i].flags & PTA_SHA
3675 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
3676 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
3677 if (processor_alias_table[i].flags & PTA_PCLMUL
3678 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3679 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3680 if (processor_alias_table[i].flags & PTA_FSGSBASE
3681 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3682 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3683 if (processor_alias_table[i].flags & PTA_RDRND
3684 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3685 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3686 if (processor_alias_table[i].flags & PTA_F16C
3687 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3688 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3689 if (processor_alias_table[i].flags & PTA_RTM
3690 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3691 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3692 if (processor_alias_table[i].flags & PTA_HLE
3693 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3694 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3695 if (processor_alias_table[i].flags & PTA_PRFCHW
3696 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3697 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3698 if (processor_alias_table[i].flags & PTA_RDSEED
3699 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3700 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3701 if (processor_alias_table[i].flags & PTA_ADX
3702 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3703 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3704 if (processor_alias_table[i].flags & PTA_FXSR
3705 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3706 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3707 if (processor_alias_table[i].flags & PTA_XSAVE
3708 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3709 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3710 if (processor_alias_table[i].flags & PTA_XSAVEOPT
3711 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3712 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3713 if (processor_alias_table[i].flags & PTA_AVX512F
3714 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
3715 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
3716 if (processor_alias_table[i].flags & PTA_AVX512ER
3717 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
3718 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
3719 if (processor_alias_table[i].flags & PTA_AVX512PF
3720 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
3721 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
3722 if (processor_alias_table[i].flags & PTA_AVX512CD
3723 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
3724 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
3725 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
3726 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
3727 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
3728 if (processor_alias_table[i].flags & PTA_PCOMMIT
3729 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCOMMIT))
3730 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCOMMIT;
3731 if (processor_alias_table[i].flags & PTA_CLWB
3732 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLWB))
3733 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLWB;
3734 if (processor_alias_table[i].flags & PTA_CLFLUSHOPT
3735 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
3736 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
3737 if (processor_alias_table[i].flags & PTA_XSAVEC
3738 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
3739 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
3740 if (processor_alias_table[i].flags & PTA_XSAVES
3741 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
3742 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
3743 if (processor_alias_table[i].flags & PTA_AVX512DQ
3744 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ))
3745 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ;
3746 if (processor_alias_table[i].flags & PTA_AVX512BW
3747 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW))
3748 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW;
3749 if (processor_alias_table[i].flags & PTA_AVX512VL
3750 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL))
3751 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL;
3752 if (processor_alias_table[i].flags & PTA_MPX
3753 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MPX))
3754 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MPX;
3755 if (processor_alias_table[i].flags & PTA_AVX512VBMI
3756 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VBMI))
3757 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VBMI;
3758 if (processor_alias_table[i].flags & PTA_AVX512IFMA
3759 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512IFMA))
3760 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512IFMA;
3761 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3762 x86_prefetch_sse = true;
3764 break;
3767 if (TARGET_X32 && (opts->x_ix86_isa_flags & OPTION_MASK_ISA_MPX))
3768 error ("Intel MPX does not support x32");
3770 if (TARGET_X32 && (ix86_isa_flags & OPTION_MASK_ISA_MPX))
3771 error ("Intel MPX does not support x32");
3773 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3774 error ("generic CPU can be used only for %stune=%s %s",
3775 prefix, suffix, sw);
3776 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3777 error ("intel CPU can be used only for %stune=%s %s",
3778 prefix, suffix, sw);
3779 else if (i == pta_size)
3780 error ("bad value (%s) for %sarch=%s %s",
3781 opts->x_ix86_arch_string, prefix, suffix, sw);
3783 ix86_arch_mask = 1u << ix86_arch;
3784 for (i = 0; i < X86_ARCH_LAST; ++i)
3785 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3787 for (i = 0; i < pta_size; i++)
3788 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
3790 ix86_schedule = processor_alias_table[i].schedule;
3791 ix86_tune = processor_alias_table[i].processor;
3792 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3794 if (!(processor_alias_table[i].flags & PTA_64BIT))
3796 if (ix86_tune_defaulted)
3798 opts->x_ix86_tune_string = "x86-64";
3799 for (i = 0; i < pta_size; i++)
3800 if (! strcmp (opts->x_ix86_tune_string,
3801 processor_alias_table[i].name))
3802 break;
3803 ix86_schedule = processor_alias_table[i].schedule;
3804 ix86_tune = processor_alias_table[i].processor;
3806 else
3807 error ("CPU you selected does not support x86-64 "
3808 "instruction set");
3811 /* Intel CPUs have always interpreted SSE prefetch instructions as
3812 NOPs; so, we can enable SSE prefetch instructions even when
3813 -mtune (rather than -march) points us to a processor that has them.
3814 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3815 higher processors. */
3816 if (TARGET_CMOV
3817 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3818 x86_prefetch_sse = true;
3819 break;
3822 if (ix86_tune_specified && i == pta_size)
3823 error ("bad value (%s) for %stune=%s %s",
3824 opts->x_ix86_tune_string, prefix, suffix, sw);
3826 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
3828 #ifndef USE_IX86_FRAME_POINTER
3829 #define USE_IX86_FRAME_POINTER 0
3830 #endif
3832 #ifndef USE_X86_64_FRAME_POINTER
3833 #define USE_X86_64_FRAME_POINTER 0
3834 #endif
3836 /* Set the default values for switches whose default depends on TARGET_64BIT
3837 in case they weren't overwritten by command line options. */
3838 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3840 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3841 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3842 if (opts->x_flag_asynchronous_unwind_tables
3843 && !opts_set->x_flag_unwind_tables
3844 && TARGET_64BIT_MS_ABI)
3845 opts->x_flag_unwind_tables = 1;
3846 if (opts->x_flag_asynchronous_unwind_tables == 2)
3847 opts->x_flag_unwind_tables
3848 = opts->x_flag_asynchronous_unwind_tables = 1;
3849 if (opts->x_flag_pcc_struct_return == 2)
3850 opts->x_flag_pcc_struct_return = 0;
3852 else
3854 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3855 opts->x_flag_omit_frame_pointer
3856 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
3857 if (opts->x_flag_asynchronous_unwind_tables == 2)
3858 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3859 if (opts->x_flag_pcc_struct_return == 2)
3860 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3863 ix86_tune_cost = processor_target_table[ix86_tune].cost;
3864 if (opts->x_optimize_size)
3865 ix86_cost = &ix86_size_cost;
3866 else
3867 ix86_cost = ix86_tune_cost;
3869 /* Arrange to set up i386_stack_locals for all functions. */
3870 init_machine_status = ix86_init_machine_status;
3872 /* Validate -mregparm= value. */
3873 if (opts_set->x_ix86_regparm)
3875 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3876 warning (0, "-mregparm is ignored in 64-bit mode");
3877 if (opts->x_ix86_regparm > REGPARM_MAX)
3879 error ("-mregparm=%d is not between 0 and %d",
3880 opts->x_ix86_regparm, REGPARM_MAX);
3881 opts->x_ix86_regparm = 0;
3884 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3885 opts->x_ix86_regparm = REGPARM_MAX;
3887 /* Default align_* from the processor table. */
3888 if (opts->x_align_loops == 0)
3890 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3891 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3893 if (opts->x_align_jumps == 0)
3895 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3896 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3898 if (opts->x_align_functions == 0)
3900 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3903 /* Provide default for -mbranch-cost= value. */
3904 if (!opts_set->x_ix86_branch_cost)
3905 opts->x_ix86_branch_cost = ix86_cost->branch_cost;
3907 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3909 opts->x_target_flags
3910 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
3912 /* Enable by default the SSE and MMX builtins. Do allow the user to
3913 explicitly disable any of these. In particular, disabling SSE and
3914 MMX for kernel code is extremely useful. */
3915 if (!ix86_arch_specified)
3916 opts->x_ix86_isa_flags
3917 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3918 | TARGET_SUBTARGET64_ISA_DEFAULT)
3919 & ~opts->x_ix86_isa_flags_explicit);
3921 if (TARGET_RTD_P (opts->x_target_flags))
3922 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3924 else
3926 opts->x_target_flags
3927 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
3929 if (!ix86_arch_specified)
3930 opts->x_ix86_isa_flags
3931 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
3933 /* i386 ABI does not specify red zone. It still makes sense to use it
3934 when programmer takes care to stack from being destroyed. */
3935 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
3936 opts->x_target_flags |= MASK_NO_RED_ZONE;
3939 /* Keep nonleaf frame pointers. */
3940 if (opts->x_flag_omit_frame_pointer)
3941 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3942 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
3943 opts->x_flag_omit_frame_pointer = 1;
3945 /* If we're doing fast math, we don't care about comparison order
3946 wrt NaNs. This lets us use a shorter comparison sequence. */
3947 if (opts->x_flag_finite_math_only)
3948 opts->x_target_flags &= ~MASK_IEEE_FP;
3950 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3951 since the insns won't need emulation. */
3952 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
3953 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
3955 /* Likewise, if the target doesn't have a 387, or we've specified
3956 software floating point, don't use 387 inline intrinsics. */
3957 if (!TARGET_80387_P (opts->x_target_flags))
3958 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
3960 /* Turn on MMX builtins for -msse. */
3961 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
3962 opts->x_ix86_isa_flags
3963 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
3965 /* Enable SSE prefetch. */
3966 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
3967 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
3968 x86_prefetch_sse = true;
3970 /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
3971 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
3972 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
3973 opts->x_ix86_isa_flags
3974 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
3976 /* Enable popcnt instruction for -msse4.2 or -mabm. */
3977 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
3978 || TARGET_ABM_P (opts->x_ix86_isa_flags))
3979 opts->x_ix86_isa_flags
3980 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
3982 /* Enable lzcnt instruction for -mabm. */
3983 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
3984 opts->x_ix86_isa_flags
3985 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
3987 /* Validate -mpreferred-stack-boundary= value or default it to
3988 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3989 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3990 if (opts_set->x_ix86_preferred_stack_boundary_arg)
3992 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3993 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
3994 int max = (TARGET_SEH ? 4 : 12);
3996 if (opts->x_ix86_preferred_stack_boundary_arg < min
3997 || opts->x_ix86_preferred_stack_boundary_arg > max)
3999 if (min == max)
4000 error ("-mpreferred-stack-boundary is not supported "
4001 "for this target");
4002 else
4003 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
4004 opts->x_ix86_preferred_stack_boundary_arg, min, max);
4006 else
4007 ix86_preferred_stack_boundary
4008 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
4011 /* Set the default value for -mstackrealign. */
4012 if (opts->x_ix86_force_align_arg_pointer == -1)
4013 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
4015 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
4017 /* Validate -mincoming-stack-boundary= value or default it to
4018 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
4019 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
4020 if (opts_set->x_ix86_incoming_stack_boundary_arg)
4022 if (opts->x_ix86_incoming_stack_boundary_arg
4023 < (TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2)
4024 || opts->x_ix86_incoming_stack_boundary_arg > 12)
4025 error ("-mincoming-stack-boundary=%d is not between %d and 12",
4026 opts->x_ix86_incoming_stack_boundary_arg,
4027 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2);
4028 else
4030 ix86_user_incoming_stack_boundary
4031 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
4032 ix86_incoming_stack_boundary
4033 = ix86_user_incoming_stack_boundary;
4037 #ifndef NO_PROFILE_COUNTERS
4038 if (flag_nop_mcount)
4039 error ("-mnop-mcount is not compatible with this target");
4040 #endif
4041 if (flag_nop_mcount && flag_pic)
4042 error ("-mnop-mcount is not implemented for -fPIC");
4044 /* Accept -msseregparm only if at least SSE support is enabled. */
4045 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
4046 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
4047 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
4049 if (opts_set->x_ix86_fpmath)
4051 if (opts->x_ix86_fpmath & FPMATH_SSE)
4053 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
4055 warning (0, "SSE instruction set disabled, using 387 arithmetics");
4056 opts->x_ix86_fpmath = FPMATH_387;
4058 else if ((opts->x_ix86_fpmath & FPMATH_387)
4059 && !TARGET_80387_P (opts->x_target_flags))
4061 warning (0, "387 instruction set disabled, using SSE arithmetics");
4062 opts->x_ix86_fpmath = FPMATH_SSE;
4066 /* For all chips supporting SSE2, -mfpmath=sse performs better than
4067 fpmath=387. The second is however default at many targets since the
4068 extra 80bit precision of temporaries is considered to be part of ABI.
4069 Overwrite the default at least for -ffast-math.
4070 TODO: -mfpmath=both seems to produce same performing code with bit
4071 smaller binaries. It is however not clear if register allocation is
4072 ready for this setting.
4073 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
4074 codegen. We may switch to 387 with -ffast-math for size optimized
4075 functions. */
4076 else if (fast_math_flags_set_p (&global_options)
4077 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
4078 opts->x_ix86_fpmath = FPMATH_SSE;
4079 else
4080 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
4082 /* If the i387 is disabled, then do not return values in it. */
4083 if (!TARGET_80387_P (opts->x_target_flags))
4084 opts->x_target_flags &= ~MASK_FLOAT_RETURNS;
4086 /* Use external vectorized library in vectorizing intrinsics. */
4087 if (opts_set->x_ix86_veclibabi_type)
4088 switch (opts->x_ix86_veclibabi_type)
4090 case ix86_veclibabi_type_svml:
4091 ix86_veclib_handler = ix86_veclibabi_svml;
4092 break;
4094 case ix86_veclibabi_type_acml:
4095 ix86_veclib_handler = ix86_veclibabi_acml;
4096 break;
4098 default:
4099 gcc_unreachable ();
4102 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4103 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4104 && !opts->x_optimize_size)
4105 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4107 /* If stack probes are required, the space used for large function
4108 arguments on the stack must also be probed, so enable
4109 -maccumulate-outgoing-args so this happens in the prologue. */
4110 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4111 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4113 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4114 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4115 "for correctness", prefix, suffix);
4116 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4119 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4121 char *p;
4122 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4123 p = strchr (internal_label_prefix, 'X');
4124 internal_label_prefix_len = p - internal_label_prefix;
4125 *p = '\0';
4128 /* When scheduling description is not available, disable scheduler pass
4129 so it won't slow down the compilation and make x87 code slower. */
4130 if (!TARGET_SCHEDULE)
4131 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4133 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4134 ix86_tune_cost->simultaneous_prefetches,
4135 opts->x_param_values,
4136 opts_set->x_param_values);
4137 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4138 ix86_tune_cost->prefetch_block,
4139 opts->x_param_values,
4140 opts_set->x_param_values);
4141 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4142 ix86_tune_cost->l1_cache_size,
4143 opts->x_param_values,
4144 opts_set->x_param_values);
4145 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4146 ix86_tune_cost->l2_cache_size,
4147 opts->x_param_values,
4148 opts_set->x_param_values);
4150 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4151 if (opts->x_flag_prefetch_loop_arrays < 0
4152 && HAVE_prefetch
4153 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4154 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4155 opts->x_flag_prefetch_loop_arrays = 1;
4157 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4158 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4159 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4160 targetm.expand_builtin_va_start = NULL;
4162 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4164 ix86_gen_leave = gen_leave_rex64;
4165 if (Pmode == DImode)
4167 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4168 ix86_gen_tls_local_dynamic_base_64
4169 = gen_tls_local_dynamic_base_64_di;
4171 else
4173 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4174 ix86_gen_tls_local_dynamic_base_64
4175 = gen_tls_local_dynamic_base_64_si;
4178 else
4179 ix86_gen_leave = gen_leave;
4181 if (Pmode == DImode)
4183 ix86_gen_add3 = gen_adddi3;
4184 ix86_gen_sub3 = gen_subdi3;
4185 ix86_gen_sub3_carry = gen_subdi3_carry;
4186 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4187 ix86_gen_andsp = gen_anddi3;
4188 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4189 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4190 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4191 ix86_gen_monitor = gen_sse3_monitor_di;
4193 else
4195 ix86_gen_add3 = gen_addsi3;
4196 ix86_gen_sub3 = gen_subsi3;
4197 ix86_gen_sub3_carry = gen_subsi3_carry;
4198 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4199 ix86_gen_andsp = gen_andsi3;
4200 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4201 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4202 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4203 ix86_gen_monitor = gen_sse3_monitor_si;
4206 #ifdef USE_IX86_CLD
4207 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4208 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4209 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4210 #endif
4212 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
4214 if (opts->x_flag_fentry > 0)
4215 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4216 "with -fpic");
4217 opts->x_flag_fentry = 0;
4219 else if (TARGET_SEH)
4221 if (opts->x_flag_fentry == 0)
4222 sorry ("-mno-fentry isn%'t compatible with SEH");
4223 opts->x_flag_fentry = 1;
4225 else if (opts->x_flag_fentry < 0)
4227 #if defined(PROFILE_BEFORE_PROLOGUE)
4228 opts->x_flag_fentry = 1;
4229 #else
4230 opts->x_flag_fentry = 0;
4231 #endif
4234 /* When not opts->x_optimize for size, enable vzeroupper optimization for
4235 TARGET_AVX with -fexpensive-optimizations and split 32-byte
4236 AVX unaligned load/store. */
4237 if (!opts->x_optimize_size)
4239 if (flag_expensive_optimizations
4240 && !(opts_set->x_target_flags & MASK_VZEROUPPER))
4241 opts->x_target_flags |= MASK_VZEROUPPER;
4242 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4243 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4244 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4245 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4246 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4247 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4248 /* Enable 128-bit AVX instruction generation
4249 for the auto-vectorizer. */
4250 if (TARGET_AVX128_OPTIMAL
4251 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
4252 opts->x_target_flags |= MASK_PREFER_AVX128;
4255 if (opts->x_ix86_recip_name)
4257 char *p = ASTRDUP (opts->x_ix86_recip_name);
4258 char *q;
4259 unsigned int mask, i;
4260 bool invert;
4262 while ((q = strtok (p, ",")) != NULL)
4264 p = NULL;
4265 if (*q == '!')
4267 invert = true;
4268 q++;
4270 else
4271 invert = false;
4273 if (!strcmp (q, "default"))
4274 mask = RECIP_MASK_ALL;
4275 else
4277 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4278 if (!strcmp (q, recip_options[i].string))
4280 mask = recip_options[i].mask;
4281 break;
4284 if (i == ARRAY_SIZE (recip_options))
4286 error ("unknown option for -mrecip=%s", q);
4287 invert = false;
4288 mask = RECIP_MASK_NONE;
4292 opts->x_recip_mask_explicit |= mask;
4293 if (invert)
4294 opts->x_recip_mask &= ~mask;
4295 else
4296 opts->x_recip_mask |= mask;
4300 if (TARGET_RECIP_P (opts->x_target_flags))
4301 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4302 else if (opts_set->x_target_flags & MASK_RECIP)
4303 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4305 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4306 for 64-bit Bionic. */
4307 if (TARGET_HAS_BIONIC
4308 && !(opts_set->x_target_flags
4309 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4310 opts->x_target_flags |= (TARGET_64BIT
4311 ? MASK_LONG_DOUBLE_128
4312 : MASK_LONG_DOUBLE_64);
4314 /* Only one of them can be active. */
4315 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4316 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4318 /* Save the initial options in case the user does function specific
4319 options. */
4320 if (main_args_p)
4321 target_option_default_node = target_option_current_node
4322 = build_target_option_node (opts);
4324 /* Handle stack protector */
4325 if (!opts_set->x_ix86_stack_protector_guard)
4326 opts->x_ix86_stack_protector_guard
4327 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4329 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4330 if (opts->x_ix86_tune_memcpy_strategy)
4332 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4333 ix86_parse_stringop_strategy_string (str, false);
4334 free (str);
4337 if (opts->x_ix86_tune_memset_strategy)
4339 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4340 ix86_parse_stringop_strategy_string (str, true);
4341 free (str);
4345 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4347 static void
4348 ix86_option_override (void)
4350 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
4351 struct register_pass_info insert_vzeroupper_info
4352 = { pass_insert_vzeroupper, "reload",
4353 1, PASS_POS_INSERT_AFTER
4356 ix86_option_override_internal (true, &global_options, &global_options_set);
4359 /* This needs to be done at start up. It's convenient to do it here. */
4360 register_pass (&insert_vzeroupper_info);
4363 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
4364 static char *
4365 ix86_offload_options (void)
4367 if (TARGET_LP64)
4368 return xstrdup ("-foffload-abi=lp64");
4369 return xstrdup ("-foffload-abi=ilp32");
4372 /* Update register usage after having seen the compiler flags. */
4374 static void
4375 ix86_conditional_register_usage (void)
4377 int i, c_mask;
4378 unsigned int j;
4380 /* The PIC register, if it exists, is fixed. */
4381 j = PIC_OFFSET_TABLE_REGNUM;
4382 if (j != INVALID_REGNUM)
4383 fixed_regs[j] = call_used_regs[j] = 1;
4385 /* For 32-bit targets, squash the REX registers. */
4386 if (! TARGET_64BIT)
4388 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4389 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4390 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4391 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4392 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4393 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4396 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4397 c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
4398 : TARGET_64BIT ? (1 << 2)
4399 : (1 << 1));
4401 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4403 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4405 /* Set/reset conditionally defined registers from
4406 CALL_USED_REGISTERS initializer. */
4407 if (call_used_regs[i] > 1)
4408 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4410 /* Calculate registers of CLOBBERED_REGS register set
4411 as call used registers from GENERAL_REGS register set. */
4412 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4413 && call_used_regs[i])
4414 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4417 /* If MMX is disabled, squash the registers. */
4418 if (! TARGET_MMX)
4419 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4420 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4421 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4423 /* If SSE is disabled, squash the registers. */
4424 if (! TARGET_SSE)
4425 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4426 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4427 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4429 /* If the FPU is disabled, squash the registers. */
4430 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4431 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4432 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4433 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4435 /* If AVX512F is disabled, squash the registers. */
4436 if (! TARGET_AVX512F)
4438 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4439 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4441 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4442 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4445 /* If MPX is disabled, squash the registers. */
4446 if (! TARGET_MPX)
4447 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
4448 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4452 /* Save the current options */
4454 static void
4455 ix86_function_specific_save (struct cl_target_option *ptr,
4456 struct gcc_options *opts)
4458 ptr->arch = ix86_arch;
4459 ptr->schedule = ix86_schedule;
4460 ptr->tune = ix86_tune;
4461 ptr->branch_cost = ix86_branch_cost;
4462 ptr->tune_defaulted = ix86_tune_defaulted;
4463 ptr->arch_specified = ix86_arch_specified;
4464 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
4465 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
4466 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
4467 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
4468 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
4469 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
4470 ptr->x_ix86_abi = opts->x_ix86_abi;
4471 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
4472 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
4473 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
4474 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
4475 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
4476 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
4477 ptr->x_ix86_pmode = opts->x_ix86_pmode;
4478 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
4479 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
4480 ptr->x_ix86_regparm = opts->x_ix86_regparm;
4481 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
4482 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
4483 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
4484 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
4485 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
4486 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
4487 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
4488 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
4489 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
4490 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
4492 /* The fields are char but the variables are not; make sure the
4493 values fit in the fields. */
4494 gcc_assert (ptr->arch == ix86_arch);
4495 gcc_assert (ptr->schedule == ix86_schedule);
4496 gcc_assert (ptr->tune == ix86_tune);
4497 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4500 /* Restore the current options */
4502 static void
4503 ix86_function_specific_restore (struct gcc_options *opts,
4504 struct cl_target_option *ptr)
4506 enum processor_type old_tune = ix86_tune;
4507 enum processor_type old_arch = ix86_arch;
4508 unsigned int ix86_arch_mask;
4509 int i;
4511 /* We don't change -fPIC. */
4512 opts->x_flag_pic = flag_pic;
4514 ix86_arch = (enum processor_type) ptr->arch;
4515 ix86_schedule = (enum attr_cpu) ptr->schedule;
4516 ix86_tune = (enum processor_type) ptr->tune;
4517 opts->x_ix86_branch_cost = ptr->branch_cost;
4518 ix86_tune_defaulted = ptr->tune_defaulted;
4519 ix86_arch_specified = ptr->arch_specified;
4520 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4521 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
4522 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
4523 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
4524 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
4525 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
4526 opts->x_ix86_abi = ptr->x_ix86_abi;
4527 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
4528 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
4529 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
4530 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
4531 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
4532 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
4533 opts->x_ix86_pmode = ptr->x_ix86_pmode;
4534 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
4535 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
4536 opts->x_ix86_regparm = ptr->x_ix86_regparm;
4537 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
4538 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
4539 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
4540 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
4541 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
4542 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
4543 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
4544 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
4545 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
4546 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
4548 /* Recreate the arch feature tests if the arch changed */
4549 if (old_arch != ix86_arch)
4551 ix86_arch_mask = 1u << ix86_arch;
4552 for (i = 0; i < X86_ARCH_LAST; ++i)
4553 ix86_arch_features[i]
4554 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4557 /* Recreate the tune optimization tests */
4558 if (old_tune != ix86_tune)
4559 set_ix86_tune_features (ix86_tune, false);
4562 /* Print the current options */
4564 static void
4565 ix86_function_specific_print (FILE *file, int indent,
4566 struct cl_target_option *ptr)
4568 char *target_string
4569 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4570 NULL, NULL, ptr->x_ix86_fpmath, false);
4572 gcc_assert (ptr->arch < PROCESSOR_max);
4573 fprintf (file, "%*sarch = %d (%s)\n",
4574 indent, "",
4575 ptr->arch, processor_target_table[ptr->arch].name);
4577 gcc_assert (ptr->tune < PROCESSOR_max);
4578 fprintf (file, "%*stune = %d (%s)\n",
4579 indent, "",
4580 ptr->tune, processor_target_table[ptr->tune].name);
4582 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4584 if (target_string)
4586 fprintf (file, "%*s%s\n", indent, "", target_string);
4587 free (target_string);
4592 /* Inner function to process the attribute((target(...))), take an argument and
4593 set the current options from the argument. If we have a list, recursively go
4594 over the list. */
4596 static bool
4597 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4598 struct gcc_options *opts,
4599 struct gcc_options *opts_set,
4600 struct gcc_options *enum_opts_set)
4602 char *next_optstr;
4603 bool ret = true;
4605 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4606 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4607 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4608 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4609 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4611 enum ix86_opt_type
4613 ix86_opt_unknown,
4614 ix86_opt_yes,
4615 ix86_opt_no,
4616 ix86_opt_str,
4617 ix86_opt_enum,
4618 ix86_opt_isa
4621 static const struct
4623 const char *string;
4624 size_t len;
4625 enum ix86_opt_type type;
4626 int opt;
4627 int mask;
4628 } attrs[] = {
4629 /* isa options */
4630 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4631 IX86_ATTR_ISA ("abm", OPT_mabm),
4632 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4633 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4634 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4635 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4636 IX86_ATTR_ISA ("aes", OPT_maes),
4637 IX86_ATTR_ISA ("sha", OPT_msha),
4638 IX86_ATTR_ISA ("avx", OPT_mavx),
4639 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4640 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
4641 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
4642 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
4643 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
4644 IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq),
4645 IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw),
4646 IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl),
4647 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4648 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4649 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4650 IX86_ATTR_ISA ("sse", OPT_msse),
4651 IX86_ATTR_ISA ("sse2", OPT_msse2),
4652 IX86_ATTR_ISA ("sse3", OPT_msse3),
4653 IX86_ATTR_ISA ("sse4", OPT_msse4),
4654 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4655 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4656 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4657 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4658 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4659 IX86_ATTR_ISA ("fma", OPT_mfma),
4660 IX86_ATTR_ISA ("xop", OPT_mxop),
4661 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4662 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4663 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4664 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4665 IX86_ATTR_ISA ("rtm", OPT_mrtm),
4666 IX86_ATTR_ISA ("hle", OPT_mhle),
4667 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
4668 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
4669 IX86_ATTR_ISA ("adx", OPT_madx),
4670 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
4671 IX86_ATTR_ISA ("xsave", OPT_mxsave),
4672 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
4673 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
4674 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt),
4675 IX86_ATTR_ISA ("xsavec", OPT_mxsavec),
4676 IX86_ATTR_ISA ("xsaves", OPT_mxsaves),
4677 IX86_ATTR_ISA ("avx512vbmi", OPT_mavx512vbmi),
4678 IX86_ATTR_ISA ("avx512ifma", OPT_mavx512ifma),
4679 IX86_ATTR_ISA ("clwb", OPT_mclwb),
4680 IX86_ATTR_ISA ("pcommit", OPT_mpcommit),
4682 /* enum options */
4683 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4685 /* string options */
4686 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4687 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4689 /* flag options */
4690 IX86_ATTR_YES ("cld",
4691 OPT_mcld,
4692 MASK_CLD),
4694 IX86_ATTR_NO ("fancy-math-387",
4695 OPT_mfancy_math_387,
4696 MASK_NO_FANCY_MATH_387),
4698 IX86_ATTR_YES ("ieee-fp",
4699 OPT_mieee_fp,
4700 MASK_IEEE_FP),
4702 IX86_ATTR_YES ("inline-all-stringops",
4703 OPT_minline_all_stringops,
4704 MASK_INLINE_ALL_STRINGOPS),
4706 IX86_ATTR_YES ("inline-stringops-dynamically",
4707 OPT_minline_stringops_dynamically,
4708 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4710 IX86_ATTR_NO ("align-stringops",
4711 OPT_mno_align_stringops,
4712 MASK_NO_ALIGN_STRINGOPS),
4714 IX86_ATTR_YES ("recip",
4715 OPT_mrecip,
4716 MASK_RECIP),
4720 /* If this is a list, recurse to get the options. */
4721 if (TREE_CODE (args) == TREE_LIST)
4723 bool ret = true;
4725 for (; args; args = TREE_CHAIN (args))
4726 if (TREE_VALUE (args)
4727 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4728 p_strings, opts, opts_set,
4729 enum_opts_set))
4730 ret = false;
4732 return ret;
4735 else if (TREE_CODE (args) != STRING_CST)
4737 error ("attribute %<target%> argument not a string");
4738 return false;
4741 /* Handle multiple arguments separated by commas. */
4742 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4744 while (next_optstr && *next_optstr != '\0')
4746 char *p = next_optstr;
4747 char *orig_p = p;
4748 char *comma = strchr (next_optstr, ',');
4749 const char *opt_string;
4750 size_t len, opt_len;
4751 int opt;
4752 bool opt_set_p;
4753 char ch;
4754 unsigned i;
4755 enum ix86_opt_type type = ix86_opt_unknown;
4756 int mask = 0;
4758 if (comma)
4760 *comma = '\0';
4761 len = comma - next_optstr;
4762 next_optstr = comma + 1;
4764 else
4766 len = strlen (p);
4767 next_optstr = NULL;
4770 /* Recognize no-xxx. */
4771 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4773 opt_set_p = false;
4774 p += 3;
4775 len -= 3;
4777 else
4778 opt_set_p = true;
4780 /* Find the option. */
4781 ch = *p;
4782 opt = N_OPTS;
4783 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4785 type = attrs[i].type;
4786 opt_len = attrs[i].len;
4787 if (ch == attrs[i].string[0]
4788 && ((type != ix86_opt_str && type != ix86_opt_enum)
4789 ? len == opt_len
4790 : len > opt_len)
4791 && memcmp (p, attrs[i].string, opt_len) == 0)
4793 opt = attrs[i].opt;
4794 mask = attrs[i].mask;
4795 opt_string = attrs[i].string;
4796 break;
4800 /* Process the option. */
4801 if (opt == N_OPTS)
4803 error ("attribute(target(\"%s\")) is unknown", orig_p);
4804 ret = false;
4807 else if (type == ix86_opt_isa)
4809 struct cl_decoded_option decoded;
4811 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4812 ix86_handle_option (opts, opts_set,
4813 &decoded, input_location);
4816 else if (type == ix86_opt_yes || type == ix86_opt_no)
4818 if (type == ix86_opt_no)
4819 opt_set_p = !opt_set_p;
4821 if (opt_set_p)
4822 opts->x_target_flags |= mask;
4823 else
4824 opts->x_target_flags &= ~mask;
4827 else if (type == ix86_opt_str)
4829 if (p_strings[opt])
4831 error ("option(\"%s\") was already specified", opt_string);
4832 ret = false;
4834 else
4835 p_strings[opt] = xstrdup (p + opt_len);
4838 else if (type == ix86_opt_enum)
4840 bool arg_ok;
4841 int value;
4843 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4844 if (arg_ok)
4845 set_option (opts, enum_opts_set, opt, value,
4846 p + opt_len, DK_UNSPECIFIED, input_location,
4847 global_dc);
4848 else
4850 error ("attribute(target(\"%s\")) is unknown", orig_p);
4851 ret = false;
4855 else
4856 gcc_unreachable ();
4859 return ret;
4862 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4864 tree
4865 ix86_valid_target_attribute_tree (tree args,
4866 struct gcc_options *opts,
4867 struct gcc_options *opts_set)
4869 const char *orig_arch_string = opts->x_ix86_arch_string;
4870 const char *orig_tune_string = opts->x_ix86_tune_string;
4871 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
4872 int orig_tune_defaulted = ix86_tune_defaulted;
4873 int orig_arch_specified = ix86_arch_specified;
4874 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4875 tree t = NULL_TREE;
4876 int i;
4877 struct cl_target_option *def
4878 = TREE_TARGET_OPTION (target_option_default_node);
4879 struct gcc_options enum_opts_set;
4881 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4883 /* Process each of the options on the chain. */
4884 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
4885 opts_set, &enum_opts_set))
4886 return error_mark_node;
4888 /* If the changed options are different from the default, rerun
4889 ix86_option_override_internal, and then save the options away.
4890 The string options are are attribute options, and will be undone
4891 when we copy the save structure. */
4892 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
4893 || opts->x_target_flags != def->x_target_flags
4894 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4895 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4896 || enum_opts_set.x_ix86_fpmath)
4898 /* If we are using the default tune= or arch=, undo the string assigned,
4899 and use the default. */
4900 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4901 opts->x_ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4902 else if (!orig_arch_specified)
4903 opts->x_ix86_arch_string = NULL;
4905 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4906 opts->x_ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4907 else if (orig_tune_defaulted)
4908 opts->x_ix86_tune_string = NULL;
4910 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4911 if (enum_opts_set.x_ix86_fpmath)
4912 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4913 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4914 && TARGET_SSE_P (opts->x_ix86_isa_flags))
4916 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4917 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4920 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4921 ix86_option_override_internal (false, opts, opts_set);
4923 /* Add any builtin functions with the new isa if any. */
4924 ix86_add_new_builtins (opts->x_ix86_isa_flags);
4926 /* Save the current options unless we are validating options for
4927 #pragma. */
4928 t = build_target_option_node (opts);
4930 opts->x_ix86_arch_string = orig_arch_string;
4931 opts->x_ix86_tune_string = orig_tune_string;
4932 opts_set->x_ix86_fpmath = orig_fpmath_set;
4934 /* Free up memory allocated to hold the strings */
4935 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4936 free (option_strings[i]);
4939 return t;
4942 /* Hook to validate attribute((target("string"))). */
4944 static bool
4945 ix86_valid_target_attribute_p (tree fndecl,
4946 tree ARG_UNUSED (name),
4947 tree args,
4948 int ARG_UNUSED (flags))
4950 struct gcc_options func_options;
4951 tree new_target, new_optimize;
4952 bool ret = true;
4954 /* attribute((target("default"))) does nothing, beyond
4955 affecting multi-versioning. */
4956 if (TREE_VALUE (args)
4957 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
4958 && TREE_CHAIN (args) == NULL_TREE
4959 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
4960 return true;
4962 tree old_optimize = build_optimization_node (&global_options);
4964 /* Get the optimization options of the current function. */
4965 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4967 if (!func_optimize)
4968 func_optimize = old_optimize;
4970 /* Init func_options. */
4971 memset (&func_options, 0, sizeof (func_options));
4972 init_options_struct (&func_options, NULL);
4973 lang_hooks.init_options_struct (&func_options);
4975 cl_optimization_restore (&func_options,
4976 TREE_OPTIMIZATION (func_optimize));
4978 /* Initialize func_options to the default before its target options can
4979 be set. */
4980 cl_target_option_restore (&func_options,
4981 TREE_TARGET_OPTION (target_option_default_node));
4983 new_target = ix86_valid_target_attribute_tree (args, &func_options,
4984 &global_options_set);
4986 new_optimize = build_optimization_node (&func_options);
4988 if (new_target == error_mark_node)
4989 ret = false;
4991 else if (fndecl && new_target)
4993 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4995 if (old_optimize != new_optimize)
4996 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4999 return ret;
5003 /* Hook to determine if one function can safely inline another. */
5005 static bool
5006 ix86_can_inline_p (tree caller, tree callee)
5008 bool ret = false;
5009 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
5010 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
5012 /* If callee has no option attributes, then it is ok to inline. */
5013 if (!callee_tree)
5014 ret = true;
5016 /* If caller has no option attributes, but callee does then it is not ok to
5017 inline. */
5018 else if (!caller_tree)
5019 ret = false;
5021 else
5023 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
5024 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
5026 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
5027 can inline a SSE2 function but a SSE2 function can't inline a SSE4
5028 function. */
5029 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
5030 != callee_opts->x_ix86_isa_flags)
5031 ret = false;
5033 /* See if we have the same non-isa options. */
5034 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
5035 ret = false;
5037 /* See if arch, tune, etc. are the same. */
5038 else if (caller_opts->arch != callee_opts->arch)
5039 ret = false;
5041 else if (caller_opts->tune != callee_opts->tune)
5042 ret = false;
5044 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
5045 ret = false;
5047 else if (caller_opts->branch_cost != callee_opts->branch_cost)
5048 ret = false;
5050 else
5051 ret = true;
5054 return ret;
5058 /* Remember the last target of ix86_set_current_function. */
5059 static GTY(()) tree ix86_previous_fndecl;
5061 /* Set target globals to default. */
5063 static void
5064 ix86_reset_to_default_globals (void)
5066 tree old_tree = (ix86_previous_fndecl
5067 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
5068 : NULL_TREE);
5070 if (old_tree)
5072 tree new_tree = target_option_current_node;
5073 cl_target_option_restore (&global_options,
5074 TREE_TARGET_OPTION (new_tree));
5075 if (TREE_TARGET_GLOBALS (new_tree))
5076 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5077 else if (new_tree == target_option_default_node)
5078 restore_target_globals (&default_target_globals);
5079 else
5080 TREE_TARGET_GLOBALS (new_tree)
5081 = save_target_globals_default_opts ();
5085 /* Invalidate ix86_previous_fndecl cache. */
5086 void
5087 ix86_reset_previous_fndecl (void)
5089 ix86_reset_to_default_globals ();
5090 ix86_previous_fndecl = NULL_TREE;
5093 /* Establish appropriate back-end context for processing the function
5094 FNDECL. The argument might be NULL to indicate processing at top
5095 level, outside of any function scope. */
5096 static void
5097 ix86_set_current_function (tree fndecl)
5099 /* Only change the context if the function changes. This hook is called
5100 several times in the course of compiling a function, and we don't want to
5101 slow things down too much or call target_reinit when it isn't safe. */
5102 if (fndecl && fndecl != ix86_previous_fndecl)
5104 tree old_tree = (ix86_previous_fndecl
5105 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
5106 : NULL_TREE);
5108 tree new_tree = (fndecl
5109 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
5110 : NULL_TREE);
5112 if (old_tree == new_tree)
5115 else if (new_tree && new_tree != target_option_default_node)
5117 cl_target_option_restore (&global_options,
5118 TREE_TARGET_OPTION (new_tree));
5119 if (TREE_TARGET_GLOBALS (new_tree))
5120 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5121 else
5122 TREE_TARGET_GLOBALS (new_tree)
5123 = save_target_globals_default_opts ();
5126 else if (old_tree && old_tree != target_option_default_node)
5127 ix86_reset_to_default_globals ();
5128 ix86_previous_fndecl = fndecl;
5133 /* Return true if this goes in large data/bss. */
5135 static bool
5136 ix86_in_large_data_p (tree exp)
5138 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
5139 return false;
5141 /* Functions are never large data. */
5142 if (TREE_CODE (exp) == FUNCTION_DECL)
5143 return false;
5145 /* Automatic variables are never large data. */
5146 if (TREE_CODE (exp) == VAR_DECL && !is_global_var (exp))
5147 return false;
5149 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
5151 const char *section = DECL_SECTION_NAME (exp);
5152 if (strcmp (section, ".ldata") == 0
5153 || strcmp (section, ".lbss") == 0)
5154 return true;
5155 return false;
5157 else
5159 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5161 /* If this is an incomplete type with size 0, then we can't put it
5162 in data because it might be too big when completed. Also,
5163 int_size_in_bytes returns -1 if size can vary or is larger than
5164 an integer in which case also it is safer to assume that it goes in
5165 large data. */
5166 if (size <= 0 || size > ix86_section_threshold)
5167 return true;
5170 return false;
5173 /* Switch to the appropriate section for output of DECL.
5174 DECL is either a `VAR_DECL' node or a constant of some sort.
5175 RELOC indicates whether forming the initial value of DECL requires
5176 link-time relocations. */
5178 ATTRIBUTE_UNUSED static section *
5179 x86_64_elf_select_section (tree decl, int reloc,
5180 unsigned HOST_WIDE_INT align)
5182 if (ix86_in_large_data_p (decl))
5184 const char *sname = NULL;
5185 unsigned int flags = SECTION_WRITE;
5186 switch (categorize_decl_for_section (decl, reloc))
5188 case SECCAT_DATA:
5189 sname = ".ldata";
5190 break;
5191 case SECCAT_DATA_REL:
5192 sname = ".ldata.rel";
5193 break;
5194 case SECCAT_DATA_REL_LOCAL:
5195 sname = ".ldata.rel.local";
5196 break;
5197 case SECCAT_DATA_REL_RO:
5198 sname = ".ldata.rel.ro";
5199 break;
5200 case SECCAT_DATA_REL_RO_LOCAL:
5201 sname = ".ldata.rel.ro.local";
5202 break;
5203 case SECCAT_BSS:
5204 sname = ".lbss";
5205 flags |= SECTION_BSS;
5206 break;
5207 case SECCAT_RODATA:
5208 case SECCAT_RODATA_MERGE_STR:
5209 case SECCAT_RODATA_MERGE_STR_INIT:
5210 case SECCAT_RODATA_MERGE_CONST:
5211 sname = ".lrodata";
5212 flags = 0;
5213 break;
5214 case SECCAT_SRODATA:
5215 case SECCAT_SDATA:
5216 case SECCAT_SBSS:
5217 gcc_unreachable ();
5218 case SECCAT_TEXT:
5219 case SECCAT_TDATA:
5220 case SECCAT_TBSS:
5221 /* We don't split these for medium model. Place them into
5222 default sections and hope for best. */
5223 break;
5225 if (sname)
5227 /* We might get called with string constants, but get_named_section
5228 doesn't like them as they are not DECLs. Also, we need to set
5229 flags in that case. */
5230 if (!DECL_P (decl))
5231 return get_section (sname, flags, NULL);
5232 return get_named_section (decl, sname, reloc);
5235 return default_elf_select_section (decl, reloc, align);
5238 /* Select a set of attributes for section NAME based on the properties
5239 of DECL and whether or not RELOC indicates that DECL's initializer
5240 might contain runtime relocations. */
5242 static unsigned int ATTRIBUTE_UNUSED
5243 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5245 unsigned int flags = default_section_type_flags (decl, name, reloc);
5247 if (decl == NULL_TREE
5248 && (strcmp (name, ".ldata.rel.ro") == 0
5249 || strcmp (name, ".ldata.rel.ro.local") == 0))
5250 flags |= SECTION_RELRO;
5252 if (strcmp (name, ".lbss") == 0
5253 || strncmp (name, ".lbss.", 5) == 0
5254 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
5255 flags |= SECTION_BSS;
5257 return flags;
5260 /* Build up a unique section name, expressed as a
5261 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5262 RELOC indicates whether the initial value of EXP requires
5263 link-time relocations. */
5265 static void ATTRIBUTE_UNUSED
5266 x86_64_elf_unique_section (tree decl, int reloc)
5268 if (ix86_in_large_data_p (decl))
5270 const char *prefix = NULL;
5271 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5272 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
5274 switch (categorize_decl_for_section (decl, reloc))
5276 case SECCAT_DATA:
5277 case SECCAT_DATA_REL:
5278 case SECCAT_DATA_REL_LOCAL:
5279 case SECCAT_DATA_REL_RO:
5280 case SECCAT_DATA_REL_RO_LOCAL:
5281 prefix = one_only ? ".ld" : ".ldata";
5282 break;
5283 case SECCAT_BSS:
5284 prefix = one_only ? ".lb" : ".lbss";
5285 break;
5286 case SECCAT_RODATA:
5287 case SECCAT_RODATA_MERGE_STR:
5288 case SECCAT_RODATA_MERGE_STR_INIT:
5289 case SECCAT_RODATA_MERGE_CONST:
5290 prefix = one_only ? ".lr" : ".lrodata";
5291 break;
5292 case SECCAT_SRODATA:
5293 case SECCAT_SDATA:
5294 case SECCAT_SBSS:
5295 gcc_unreachable ();
5296 case SECCAT_TEXT:
5297 case SECCAT_TDATA:
5298 case SECCAT_TBSS:
5299 /* We don't split these for medium model. Place them into
5300 default sections and hope for best. */
5301 break;
5303 if (prefix)
5305 const char *name, *linkonce;
5306 char *string;
5308 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5309 name = targetm.strip_name_encoding (name);
5311 /* If we're using one_only, then there needs to be a .gnu.linkonce
5312 prefix to the section name. */
5313 linkonce = one_only ? ".gnu.linkonce" : "";
5315 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5317 set_decl_section_name (decl, string);
5318 return;
5321 default_unique_section (decl, reloc);
5324 #ifdef COMMON_ASM_OP
5325 /* This says how to output assembler code to declare an
5326 uninitialized external linkage data object.
5328 For medium model x86-64 we need to use .largecomm opcode for
5329 large objects. */
5330 void
5331 x86_elf_aligned_common (FILE *file,
5332 const char *name, unsigned HOST_WIDE_INT size,
5333 int align)
5335 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5336 && size > (unsigned int)ix86_section_threshold)
5337 fputs ("\t.largecomm\t", file);
5338 else
5339 fputs (COMMON_ASM_OP, file);
5340 assemble_name (file, name);
5341 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5342 size, align / BITS_PER_UNIT);
5344 #endif
5346 /* Utility function for targets to use in implementing
5347 ASM_OUTPUT_ALIGNED_BSS. */
5349 void
5350 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
5351 unsigned HOST_WIDE_INT size, int align)
5353 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5354 && size > (unsigned int)ix86_section_threshold)
5355 switch_to_section (get_named_section (decl, ".lbss", 0));
5356 else
5357 switch_to_section (bss_section);
5358 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5359 #ifdef ASM_DECLARE_OBJECT_NAME
5360 last_assemble_variable_decl = decl;
5361 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5362 #else
5363 /* Standard thing is just output label for the object. */
5364 ASM_OUTPUT_LABEL (file, name);
5365 #endif /* ASM_DECLARE_OBJECT_NAME */
5366 ASM_OUTPUT_SKIP (file, size ? size : 1);
5369 /* Decide whether we must probe the stack before any space allocation
5370 on this target. It's essentially TARGET_STACK_PROBE except when
5371 -fstack-check causes the stack to be already probed differently. */
5373 bool
5374 ix86_target_stack_probe (void)
5376 /* Do not probe the stack twice if static stack checking is enabled. */
5377 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5378 return false;
5380 return TARGET_STACK_PROBE;
5383 /* Decide whether we can make a sibling call to a function. DECL is the
5384 declaration of the function being targeted by the call and EXP is the
5385 CALL_EXPR representing the call. */
5387 static bool
5388 ix86_function_ok_for_sibcall (tree decl, tree exp)
5390 tree type, decl_or_type;
5391 rtx a, b;
5393 /* If we are generating position-independent code, we cannot sibcall
5394 optimize any indirect call, or a direct call to a global function,
5395 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5396 if (!TARGET_MACHO
5397 && !TARGET_64BIT
5398 && flag_pic
5399 && (!decl || !targetm.binds_local_p (decl)))
5400 return false;
5402 /* If we need to align the outgoing stack, then sibcalling would
5403 unalign the stack, which may break the called function. */
5404 if (ix86_minimum_incoming_stack_boundary (true)
5405 < PREFERRED_STACK_BOUNDARY)
5406 return false;
5408 if (decl)
5410 decl_or_type = decl;
5411 type = TREE_TYPE (decl);
5413 else
5415 /* We're looking at the CALL_EXPR, we need the type of the function. */
5416 type = CALL_EXPR_FN (exp); /* pointer expression */
5417 type = TREE_TYPE (type); /* pointer type */
5418 type = TREE_TYPE (type); /* function type */
5419 decl_or_type = type;
5422 /* Check that the return value locations are the same. Like
5423 if we are returning floats on the 80387 register stack, we cannot
5424 make a sibcall from a function that doesn't return a float to a
5425 function that does or, conversely, from a function that does return
5426 a float to a function that doesn't; the necessary stack adjustment
5427 would not be executed. This is also the place we notice
5428 differences in the return value ABI. Note that it is ok for one
5429 of the functions to have void return type as long as the return
5430 value of the other is passed in a register. */
5431 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5432 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5433 cfun->decl, false);
5434 if (STACK_REG_P (a) || STACK_REG_P (b))
5436 if (!rtx_equal_p (a, b))
5437 return false;
5439 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5441 else if (!rtx_equal_p (a, b))
5442 return false;
5444 if (TARGET_64BIT)
5446 /* The SYSV ABI has more call-clobbered registers;
5447 disallow sibcalls from MS to SYSV. */
5448 if (cfun->machine->call_abi == MS_ABI
5449 && ix86_function_type_abi (type) == SYSV_ABI)
5450 return false;
5452 else
5454 /* If this call is indirect, we'll need to be able to use a
5455 call-clobbered register for the address of the target function.
5456 Make sure that all such registers are not used for passing
5457 parameters. Note that DLLIMPORT functions are indirect. */
5458 if (!decl
5459 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5461 if (ix86_function_regparm (type, NULL) >= 3)
5463 /* ??? Need to count the actual number of registers to be used,
5464 not the possible number of registers. Fix later. */
5465 return false;
5470 /* Otherwise okay. That also includes certain types of indirect calls. */
5471 return true;
5474 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5475 and "sseregparm" calling convention attributes;
5476 arguments as in struct attribute_spec.handler. */
5478 static tree
5479 ix86_handle_cconv_attribute (tree *node, tree name,
5480 tree args,
5481 int,
5482 bool *no_add_attrs)
5484 if (TREE_CODE (*node) != FUNCTION_TYPE
5485 && TREE_CODE (*node) != METHOD_TYPE
5486 && TREE_CODE (*node) != FIELD_DECL
5487 && TREE_CODE (*node) != TYPE_DECL)
5489 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5490 name);
5491 *no_add_attrs = true;
5492 return NULL_TREE;
5495 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5496 if (is_attribute_p ("regparm", name))
5498 tree cst;
5500 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5502 error ("fastcall and regparm attributes are not compatible");
5505 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5507 error ("regparam and thiscall attributes are not compatible");
5510 cst = TREE_VALUE (args);
5511 if (TREE_CODE (cst) != INTEGER_CST)
5513 warning (OPT_Wattributes,
5514 "%qE attribute requires an integer constant argument",
5515 name);
5516 *no_add_attrs = true;
5518 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5520 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5521 name, REGPARM_MAX);
5522 *no_add_attrs = true;
5525 return NULL_TREE;
5528 if (TARGET_64BIT)
5530 /* Do not warn when emulating the MS ABI. */
5531 if ((TREE_CODE (*node) != FUNCTION_TYPE
5532 && TREE_CODE (*node) != METHOD_TYPE)
5533 || ix86_function_type_abi (*node) != MS_ABI)
5534 warning (OPT_Wattributes, "%qE attribute ignored",
5535 name);
5536 *no_add_attrs = true;
5537 return NULL_TREE;
5540 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5541 if (is_attribute_p ("fastcall", name))
5543 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5545 error ("fastcall and cdecl attributes are not compatible");
5547 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5549 error ("fastcall and stdcall attributes are not compatible");
5551 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5553 error ("fastcall and regparm attributes are not compatible");
5555 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5557 error ("fastcall and thiscall attributes are not compatible");
5561 /* Can combine stdcall with fastcall (redundant), regparm and
5562 sseregparm. */
5563 else if (is_attribute_p ("stdcall", name))
5565 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5567 error ("stdcall and cdecl attributes are not compatible");
5569 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5571 error ("stdcall and fastcall attributes are not compatible");
5573 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5575 error ("stdcall and thiscall attributes are not compatible");
5579 /* Can combine cdecl with regparm and sseregparm. */
5580 else if (is_attribute_p ("cdecl", name))
5582 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5584 error ("stdcall and cdecl attributes are not compatible");
5586 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5588 error ("fastcall and cdecl attributes are not compatible");
5590 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5592 error ("cdecl and thiscall attributes are not compatible");
5595 else if (is_attribute_p ("thiscall", name))
5597 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5598 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5599 name);
5600 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5602 error ("stdcall and thiscall attributes are not compatible");
5604 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5606 error ("fastcall and thiscall attributes are not compatible");
5608 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5610 error ("cdecl and thiscall attributes are not compatible");
5614 /* Can combine sseregparm with all attributes. */
5616 return NULL_TREE;
5619 /* The transactional memory builtins are implicitly regparm or fastcall
5620 depending on the ABI. Override the generic do-nothing attribute that
5621 these builtins were declared with, and replace it with one of the two
5622 attributes that we expect elsewhere. */
5624 static tree
5625 ix86_handle_tm_regparm_attribute (tree *node, tree, tree,
5626 int flags, bool *no_add_attrs)
5628 tree alt;
5630 /* In no case do we want to add the placeholder attribute. */
5631 *no_add_attrs = true;
5633 /* The 64-bit ABI is unchanged for transactional memory. */
5634 if (TARGET_64BIT)
5635 return NULL_TREE;
5637 /* ??? Is there a better way to validate 32-bit windows? We have
5638 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5639 if (CHECK_STACK_LIMIT > 0)
5640 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5641 else
5643 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5644 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5646 decl_attributes (node, alt, flags);
5648 return NULL_TREE;
5651 /* This function determines from TYPE the calling-convention. */
5653 unsigned int
5654 ix86_get_callcvt (const_tree type)
5656 unsigned int ret = 0;
5657 bool is_stdarg;
5658 tree attrs;
5660 if (TARGET_64BIT)
5661 return IX86_CALLCVT_CDECL;
5663 attrs = TYPE_ATTRIBUTES (type);
5664 if (attrs != NULL_TREE)
5666 if (lookup_attribute ("cdecl", attrs))
5667 ret |= IX86_CALLCVT_CDECL;
5668 else if (lookup_attribute ("stdcall", attrs))
5669 ret |= IX86_CALLCVT_STDCALL;
5670 else if (lookup_attribute ("fastcall", attrs))
5671 ret |= IX86_CALLCVT_FASTCALL;
5672 else if (lookup_attribute ("thiscall", attrs))
5673 ret |= IX86_CALLCVT_THISCALL;
5675 /* Regparam isn't allowed for thiscall and fastcall. */
5676 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5678 if (lookup_attribute ("regparm", attrs))
5679 ret |= IX86_CALLCVT_REGPARM;
5680 if (lookup_attribute ("sseregparm", attrs))
5681 ret |= IX86_CALLCVT_SSEREGPARM;
5684 if (IX86_BASE_CALLCVT(ret) != 0)
5685 return ret;
5688 is_stdarg = stdarg_p (type);
5689 if (TARGET_RTD && !is_stdarg)
5690 return IX86_CALLCVT_STDCALL | ret;
5692 if (ret != 0
5693 || is_stdarg
5694 || TREE_CODE (type) != METHOD_TYPE
5695 || ix86_function_type_abi (type) != MS_ABI)
5696 return IX86_CALLCVT_CDECL | ret;
5698 return IX86_CALLCVT_THISCALL;
5701 /* Return 0 if the attributes for two types are incompatible, 1 if they
5702 are compatible, and 2 if they are nearly compatible (which causes a
5703 warning to be generated). */
5705 static int
5706 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5708 unsigned int ccvt1, ccvt2;
5710 if (TREE_CODE (type1) != FUNCTION_TYPE
5711 && TREE_CODE (type1) != METHOD_TYPE)
5712 return 1;
5714 ccvt1 = ix86_get_callcvt (type1);
5715 ccvt2 = ix86_get_callcvt (type2);
5716 if (ccvt1 != ccvt2)
5717 return 0;
5718 if (ix86_function_regparm (type1, NULL)
5719 != ix86_function_regparm (type2, NULL))
5720 return 0;
5722 return 1;
5725 /* Return the regparm value for a function with the indicated TYPE and DECL.
5726 DECL may be NULL when calling function indirectly
5727 or considering a libcall. */
5729 static int
5730 ix86_function_regparm (const_tree type, const_tree decl)
5732 tree attr;
5733 int regparm;
5734 unsigned int ccvt;
5736 if (TARGET_64BIT)
5737 return (ix86_function_type_abi (type) == SYSV_ABI
5738 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5739 ccvt = ix86_get_callcvt (type);
5740 regparm = ix86_regparm;
5742 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5744 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5745 if (attr)
5747 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5748 return regparm;
5751 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5752 return 2;
5753 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5754 return 1;
5756 /* Use register calling convention for local functions when possible. */
5757 if (decl
5758 && TREE_CODE (decl) == FUNCTION_DECL
5759 /* Caller and callee must agree on the calling convention, so
5760 checking here just optimize means that with
5761 __attribute__((optimize (...))) caller could use regparm convention
5762 and callee not, or vice versa. Instead look at whether the callee
5763 is optimized or not. */
5764 && opt_for_fn (decl, optimize)
5765 && !(profile_flag && !flag_fentry))
5767 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5768 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE (decl));
5769 if (i && i->local && i->can_change_signature)
5771 int local_regparm, globals = 0, regno;
5773 /* Make sure no regparm register is taken by a
5774 fixed register variable. */
5775 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5776 if (fixed_regs[local_regparm])
5777 break;
5779 /* We don't want to use regparm(3) for nested functions as
5780 these use a static chain pointer in the third argument. */
5781 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5782 local_regparm = 2;
5784 /* In 32-bit mode save a register for the split stack. */
5785 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5786 local_regparm = 2;
5788 /* Each fixed register usage increases register pressure,
5789 so less registers should be used for argument passing.
5790 This functionality can be overriden by an explicit
5791 regparm value. */
5792 for (regno = AX_REG; regno <= DI_REG; regno++)
5793 if (fixed_regs[regno])
5794 globals++;
5796 local_regparm
5797 = globals < local_regparm ? local_regparm - globals : 0;
5799 if (local_regparm > regparm)
5800 regparm = local_regparm;
5804 return regparm;
5807 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5808 DFmode (2) arguments in SSE registers for a function with the
5809 indicated TYPE and DECL. DECL may be NULL when calling function
5810 indirectly or considering a libcall. Otherwise return 0. */
5812 static int
5813 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5815 gcc_assert (!TARGET_64BIT);
5817 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5818 by the sseregparm attribute. */
5819 if (TARGET_SSEREGPARM
5820 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5822 if (!TARGET_SSE)
5824 if (warn)
5826 if (decl)
5827 error ("calling %qD with attribute sseregparm without "
5828 "SSE/SSE2 enabled", decl);
5829 else
5830 error ("calling %qT with attribute sseregparm without "
5831 "SSE/SSE2 enabled", type);
5833 return 0;
5836 return 2;
5839 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5840 (and DFmode for SSE2) arguments in SSE registers. */
5841 if (decl && TARGET_SSE_MATH && optimize
5842 && !(profile_flag && !flag_fentry))
5844 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5845 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5846 if (i && i->local && i->can_change_signature)
5847 return TARGET_SSE2 ? 2 : 1;
5850 return 0;
5853 /* Return true if EAX is live at the start of the function. Used by
5854 ix86_expand_prologue to determine if we need special help before
5855 calling allocate_stack_worker. */
5857 static bool
5858 ix86_eax_live_at_start_p (void)
5860 /* Cheat. Don't bother working forward from ix86_function_regparm
5861 to the function type to whether an actual argument is located in
5862 eax. Instead just look at cfg info, which is still close enough
5863 to correct at this point. This gives false positives for broken
5864 functions that might use uninitialized data that happens to be
5865 allocated in eax, but who cares? */
5866 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
5869 static bool
5870 ix86_keep_aggregate_return_pointer (tree fntype)
5872 tree attr;
5874 if (!TARGET_64BIT)
5876 attr = lookup_attribute ("callee_pop_aggregate_return",
5877 TYPE_ATTRIBUTES (fntype));
5878 if (attr)
5879 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5881 /* For 32-bit MS-ABI the default is to keep aggregate
5882 return pointer. */
5883 if (ix86_function_type_abi (fntype) == MS_ABI)
5884 return true;
5886 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5889 /* Value is the number of bytes of arguments automatically
5890 popped when returning from a subroutine call.
5891 FUNDECL is the declaration node of the function (as a tree),
5892 FUNTYPE is the data type of the function (as a tree),
5893 or for a library call it is an identifier node for the subroutine name.
5894 SIZE is the number of bytes of arguments passed on the stack.
5896 On the 80386, the RTD insn may be used to pop them if the number
5897 of args is fixed, but if the number is variable then the caller
5898 must pop them all. RTD can't be used for library calls now
5899 because the library is compiled with the Unix compiler.
5900 Use of RTD is a selectable option, since it is incompatible with
5901 standard Unix calling sequences. If the option is not selected,
5902 the caller must always pop the args.
5904 The attribute stdcall is equivalent to RTD on a per module basis. */
5906 static int
5907 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5909 unsigned int ccvt;
5911 /* None of the 64-bit ABIs pop arguments. */
5912 if (TARGET_64BIT)
5913 return 0;
5915 ccvt = ix86_get_callcvt (funtype);
5917 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5918 | IX86_CALLCVT_THISCALL)) != 0
5919 && ! stdarg_p (funtype))
5920 return size;
5922 /* Lose any fake structure return argument if it is passed on the stack. */
5923 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5924 && !ix86_keep_aggregate_return_pointer (funtype))
5926 int nregs = ix86_function_regparm (funtype, fundecl);
5927 if (nregs == 0)
5928 return GET_MODE_SIZE (Pmode);
5931 return 0;
5934 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
5936 static bool
5937 ix86_legitimate_combined_insn (rtx_insn *insn)
5939 /* Check operand constraints in case hard registers were propagated
5940 into insn pattern. This check prevents combine pass from
5941 generating insn patterns with invalid hard register operands.
5942 These invalid insns can eventually confuse reload to error out
5943 with a spill failure. See also PRs 46829 and 46843. */
5944 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
5946 int i;
5948 extract_insn (insn);
5949 preprocess_constraints (insn);
5951 int n_operands = recog_data.n_operands;
5952 int n_alternatives = recog_data.n_alternatives;
5953 for (i = 0; i < n_operands; i++)
5955 rtx op = recog_data.operand[i];
5956 machine_mode mode = GET_MODE (op);
5957 const operand_alternative *op_alt;
5958 int offset = 0;
5959 bool win;
5960 int j;
5962 /* For pre-AVX disallow unaligned loads/stores where the
5963 instructions don't support it. */
5964 if (!TARGET_AVX
5965 && VECTOR_MODE_P (GET_MODE (op))
5966 && misaligned_operand (op, GET_MODE (op)))
5968 int min_align = get_attr_ssememalign (insn);
5969 if (min_align == 0)
5970 return false;
5973 /* A unary operator may be accepted by the predicate, but it
5974 is irrelevant for matching constraints. */
5975 if (UNARY_P (op))
5976 op = XEXP (op, 0);
5978 if (GET_CODE (op) == SUBREG)
5980 if (REG_P (SUBREG_REG (op))
5981 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
5982 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
5983 GET_MODE (SUBREG_REG (op)),
5984 SUBREG_BYTE (op),
5985 GET_MODE (op));
5986 op = SUBREG_REG (op);
5989 if (!(REG_P (op) && HARD_REGISTER_P (op)))
5990 continue;
5992 op_alt = recog_op_alt;
5994 /* Operand has no constraints, anything is OK. */
5995 win = !n_alternatives;
5997 alternative_mask preferred = get_preferred_alternatives (insn);
5998 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
6000 if (!TEST_BIT (preferred, j))
6001 continue;
6002 if (op_alt[i].anything_ok
6003 || (op_alt[i].matches != -1
6004 && operands_match_p
6005 (recog_data.operand[i],
6006 recog_data.operand[op_alt[i].matches]))
6007 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
6009 win = true;
6010 break;
6014 if (!win)
6015 return false;
6019 return true;
6022 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
6024 static unsigned HOST_WIDE_INT
6025 ix86_asan_shadow_offset (void)
6027 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
6028 : HOST_WIDE_INT_C (0x7fff8000))
6029 : (HOST_WIDE_INT_1 << 29);
6032 /* Argument support functions. */
6034 /* Return true when register may be used to pass function parameters. */
6035 bool
6036 ix86_function_arg_regno_p (int regno)
6038 int i;
6039 const int *parm_regs;
6041 if (!TARGET_64BIT)
6043 if (TARGET_MACHO)
6044 return (regno < REGPARM_MAX
6045 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
6046 else
6047 return (regno < REGPARM_MAX
6048 || (TARGET_MMX && MMX_REGNO_P (regno)
6049 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
6050 || (TARGET_SSE && SSE_REGNO_P (regno)
6051 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
6054 if (TARGET_SSE && SSE_REGNO_P (regno)
6055 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
6056 return true;
6058 /* TODO: The function should depend on current function ABI but
6059 builtins.c would need updating then. Therefore we use the
6060 default ABI. */
6062 /* RAX is used as hidden argument to va_arg functions. */
6063 if (ix86_abi == SYSV_ABI && regno == AX_REG)
6064 return true;
6066 if (ix86_abi == MS_ABI)
6067 parm_regs = x86_64_ms_abi_int_parameter_registers;
6068 else
6069 parm_regs = x86_64_int_parameter_registers;
6070 for (i = 0; i < (ix86_abi == MS_ABI
6071 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
6072 if (regno == parm_regs[i])
6073 return true;
6074 return false;
6077 /* Return if we do not know how to pass TYPE solely in registers. */
6079 static bool
6080 ix86_must_pass_in_stack (machine_mode mode, const_tree type)
6082 if (must_pass_in_stack_var_size_or_pad (mode, type))
6083 return true;
6085 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
6086 The layout_type routine is crafty and tries to trick us into passing
6087 currently unsupported vector types on the stack by using TImode. */
6088 return (!TARGET_64BIT && mode == TImode
6089 && type && TREE_CODE (type) != VECTOR_TYPE);
6092 /* It returns the size, in bytes, of the area reserved for arguments passed
6093 in registers for the function represented by fndecl dependent to the used
6094 abi format. */
6096 ix86_reg_parm_stack_space (const_tree fndecl)
6098 enum calling_abi call_abi = SYSV_ABI;
6099 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
6100 call_abi = ix86_function_abi (fndecl);
6101 else
6102 call_abi = ix86_function_type_abi (fndecl);
6103 if (TARGET_64BIT && call_abi == MS_ABI)
6104 return 32;
6105 return 0;
6108 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
6109 call abi used. */
6110 enum calling_abi
6111 ix86_function_type_abi (const_tree fntype)
6113 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
6115 enum calling_abi abi = ix86_abi;
6116 if (abi == SYSV_ABI)
6118 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
6120 if (TARGET_X32)
6122 static bool warned = false;
6123 if (!warned)
6125 error ("X32 does not support ms_abi attribute");
6126 warned = true;
6129 abi = MS_ABI;
6132 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
6133 abi = SYSV_ABI;
6134 return abi;
6136 return ix86_abi;
6139 /* We add this as a workaround in order to use libc_has_function
6140 hook in i386.md. */
6141 bool
6142 ix86_libc_has_function (enum function_class fn_class)
6144 return targetm.libc_has_function (fn_class);
6147 static bool
6148 ix86_function_ms_hook_prologue (const_tree fn)
6150 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
6152 if (decl_function_context (fn) != NULL_TREE)
6153 error_at (DECL_SOURCE_LOCATION (fn),
6154 "ms_hook_prologue is not compatible with nested function");
6155 else
6156 return true;
6158 return false;
6161 static enum calling_abi
6162 ix86_function_abi (const_tree fndecl)
6164 if (! fndecl)
6165 return ix86_abi;
6166 return ix86_function_type_abi (TREE_TYPE (fndecl));
6169 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
6170 call abi used. */
6171 enum calling_abi
6172 ix86_cfun_abi (void)
6174 if (! cfun)
6175 return ix86_abi;
6176 return cfun->machine->call_abi;
6179 /* Write the extra assembler code needed to declare a function properly. */
6181 void
6182 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
6183 tree decl)
6185 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
6187 if (is_ms_hook)
6189 int i, filler_count = (TARGET_64BIT ? 32 : 16);
6190 unsigned int filler_cc = 0xcccccccc;
6192 for (i = 0; i < filler_count; i += 4)
6193 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
6196 #ifdef SUBTARGET_ASM_UNWIND_INIT
6197 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
6198 #endif
6200 ASM_OUTPUT_LABEL (asm_out_file, fname);
6202 /* Output magic byte marker, if hot-patch attribute is set. */
6203 if (is_ms_hook)
6205 if (TARGET_64BIT)
6207 /* leaq [%rsp + 0], %rsp */
6208 asm_fprintf (asm_out_file, ASM_BYTE
6209 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
6211 else
6213 /* movl.s %edi, %edi
6214 push %ebp
6215 movl.s %esp, %ebp */
6216 asm_fprintf (asm_out_file, ASM_BYTE
6217 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
6222 /* regclass.c */
6223 extern void init_regs (void);
6225 /* Implementation of call abi switching target hook. Specific to FNDECL
6226 the specific call register sets are set. See also
6227 ix86_conditional_register_usage for more details. */
6228 void
6229 ix86_call_abi_override (const_tree fndecl)
6231 if (fndecl == NULL_TREE)
6232 cfun->machine->call_abi = ix86_abi;
6233 else
6234 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
6237 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
6238 expensive re-initialization of init_regs each time we switch function context
6239 since this is needed only during RTL expansion. */
6240 static void
6241 ix86_maybe_switch_abi (void)
6243 if (TARGET_64BIT &&
6244 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
6245 reinit_regs ();
6248 /* Return 1 if pseudo register should be created and used to hold
6249 GOT address for PIC code. */
6250 static bool
6251 ix86_use_pseudo_pic_reg (void)
6253 if ((TARGET_64BIT
6254 && (ix86_cmodel == CM_SMALL_PIC
6255 || TARGET_PECOFF))
6256 || !flag_pic)
6257 return false;
6258 return true;
6261 /* Initialize large model PIC register. */
6263 static void
6264 ix86_init_large_pic_reg (unsigned int tmp_regno)
6266 rtx_code_label *label;
6267 rtx tmp_reg;
6269 gcc_assert (Pmode == DImode);
6270 label = gen_label_rtx ();
6271 emit_label (label);
6272 LABEL_PRESERVE_P (label) = 1;
6273 tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
6274 gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
6275 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
6276 label));
6277 emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6278 emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
6279 pic_offset_table_rtx, tmp_reg));
6282 /* Create and initialize PIC register if required. */
6283 static void
6284 ix86_init_pic_reg (void)
6286 edge entry_edge;
6287 rtx_insn *seq;
6289 if (!ix86_use_pseudo_pic_reg ())
6290 return;
6292 start_sequence ();
6294 if (TARGET_64BIT)
6296 if (ix86_cmodel == CM_LARGE_PIC)
6297 ix86_init_large_pic_reg (R11_REG);
6298 else
6299 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6301 else
6303 /* If there is future mcount call in the function it is more profitable
6304 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
6305 rtx reg = crtl->profile
6306 ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
6307 : pic_offset_table_rtx;
6308 rtx insn = emit_insn (gen_set_got (reg));
6309 RTX_FRAME_RELATED_P (insn) = 1;
6310 if (crtl->profile)
6311 emit_move_insn (pic_offset_table_rtx, reg);
6312 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
6315 seq = get_insns ();
6316 end_sequence ();
6318 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6319 insert_insn_on_edge (seq, entry_edge);
6320 commit_one_edge_insertion (entry_edge);
6323 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6324 for a call to a function whose data type is FNTYPE.
6325 For a library call, FNTYPE is 0. */
6327 void
6328 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
6329 tree fntype, /* tree ptr for function decl */
6330 rtx libname, /* SYMBOL_REF of library name or 0 */
6331 tree fndecl,
6332 int caller)
6334 struct cgraph_local_info *i;
6336 memset (cum, 0, sizeof (*cum));
6338 if (fndecl)
6340 i = cgraph_node::local_info (fndecl);
6341 cum->call_abi = ix86_function_abi (fndecl);
6343 else
6345 i = NULL;
6346 cum->call_abi = ix86_function_type_abi (fntype);
6349 cum->caller = caller;
6351 /* Set up the number of registers to use for passing arguments. */
6352 cum->nregs = ix86_regparm;
6353 if (TARGET_64BIT)
6355 cum->nregs = (cum->call_abi == SYSV_ABI
6356 ? X86_64_REGPARM_MAX
6357 : X86_64_MS_REGPARM_MAX);
6359 if (TARGET_SSE)
6361 cum->sse_nregs = SSE_REGPARM_MAX;
6362 if (TARGET_64BIT)
6364 cum->sse_nregs = (cum->call_abi == SYSV_ABI
6365 ? X86_64_SSE_REGPARM_MAX
6366 : X86_64_MS_SSE_REGPARM_MAX);
6369 if (TARGET_MMX)
6370 cum->mmx_nregs = MMX_REGPARM_MAX;
6371 cum->warn_avx512f = true;
6372 cum->warn_avx = true;
6373 cum->warn_sse = true;
6374 cum->warn_mmx = true;
6376 /* Because type might mismatch in between caller and callee, we need to
6377 use actual type of function for local calls.
6378 FIXME: cgraph_analyze can be told to actually record if function uses
6379 va_start so for local functions maybe_vaarg can be made aggressive
6380 helping K&R code.
6381 FIXME: once typesytem is fixed, we won't need this code anymore. */
6382 if (i && i->local && i->can_change_signature)
6383 fntype = TREE_TYPE (fndecl);
6384 cum->stdarg = stdarg_p (fntype);
6385 cum->maybe_vaarg = (fntype
6386 ? (!prototype_p (fntype) || stdarg_p (fntype))
6387 : !libname);
6389 cum->bnd_regno = FIRST_BND_REG;
6390 cum->bnds_in_bt = 0;
6391 cum->force_bnd_pass = 0;
6393 if (!TARGET_64BIT)
6395 /* If there are variable arguments, then we won't pass anything
6396 in registers in 32-bit mode. */
6397 if (stdarg_p (fntype))
6399 cum->nregs = 0;
6400 cum->sse_nregs = 0;
6401 cum->mmx_nregs = 0;
6402 cum->warn_avx512f = false;
6403 cum->warn_avx = false;
6404 cum->warn_sse = false;
6405 cum->warn_mmx = false;
6406 return;
6409 /* Use ecx and edx registers if function has fastcall attribute,
6410 else look for regparm information. */
6411 if (fntype)
6413 unsigned int ccvt = ix86_get_callcvt (fntype);
6414 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6416 cum->nregs = 1;
6417 cum->fastcall = 1; /* Same first register as in fastcall. */
6419 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6421 cum->nregs = 2;
6422 cum->fastcall = 1;
6424 else
6425 cum->nregs = ix86_function_regparm (fntype, fndecl);
6428 /* Set up the number of SSE registers used for passing SFmode
6429 and DFmode arguments. Warn for mismatching ABI. */
6430 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6434 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6435 But in the case of vector types, it is some vector mode.
6437 When we have only some of our vector isa extensions enabled, then there
6438 are some modes for which vector_mode_supported_p is false. For these
6439 modes, the generic vector support in gcc will choose some non-vector mode
6440 in order to implement the type. By computing the natural mode, we'll
6441 select the proper ABI location for the operand and not depend on whatever
6442 the middle-end decides to do with these vector types.
6444 The midde-end can't deal with the vector types > 16 bytes. In this
6445 case, we return the original mode and warn ABI change if CUM isn't
6446 NULL.
6448 If INT_RETURN is true, warn ABI change if the vector mode isn't
6449 available for function return value. */
6451 static machine_mode
6452 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
6453 bool in_return)
6455 machine_mode mode = TYPE_MODE (type);
6457 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6459 HOST_WIDE_INT size = int_size_in_bytes (type);
6460 if ((size == 8 || size == 16 || size == 32 || size == 64)
6461 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6462 && TYPE_VECTOR_SUBPARTS (type) > 1)
6464 machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6466 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6467 mode = MIN_MODE_VECTOR_FLOAT;
6468 else
6469 mode = MIN_MODE_VECTOR_INT;
6471 /* Get the mode which has this inner mode and number of units. */
6472 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6473 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6474 && GET_MODE_INNER (mode) == innermode)
6476 if (size == 64 && !TARGET_AVX512F)
6478 static bool warnedavx512f;
6479 static bool warnedavx512f_ret;
6481 if (cum && cum->warn_avx512f && !warnedavx512f)
6483 if (warning (OPT_Wpsabi, "AVX512F vector argument "
6484 "without AVX512F enabled changes the ABI"))
6485 warnedavx512f = true;
6487 else if (in_return && !warnedavx512f_ret)
6489 if (warning (OPT_Wpsabi, "AVX512F vector return "
6490 "without AVX512F enabled changes the ABI"))
6491 warnedavx512f_ret = true;
6494 return TYPE_MODE (type);
6496 else if (size == 32 && !TARGET_AVX)
6498 static bool warnedavx;
6499 static bool warnedavx_ret;
6501 if (cum && cum->warn_avx && !warnedavx)
6503 if (warning (OPT_Wpsabi, "AVX vector argument "
6504 "without AVX enabled changes the ABI"))
6505 warnedavx = true;
6507 else if (in_return && !warnedavx_ret)
6509 if (warning (OPT_Wpsabi, "AVX vector return "
6510 "without AVX enabled changes the ABI"))
6511 warnedavx_ret = true;
6514 return TYPE_MODE (type);
6516 else if (((size == 8 && TARGET_64BIT) || size == 16)
6517 && !TARGET_SSE)
6519 static bool warnedsse;
6520 static bool warnedsse_ret;
6522 if (cum && cum->warn_sse && !warnedsse)
6524 if (warning (OPT_Wpsabi, "SSE vector argument "
6525 "without SSE enabled changes the ABI"))
6526 warnedsse = true;
6528 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
6530 if (warning (OPT_Wpsabi, "SSE vector return "
6531 "without SSE enabled changes the ABI"))
6532 warnedsse_ret = true;
6535 else if ((size == 8 && !TARGET_64BIT) && !TARGET_MMX)
6537 static bool warnedmmx;
6538 static bool warnedmmx_ret;
6540 if (cum && cum->warn_mmx && !warnedmmx)
6542 if (warning (OPT_Wpsabi, "MMX vector argument "
6543 "without MMX enabled changes the ABI"))
6544 warnedmmx = true;
6546 else if (in_return && !warnedmmx_ret)
6548 if (warning (OPT_Wpsabi, "MMX vector return "
6549 "without MMX enabled changes the ABI"))
6550 warnedmmx_ret = true;
6553 return mode;
6556 gcc_unreachable ();
6560 return mode;
6563 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6564 this may not agree with the mode that the type system has chosen for the
6565 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6566 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6568 static rtx
6569 gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
6570 unsigned int regno)
6572 rtx tmp;
6574 if (orig_mode != BLKmode)
6575 tmp = gen_rtx_REG (orig_mode, regno);
6576 else
6578 tmp = gen_rtx_REG (mode, regno);
6579 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6580 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6583 return tmp;
6586 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6587 of this code is to classify each 8bytes of incoming argument by the register
6588 class and assign registers accordingly. */
6590 /* Return the union class of CLASS1 and CLASS2.
6591 See the x86-64 PS ABI for details. */
6593 static enum x86_64_reg_class
6594 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6596 /* Rule #1: If both classes are equal, this is the resulting class. */
6597 if (class1 == class2)
6598 return class1;
6600 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6601 the other class. */
6602 if (class1 == X86_64_NO_CLASS)
6603 return class2;
6604 if (class2 == X86_64_NO_CLASS)
6605 return class1;
6607 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6608 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6609 return X86_64_MEMORY_CLASS;
6611 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6612 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6613 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6614 return X86_64_INTEGERSI_CLASS;
6615 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6616 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6617 return X86_64_INTEGER_CLASS;
6619 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6620 MEMORY is used. */
6621 if (class1 == X86_64_X87_CLASS
6622 || class1 == X86_64_X87UP_CLASS
6623 || class1 == X86_64_COMPLEX_X87_CLASS
6624 || class2 == X86_64_X87_CLASS
6625 || class2 == X86_64_X87UP_CLASS
6626 || class2 == X86_64_COMPLEX_X87_CLASS)
6627 return X86_64_MEMORY_CLASS;
6629 /* Rule #6: Otherwise class SSE is used. */
6630 return X86_64_SSE_CLASS;
6633 /* Classify the argument of type TYPE and mode MODE.
6634 CLASSES will be filled by the register class used to pass each word
6635 of the operand. The number of words is returned. In case the parameter
6636 should be passed in memory, 0 is returned. As a special case for zero
6637 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6639 BIT_OFFSET is used internally for handling records and specifies offset
6640 of the offset in bits modulo 512 to avoid overflow cases.
6642 See the x86-64 PS ABI for details.
6645 static int
6646 classify_argument (machine_mode mode, const_tree type,
6647 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6649 HOST_WIDE_INT bytes =
6650 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6651 int words
6652 = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6654 /* Variable sized entities are always passed/returned in memory. */
6655 if (bytes < 0)
6656 return 0;
6658 if (mode != VOIDmode
6659 && targetm.calls.must_pass_in_stack (mode, type))
6660 return 0;
6662 if (type && AGGREGATE_TYPE_P (type))
6664 int i;
6665 tree field;
6666 enum x86_64_reg_class subclasses[MAX_CLASSES];
6668 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
6669 if (bytes > 64)
6670 return 0;
6672 for (i = 0; i < words; i++)
6673 classes[i] = X86_64_NO_CLASS;
6675 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6676 signalize memory class, so handle it as special case. */
6677 if (!words)
6679 classes[0] = X86_64_NO_CLASS;
6680 return 1;
6683 /* Classify each field of record and merge classes. */
6684 switch (TREE_CODE (type))
6686 case RECORD_TYPE:
6687 /* And now merge the fields of structure. */
6688 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6690 if (TREE_CODE (field) == FIELD_DECL)
6692 int num;
6694 if (TREE_TYPE (field) == error_mark_node)
6695 continue;
6697 /* Bitfields are always classified as integer. Handle them
6698 early, since later code would consider them to be
6699 misaligned integers. */
6700 if (DECL_BIT_FIELD (field))
6702 for (i = (int_bit_position (field)
6703 + (bit_offset % 64)) / 8 / 8;
6704 i < ((int_bit_position (field) + (bit_offset % 64))
6705 + tree_to_shwi (DECL_SIZE (field))
6706 + 63) / 8 / 8; i++)
6707 classes[i] =
6708 merge_classes (X86_64_INTEGER_CLASS,
6709 classes[i]);
6711 else
6713 int pos;
6715 type = TREE_TYPE (field);
6717 /* Flexible array member is ignored. */
6718 if (TYPE_MODE (type) == BLKmode
6719 && TREE_CODE (type) == ARRAY_TYPE
6720 && TYPE_SIZE (type) == NULL_TREE
6721 && TYPE_DOMAIN (type) != NULL_TREE
6722 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6723 == NULL_TREE))
6725 static bool warned;
6727 if (!warned && warn_psabi)
6729 warned = true;
6730 inform (input_location,
6731 "the ABI of passing struct with"
6732 " a flexible array member has"
6733 " changed in GCC 4.4");
6735 continue;
6737 num = classify_argument (TYPE_MODE (type), type,
6738 subclasses,
6739 (int_bit_position (field)
6740 + bit_offset) % 512);
6741 if (!num)
6742 return 0;
6743 pos = (int_bit_position (field)
6744 + (bit_offset % 64)) / 8 / 8;
6745 for (i = 0; i < num && (i + pos) < words; i++)
6746 classes[i + pos] =
6747 merge_classes (subclasses[i], classes[i + pos]);
6751 break;
6753 case ARRAY_TYPE:
6754 /* Arrays are handled as small records. */
6756 int num;
6757 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6758 TREE_TYPE (type), subclasses, bit_offset);
6759 if (!num)
6760 return 0;
6762 /* The partial classes are now full classes. */
6763 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6764 subclasses[0] = X86_64_SSE_CLASS;
6765 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6766 && !((bit_offset % 64) == 0 && bytes == 4))
6767 subclasses[0] = X86_64_INTEGER_CLASS;
6769 for (i = 0; i < words; i++)
6770 classes[i] = subclasses[i % num];
6772 break;
6774 case UNION_TYPE:
6775 case QUAL_UNION_TYPE:
6776 /* Unions are similar to RECORD_TYPE but offset is always 0.
6778 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6780 if (TREE_CODE (field) == FIELD_DECL)
6782 int num;
6784 if (TREE_TYPE (field) == error_mark_node)
6785 continue;
6787 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6788 TREE_TYPE (field), subclasses,
6789 bit_offset);
6790 if (!num)
6791 return 0;
6792 for (i = 0; i < num && i < words; i++)
6793 classes[i] = merge_classes (subclasses[i], classes[i]);
6796 break;
6798 default:
6799 gcc_unreachable ();
6802 if (words > 2)
6804 /* When size > 16 bytes, if the first one isn't
6805 X86_64_SSE_CLASS or any other ones aren't
6806 X86_64_SSEUP_CLASS, everything should be passed in
6807 memory. */
6808 if (classes[0] != X86_64_SSE_CLASS)
6809 return 0;
6811 for (i = 1; i < words; i++)
6812 if (classes[i] != X86_64_SSEUP_CLASS)
6813 return 0;
6816 /* Final merger cleanup. */
6817 for (i = 0; i < words; i++)
6819 /* If one class is MEMORY, everything should be passed in
6820 memory. */
6821 if (classes[i] == X86_64_MEMORY_CLASS)
6822 return 0;
6824 /* The X86_64_SSEUP_CLASS should be always preceded by
6825 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6826 if (classes[i] == X86_64_SSEUP_CLASS
6827 && classes[i - 1] != X86_64_SSE_CLASS
6828 && classes[i - 1] != X86_64_SSEUP_CLASS)
6830 /* The first one should never be X86_64_SSEUP_CLASS. */
6831 gcc_assert (i != 0);
6832 classes[i] = X86_64_SSE_CLASS;
6835 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6836 everything should be passed in memory. */
6837 if (classes[i] == X86_64_X87UP_CLASS
6838 && (classes[i - 1] != X86_64_X87_CLASS))
6840 static bool warned;
6842 /* The first one should never be X86_64_X87UP_CLASS. */
6843 gcc_assert (i != 0);
6844 if (!warned && warn_psabi)
6846 warned = true;
6847 inform (input_location,
6848 "the ABI of passing union with long double"
6849 " has changed in GCC 4.4");
6851 return 0;
6854 return words;
6857 /* Compute alignment needed. We align all types to natural boundaries with
6858 exception of XFmode that is aligned to 64bits. */
6859 if (mode != VOIDmode && mode != BLKmode)
6861 int mode_alignment = GET_MODE_BITSIZE (mode);
6863 if (mode == XFmode)
6864 mode_alignment = 128;
6865 else if (mode == XCmode)
6866 mode_alignment = 256;
6867 if (COMPLEX_MODE_P (mode))
6868 mode_alignment /= 2;
6869 /* Misaligned fields are always returned in memory. */
6870 if (bit_offset % mode_alignment)
6871 return 0;
6874 /* for V1xx modes, just use the base mode */
6875 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6876 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6877 mode = GET_MODE_INNER (mode);
6879 /* Classification of atomic types. */
6880 switch (mode)
6882 case SDmode:
6883 case DDmode:
6884 classes[0] = X86_64_SSE_CLASS;
6885 return 1;
6886 case TDmode:
6887 classes[0] = X86_64_SSE_CLASS;
6888 classes[1] = X86_64_SSEUP_CLASS;
6889 return 2;
6890 case DImode:
6891 case SImode:
6892 case HImode:
6893 case QImode:
6894 case CSImode:
6895 case CHImode:
6896 case CQImode:
6898 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
6900 /* Analyze last 128 bits only. */
6901 size = (size - 1) & 0x7f;
6903 if (size < 32)
6905 classes[0] = X86_64_INTEGERSI_CLASS;
6906 return 1;
6908 else if (size < 64)
6910 classes[0] = X86_64_INTEGER_CLASS;
6911 return 1;
6913 else if (size < 64+32)
6915 classes[0] = X86_64_INTEGER_CLASS;
6916 classes[1] = X86_64_INTEGERSI_CLASS;
6917 return 2;
6919 else if (size < 64+64)
6921 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6922 return 2;
6924 else
6925 gcc_unreachable ();
6927 case CDImode:
6928 case TImode:
6929 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6930 return 2;
6931 case COImode:
6932 case OImode:
6933 /* OImode shouldn't be used directly. */
6934 gcc_unreachable ();
6935 case CTImode:
6936 return 0;
6937 case SFmode:
6938 if (!(bit_offset % 64))
6939 classes[0] = X86_64_SSESF_CLASS;
6940 else
6941 classes[0] = X86_64_SSE_CLASS;
6942 return 1;
6943 case DFmode:
6944 classes[0] = X86_64_SSEDF_CLASS;
6945 return 1;
6946 case XFmode:
6947 classes[0] = X86_64_X87_CLASS;
6948 classes[1] = X86_64_X87UP_CLASS;
6949 return 2;
6950 case TFmode:
6951 classes[0] = X86_64_SSE_CLASS;
6952 classes[1] = X86_64_SSEUP_CLASS;
6953 return 2;
6954 case SCmode:
6955 classes[0] = X86_64_SSE_CLASS;
6956 if (!(bit_offset % 64))
6957 return 1;
6958 else
6960 static bool warned;
6962 if (!warned && warn_psabi)
6964 warned = true;
6965 inform (input_location,
6966 "the ABI of passing structure with complex float"
6967 " member has changed in GCC 4.4");
6969 classes[1] = X86_64_SSESF_CLASS;
6970 return 2;
6972 case DCmode:
6973 classes[0] = X86_64_SSEDF_CLASS;
6974 classes[1] = X86_64_SSEDF_CLASS;
6975 return 2;
6976 case XCmode:
6977 classes[0] = X86_64_COMPLEX_X87_CLASS;
6978 return 1;
6979 case TCmode:
6980 /* This modes is larger than 16 bytes. */
6981 return 0;
6982 case V8SFmode:
6983 case V8SImode:
6984 case V32QImode:
6985 case V16HImode:
6986 case V4DFmode:
6987 case V4DImode:
6988 classes[0] = X86_64_SSE_CLASS;
6989 classes[1] = X86_64_SSEUP_CLASS;
6990 classes[2] = X86_64_SSEUP_CLASS;
6991 classes[3] = X86_64_SSEUP_CLASS;
6992 return 4;
6993 case V8DFmode:
6994 case V16SFmode:
6995 case V8DImode:
6996 case V16SImode:
6997 case V32HImode:
6998 case V64QImode:
6999 classes[0] = X86_64_SSE_CLASS;
7000 classes[1] = X86_64_SSEUP_CLASS;
7001 classes[2] = X86_64_SSEUP_CLASS;
7002 classes[3] = X86_64_SSEUP_CLASS;
7003 classes[4] = X86_64_SSEUP_CLASS;
7004 classes[5] = X86_64_SSEUP_CLASS;
7005 classes[6] = X86_64_SSEUP_CLASS;
7006 classes[7] = X86_64_SSEUP_CLASS;
7007 return 8;
7008 case V4SFmode:
7009 case V4SImode:
7010 case V16QImode:
7011 case V8HImode:
7012 case V2DFmode:
7013 case V2DImode:
7014 classes[0] = X86_64_SSE_CLASS;
7015 classes[1] = X86_64_SSEUP_CLASS;
7016 return 2;
7017 case V1TImode:
7018 case V1DImode:
7019 case V2SFmode:
7020 case V2SImode:
7021 case V4HImode:
7022 case V8QImode:
7023 classes[0] = X86_64_SSE_CLASS;
7024 return 1;
7025 case BLKmode:
7026 case VOIDmode:
7027 return 0;
7028 default:
7029 gcc_assert (VECTOR_MODE_P (mode));
7031 if (bytes > 16)
7032 return 0;
7034 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
7036 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
7037 classes[0] = X86_64_INTEGERSI_CLASS;
7038 else
7039 classes[0] = X86_64_INTEGER_CLASS;
7040 classes[1] = X86_64_INTEGER_CLASS;
7041 return 1 + (bytes > 8);
7045 /* Examine the argument and return set number of register required in each
7046 class. Return true iff parameter should be passed in memory. */
7048 static bool
7049 examine_argument (machine_mode mode, const_tree type, int in_return,
7050 int *int_nregs, int *sse_nregs)
7052 enum x86_64_reg_class regclass[MAX_CLASSES];
7053 int n = classify_argument (mode, type, regclass, 0);
7055 *int_nregs = 0;
7056 *sse_nregs = 0;
7058 if (!n)
7059 return true;
7060 for (n--; n >= 0; n--)
7061 switch (regclass[n])
7063 case X86_64_INTEGER_CLASS:
7064 case X86_64_INTEGERSI_CLASS:
7065 (*int_nregs)++;
7066 break;
7067 case X86_64_SSE_CLASS:
7068 case X86_64_SSESF_CLASS:
7069 case X86_64_SSEDF_CLASS:
7070 (*sse_nregs)++;
7071 break;
7072 case X86_64_NO_CLASS:
7073 case X86_64_SSEUP_CLASS:
7074 break;
7075 case X86_64_X87_CLASS:
7076 case X86_64_X87UP_CLASS:
7077 case X86_64_COMPLEX_X87_CLASS:
7078 if (!in_return)
7079 return true;
7080 break;
7081 case X86_64_MEMORY_CLASS:
7082 gcc_unreachable ();
7085 return false;
7088 /* Construct container for the argument used by GCC interface. See
7089 FUNCTION_ARG for the detailed description. */
7091 static rtx
7092 construct_container (machine_mode mode, machine_mode orig_mode,
7093 const_tree type, int in_return, int nintregs, int nsseregs,
7094 const int *intreg, int sse_regno)
7096 /* The following variables hold the static issued_error state. */
7097 static bool issued_sse_arg_error;
7098 static bool issued_sse_ret_error;
7099 static bool issued_x87_ret_error;
7101 machine_mode tmpmode;
7102 int bytes =
7103 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
7104 enum x86_64_reg_class regclass[MAX_CLASSES];
7105 int n;
7106 int i;
7107 int nexps = 0;
7108 int needed_sseregs, needed_intregs;
7109 rtx exp[MAX_CLASSES];
7110 rtx ret;
7112 n = classify_argument (mode, type, regclass, 0);
7113 if (!n)
7114 return NULL;
7115 if (examine_argument (mode, type, in_return, &needed_intregs,
7116 &needed_sseregs))
7117 return NULL;
7118 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
7119 return NULL;
7121 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
7122 some less clueful developer tries to use floating-point anyway. */
7123 if (needed_sseregs && !TARGET_SSE)
7125 if (in_return)
7127 if (!issued_sse_ret_error)
7129 error ("SSE register return with SSE disabled");
7130 issued_sse_ret_error = true;
7133 else if (!issued_sse_arg_error)
7135 error ("SSE register argument with SSE disabled");
7136 issued_sse_arg_error = true;
7138 return NULL;
7141 /* Likewise, error if the ABI requires us to return values in the
7142 x87 registers and the user specified -mno-80387. */
7143 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
7144 for (i = 0; i < n; i++)
7145 if (regclass[i] == X86_64_X87_CLASS
7146 || regclass[i] == X86_64_X87UP_CLASS
7147 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
7149 if (!issued_x87_ret_error)
7151 error ("x87 register return with x87 disabled");
7152 issued_x87_ret_error = true;
7154 return NULL;
7157 /* First construct simple cases. Avoid SCmode, since we want to use
7158 single register to pass this type. */
7159 if (n == 1 && mode != SCmode)
7160 switch (regclass[0])
7162 case X86_64_INTEGER_CLASS:
7163 case X86_64_INTEGERSI_CLASS:
7164 return gen_rtx_REG (mode, intreg[0]);
7165 case X86_64_SSE_CLASS:
7166 case X86_64_SSESF_CLASS:
7167 case X86_64_SSEDF_CLASS:
7168 if (mode != BLKmode)
7169 return gen_reg_or_parallel (mode, orig_mode,
7170 SSE_REGNO (sse_regno));
7171 break;
7172 case X86_64_X87_CLASS:
7173 case X86_64_COMPLEX_X87_CLASS:
7174 return gen_rtx_REG (mode, FIRST_STACK_REG);
7175 case X86_64_NO_CLASS:
7176 /* Zero sized array, struct or class. */
7177 return NULL;
7178 default:
7179 gcc_unreachable ();
7181 if (n == 2
7182 && regclass[0] == X86_64_SSE_CLASS
7183 && regclass[1] == X86_64_SSEUP_CLASS
7184 && mode != BLKmode)
7185 return gen_reg_or_parallel (mode, orig_mode,
7186 SSE_REGNO (sse_regno));
7187 if (n == 4
7188 && regclass[0] == X86_64_SSE_CLASS
7189 && regclass[1] == X86_64_SSEUP_CLASS
7190 && regclass[2] == X86_64_SSEUP_CLASS
7191 && regclass[3] == X86_64_SSEUP_CLASS
7192 && mode != BLKmode)
7193 return gen_reg_or_parallel (mode, orig_mode,
7194 SSE_REGNO (sse_regno));
7195 if (n == 8
7196 && regclass[0] == X86_64_SSE_CLASS
7197 && regclass[1] == X86_64_SSEUP_CLASS
7198 && regclass[2] == X86_64_SSEUP_CLASS
7199 && regclass[3] == X86_64_SSEUP_CLASS
7200 && regclass[4] == X86_64_SSEUP_CLASS
7201 && regclass[5] == X86_64_SSEUP_CLASS
7202 && regclass[6] == X86_64_SSEUP_CLASS
7203 && regclass[7] == X86_64_SSEUP_CLASS
7204 && mode != BLKmode)
7205 return gen_reg_or_parallel (mode, orig_mode,
7206 SSE_REGNO (sse_regno));
7207 if (n == 2
7208 && regclass[0] == X86_64_X87_CLASS
7209 && regclass[1] == X86_64_X87UP_CLASS)
7210 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
7212 if (n == 2
7213 && regclass[0] == X86_64_INTEGER_CLASS
7214 && regclass[1] == X86_64_INTEGER_CLASS
7215 && (mode == CDImode || mode == TImode)
7216 && intreg[0] + 1 == intreg[1])
7217 return gen_rtx_REG (mode, intreg[0]);
7219 /* Otherwise figure out the entries of the PARALLEL. */
7220 for (i = 0; i < n; i++)
7222 int pos;
7224 switch (regclass[i])
7226 case X86_64_NO_CLASS:
7227 break;
7228 case X86_64_INTEGER_CLASS:
7229 case X86_64_INTEGERSI_CLASS:
7230 /* Merge TImodes on aligned occasions here too. */
7231 if (i * 8 + 8 > bytes)
7232 tmpmode
7233 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
7234 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
7235 tmpmode = SImode;
7236 else
7237 tmpmode = DImode;
7238 /* We've requested 24 bytes we
7239 don't have mode for. Use DImode. */
7240 if (tmpmode == BLKmode)
7241 tmpmode = DImode;
7242 exp [nexps++]
7243 = gen_rtx_EXPR_LIST (VOIDmode,
7244 gen_rtx_REG (tmpmode, *intreg),
7245 GEN_INT (i*8));
7246 intreg++;
7247 break;
7248 case X86_64_SSESF_CLASS:
7249 exp [nexps++]
7250 = gen_rtx_EXPR_LIST (VOIDmode,
7251 gen_rtx_REG (SFmode,
7252 SSE_REGNO (sse_regno)),
7253 GEN_INT (i*8));
7254 sse_regno++;
7255 break;
7256 case X86_64_SSEDF_CLASS:
7257 exp [nexps++]
7258 = gen_rtx_EXPR_LIST (VOIDmode,
7259 gen_rtx_REG (DFmode,
7260 SSE_REGNO (sse_regno)),
7261 GEN_INT (i*8));
7262 sse_regno++;
7263 break;
7264 case X86_64_SSE_CLASS:
7265 pos = i;
7266 switch (n)
7268 case 1:
7269 tmpmode = DImode;
7270 break;
7271 case 2:
7272 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
7274 tmpmode = TImode;
7275 i++;
7277 else
7278 tmpmode = DImode;
7279 break;
7280 case 4:
7281 gcc_assert (i == 0
7282 && regclass[1] == X86_64_SSEUP_CLASS
7283 && regclass[2] == X86_64_SSEUP_CLASS
7284 && regclass[3] == X86_64_SSEUP_CLASS);
7285 tmpmode = OImode;
7286 i += 3;
7287 break;
7288 case 8:
7289 gcc_assert (i == 0
7290 && regclass[1] == X86_64_SSEUP_CLASS
7291 && regclass[2] == X86_64_SSEUP_CLASS
7292 && regclass[3] == X86_64_SSEUP_CLASS
7293 && regclass[4] == X86_64_SSEUP_CLASS
7294 && regclass[5] == X86_64_SSEUP_CLASS
7295 && regclass[6] == X86_64_SSEUP_CLASS
7296 && regclass[7] == X86_64_SSEUP_CLASS);
7297 tmpmode = XImode;
7298 i += 7;
7299 break;
7300 default:
7301 gcc_unreachable ();
7303 exp [nexps++]
7304 = gen_rtx_EXPR_LIST (VOIDmode,
7305 gen_rtx_REG (tmpmode,
7306 SSE_REGNO (sse_regno)),
7307 GEN_INT (pos*8));
7308 sse_regno++;
7309 break;
7310 default:
7311 gcc_unreachable ();
7315 /* Empty aligned struct, union or class. */
7316 if (nexps == 0)
7317 return NULL;
7319 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
7320 for (i = 0; i < nexps; i++)
7321 XVECEXP (ret, 0, i) = exp [i];
7322 return ret;
7325 /* Update the data in CUM to advance over an argument of mode MODE
7326 and data type TYPE. (TYPE is null for libcalls where that information
7327 may not be available.)
7329 Return a number of integer regsiters advanced over. */
7331 static int
7332 function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
7333 const_tree type, HOST_WIDE_INT bytes,
7334 HOST_WIDE_INT words)
7336 int res = 0;
7338 switch (mode)
7340 default:
7341 break;
7343 case BLKmode:
7344 if (bytes < 0)
7345 break;
7346 /* FALLTHRU */
7348 case DImode:
7349 case SImode:
7350 case HImode:
7351 case QImode:
7352 cum->words += words;
7353 cum->nregs -= words;
7354 cum->regno += words;
7355 if (cum->nregs >= 0)
7356 res = words;
7357 if (cum->nregs <= 0)
7359 cum->nregs = 0;
7360 cum->regno = 0;
7362 break;
7364 case OImode:
7365 /* OImode shouldn't be used directly. */
7366 gcc_unreachable ();
7368 case DFmode:
7369 if (cum->float_in_sse < 2)
7370 break;
7371 case SFmode:
7372 if (cum->float_in_sse < 1)
7373 break;
7374 /* FALLTHRU */
7376 case V8SFmode:
7377 case V8SImode:
7378 case V64QImode:
7379 case V32HImode:
7380 case V16SImode:
7381 case V8DImode:
7382 case V16SFmode:
7383 case V8DFmode:
7384 case V32QImode:
7385 case V16HImode:
7386 case V4DFmode:
7387 case V4DImode:
7388 case TImode:
7389 case V16QImode:
7390 case V8HImode:
7391 case V4SImode:
7392 case V2DImode:
7393 case V4SFmode:
7394 case V2DFmode:
7395 if (!type || !AGGREGATE_TYPE_P (type))
7397 cum->sse_words += words;
7398 cum->sse_nregs -= 1;
7399 cum->sse_regno += 1;
7400 if (cum->sse_nregs <= 0)
7402 cum->sse_nregs = 0;
7403 cum->sse_regno = 0;
7406 break;
7408 case V8QImode:
7409 case V4HImode:
7410 case V2SImode:
7411 case V2SFmode:
7412 case V1TImode:
7413 case V1DImode:
7414 if (!type || !AGGREGATE_TYPE_P (type))
7416 cum->mmx_words += words;
7417 cum->mmx_nregs -= 1;
7418 cum->mmx_regno += 1;
7419 if (cum->mmx_nregs <= 0)
7421 cum->mmx_nregs = 0;
7422 cum->mmx_regno = 0;
7425 break;
7428 return res;
7431 static int
7432 function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
7433 const_tree type, HOST_WIDE_INT words, bool named)
7435 int int_nregs, sse_nregs;
7437 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
7438 if (!named && (VALID_AVX512F_REG_MODE (mode)
7439 || VALID_AVX256_REG_MODE (mode)))
7440 return 0;
7442 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
7443 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
7445 cum->nregs -= int_nregs;
7446 cum->sse_nregs -= sse_nregs;
7447 cum->regno += int_nregs;
7448 cum->sse_regno += sse_nregs;
7449 return int_nregs;
7451 else
7453 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
7454 cum->words = (cum->words + align - 1) & ~(align - 1);
7455 cum->words += words;
7456 return 0;
7460 static int
7461 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
7462 HOST_WIDE_INT words)
7464 /* Otherwise, this should be passed indirect. */
7465 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
7467 cum->words += words;
7468 if (cum->nregs > 0)
7470 cum->nregs -= 1;
7471 cum->regno += 1;
7472 return 1;
7474 return 0;
7477 /* Update the data in CUM to advance over an argument of mode MODE and
7478 data type TYPE. (TYPE is null for libcalls where that information
7479 may not be available.) */
7481 static void
7482 ix86_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
7483 const_tree type, bool named)
7485 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7486 HOST_WIDE_INT bytes, words;
7487 int nregs;
7489 if (mode == BLKmode)
7490 bytes = int_size_in_bytes (type);
7491 else
7492 bytes = GET_MODE_SIZE (mode);
7493 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7495 if (type)
7496 mode = type_natural_mode (type, NULL, false);
7498 if ((type && POINTER_BOUNDS_TYPE_P (type))
7499 || POINTER_BOUNDS_MODE_P (mode))
7501 /* If we pass bounds in BT then just update remained bounds count. */
7502 if (cum->bnds_in_bt)
7504 cum->bnds_in_bt--;
7505 return;
7508 /* Update remained number of bounds to force. */
7509 if (cum->force_bnd_pass)
7510 cum->force_bnd_pass--;
7512 cum->bnd_regno++;
7514 return;
7517 /* The first arg not going to Bounds Tables resets this counter. */
7518 cum->bnds_in_bt = 0;
7519 /* For unnamed args we always pass bounds to avoid bounds mess when
7520 passed and received types do not match. If bounds do not follow
7521 unnamed arg, still pretend required number of bounds were passed. */
7522 if (cum->force_bnd_pass)
7524 cum->bnd_regno += cum->force_bnd_pass;
7525 cum->force_bnd_pass = 0;
7528 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7529 nregs = function_arg_advance_ms_64 (cum, bytes, words);
7530 else if (TARGET_64BIT)
7531 nregs = function_arg_advance_64 (cum, mode, type, words, named);
7532 else
7533 nregs = function_arg_advance_32 (cum, mode, type, bytes, words);
7535 /* For stdarg we expect bounds to be passed for each value passed
7536 in register. */
7537 if (cum->stdarg)
7538 cum->force_bnd_pass = nregs;
7539 /* For pointers passed in memory we expect bounds passed in Bounds
7540 Table. */
7541 if (!nregs)
7542 cum->bnds_in_bt = chkp_type_bounds_count (type);
7545 /* Define where to put the arguments to a function.
7546 Value is zero to push the argument on the stack,
7547 or a hard register in which to store the argument.
7549 MODE is the argument's machine mode.
7550 TYPE is the data type of the argument (as a tree).
7551 This is null for libcalls where that information may
7552 not be available.
7553 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7554 the preceding args and about the function being called.
7555 NAMED is nonzero if this argument is a named parameter
7556 (otherwise it is an extra parameter matching an ellipsis). */
7558 static rtx
7559 function_arg_32 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7560 machine_mode orig_mode, const_tree type,
7561 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
7563 /* Avoid the AL settings for the Unix64 ABI. */
7564 if (mode == VOIDmode)
7565 return constm1_rtx;
7567 switch (mode)
7569 default:
7570 break;
7572 case BLKmode:
7573 if (bytes < 0)
7574 break;
7575 /* FALLTHRU */
7576 case DImode:
7577 case SImode:
7578 case HImode:
7579 case QImode:
7580 if (words <= cum->nregs)
7582 int regno = cum->regno;
7584 /* Fastcall allocates the first two DWORD (SImode) or
7585 smaller arguments to ECX and EDX if it isn't an
7586 aggregate type . */
7587 if (cum->fastcall)
7589 if (mode == BLKmode
7590 || mode == DImode
7591 || (type && AGGREGATE_TYPE_P (type)))
7592 break;
7594 /* ECX not EAX is the first allocated register. */
7595 if (regno == AX_REG)
7596 regno = CX_REG;
7598 return gen_rtx_REG (mode, regno);
7600 break;
7602 case DFmode:
7603 if (cum->float_in_sse < 2)
7604 break;
7605 case SFmode:
7606 if (cum->float_in_sse < 1)
7607 break;
7608 /* FALLTHRU */
7609 case TImode:
7610 /* In 32bit, we pass TImode in xmm registers. */
7611 case V16QImode:
7612 case V8HImode:
7613 case V4SImode:
7614 case V2DImode:
7615 case V4SFmode:
7616 case V2DFmode:
7617 if (!type || !AGGREGATE_TYPE_P (type))
7619 if (cum->sse_nregs)
7620 return gen_reg_or_parallel (mode, orig_mode,
7621 cum->sse_regno + FIRST_SSE_REG);
7623 break;
7625 case OImode:
7626 case XImode:
7627 /* OImode and XImode shouldn't be used directly. */
7628 gcc_unreachable ();
7630 case V64QImode:
7631 case V32HImode:
7632 case V16SImode:
7633 case V8DImode:
7634 case V16SFmode:
7635 case V8DFmode:
7636 case V8SFmode:
7637 case V8SImode:
7638 case V32QImode:
7639 case V16HImode:
7640 case V4DFmode:
7641 case V4DImode:
7642 if (!type || !AGGREGATE_TYPE_P (type))
7644 if (cum->sse_nregs)
7645 return gen_reg_or_parallel (mode, orig_mode,
7646 cum->sse_regno + FIRST_SSE_REG);
7648 break;
7650 case V8QImode:
7651 case V4HImode:
7652 case V2SImode:
7653 case V2SFmode:
7654 case V1TImode:
7655 case V1DImode:
7656 if (!type || !AGGREGATE_TYPE_P (type))
7658 if (cum->mmx_nregs)
7659 return gen_reg_or_parallel (mode, orig_mode,
7660 cum->mmx_regno + FIRST_MMX_REG);
7662 break;
7665 return NULL_RTX;
7668 static rtx
7669 function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7670 machine_mode orig_mode, const_tree type, bool named)
7672 /* Handle a hidden AL argument containing number of registers
7673 for varargs x86-64 functions. */
7674 if (mode == VOIDmode)
7675 return GEN_INT (cum->maybe_vaarg
7676 ? (cum->sse_nregs < 0
7677 ? X86_64_SSE_REGPARM_MAX
7678 : cum->sse_regno)
7679 : -1);
7681 switch (mode)
7683 default:
7684 break;
7686 case V8SFmode:
7687 case V8SImode:
7688 case V32QImode:
7689 case V16HImode:
7690 case V4DFmode:
7691 case V4DImode:
7692 case V16SFmode:
7693 case V16SImode:
7694 case V64QImode:
7695 case V32HImode:
7696 case V8DFmode:
7697 case V8DImode:
7698 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
7699 if (!named)
7700 return NULL;
7701 break;
7704 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7705 cum->sse_nregs,
7706 &x86_64_int_parameter_registers [cum->regno],
7707 cum->sse_regno);
7710 static rtx
7711 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7712 machine_mode orig_mode, bool named,
7713 HOST_WIDE_INT bytes)
7715 unsigned int regno;
7717 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7718 We use value of -2 to specify that current function call is MSABI. */
7719 if (mode == VOIDmode)
7720 return GEN_INT (-2);
7722 /* If we've run out of registers, it goes on the stack. */
7723 if (cum->nregs == 0)
7724 return NULL_RTX;
7726 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7728 /* Only floating point modes are passed in anything but integer regs. */
7729 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7731 if (named)
7732 regno = cum->regno + FIRST_SSE_REG;
7733 else
7735 rtx t1, t2;
7737 /* Unnamed floating parameters are passed in both the
7738 SSE and integer registers. */
7739 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7740 t2 = gen_rtx_REG (mode, regno);
7741 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7742 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7743 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7746 /* Handle aggregated types passed in register. */
7747 if (orig_mode == BLKmode)
7749 if (bytes > 0 && bytes <= 8)
7750 mode = (bytes > 4 ? DImode : SImode);
7751 if (mode == BLKmode)
7752 mode = DImode;
7755 return gen_reg_or_parallel (mode, orig_mode, regno);
7758 /* Return where to put the arguments to a function.
7759 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7761 MODE is the argument's machine mode. TYPE is the data type of the
7762 argument. It is null for libcalls where that information may not be
7763 available. CUM gives information about the preceding args and about
7764 the function being called. NAMED is nonzero if this argument is a
7765 named parameter (otherwise it is an extra parameter matching an
7766 ellipsis). */
7768 static rtx
7769 ix86_function_arg (cumulative_args_t cum_v, machine_mode omode,
7770 const_tree type, bool named)
7772 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7773 machine_mode mode = omode;
7774 HOST_WIDE_INT bytes, words;
7775 rtx arg;
7777 /* All pointer bounds argumntas are handled separately here. */
7778 if ((type && POINTER_BOUNDS_TYPE_P (type))
7779 || POINTER_BOUNDS_MODE_P (mode))
7781 /* Return NULL if bounds are forced to go in Bounds Table. */
7782 if (cum->bnds_in_bt)
7783 arg = NULL;
7784 /* Return the next available bound reg if any. */
7785 else if (cum->bnd_regno <= LAST_BND_REG)
7786 arg = gen_rtx_REG (BNDmode, cum->bnd_regno);
7787 /* Return the next special slot number otherwise. */
7788 else
7789 arg = GEN_INT (cum->bnd_regno - LAST_BND_REG - 1);
7791 return arg;
7794 if (mode == BLKmode)
7795 bytes = int_size_in_bytes (type);
7796 else
7797 bytes = GET_MODE_SIZE (mode);
7798 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7800 /* To simplify the code below, represent vector types with a vector mode
7801 even if MMX/SSE are not active. */
7802 if (type && TREE_CODE (type) == VECTOR_TYPE)
7803 mode = type_natural_mode (type, cum, false);
7805 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7806 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7807 else if (TARGET_64BIT)
7808 arg = function_arg_64 (cum, mode, omode, type, named);
7809 else
7810 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7812 return arg;
7815 /* A C expression that indicates when an argument must be passed by
7816 reference. If nonzero for an argument, a copy of that argument is
7817 made in memory and a pointer to the argument is passed instead of
7818 the argument itself. The pointer is passed in whatever way is
7819 appropriate for passing a pointer to that type. */
7821 static bool
7822 ix86_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
7823 const_tree type, bool)
7825 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7827 /* See Windows x64 Software Convention. */
7828 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7830 int msize = (int) GET_MODE_SIZE (mode);
7831 if (type)
7833 /* Arrays are passed by reference. */
7834 if (TREE_CODE (type) == ARRAY_TYPE)
7835 return true;
7837 if (AGGREGATE_TYPE_P (type))
7839 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7840 are passed by reference. */
7841 msize = int_size_in_bytes (type);
7845 /* __m128 is passed by reference. */
7846 switch (msize) {
7847 case 1: case 2: case 4: case 8:
7848 break;
7849 default:
7850 return true;
7853 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7854 return 1;
7856 return 0;
7859 /* Return true when TYPE should be 128bit aligned for 32bit argument
7860 passing ABI. XXX: This function is obsolete and is only used for
7861 checking psABI compatibility with previous versions of GCC. */
7863 static bool
7864 ix86_compat_aligned_value_p (const_tree type)
7866 machine_mode mode = TYPE_MODE (type);
7867 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7868 || mode == TDmode
7869 || mode == TFmode
7870 || mode == TCmode)
7871 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7872 return true;
7873 if (TYPE_ALIGN (type) < 128)
7874 return false;
7876 if (AGGREGATE_TYPE_P (type))
7878 /* Walk the aggregates recursively. */
7879 switch (TREE_CODE (type))
7881 case RECORD_TYPE:
7882 case UNION_TYPE:
7883 case QUAL_UNION_TYPE:
7885 tree field;
7887 /* Walk all the structure fields. */
7888 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7890 if (TREE_CODE (field) == FIELD_DECL
7891 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
7892 return true;
7894 break;
7897 case ARRAY_TYPE:
7898 /* Just for use if some languages passes arrays by value. */
7899 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
7900 return true;
7901 break;
7903 default:
7904 gcc_unreachable ();
7907 return false;
7910 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7911 XXX: This function is obsolete and is only used for checking psABI
7912 compatibility with previous versions of GCC. */
7914 static unsigned int
7915 ix86_compat_function_arg_boundary (machine_mode mode,
7916 const_tree type, unsigned int align)
7918 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7919 natural boundaries. */
7920 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
7922 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7923 make an exception for SSE modes since these require 128bit
7924 alignment.
7926 The handling here differs from field_alignment. ICC aligns MMX
7927 arguments to 4 byte boundaries, while structure fields are aligned
7928 to 8 byte boundaries. */
7929 if (!type)
7931 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
7932 align = PARM_BOUNDARY;
7934 else
7936 if (!ix86_compat_aligned_value_p (type))
7937 align = PARM_BOUNDARY;
7940 if (align > BIGGEST_ALIGNMENT)
7941 align = BIGGEST_ALIGNMENT;
7942 return align;
7945 /* Return true when TYPE should be 128bit aligned for 32bit argument
7946 passing ABI. */
7948 static bool
7949 ix86_contains_aligned_value_p (const_tree type)
7951 machine_mode mode = TYPE_MODE (type);
7953 if (mode == XFmode || mode == XCmode)
7954 return false;
7956 if (TYPE_ALIGN (type) < 128)
7957 return false;
7959 if (AGGREGATE_TYPE_P (type))
7961 /* Walk the aggregates recursively. */
7962 switch (TREE_CODE (type))
7964 case RECORD_TYPE:
7965 case UNION_TYPE:
7966 case QUAL_UNION_TYPE:
7968 tree field;
7970 /* Walk all the structure fields. */
7971 for (field = TYPE_FIELDS (type);
7972 field;
7973 field = DECL_CHAIN (field))
7975 if (TREE_CODE (field) == FIELD_DECL
7976 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7977 return true;
7979 break;
7982 case ARRAY_TYPE:
7983 /* Just for use if some languages passes arrays by value. */
7984 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7985 return true;
7986 break;
7988 default:
7989 gcc_unreachable ();
7992 else
7993 return TYPE_ALIGN (type) >= 128;
7995 return false;
7998 /* Gives the alignment boundary, in bits, of an argument with the
7999 specified mode and type. */
8001 static unsigned int
8002 ix86_function_arg_boundary (machine_mode mode, const_tree type)
8004 unsigned int align;
8005 if (type)
8007 /* Since the main variant type is used for call, we convert it to
8008 the main variant type. */
8009 type = TYPE_MAIN_VARIANT (type);
8010 align = TYPE_ALIGN (type);
8012 else
8013 align = GET_MODE_ALIGNMENT (mode);
8014 if (align < PARM_BOUNDARY)
8015 align = PARM_BOUNDARY;
8016 else
8018 static bool warned;
8019 unsigned int saved_align = align;
8021 if (!TARGET_64BIT)
8023 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
8024 if (!type)
8026 if (mode == XFmode || mode == XCmode)
8027 align = PARM_BOUNDARY;
8029 else if (!ix86_contains_aligned_value_p (type))
8030 align = PARM_BOUNDARY;
8032 if (align < 128)
8033 align = PARM_BOUNDARY;
8036 if (warn_psabi
8037 && !warned
8038 && align != ix86_compat_function_arg_boundary (mode, type,
8039 saved_align))
8041 warned = true;
8042 inform (input_location,
8043 "The ABI for passing parameters with %d-byte"
8044 " alignment has changed in GCC 4.6",
8045 align / BITS_PER_UNIT);
8049 return align;
8052 /* Return true if N is a possible register number of function value. */
8054 static bool
8055 ix86_function_value_regno_p (const unsigned int regno)
8057 switch (regno)
8059 case AX_REG:
8060 return true;
8061 case DX_REG:
8062 return (!TARGET_64BIT || ix86_abi != MS_ABI);
8063 case DI_REG:
8064 case SI_REG:
8065 return TARGET_64BIT && ix86_abi != MS_ABI;
8067 case FIRST_BND_REG:
8068 return chkp_function_instrumented_p (current_function_decl);
8070 /* Complex values are returned in %st(0)/%st(1) pair. */
8071 case ST0_REG:
8072 case ST1_REG:
8073 /* TODO: The function should depend on current function ABI but
8074 builtins.c would need updating then. Therefore we use the
8075 default ABI. */
8076 if (TARGET_64BIT && ix86_abi == MS_ABI)
8077 return false;
8078 return TARGET_FLOAT_RETURNS_IN_80387;
8080 /* Complex values are returned in %xmm0/%xmm1 pair. */
8081 case XMM0_REG:
8082 case XMM1_REG:
8083 return TARGET_SSE;
8085 case MM0_REG:
8086 if (TARGET_MACHO || TARGET_64BIT)
8087 return false;
8088 return TARGET_MMX;
8091 return false;
8094 /* Define how to find the value returned by a function.
8095 VALTYPE is the data type of the value (as a tree).
8096 If the precise function being called is known, FUNC is its FUNCTION_DECL;
8097 otherwise, FUNC is 0. */
8099 static rtx
8100 function_value_32 (machine_mode orig_mode, machine_mode mode,
8101 const_tree fntype, const_tree fn)
8103 unsigned int regno;
8105 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
8106 we normally prevent this case when mmx is not available. However
8107 some ABIs may require the result to be returned like DImode. */
8108 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
8109 regno = FIRST_MMX_REG;
8111 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
8112 we prevent this case when sse is not available. However some ABIs
8113 may require the result to be returned like integer TImode. */
8114 else if (mode == TImode
8115 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
8116 regno = FIRST_SSE_REG;
8118 /* 32-byte vector modes in %ymm0. */
8119 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
8120 regno = FIRST_SSE_REG;
8122 /* 64-byte vector modes in %zmm0. */
8123 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
8124 regno = FIRST_SSE_REG;
8126 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
8127 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
8128 regno = FIRST_FLOAT_REG;
8129 else
8130 /* Most things go in %eax. */
8131 regno = AX_REG;
8133 /* Override FP return register with %xmm0 for local functions when
8134 SSE math is enabled or for functions with sseregparm attribute. */
8135 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
8137 int sse_level = ix86_function_sseregparm (fntype, fn, false);
8138 if ((sse_level >= 1 && mode == SFmode)
8139 || (sse_level == 2 && mode == DFmode))
8140 regno = FIRST_SSE_REG;
8143 /* OImode shouldn't be used directly. */
8144 gcc_assert (mode != OImode);
8146 return gen_rtx_REG (orig_mode, regno);
8149 static rtx
8150 function_value_64 (machine_mode orig_mode, machine_mode mode,
8151 const_tree valtype)
8153 rtx ret;
8155 /* Handle libcalls, which don't provide a type node. */
8156 if (valtype == NULL)
8158 unsigned int regno;
8160 switch (mode)
8162 case SFmode:
8163 case SCmode:
8164 case DFmode:
8165 case DCmode:
8166 case TFmode:
8167 case SDmode:
8168 case DDmode:
8169 case TDmode:
8170 regno = FIRST_SSE_REG;
8171 break;
8172 case XFmode:
8173 case XCmode:
8174 regno = FIRST_FLOAT_REG;
8175 break;
8176 case TCmode:
8177 return NULL;
8178 default:
8179 regno = AX_REG;
8182 return gen_rtx_REG (mode, regno);
8184 else if (POINTER_TYPE_P (valtype))
8186 /* Pointers are always returned in word_mode. */
8187 mode = word_mode;
8190 ret = construct_container (mode, orig_mode, valtype, 1,
8191 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
8192 x86_64_int_return_registers, 0);
8194 /* For zero sized structures, construct_container returns NULL, but we
8195 need to keep rest of compiler happy by returning meaningful value. */
8196 if (!ret)
8197 ret = gen_rtx_REG (orig_mode, AX_REG);
8199 return ret;
8202 static rtx
8203 function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
8204 const_tree valtype)
8206 unsigned int regno = AX_REG;
8208 if (TARGET_SSE)
8210 switch (GET_MODE_SIZE (mode))
8212 case 16:
8213 if (valtype != NULL_TREE
8214 && !VECTOR_INTEGER_TYPE_P (valtype)
8215 && !VECTOR_INTEGER_TYPE_P (valtype)
8216 && !INTEGRAL_TYPE_P (valtype)
8217 && !VECTOR_FLOAT_TYPE_P (valtype))
8218 break;
8219 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8220 && !COMPLEX_MODE_P (mode))
8221 regno = FIRST_SSE_REG;
8222 break;
8223 case 8:
8224 case 4:
8225 if (mode == SFmode || mode == DFmode)
8226 regno = FIRST_SSE_REG;
8227 break;
8228 default:
8229 break;
8232 return gen_rtx_REG (orig_mode, regno);
8235 static rtx
8236 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
8237 machine_mode orig_mode, machine_mode mode)
8239 const_tree fn, fntype;
8241 fn = NULL_TREE;
8242 if (fntype_or_decl && DECL_P (fntype_or_decl))
8243 fn = fntype_or_decl;
8244 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
8246 if ((valtype && POINTER_BOUNDS_TYPE_P (valtype))
8247 || POINTER_BOUNDS_MODE_P (mode))
8248 return gen_rtx_REG (BNDmode, FIRST_BND_REG);
8249 else if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
8250 return function_value_ms_64 (orig_mode, mode, valtype);
8251 else if (TARGET_64BIT)
8252 return function_value_64 (orig_mode, mode, valtype);
8253 else
8254 return function_value_32 (orig_mode, mode, fntype, fn);
8257 static rtx
8258 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
8260 machine_mode mode, orig_mode;
8262 orig_mode = TYPE_MODE (valtype);
8263 mode = type_natural_mode (valtype, NULL, true);
8264 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
8267 /* Return an RTX representing a place where a function returns
8268 or recieves pointer bounds or NULL if no bounds are returned.
8270 VALTYPE is a data type of a value returned by the function.
8272 FN_DECL_OR_TYPE is a tree node representing FUNCTION_DECL
8273 or FUNCTION_TYPE of the function.
8275 If OUTGOING is false, return a place in which the caller will
8276 see the return value. Otherwise, return a place where a
8277 function returns a value. */
8279 static rtx
8280 ix86_function_value_bounds (const_tree valtype,
8281 const_tree fntype_or_decl ATTRIBUTE_UNUSED,
8282 bool outgoing ATTRIBUTE_UNUSED)
8284 rtx res = NULL_RTX;
8286 if (BOUNDED_TYPE_P (valtype))
8287 res = gen_rtx_REG (BNDmode, FIRST_BND_REG);
8288 else if (chkp_type_has_pointer (valtype))
8290 bitmap slots;
8291 rtx bounds[2];
8292 bitmap_iterator bi;
8293 unsigned i, bnd_no = 0;
8295 bitmap_obstack_initialize (NULL);
8296 slots = BITMAP_ALLOC (NULL);
8297 chkp_find_bound_slots (valtype, slots);
8299 EXECUTE_IF_SET_IN_BITMAP (slots, 0, i, bi)
8301 rtx reg = gen_rtx_REG (BNDmode, FIRST_BND_REG + bnd_no);
8302 rtx offs = GEN_INT (i * POINTER_SIZE / BITS_PER_UNIT);
8303 gcc_assert (bnd_no < 2);
8304 bounds[bnd_no++] = gen_rtx_EXPR_LIST (VOIDmode, reg, offs);
8307 res = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (bnd_no, bounds));
8309 BITMAP_FREE (slots);
8310 bitmap_obstack_release (NULL);
8312 else
8313 res = NULL_RTX;
8315 return res;
8318 /* Pointer function arguments and return values are promoted to
8319 word_mode. */
8321 static machine_mode
8322 ix86_promote_function_mode (const_tree type, machine_mode mode,
8323 int *punsignedp, const_tree fntype,
8324 int for_return)
8326 if (type != NULL_TREE && POINTER_TYPE_P (type))
8328 *punsignedp = POINTERS_EXTEND_UNSIGNED;
8329 return word_mode;
8331 return default_promote_function_mode (type, mode, punsignedp, fntype,
8332 for_return);
8335 /* Return true if a structure, union or array with MODE containing FIELD
8336 should be accessed using BLKmode. */
8338 static bool
8339 ix86_member_type_forces_blk (const_tree field, machine_mode mode)
8341 /* Union with XFmode must be in BLKmode. */
8342 return (mode == XFmode
8343 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
8344 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
8348 ix86_libcall_value (machine_mode mode)
8350 return ix86_function_value_1 (NULL, NULL, mode, mode);
8353 /* Return true iff type is returned in memory. */
8355 static bool
8356 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
8358 #ifdef SUBTARGET_RETURN_IN_MEMORY
8359 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
8360 #else
8361 const machine_mode mode = type_natural_mode (type, NULL, true);
8362 HOST_WIDE_INT size;
8364 if (POINTER_BOUNDS_TYPE_P (type))
8365 return false;
8367 if (TARGET_64BIT)
8369 if (ix86_function_type_abi (fntype) == MS_ABI)
8371 size = int_size_in_bytes (type);
8373 /* __m128 is returned in xmm0. */
8374 if ((!type || VECTOR_INTEGER_TYPE_P (type)
8375 || INTEGRAL_TYPE_P (type)
8376 || VECTOR_FLOAT_TYPE_P (type))
8377 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8378 && !COMPLEX_MODE_P (mode)
8379 && (GET_MODE_SIZE (mode) == 16 || size == 16))
8380 return false;
8382 /* Otherwise, the size must be exactly in [1248]. */
8383 return size != 1 && size != 2 && size != 4 && size != 8;
8385 else
8387 int needed_intregs, needed_sseregs;
8389 return examine_argument (mode, type, 1,
8390 &needed_intregs, &needed_sseregs);
8393 else
8395 if (mode == BLKmode)
8396 return true;
8398 size = int_size_in_bytes (type);
8400 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
8401 return false;
8403 if (VECTOR_MODE_P (mode) || mode == TImode)
8405 /* User-created vectors small enough to fit in EAX. */
8406 if (size < 8)
8407 return false;
8409 /* Unless ABI prescibes otherwise,
8410 MMX/3dNow values are returned in MM0 if available. */
8412 if (size == 8)
8413 return TARGET_VECT8_RETURNS || !TARGET_MMX;
8415 /* SSE values are returned in XMM0 if available. */
8416 if (size == 16)
8417 return !TARGET_SSE;
8419 /* AVX values are returned in YMM0 if available. */
8420 if (size == 32)
8421 return !TARGET_AVX;
8423 /* AVX512F values are returned in ZMM0 if available. */
8424 if (size == 64)
8425 return !TARGET_AVX512F;
8428 if (mode == XFmode)
8429 return false;
8431 if (size > 12)
8432 return true;
8434 /* OImode shouldn't be used directly. */
8435 gcc_assert (mode != OImode);
8437 return false;
8439 #endif
8443 /* Create the va_list data type. */
8445 /* Returns the calling convention specific va_list date type.
8446 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
8448 static tree
8449 ix86_build_builtin_va_list_abi (enum calling_abi abi)
8451 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
8453 /* For i386 we use plain pointer to argument area. */
8454 if (!TARGET_64BIT || abi == MS_ABI)
8455 return build_pointer_type (char_type_node);
8457 record = lang_hooks.types.make_type (RECORD_TYPE);
8458 type_decl = build_decl (BUILTINS_LOCATION,
8459 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8461 f_gpr = build_decl (BUILTINS_LOCATION,
8462 FIELD_DECL, get_identifier ("gp_offset"),
8463 unsigned_type_node);
8464 f_fpr = build_decl (BUILTINS_LOCATION,
8465 FIELD_DECL, get_identifier ("fp_offset"),
8466 unsigned_type_node);
8467 f_ovf = build_decl (BUILTINS_LOCATION,
8468 FIELD_DECL, get_identifier ("overflow_arg_area"),
8469 ptr_type_node);
8470 f_sav = build_decl (BUILTINS_LOCATION,
8471 FIELD_DECL, get_identifier ("reg_save_area"),
8472 ptr_type_node);
8474 va_list_gpr_counter_field = f_gpr;
8475 va_list_fpr_counter_field = f_fpr;
8477 DECL_FIELD_CONTEXT (f_gpr) = record;
8478 DECL_FIELD_CONTEXT (f_fpr) = record;
8479 DECL_FIELD_CONTEXT (f_ovf) = record;
8480 DECL_FIELD_CONTEXT (f_sav) = record;
8482 TYPE_STUB_DECL (record) = type_decl;
8483 TYPE_NAME (record) = type_decl;
8484 TYPE_FIELDS (record) = f_gpr;
8485 DECL_CHAIN (f_gpr) = f_fpr;
8486 DECL_CHAIN (f_fpr) = f_ovf;
8487 DECL_CHAIN (f_ovf) = f_sav;
8489 layout_type (record);
8491 /* The correct type is an array type of one element. */
8492 return build_array_type (record, build_index_type (size_zero_node));
8495 /* Setup the builtin va_list data type and for 64-bit the additional
8496 calling convention specific va_list data types. */
8498 static tree
8499 ix86_build_builtin_va_list (void)
8501 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
8503 /* Initialize abi specific va_list builtin types. */
8504 if (TARGET_64BIT)
8506 tree t;
8507 if (ix86_abi == MS_ABI)
8509 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
8510 if (TREE_CODE (t) != RECORD_TYPE)
8511 t = build_variant_type_copy (t);
8512 sysv_va_list_type_node = t;
8514 else
8516 t = ret;
8517 if (TREE_CODE (t) != RECORD_TYPE)
8518 t = build_variant_type_copy (t);
8519 sysv_va_list_type_node = t;
8521 if (ix86_abi != MS_ABI)
8523 t = ix86_build_builtin_va_list_abi (MS_ABI);
8524 if (TREE_CODE (t) != RECORD_TYPE)
8525 t = build_variant_type_copy (t);
8526 ms_va_list_type_node = t;
8528 else
8530 t = ret;
8531 if (TREE_CODE (t) != RECORD_TYPE)
8532 t = build_variant_type_copy (t);
8533 ms_va_list_type_node = t;
8537 return ret;
8540 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
8542 static void
8543 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
8545 rtx save_area, mem;
8546 alias_set_type set;
8547 int i, max;
8549 /* GPR size of varargs save area. */
8550 if (cfun->va_list_gpr_size)
8551 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
8552 else
8553 ix86_varargs_gpr_size = 0;
8555 /* FPR size of varargs save area. We don't need it if we don't pass
8556 anything in SSE registers. */
8557 if (TARGET_SSE && cfun->va_list_fpr_size)
8558 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
8559 else
8560 ix86_varargs_fpr_size = 0;
8562 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
8563 return;
8565 save_area = frame_pointer_rtx;
8566 set = get_varargs_alias_set ();
8568 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8569 if (max > X86_64_REGPARM_MAX)
8570 max = X86_64_REGPARM_MAX;
8572 for (i = cum->regno; i < max; i++)
8574 mem = gen_rtx_MEM (word_mode,
8575 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
8576 MEM_NOTRAP_P (mem) = 1;
8577 set_mem_alias_set (mem, set);
8578 emit_move_insn (mem,
8579 gen_rtx_REG (word_mode,
8580 x86_64_int_parameter_registers[i]));
8583 if (ix86_varargs_fpr_size)
8585 machine_mode smode;
8586 rtx_code_label *label;
8587 rtx test;
8589 /* Now emit code to save SSE registers. The AX parameter contains number
8590 of SSE parameter registers used to call this function, though all we
8591 actually check here is the zero/non-zero status. */
8593 label = gen_label_rtx ();
8594 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
8595 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
8596 label));
8598 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8599 we used movdqa (i.e. TImode) instead? Perhaps even better would
8600 be if we could determine the real mode of the data, via a hook
8601 into pass_stdarg. Ignore all that for now. */
8602 smode = V4SFmode;
8603 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
8604 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
8606 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
8607 if (max > X86_64_SSE_REGPARM_MAX)
8608 max = X86_64_SSE_REGPARM_MAX;
8610 for (i = cum->sse_regno; i < max; ++i)
8612 mem = plus_constant (Pmode, save_area,
8613 i * 16 + ix86_varargs_gpr_size);
8614 mem = gen_rtx_MEM (smode, mem);
8615 MEM_NOTRAP_P (mem) = 1;
8616 set_mem_alias_set (mem, set);
8617 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
8619 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
8622 emit_label (label);
8626 static void
8627 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
8629 alias_set_type set = get_varargs_alias_set ();
8630 int i;
8632 /* Reset to zero, as there might be a sysv vaarg used
8633 before. */
8634 ix86_varargs_gpr_size = 0;
8635 ix86_varargs_fpr_size = 0;
8637 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
8639 rtx reg, mem;
8641 mem = gen_rtx_MEM (Pmode,
8642 plus_constant (Pmode, virtual_incoming_args_rtx,
8643 i * UNITS_PER_WORD));
8644 MEM_NOTRAP_P (mem) = 1;
8645 set_mem_alias_set (mem, set);
8647 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
8648 emit_move_insn (mem, reg);
8652 static void
8653 ix86_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
8654 tree type, int *, int no_rtl)
8656 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8657 CUMULATIVE_ARGS next_cum;
8658 tree fntype;
8660 /* This argument doesn't appear to be used anymore. Which is good,
8661 because the old code here didn't suppress rtl generation. */
8662 gcc_assert (!no_rtl);
8664 if (!TARGET_64BIT)
8665 return;
8667 fntype = TREE_TYPE (current_function_decl);
8669 /* For varargs, we do not want to skip the dummy va_dcl argument.
8670 For stdargs, we do want to skip the last named argument. */
8671 next_cum = *cum;
8672 if (stdarg_p (fntype))
8673 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8674 true);
8676 if (cum->call_abi == MS_ABI)
8677 setup_incoming_varargs_ms_64 (&next_cum);
8678 else
8679 setup_incoming_varargs_64 (&next_cum);
8682 static void
8683 ix86_setup_incoming_vararg_bounds (cumulative_args_t cum_v,
8684 enum machine_mode mode,
8685 tree type,
8686 int *pretend_size ATTRIBUTE_UNUSED,
8687 int no_rtl)
8689 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8690 CUMULATIVE_ARGS next_cum;
8691 tree fntype;
8692 rtx save_area;
8693 int bnd_reg, i, max;
8695 gcc_assert (!no_rtl);
8697 /* Do nothing if we use plain pointer to argument area. */
8698 if (!TARGET_64BIT || cum->call_abi == MS_ABI)
8699 return;
8701 fntype = TREE_TYPE (current_function_decl);
8703 /* For varargs, we do not want to skip the dummy va_dcl argument.
8704 For stdargs, we do want to skip the last named argument. */
8705 next_cum = *cum;
8706 if (stdarg_p (fntype))
8707 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8708 true);
8709 save_area = frame_pointer_rtx;
8711 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8712 if (max > X86_64_REGPARM_MAX)
8713 max = X86_64_REGPARM_MAX;
8715 bnd_reg = cum->bnd_regno + cum->force_bnd_pass;
8716 if (chkp_function_instrumented_p (current_function_decl))
8717 for (i = cum->regno; i < max; i++)
8719 rtx addr = plus_constant (Pmode, save_area, i * UNITS_PER_WORD);
8720 rtx reg = gen_rtx_REG (DImode,
8721 x86_64_int_parameter_registers[i]);
8722 rtx ptr = reg;
8723 rtx bounds;
8725 if (bnd_reg <= LAST_BND_REG)
8726 bounds = gen_rtx_REG (BNDmode, bnd_reg);
8727 else
8729 rtx ldx_addr =
8730 plus_constant (Pmode, arg_pointer_rtx,
8731 (LAST_BND_REG - bnd_reg) * GET_MODE_SIZE (Pmode));
8732 bounds = gen_reg_rtx (BNDmode);
8733 emit_insn (BNDmode == BND64mode
8734 ? gen_bnd64_ldx (bounds, ldx_addr, ptr)
8735 : gen_bnd32_ldx (bounds, ldx_addr, ptr));
8738 emit_insn (BNDmode == BND64mode
8739 ? gen_bnd64_stx (addr, ptr, bounds)
8740 : gen_bnd32_stx (addr, ptr, bounds));
8742 bnd_reg++;
8747 /* Checks if TYPE is of kind va_list char *. */
8749 static bool
8750 is_va_list_char_pointer (tree type)
8752 tree canonic;
8754 /* For 32-bit it is always true. */
8755 if (!TARGET_64BIT)
8756 return true;
8757 canonic = ix86_canonical_va_list_type (type);
8758 return (canonic == ms_va_list_type_node
8759 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
8762 /* Implement va_start. */
8764 static void
8765 ix86_va_start (tree valist, rtx nextarg)
8767 HOST_WIDE_INT words, n_gpr, n_fpr;
8768 tree f_gpr, f_fpr, f_ovf, f_sav;
8769 tree gpr, fpr, ovf, sav, t;
8770 tree type;
8771 rtx ovf_rtx;
8773 if (flag_split_stack
8774 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8776 unsigned int scratch_regno;
8778 /* When we are splitting the stack, we can't refer to the stack
8779 arguments using internal_arg_pointer, because they may be on
8780 the old stack. The split stack prologue will arrange to
8781 leave a pointer to the old stack arguments in a scratch
8782 register, which we here copy to a pseudo-register. The split
8783 stack prologue can't set the pseudo-register directly because
8784 it (the prologue) runs before any registers have been saved. */
8786 scratch_regno = split_stack_prologue_scratch_regno ();
8787 if (scratch_regno != INVALID_REGNUM)
8789 rtx reg;
8790 rtx_insn *seq;
8792 reg = gen_reg_rtx (Pmode);
8793 cfun->machine->split_stack_varargs_pointer = reg;
8795 start_sequence ();
8796 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8797 seq = get_insns ();
8798 end_sequence ();
8800 push_topmost_sequence ();
8801 emit_insn_after (seq, entry_of_function ());
8802 pop_topmost_sequence ();
8806 /* Only 64bit target needs something special. */
8807 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8809 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8810 std_expand_builtin_va_start (valist, nextarg);
8811 else
8813 rtx va_r, next;
8815 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8816 next = expand_binop (ptr_mode, add_optab,
8817 cfun->machine->split_stack_varargs_pointer,
8818 crtl->args.arg_offset_rtx,
8819 NULL_RTX, 0, OPTAB_LIB_WIDEN);
8820 convert_move (va_r, next, 0);
8822 /* Store zero bounds for va_list. */
8823 if (chkp_function_instrumented_p (current_function_decl))
8824 chkp_expand_bounds_reset_for_mem (valist,
8825 make_tree (TREE_TYPE (valist),
8826 next));
8829 return;
8832 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8833 f_fpr = DECL_CHAIN (f_gpr);
8834 f_ovf = DECL_CHAIN (f_fpr);
8835 f_sav = DECL_CHAIN (f_ovf);
8837 valist = build_simple_mem_ref (valist);
8838 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
8839 /* The following should be folded into the MEM_REF offset. */
8840 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
8841 f_gpr, NULL_TREE);
8842 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
8843 f_fpr, NULL_TREE);
8844 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
8845 f_ovf, NULL_TREE);
8846 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
8847 f_sav, NULL_TREE);
8849 /* Count number of gp and fp argument registers used. */
8850 words = crtl->args.info.words;
8851 n_gpr = crtl->args.info.regno;
8852 n_fpr = crtl->args.info.sse_regno;
8854 if (cfun->va_list_gpr_size)
8856 type = TREE_TYPE (gpr);
8857 t = build2 (MODIFY_EXPR, type,
8858 gpr, build_int_cst (type, n_gpr * 8));
8859 TREE_SIDE_EFFECTS (t) = 1;
8860 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8863 if (TARGET_SSE && cfun->va_list_fpr_size)
8865 type = TREE_TYPE (fpr);
8866 t = build2 (MODIFY_EXPR, type, fpr,
8867 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
8868 TREE_SIDE_EFFECTS (t) = 1;
8869 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8872 /* Find the overflow area. */
8873 type = TREE_TYPE (ovf);
8874 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8875 ovf_rtx = crtl->args.internal_arg_pointer;
8876 else
8877 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
8878 t = make_tree (type, ovf_rtx);
8879 if (words != 0)
8880 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
8882 /* Store zero bounds for overflow area pointer. */
8883 if (chkp_function_instrumented_p (current_function_decl))
8884 chkp_expand_bounds_reset_for_mem (ovf, t);
8886 t = build2 (MODIFY_EXPR, type, ovf, t);
8887 TREE_SIDE_EFFECTS (t) = 1;
8888 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8890 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
8892 /* Find the register save area.
8893 Prologue of the function save it right above stack frame. */
8894 type = TREE_TYPE (sav);
8895 t = make_tree (type, frame_pointer_rtx);
8896 if (!ix86_varargs_gpr_size)
8897 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
8899 /* Store zero bounds for save area pointer. */
8900 if (chkp_function_instrumented_p (current_function_decl))
8901 chkp_expand_bounds_reset_for_mem (sav, t);
8903 t = build2 (MODIFY_EXPR, type, sav, t);
8904 TREE_SIDE_EFFECTS (t) = 1;
8905 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8909 /* Implement va_arg. */
8911 static tree
8912 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
8913 gimple_seq *post_p)
8915 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
8916 tree f_gpr, f_fpr, f_ovf, f_sav;
8917 tree gpr, fpr, ovf, sav, t;
8918 int size, rsize;
8919 tree lab_false, lab_over = NULL_TREE;
8920 tree addr, t2;
8921 rtx container;
8922 int indirect_p = 0;
8923 tree ptrtype;
8924 machine_mode nat_mode;
8925 unsigned int arg_boundary;
8927 /* Only 64bit target needs something special. */
8928 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8929 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
8931 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8932 f_fpr = DECL_CHAIN (f_gpr);
8933 f_ovf = DECL_CHAIN (f_fpr);
8934 f_sav = DECL_CHAIN (f_ovf);
8936 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
8937 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
8938 valist = build_va_arg_indirect_ref (valist);
8939 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
8940 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
8941 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
8943 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
8944 if (indirect_p)
8945 type = build_pointer_type (type);
8946 size = int_size_in_bytes (type);
8947 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8949 nat_mode = type_natural_mode (type, NULL, false);
8950 switch (nat_mode)
8952 case V8SFmode:
8953 case V8SImode:
8954 case V32QImode:
8955 case V16HImode:
8956 case V4DFmode:
8957 case V4DImode:
8958 case V16SFmode:
8959 case V16SImode:
8960 case V64QImode:
8961 case V32HImode:
8962 case V8DFmode:
8963 case V8DImode:
8964 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
8965 if (!TARGET_64BIT_MS_ABI)
8967 container = NULL;
8968 break;
8971 default:
8972 container = construct_container (nat_mode, TYPE_MODE (type),
8973 type, 0, X86_64_REGPARM_MAX,
8974 X86_64_SSE_REGPARM_MAX, intreg,
8976 break;
8979 /* Pull the value out of the saved registers. */
8981 addr = create_tmp_var (ptr_type_node, "addr");
8983 if (container)
8985 int needed_intregs, needed_sseregs;
8986 bool need_temp;
8987 tree int_addr, sse_addr;
8989 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8990 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8992 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
8994 need_temp = (!REG_P (container)
8995 && ((needed_intregs && TYPE_ALIGN (type) > 64)
8996 || TYPE_ALIGN (type) > 128));
8998 /* In case we are passing structure, verify that it is consecutive block
8999 on the register save area. If not we need to do moves. */
9000 if (!need_temp && !REG_P (container))
9002 /* Verify that all registers are strictly consecutive */
9003 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
9005 int i;
9007 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
9009 rtx slot = XVECEXP (container, 0, i);
9010 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
9011 || INTVAL (XEXP (slot, 1)) != i * 16)
9012 need_temp = 1;
9015 else
9017 int i;
9019 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
9021 rtx slot = XVECEXP (container, 0, i);
9022 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
9023 || INTVAL (XEXP (slot, 1)) != i * 8)
9024 need_temp = 1;
9028 if (!need_temp)
9030 int_addr = addr;
9031 sse_addr = addr;
9033 else
9035 int_addr = create_tmp_var (ptr_type_node, "int_addr");
9036 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
9039 /* First ensure that we fit completely in registers. */
9040 if (needed_intregs)
9042 t = build_int_cst (TREE_TYPE (gpr),
9043 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
9044 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
9045 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9046 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9047 gimplify_and_add (t, pre_p);
9049 if (needed_sseregs)
9051 t = build_int_cst (TREE_TYPE (fpr),
9052 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
9053 + X86_64_REGPARM_MAX * 8);
9054 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
9055 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9056 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9057 gimplify_and_add (t, pre_p);
9060 /* Compute index to start of area used for integer regs. */
9061 if (needed_intregs)
9063 /* int_addr = gpr + sav; */
9064 t = fold_build_pointer_plus (sav, gpr);
9065 gimplify_assign (int_addr, t, pre_p);
9067 if (needed_sseregs)
9069 /* sse_addr = fpr + sav; */
9070 t = fold_build_pointer_plus (sav, fpr);
9071 gimplify_assign (sse_addr, t, pre_p);
9073 if (need_temp)
9075 int i, prev_size = 0;
9076 tree temp = create_tmp_var (type, "va_arg_tmp");
9078 /* addr = &temp; */
9079 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
9080 gimplify_assign (addr, t, pre_p);
9082 for (i = 0; i < XVECLEN (container, 0); i++)
9084 rtx slot = XVECEXP (container, 0, i);
9085 rtx reg = XEXP (slot, 0);
9086 machine_mode mode = GET_MODE (reg);
9087 tree piece_type;
9088 tree addr_type;
9089 tree daddr_type;
9090 tree src_addr, src;
9091 int src_offset;
9092 tree dest_addr, dest;
9093 int cur_size = GET_MODE_SIZE (mode);
9095 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
9096 prev_size = INTVAL (XEXP (slot, 1));
9097 if (prev_size + cur_size > size)
9099 cur_size = size - prev_size;
9100 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
9101 if (mode == BLKmode)
9102 mode = QImode;
9104 piece_type = lang_hooks.types.type_for_mode (mode, 1);
9105 if (mode == GET_MODE (reg))
9106 addr_type = build_pointer_type (piece_type);
9107 else
9108 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
9109 true);
9110 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
9111 true);
9113 if (SSE_REGNO_P (REGNO (reg)))
9115 src_addr = sse_addr;
9116 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
9118 else
9120 src_addr = int_addr;
9121 src_offset = REGNO (reg) * 8;
9123 src_addr = fold_convert (addr_type, src_addr);
9124 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
9126 dest_addr = fold_convert (daddr_type, addr);
9127 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
9128 if (cur_size == GET_MODE_SIZE (mode))
9130 src = build_va_arg_indirect_ref (src_addr);
9131 dest = build_va_arg_indirect_ref (dest_addr);
9133 gimplify_assign (dest, src, pre_p);
9135 else
9137 tree copy
9138 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
9139 3, dest_addr, src_addr,
9140 size_int (cur_size));
9141 gimplify_and_add (copy, pre_p);
9143 prev_size += cur_size;
9147 if (needed_intregs)
9149 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
9150 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
9151 gimplify_assign (gpr, t, pre_p);
9154 if (needed_sseregs)
9156 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
9157 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
9158 gimplify_assign (fpr, t, pre_p);
9161 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
9163 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
9166 /* ... otherwise out of the overflow area. */
9168 /* When we align parameter on stack for caller, if the parameter
9169 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
9170 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
9171 here with caller. */
9172 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
9173 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
9174 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
9176 /* Care for on-stack alignment if needed. */
9177 if (arg_boundary <= 64 || size == 0)
9178 t = ovf;
9179 else
9181 HOST_WIDE_INT align = arg_boundary / 8;
9182 t = fold_build_pointer_plus_hwi (ovf, align - 1);
9183 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
9184 build_int_cst (TREE_TYPE (t), -align));
9187 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
9188 gimplify_assign (addr, t, pre_p);
9190 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
9191 gimplify_assign (unshare_expr (ovf), t, pre_p);
9193 if (container)
9194 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
9196 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
9197 addr = fold_convert (ptrtype, addr);
9199 if (indirect_p)
9200 addr = build_va_arg_indirect_ref (addr);
9201 return build_va_arg_indirect_ref (addr);
9204 /* Return true if OPNUM's MEM should be matched
9205 in movabs* patterns. */
9207 bool
9208 ix86_check_movabs (rtx insn, int opnum)
9210 rtx set, mem;
9212 set = PATTERN (insn);
9213 if (GET_CODE (set) == PARALLEL)
9214 set = XVECEXP (set, 0, 0);
9215 gcc_assert (GET_CODE (set) == SET);
9216 mem = XEXP (set, opnum);
9217 while (GET_CODE (mem) == SUBREG)
9218 mem = SUBREG_REG (mem);
9219 gcc_assert (MEM_P (mem));
9220 return volatile_ok || !MEM_VOLATILE_P (mem);
9223 /* Initialize the table of extra 80387 mathematical constants. */
9225 static void
9226 init_ext_80387_constants (void)
9228 static const char * cst[5] =
9230 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
9231 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
9232 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
9233 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
9234 "3.1415926535897932385128089594061862044", /* 4: fldpi */
9236 int i;
9238 for (i = 0; i < 5; i++)
9240 real_from_string (&ext_80387_constants_table[i], cst[i]);
9241 /* Ensure each constant is rounded to XFmode precision. */
9242 real_convert (&ext_80387_constants_table[i],
9243 XFmode, &ext_80387_constants_table[i]);
9246 ext_80387_constants_init = 1;
9249 /* Return non-zero if the constant is something that
9250 can be loaded with a special instruction. */
9253 standard_80387_constant_p (rtx x)
9255 machine_mode mode = GET_MODE (x);
9257 REAL_VALUE_TYPE r;
9259 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
9260 return -1;
9262 if (x == CONST0_RTX (mode))
9263 return 1;
9264 if (x == CONST1_RTX (mode))
9265 return 2;
9267 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9269 /* For XFmode constants, try to find a special 80387 instruction when
9270 optimizing for size or on those CPUs that benefit from them. */
9271 if (mode == XFmode
9272 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
9274 int i;
9276 if (! ext_80387_constants_init)
9277 init_ext_80387_constants ();
9279 for (i = 0; i < 5; i++)
9280 if (real_identical (&r, &ext_80387_constants_table[i]))
9281 return i + 3;
9284 /* Load of the constant -0.0 or -1.0 will be split as
9285 fldz;fchs or fld1;fchs sequence. */
9286 if (real_isnegzero (&r))
9287 return 8;
9288 if (real_identical (&r, &dconstm1))
9289 return 9;
9291 return 0;
9294 /* Return the opcode of the special instruction to be used to load
9295 the constant X. */
9297 const char *
9298 standard_80387_constant_opcode (rtx x)
9300 switch (standard_80387_constant_p (x))
9302 case 1:
9303 return "fldz";
9304 case 2:
9305 return "fld1";
9306 case 3:
9307 return "fldlg2";
9308 case 4:
9309 return "fldln2";
9310 case 5:
9311 return "fldl2e";
9312 case 6:
9313 return "fldl2t";
9314 case 7:
9315 return "fldpi";
9316 case 8:
9317 case 9:
9318 return "#";
9319 default:
9320 gcc_unreachable ();
9324 /* Return the CONST_DOUBLE representing the 80387 constant that is
9325 loaded by the specified special instruction. The argument IDX
9326 matches the return value from standard_80387_constant_p. */
9329 standard_80387_constant_rtx (int idx)
9331 int i;
9333 if (! ext_80387_constants_init)
9334 init_ext_80387_constants ();
9336 switch (idx)
9338 case 3:
9339 case 4:
9340 case 5:
9341 case 6:
9342 case 7:
9343 i = idx - 3;
9344 break;
9346 default:
9347 gcc_unreachable ();
9350 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
9351 XFmode);
9354 /* Return 1 if X is all 0s and 2 if x is all 1s
9355 in supported SSE/AVX vector mode. */
9358 standard_sse_constant_p (rtx x)
9360 machine_mode mode = GET_MODE (x);
9362 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
9363 return 1;
9364 if (vector_all_ones_operand (x, mode))
9365 switch (mode)
9367 case V16QImode:
9368 case V8HImode:
9369 case V4SImode:
9370 case V2DImode:
9371 if (TARGET_SSE2)
9372 return 2;
9373 case V32QImode:
9374 case V16HImode:
9375 case V8SImode:
9376 case V4DImode:
9377 if (TARGET_AVX2)
9378 return 2;
9379 case V64QImode:
9380 case V32HImode:
9381 case V16SImode:
9382 case V8DImode:
9383 if (TARGET_AVX512F)
9384 return 2;
9385 default:
9386 break;
9389 return 0;
9392 /* Return the opcode of the special instruction to be used to load
9393 the constant X. */
9395 const char *
9396 standard_sse_constant_opcode (rtx_insn *insn, rtx x)
9398 switch (standard_sse_constant_p (x))
9400 case 1:
9401 switch (get_attr_mode (insn))
9403 case MODE_XI:
9404 return "vpxord\t%g0, %g0, %g0";
9405 case MODE_V16SF:
9406 return TARGET_AVX512DQ ? "vxorps\t%g0, %g0, %g0"
9407 : "vpxord\t%g0, %g0, %g0";
9408 case MODE_V8DF:
9409 return TARGET_AVX512DQ ? "vxorpd\t%g0, %g0, %g0"
9410 : "vpxorq\t%g0, %g0, %g0";
9411 case MODE_TI:
9412 return TARGET_AVX512VL ? "vpxord\t%t0, %t0, %t0"
9413 : "%vpxor\t%0, %d0";
9414 case MODE_V2DF:
9415 return "%vxorpd\t%0, %d0";
9416 case MODE_V4SF:
9417 return "%vxorps\t%0, %d0";
9419 case MODE_OI:
9420 return TARGET_AVX512VL ? "vpxord\t%x0, %x0, %x0"
9421 : "vpxor\t%x0, %x0, %x0";
9422 case MODE_V4DF:
9423 return "vxorpd\t%x0, %x0, %x0";
9424 case MODE_V8SF:
9425 return "vxorps\t%x0, %x0, %x0";
9427 default:
9428 break;
9431 case 2:
9432 if (TARGET_AVX512VL
9433 || get_attr_mode (insn) == MODE_XI
9434 || get_attr_mode (insn) == MODE_V8DF
9435 || get_attr_mode (insn) == MODE_V16SF)
9436 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
9437 if (TARGET_AVX)
9438 return "vpcmpeqd\t%0, %0, %0";
9439 else
9440 return "pcmpeqd\t%0, %0";
9442 default:
9443 break;
9445 gcc_unreachable ();
9448 /* Returns true if OP contains a symbol reference */
9450 bool
9451 symbolic_reference_mentioned_p (rtx op)
9453 const char *fmt;
9454 int i;
9456 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
9457 return true;
9459 fmt = GET_RTX_FORMAT (GET_CODE (op));
9460 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
9462 if (fmt[i] == 'E')
9464 int j;
9466 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
9467 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
9468 return true;
9471 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
9472 return true;
9475 return false;
9478 /* Return true if it is appropriate to emit `ret' instructions in the
9479 body of a function. Do this only if the epilogue is simple, needing a
9480 couple of insns. Prior to reloading, we can't tell how many registers
9481 must be saved, so return false then. Return false if there is no frame
9482 marker to de-allocate. */
9484 bool
9485 ix86_can_use_return_insn_p (void)
9487 struct ix86_frame frame;
9489 if (! reload_completed || frame_pointer_needed)
9490 return 0;
9492 /* Don't allow more than 32k pop, since that's all we can do
9493 with one instruction. */
9494 if (crtl->args.pops_args && crtl->args.size >= 32768)
9495 return 0;
9497 ix86_compute_frame_layout (&frame);
9498 return (frame.stack_pointer_offset == UNITS_PER_WORD
9499 && (frame.nregs + frame.nsseregs) == 0);
9502 /* Value should be nonzero if functions must have frame pointers.
9503 Zero means the frame pointer need not be set up (and parms may
9504 be accessed via the stack pointer) in functions that seem suitable. */
9506 static bool
9507 ix86_frame_pointer_required (void)
9509 /* If we accessed previous frames, then the generated code expects
9510 to be able to access the saved ebp value in our frame. */
9511 if (cfun->machine->accesses_prev_frame)
9512 return true;
9514 /* Several x86 os'es need a frame pointer for other reasons,
9515 usually pertaining to setjmp. */
9516 if (SUBTARGET_FRAME_POINTER_REQUIRED)
9517 return true;
9519 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
9520 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
9521 return true;
9523 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
9524 allocation is 4GB. */
9525 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
9526 return true;
9528 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
9529 turns off the frame pointer by default. Turn it back on now if
9530 we've not got a leaf function. */
9531 if (TARGET_OMIT_LEAF_FRAME_POINTER
9532 && (!crtl->is_leaf
9533 || ix86_current_function_calls_tls_descriptor))
9534 return true;
9536 if (crtl->profile && !flag_fentry)
9537 return true;
9539 return false;
9542 /* Record that the current function accesses previous call frames. */
9544 void
9545 ix86_setup_frame_addresses (void)
9547 cfun->machine->accesses_prev_frame = 1;
9550 #ifndef USE_HIDDEN_LINKONCE
9551 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
9552 # define USE_HIDDEN_LINKONCE 1
9553 # else
9554 # define USE_HIDDEN_LINKONCE 0
9555 # endif
9556 #endif
9558 static int pic_labels_used;
9560 /* Fills in the label name that should be used for a pc thunk for
9561 the given register. */
9563 static void
9564 get_pc_thunk_name (char name[32], unsigned int regno)
9566 gcc_assert (!TARGET_64BIT);
9568 if (USE_HIDDEN_LINKONCE)
9569 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
9570 else
9571 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
9575 /* This function generates code for -fpic that loads %ebx with
9576 the return address of the caller and then returns. */
9578 static void
9579 ix86_code_end (void)
9581 rtx xops[2];
9582 int regno;
9584 for (regno = AX_REG; regno <= SP_REG; regno++)
9586 char name[32];
9587 tree decl;
9589 if (!(pic_labels_used & (1 << regno)))
9590 continue;
9592 get_pc_thunk_name (name, regno);
9594 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9595 get_identifier (name),
9596 build_function_type_list (void_type_node, NULL_TREE));
9597 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9598 NULL_TREE, void_type_node);
9599 TREE_PUBLIC (decl) = 1;
9600 TREE_STATIC (decl) = 1;
9601 DECL_IGNORED_P (decl) = 1;
9603 #if TARGET_MACHO
9604 if (TARGET_MACHO)
9606 switch_to_section (darwin_sections[text_coal_section]);
9607 fputs ("\t.weak_definition\t", asm_out_file);
9608 assemble_name (asm_out_file, name);
9609 fputs ("\n\t.private_extern\t", asm_out_file);
9610 assemble_name (asm_out_file, name);
9611 putc ('\n', asm_out_file);
9612 ASM_OUTPUT_LABEL (asm_out_file, name);
9613 DECL_WEAK (decl) = 1;
9615 else
9616 #endif
9617 if (USE_HIDDEN_LINKONCE)
9619 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
9621 targetm.asm_out.unique_section (decl, 0);
9622 switch_to_section (get_named_section (decl, NULL, 0));
9624 targetm.asm_out.globalize_label (asm_out_file, name);
9625 fputs ("\t.hidden\t", asm_out_file);
9626 assemble_name (asm_out_file, name);
9627 putc ('\n', asm_out_file);
9628 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
9630 else
9632 switch_to_section (text_section);
9633 ASM_OUTPUT_LABEL (asm_out_file, name);
9636 DECL_INITIAL (decl) = make_node (BLOCK);
9637 current_function_decl = decl;
9638 init_function_start (decl);
9639 first_function_block_is_cold = false;
9640 /* Make sure unwind info is emitted for the thunk if needed. */
9641 final_start_function (emit_barrier (), asm_out_file, 1);
9643 /* Pad stack IP move with 4 instructions (two NOPs count
9644 as one instruction). */
9645 if (TARGET_PAD_SHORT_FUNCTION)
9647 int i = 8;
9649 while (i--)
9650 fputs ("\tnop\n", asm_out_file);
9653 xops[0] = gen_rtx_REG (Pmode, regno);
9654 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
9655 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
9656 output_asm_insn ("%!ret", NULL);
9657 final_end_function ();
9658 init_insn_lengths ();
9659 free_after_compilation (cfun);
9660 set_cfun (NULL);
9661 current_function_decl = NULL;
9664 if (flag_split_stack)
9665 file_end_indicate_split_stack ();
9668 /* Emit code for the SET_GOT patterns. */
9670 const char *
9671 output_set_got (rtx dest, rtx label)
9673 rtx xops[3];
9675 xops[0] = dest;
9677 if (TARGET_VXWORKS_RTP && flag_pic)
9679 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
9680 xops[2] = gen_rtx_MEM (Pmode,
9681 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
9682 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
9684 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9685 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9686 an unadorned address. */
9687 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
9688 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
9689 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
9690 return "";
9693 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
9695 if (!flag_pic)
9697 if (TARGET_MACHO)
9698 /* We don't need a pic base, we're not producing pic. */
9699 gcc_unreachable ();
9701 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
9702 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
9703 targetm.asm_out.internal_label (asm_out_file, "L",
9704 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
9706 else
9708 char name[32];
9709 get_pc_thunk_name (name, REGNO (dest));
9710 pic_labels_used |= 1 << REGNO (dest);
9712 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
9713 xops[2] = gen_rtx_MEM (QImode, xops[2]);
9714 output_asm_insn ("%!call\t%X2", xops);
9716 #if TARGET_MACHO
9717 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
9718 This is what will be referenced by the Mach-O PIC subsystem. */
9719 if (machopic_should_output_picbase_label () || !label)
9720 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
9722 /* When we are restoring the pic base at the site of a nonlocal label,
9723 and we decided to emit the pic base above, we will still output a
9724 local label used for calculating the correction offset (even though
9725 the offset will be 0 in that case). */
9726 if (label)
9727 targetm.asm_out.internal_label (asm_out_file, "L",
9728 CODE_LABEL_NUMBER (label));
9729 #endif
9732 if (!TARGET_MACHO)
9733 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
9735 return "";
9738 /* Generate an "push" pattern for input ARG. */
9740 static rtx
9741 gen_push (rtx arg)
9743 struct machine_function *m = cfun->machine;
9745 if (m->fs.cfa_reg == stack_pointer_rtx)
9746 m->fs.cfa_offset += UNITS_PER_WORD;
9747 m->fs.sp_offset += UNITS_PER_WORD;
9749 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9750 arg = gen_rtx_REG (word_mode, REGNO (arg));
9752 return gen_rtx_SET (VOIDmode,
9753 gen_rtx_MEM (word_mode,
9754 gen_rtx_PRE_DEC (Pmode,
9755 stack_pointer_rtx)),
9756 arg);
9759 /* Generate an "pop" pattern for input ARG. */
9761 static rtx
9762 gen_pop (rtx arg)
9764 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9765 arg = gen_rtx_REG (word_mode, REGNO (arg));
9767 return gen_rtx_SET (VOIDmode,
9768 arg,
9769 gen_rtx_MEM (word_mode,
9770 gen_rtx_POST_INC (Pmode,
9771 stack_pointer_rtx)));
9774 /* Return >= 0 if there is an unused call-clobbered register available
9775 for the entire function. */
9777 static unsigned int
9778 ix86_select_alt_pic_regnum (void)
9780 if (ix86_use_pseudo_pic_reg ())
9781 return INVALID_REGNUM;
9783 if (crtl->is_leaf
9784 && !crtl->profile
9785 && !ix86_current_function_calls_tls_descriptor)
9787 int i, drap;
9788 /* Can't use the same register for both PIC and DRAP. */
9789 if (crtl->drap_reg)
9790 drap = REGNO (crtl->drap_reg);
9791 else
9792 drap = -1;
9793 for (i = 2; i >= 0; --i)
9794 if (i != drap && !df_regs_ever_live_p (i))
9795 return i;
9798 return INVALID_REGNUM;
9801 /* Return TRUE if we need to save REGNO. */
9803 static bool
9804 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
9806 if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
9807 && pic_offset_table_rtx)
9809 if (ix86_use_pseudo_pic_reg ())
9811 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
9812 _mcount in prologue. */
9813 if (!TARGET_64BIT && flag_pic && crtl->profile)
9814 return true;
9816 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9817 || crtl->profile
9818 || crtl->calls_eh_return
9819 || crtl->uses_const_pool
9820 || cfun->has_nonlocal_label)
9821 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
9824 if (crtl->calls_eh_return && maybe_eh_return)
9826 unsigned i;
9827 for (i = 0; ; i++)
9829 unsigned test = EH_RETURN_DATA_REGNO (i);
9830 if (test == INVALID_REGNUM)
9831 break;
9832 if (test == regno)
9833 return true;
9837 if (crtl->drap_reg
9838 && regno == REGNO (crtl->drap_reg)
9839 && !cfun->machine->no_drap_save_restore)
9840 return true;
9842 return (df_regs_ever_live_p (regno)
9843 && !call_used_regs[regno]
9844 && !fixed_regs[regno]
9845 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
9848 /* Return number of saved general prupose registers. */
9850 static int
9851 ix86_nsaved_regs (void)
9853 int nregs = 0;
9854 int regno;
9856 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9857 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9858 nregs ++;
9859 return nregs;
9862 /* Return number of saved SSE registrers. */
9864 static int
9865 ix86_nsaved_sseregs (void)
9867 int nregs = 0;
9868 int regno;
9870 if (!TARGET_64BIT_MS_ABI)
9871 return 0;
9872 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9873 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9874 nregs ++;
9875 return nregs;
9878 /* Given FROM and TO register numbers, say whether this elimination is
9879 allowed. If stack alignment is needed, we can only replace argument
9880 pointer with hard frame pointer, or replace frame pointer with stack
9881 pointer. Otherwise, frame pointer elimination is automatically
9882 handled and all other eliminations are valid. */
9884 static bool
9885 ix86_can_eliminate (const int from, const int to)
9887 if (stack_realign_fp)
9888 return ((from == ARG_POINTER_REGNUM
9889 && to == HARD_FRAME_POINTER_REGNUM)
9890 || (from == FRAME_POINTER_REGNUM
9891 && to == STACK_POINTER_REGNUM));
9892 else
9893 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
9896 /* Return the offset between two registers, one to be eliminated, and the other
9897 its replacement, at the start of a routine. */
9899 HOST_WIDE_INT
9900 ix86_initial_elimination_offset (int from, int to)
9902 struct ix86_frame frame;
9903 ix86_compute_frame_layout (&frame);
9905 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9906 return frame.hard_frame_pointer_offset;
9907 else if (from == FRAME_POINTER_REGNUM
9908 && to == HARD_FRAME_POINTER_REGNUM)
9909 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
9910 else
9912 gcc_assert (to == STACK_POINTER_REGNUM);
9914 if (from == ARG_POINTER_REGNUM)
9915 return frame.stack_pointer_offset;
9917 gcc_assert (from == FRAME_POINTER_REGNUM);
9918 return frame.stack_pointer_offset - frame.frame_pointer_offset;
9922 /* In a dynamically-aligned function, we can't know the offset from
9923 stack pointer to frame pointer, so we must ensure that setjmp
9924 eliminates fp against the hard fp (%ebp) rather than trying to
9925 index from %esp up to the top of the frame across a gap that is
9926 of unknown (at compile-time) size. */
9927 static rtx
9928 ix86_builtin_setjmp_frame_value (void)
9930 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
9933 /* When using -fsplit-stack, the allocation routines set a field in
9934 the TCB to the bottom of the stack plus this much space, measured
9935 in bytes. */
9937 #define SPLIT_STACK_AVAILABLE 256
9939 /* Fill structure ix86_frame about frame of currently computed function. */
9941 static void
9942 ix86_compute_frame_layout (struct ix86_frame *frame)
9944 unsigned HOST_WIDE_INT stack_alignment_needed;
9945 HOST_WIDE_INT offset;
9946 unsigned HOST_WIDE_INT preferred_alignment;
9947 HOST_WIDE_INT size = get_frame_size ();
9948 HOST_WIDE_INT to_allocate;
9950 frame->nregs = ix86_nsaved_regs ();
9951 frame->nsseregs = ix86_nsaved_sseregs ();
9953 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9954 function prologues and leaf. */
9955 if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
9956 && (!crtl->is_leaf || cfun->calls_alloca != 0
9957 || ix86_current_function_calls_tls_descriptor))
9959 crtl->preferred_stack_boundary = 128;
9960 crtl->stack_alignment_needed = 128;
9962 /* preferred_stack_boundary is never updated for call
9963 expanded from tls descriptor. Update it here. We don't update it in
9964 expand stage because according to the comments before
9965 ix86_current_function_calls_tls_descriptor, tls calls may be optimized
9966 away. */
9967 else if (ix86_current_function_calls_tls_descriptor
9968 && crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY)
9970 crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
9971 if (crtl->stack_alignment_needed < PREFERRED_STACK_BOUNDARY)
9972 crtl->stack_alignment_needed = PREFERRED_STACK_BOUNDARY;
9975 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
9976 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
9978 gcc_assert (!size || stack_alignment_needed);
9979 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
9980 gcc_assert (preferred_alignment <= stack_alignment_needed);
9982 /* For SEH we have to limit the amount of code movement into the prologue.
9983 At present we do this via a BLOCKAGE, at which point there's very little
9984 scheduling that can be done, which means that there's very little point
9985 in doing anything except PUSHs. */
9986 if (TARGET_SEH)
9987 cfun->machine->use_fast_prologue_epilogue = false;
9989 /* During reload iteration the amount of registers saved can change.
9990 Recompute the value as needed. Do not recompute when amount of registers
9991 didn't change as reload does multiple calls to the function and does not
9992 expect the decision to change within single iteration. */
9993 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
9994 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
9996 int count = frame->nregs;
9997 struct cgraph_node *node = cgraph_node::get (current_function_decl);
9999 cfun->machine->use_fast_prologue_epilogue_nregs = count;
10001 /* The fast prologue uses move instead of push to save registers. This
10002 is significantly longer, but also executes faster as modern hardware
10003 can execute the moves in parallel, but can't do that for push/pop.
10005 Be careful about choosing what prologue to emit: When function takes
10006 many instructions to execute we may use slow version as well as in
10007 case function is known to be outside hot spot (this is known with
10008 feedback only). Weight the size of function by number of registers
10009 to save as it is cheap to use one or two push instructions but very
10010 slow to use many of them. */
10011 if (count)
10012 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
10013 if (node->frequency < NODE_FREQUENCY_NORMAL
10014 || (flag_branch_probabilities
10015 && node->frequency < NODE_FREQUENCY_HOT))
10016 cfun->machine->use_fast_prologue_epilogue = false;
10017 else
10018 cfun->machine->use_fast_prologue_epilogue
10019 = !expensive_function_p (count);
10022 frame->save_regs_using_mov
10023 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
10024 /* If static stack checking is enabled and done with probes,
10025 the registers need to be saved before allocating the frame. */
10026 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
10028 /* Skip return address. */
10029 offset = UNITS_PER_WORD;
10031 /* Skip pushed static chain. */
10032 if (ix86_static_chain_on_stack)
10033 offset += UNITS_PER_WORD;
10035 /* Skip saved base pointer. */
10036 if (frame_pointer_needed)
10037 offset += UNITS_PER_WORD;
10038 frame->hfp_save_offset = offset;
10040 /* The traditional frame pointer location is at the top of the frame. */
10041 frame->hard_frame_pointer_offset = offset;
10043 /* Register save area */
10044 offset += frame->nregs * UNITS_PER_WORD;
10045 frame->reg_save_offset = offset;
10047 /* On SEH target, registers are pushed just before the frame pointer
10048 location. */
10049 if (TARGET_SEH)
10050 frame->hard_frame_pointer_offset = offset;
10052 /* Align and set SSE register save area. */
10053 if (frame->nsseregs)
10055 /* The only ABI that has saved SSE registers (Win64) also has a
10056 16-byte aligned default stack, and thus we don't need to be
10057 within the re-aligned local stack frame to save them. */
10058 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
10059 offset = (offset + 16 - 1) & -16;
10060 offset += frame->nsseregs * 16;
10062 frame->sse_reg_save_offset = offset;
10064 /* The re-aligned stack starts here. Values before this point are not
10065 directly comparable with values below this point. In order to make
10066 sure that no value happens to be the same before and after, force
10067 the alignment computation below to add a non-zero value. */
10068 if (stack_realign_fp)
10069 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
10071 /* Va-arg area */
10072 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
10073 offset += frame->va_arg_size;
10075 /* Align start of frame for local function. */
10076 if (stack_realign_fp
10077 || offset != frame->sse_reg_save_offset
10078 || size != 0
10079 || !crtl->is_leaf
10080 || cfun->calls_alloca
10081 || ix86_current_function_calls_tls_descriptor)
10082 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
10084 /* Frame pointer points here. */
10085 frame->frame_pointer_offset = offset;
10087 offset += size;
10089 /* Add outgoing arguments area. Can be skipped if we eliminated
10090 all the function calls as dead code.
10091 Skipping is however impossible when function calls alloca. Alloca
10092 expander assumes that last crtl->outgoing_args_size
10093 of stack frame are unused. */
10094 if (ACCUMULATE_OUTGOING_ARGS
10095 && (!crtl->is_leaf || cfun->calls_alloca
10096 || ix86_current_function_calls_tls_descriptor))
10098 offset += crtl->outgoing_args_size;
10099 frame->outgoing_arguments_size = crtl->outgoing_args_size;
10101 else
10102 frame->outgoing_arguments_size = 0;
10104 /* Align stack boundary. Only needed if we're calling another function
10105 or using alloca. */
10106 if (!crtl->is_leaf || cfun->calls_alloca
10107 || ix86_current_function_calls_tls_descriptor)
10108 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
10110 /* We've reached end of stack frame. */
10111 frame->stack_pointer_offset = offset;
10113 /* Size prologue needs to allocate. */
10114 to_allocate = offset - frame->sse_reg_save_offset;
10116 if ((!to_allocate && frame->nregs <= 1)
10117 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
10118 frame->save_regs_using_mov = false;
10120 if (ix86_using_red_zone ()
10121 && crtl->sp_is_unchanging
10122 && crtl->is_leaf
10123 && !ix86_current_function_calls_tls_descriptor)
10125 frame->red_zone_size = to_allocate;
10126 if (frame->save_regs_using_mov)
10127 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
10128 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
10129 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
10131 else
10132 frame->red_zone_size = 0;
10133 frame->stack_pointer_offset -= frame->red_zone_size;
10135 /* The SEH frame pointer location is near the bottom of the frame.
10136 This is enforced by the fact that the difference between the
10137 stack pointer and the frame pointer is limited to 240 bytes in
10138 the unwind data structure. */
10139 if (TARGET_SEH)
10141 HOST_WIDE_INT diff;
10143 /* If we can leave the frame pointer where it is, do so. Also, returns
10144 the establisher frame for __builtin_frame_address (0). */
10145 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
10146 if (diff <= SEH_MAX_FRAME_SIZE
10147 && (diff > 240 || (diff & 15) != 0)
10148 && !crtl->accesses_prior_frames)
10150 /* Ideally we'd determine what portion of the local stack frame
10151 (within the constraint of the lowest 240) is most heavily used.
10152 But without that complication, simply bias the frame pointer
10153 by 128 bytes so as to maximize the amount of the local stack
10154 frame that is addressable with 8-bit offsets. */
10155 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
10160 /* This is semi-inlined memory_address_length, but simplified
10161 since we know that we're always dealing with reg+offset, and
10162 to avoid having to create and discard all that rtl. */
10164 static inline int
10165 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
10167 int len = 4;
10169 if (offset == 0)
10171 /* EBP and R13 cannot be encoded without an offset. */
10172 len = (regno == BP_REG || regno == R13_REG);
10174 else if (IN_RANGE (offset, -128, 127))
10175 len = 1;
10177 /* ESP and R12 must be encoded with a SIB byte. */
10178 if (regno == SP_REG || regno == R12_REG)
10179 len++;
10181 return len;
10184 /* Return an RTX that points to CFA_OFFSET within the stack frame.
10185 The valid base registers are taken from CFUN->MACHINE->FS. */
10187 static rtx
10188 choose_baseaddr (HOST_WIDE_INT cfa_offset)
10190 const struct machine_function *m = cfun->machine;
10191 rtx base_reg = NULL;
10192 HOST_WIDE_INT base_offset = 0;
10194 if (m->use_fast_prologue_epilogue)
10196 /* Choose the base register most likely to allow the most scheduling
10197 opportunities. Generally FP is valid throughout the function,
10198 while DRAP must be reloaded within the epilogue. But choose either
10199 over the SP due to increased encoding size. */
10201 if (m->fs.fp_valid)
10203 base_reg = hard_frame_pointer_rtx;
10204 base_offset = m->fs.fp_offset - cfa_offset;
10206 else if (m->fs.drap_valid)
10208 base_reg = crtl->drap_reg;
10209 base_offset = 0 - cfa_offset;
10211 else if (m->fs.sp_valid)
10213 base_reg = stack_pointer_rtx;
10214 base_offset = m->fs.sp_offset - cfa_offset;
10217 else
10219 HOST_WIDE_INT toffset;
10220 int len = 16, tlen;
10222 /* Choose the base register with the smallest address encoding.
10223 With a tie, choose FP > DRAP > SP. */
10224 if (m->fs.sp_valid)
10226 base_reg = stack_pointer_rtx;
10227 base_offset = m->fs.sp_offset - cfa_offset;
10228 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
10230 if (m->fs.drap_valid)
10232 toffset = 0 - cfa_offset;
10233 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
10234 if (tlen <= len)
10236 base_reg = crtl->drap_reg;
10237 base_offset = toffset;
10238 len = tlen;
10241 if (m->fs.fp_valid)
10243 toffset = m->fs.fp_offset - cfa_offset;
10244 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
10245 if (tlen <= len)
10247 base_reg = hard_frame_pointer_rtx;
10248 base_offset = toffset;
10249 len = tlen;
10253 gcc_assert (base_reg != NULL);
10255 return plus_constant (Pmode, base_reg, base_offset);
10258 /* Emit code to save registers in the prologue. */
10260 static void
10261 ix86_emit_save_regs (void)
10263 unsigned int regno;
10264 rtx insn;
10266 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
10267 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10269 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
10270 RTX_FRAME_RELATED_P (insn) = 1;
10274 /* Emit a single register save at CFA - CFA_OFFSET. */
10276 static void
10277 ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
10278 HOST_WIDE_INT cfa_offset)
10280 struct machine_function *m = cfun->machine;
10281 rtx reg = gen_rtx_REG (mode, regno);
10282 rtx mem, addr, base, insn;
10284 addr = choose_baseaddr (cfa_offset);
10285 mem = gen_frame_mem (mode, addr);
10287 /* For SSE saves, we need to indicate the 128-bit alignment. */
10288 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
10290 insn = emit_move_insn (mem, reg);
10291 RTX_FRAME_RELATED_P (insn) = 1;
10293 base = addr;
10294 if (GET_CODE (base) == PLUS)
10295 base = XEXP (base, 0);
10296 gcc_checking_assert (REG_P (base));
10298 /* When saving registers into a re-aligned local stack frame, avoid
10299 any tricky guessing by dwarf2out. */
10300 if (m->fs.realigned)
10302 gcc_checking_assert (stack_realign_drap);
10304 if (regno == REGNO (crtl->drap_reg))
10306 /* A bit of a hack. We force the DRAP register to be saved in
10307 the re-aligned stack frame, which provides us with a copy
10308 of the CFA that will last past the prologue. Install it. */
10309 gcc_checking_assert (cfun->machine->fs.fp_valid);
10310 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
10311 cfun->machine->fs.fp_offset - cfa_offset);
10312 mem = gen_rtx_MEM (mode, addr);
10313 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
10315 else
10317 /* The frame pointer is a stable reference within the
10318 aligned frame. Use it. */
10319 gcc_checking_assert (cfun->machine->fs.fp_valid);
10320 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
10321 cfun->machine->fs.fp_offset - cfa_offset);
10322 mem = gen_rtx_MEM (mode, addr);
10323 add_reg_note (insn, REG_CFA_EXPRESSION,
10324 gen_rtx_SET (VOIDmode, mem, reg));
10328 /* The memory may not be relative to the current CFA register,
10329 which means that we may need to generate a new pattern for
10330 use by the unwind info. */
10331 else if (base != m->fs.cfa_reg)
10333 addr = plus_constant (Pmode, m->fs.cfa_reg,
10334 m->fs.cfa_offset - cfa_offset);
10335 mem = gen_rtx_MEM (mode, addr);
10336 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
10340 /* Emit code to save registers using MOV insns.
10341 First register is stored at CFA - CFA_OFFSET. */
10342 static void
10343 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
10345 unsigned int regno;
10347 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10348 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10350 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
10351 cfa_offset -= UNITS_PER_WORD;
10355 /* Emit code to save SSE registers using MOV insns.
10356 First register is stored at CFA - CFA_OFFSET. */
10357 static void
10358 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
10360 unsigned int regno;
10362 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10363 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10365 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
10366 cfa_offset -= 16;
10370 static GTY(()) rtx queued_cfa_restores;
10372 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
10373 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
10374 Don't add the note if the previously saved value will be left untouched
10375 within stack red-zone till return, as unwinders can find the same value
10376 in the register and on the stack. */
10378 static void
10379 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
10381 if (!crtl->shrink_wrapped
10382 && cfa_offset <= cfun->machine->fs.red_zone_offset)
10383 return;
10385 if (insn)
10387 add_reg_note (insn, REG_CFA_RESTORE, reg);
10388 RTX_FRAME_RELATED_P (insn) = 1;
10390 else
10391 queued_cfa_restores
10392 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
10395 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
10397 static void
10398 ix86_add_queued_cfa_restore_notes (rtx insn)
10400 rtx last;
10401 if (!queued_cfa_restores)
10402 return;
10403 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
10405 XEXP (last, 1) = REG_NOTES (insn);
10406 REG_NOTES (insn) = queued_cfa_restores;
10407 queued_cfa_restores = NULL_RTX;
10408 RTX_FRAME_RELATED_P (insn) = 1;
10411 /* Expand prologue or epilogue stack adjustment.
10412 The pattern exist to put a dependency on all ebp-based memory accesses.
10413 STYLE should be negative if instructions should be marked as frame related,
10414 zero if %r11 register is live and cannot be freely used and positive
10415 otherwise. */
10417 static void
10418 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
10419 int style, bool set_cfa)
10421 struct machine_function *m = cfun->machine;
10422 rtx insn;
10423 bool add_frame_related_expr = false;
10425 if (Pmode == SImode)
10426 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
10427 else if (x86_64_immediate_operand (offset, DImode))
10428 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
10429 else
10431 rtx tmp;
10432 /* r11 is used by indirect sibcall return as well, set before the
10433 epilogue and used after the epilogue. */
10434 if (style)
10435 tmp = gen_rtx_REG (DImode, R11_REG);
10436 else
10438 gcc_assert (src != hard_frame_pointer_rtx
10439 && dest != hard_frame_pointer_rtx);
10440 tmp = hard_frame_pointer_rtx;
10442 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
10443 if (style < 0)
10444 add_frame_related_expr = true;
10446 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
10449 insn = emit_insn (insn);
10450 if (style >= 0)
10451 ix86_add_queued_cfa_restore_notes (insn);
10453 if (set_cfa)
10455 rtx r;
10457 gcc_assert (m->fs.cfa_reg == src);
10458 m->fs.cfa_offset += INTVAL (offset);
10459 m->fs.cfa_reg = dest;
10461 r = gen_rtx_PLUS (Pmode, src, offset);
10462 r = gen_rtx_SET (VOIDmode, dest, r);
10463 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
10464 RTX_FRAME_RELATED_P (insn) = 1;
10466 else if (style < 0)
10468 RTX_FRAME_RELATED_P (insn) = 1;
10469 if (add_frame_related_expr)
10471 rtx r = gen_rtx_PLUS (Pmode, src, offset);
10472 r = gen_rtx_SET (VOIDmode, dest, r);
10473 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
10477 if (dest == stack_pointer_rtx)
10479 HOST_WIDE_INT ooffset = m->fs.sp_offset;
10480 bool valid = m->fs.sp_valid;
10482 if (src == hard_frame_pointer_rtx)
10484 valid = m->fs.fp_valid;
10485 ooffset = m->fs.fp_offset;
10487 else if (src == crtl->drap_reg)
10489 valid = m->fs.drap_valid;
10490 ooffset = 0;
10492 else
10494 /* Else there are two possibilities: SP itself, which we set
10495 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
10496 taken care of this by hand along the eh_return path. */
10497 gcc_checking_assert (src == stack_pointer_rtx
10498 || offset == const0_rtx);
10501 m->fs.sp_offset = ooffset - INTVAL (offset);
10502 m->fs.sp_valid = valid;
10506 /* Find an available register to be used as dynamic realign argument
10507 pointer regsiter. Such a register will be written in prologue and
10508 used in begin of body, so it must not be
10509 1. parameter passing register.
10510 2. GOT pointer.
10511 We reuse static-chain register if it is available. Otherwise, we
10512 use DI for i386 and R13 for x86-64. We chose R13 since it has
10513 shorter encoding.
10515 Return: the regno of chosen register. */
10517 static unsigned int
10518 find_drap_reg (void)
10520 tree decl = cfun->decl;
10522 if (TARGET_64BIT)
10524 /* Use R13 for nested function or function need static chain.
10525 Since function with tail call may use any caller-saved
10526 registers in epilogue, DRAP must not use caller-saved
10527 register in such case. */
10528 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10529 return R13_REG;
10531 return R10_REG;
10533 else
10535 /* Use DI for nested function or function need static chain.
10536 Since function with tail call may use any caller-saved
10537 registers in epilogue, DRAP must not use caller-saved
10538 register in such case. */
10539 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10540 return DI_REG;
10542 /* Reuse static chain register if it isn't used for parameter
10543 passing. */
10544 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
10546 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
10547 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
10548 return CX_REG;
10550 return DI_REG;
10554 /* Return minimum incoming stack alignment. */
10556 static unsigned int
10557 ix86_minimum_incoming_stack_boundary (bool sibcall)
10559 unsigned int incoming_stack_boundary;
10561 /* Prefer the one specified at command line. */
10562 if (ix86_user_incoming_stack_boundary)
10563 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
10564 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
10565 if -mstackrealign is used, it isn't used for sibcall check and
10566 estimated stack alignment is 128bit. */
10567 else if (!sibcall
10568 && !TARGET_64BIT
10569 && ix86_force_align_arg_pointer
10570 && crtl->stack_alignment_estimated == 128)
10571 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10572 else
10573 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
10575 /* Incoming stack alignment can be changed on individual functions
10576 via force_align_arg_pointer attribute. We use the smallest
10577 incoming stack boundary. */
10578 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
10579 && lookup_attribute (ix86_force_align_arg_pointer_string,
10580 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
10581 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10583 /* The incoming stack frame has to be aligned at least at
10584 parm_stack_boundary. */
10585 if (incoming_stack_boundary < crtl->parm_stack_boundary)
10586 incoming_stack_boundary = crtl->parm_stack_boundary;
10588 /* Stack at entrance of main is aligned by runtime. We use the
10589 smallest incoming stack boundary. */
10590 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
10591 && DECL_NAME (current_function_decl)
10592 && MAIN_NAME_P (DECL_NAME (current_function_decl))
10593 && DECL_FILE_SCOPE_P (current_function_decl))
10594 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
10596 return incoming_stack_boundary;
10599 /* Update incoming stack boundary and estimated stack alignment. */
10601 static void
10602 ix86_update_stack_boundary (void)
10604 ix86_incoming_stack_boundary
10605 = ix86_minimum_incoming_stack_boundary (false);
10607 /* x86_64 vararg needs 16byte stack alignment for register save
10608 area. */
10609 if (TARGET_64BIT
10610 && cfun->stdarg
10611 && crtl->stack_alignment_estimated < 128)
10612 crtl->stack_alignment_estimated = 128;
10615 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
10616 needed or an rtx for DRAP otherwise. */
10618 static rtx
10619 ix86_get_drap_rtx (void)
10621 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
10622 crtl->need_drap = true;
10624 if (stack_realign_drap)
10626 /* Assign DRAP to vDRAP and returns vDRAP */
10627 unsigned int regno = find_drap_reg ();
10628 rtx drap_vreg;
10629 rtx arg_ptr;
10630 rtx_insn *seq, *insn;
10632 arg_ptr = gen_rtx_REG (Pmode, regno);
10633 crtl->drap_reg = arg_ptr;
10635 start_sequence ();
10636 drap_vreg = copy_to_reg (arg_ptr);
10637 seq = get_insns ();
10638 end_sequence ();
10640 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
10641 if (!optimize)
10643 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
10644 RTX_FRAME_RELATED_P (insn) = 1;
10646 return drap_vreg;
10648 else
10649 return NULL;
10652 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
10654 static rtx
10655 ix86_internal_arg_pointer (void)
10657 return virtual_incoming_args_rtx;
10660 struct scratch_reg {
10661 rtx reg;
10662 bool saved;
10665 /* Return a short-lived scratch register for use on function entry.
10666 In 32-bit mode, it is valid only after the registers are saved
10667 in the prologue. This register must be released by means of
10668 release_scratch_register_on_entry once it is dead. */
10670 static void
10671 get_scratch_register_on_entry (struct scratch_reg *sr)
10673 int regno;
10675 sr->saved = false;
10677 if (TARGET_64BIT)
10679 /* We always use R11 in 64-bit mode. */
10680 regno = R11_REG;
10682 else
10684 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
10685 bool fastcall_p
10686 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10687 bool thiscall_p
10688 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10689 bool static_chain_p = DECL_STATIC_CHAIN (decl);
10690 int regparm = ix86_function_regparm (fntype, decl);
10691 int drap_regno
10692 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
10694 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10695 for the static chain register. */
10696 if ((regparm < 1 || (fastcall_p && !static_chain_p))
10697 && drap_regno != AX_REG)
10698 regno = AX_REG;
10699 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10700 for the static chain register. */
10701 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
10702 regno = AX_REG;
10703 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
10704 regno = DX_REG;
10705 /* ecx is the static chain register. */
10706 else if (regparm < 3 && !fastcall_p && !thiscall_p
10707 && !static_chain_p
10708 && drap_regno != CX_REG)
10709 regno = CX_REG;
10710 else if (ix86_save_reg (BX_REG, true))
10711 regno = BX_REG;
10712 /* esi is the static chain register. */
10713 else if (!(regparm == 3 && static_chain_p)
10714 && ix86_save_reg (SI_REG, true))
10715 regno = SI_REG;
10716 else if (ix86_save_reg (DI_REG, true))
10717 regno = DI_REG;
10718 else
10720 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
10721 sr->saved = true;
10725 sr->reg = gen_rtx_REG (Pmode, regno);
10726 if (sr->saved)
10728 rtx insn = emit_insn (gen_push (sr->reg));
10729 RTX_FRAME_RELATED_P (insn) = 1;
10733 /* Release a scratch register obtained from the preceding function. */
10735 static void
10736 release_scratch_register_on_entry (struct scratch_reg *sr)
10738 if (sr->saved)
10740 struct machine_function *m = cfun->machine;
10741 rtx x, insn = emit_insn (gen_pop (sr->reg));
10743 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
10744 RTX_FRAME_RELATED_P (insn) = 1;
10745 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
10746 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10747 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
10748 m->fs.sp_offset -= UNITS_PER_WORD;
10752 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
10754 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
10756 static void
10757 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
10759 /* We skip the probe for the first interval + a small dope of 4 words and
10760 probe that many bytes past the specified size to maintain a protection
10761 area at the botton of the stack. */
10762 const int dope = 4 * UNITS_PER_WORD;
10763 rtx size_rtx = GEN_INT (size), last;
10765 /* See if we have a constant small number of probes to generate. If so,
10766 that's the easy case. The run-time loop is made up of 11 insns in the
10767 generic case while the compile-time loop is made up of 3+2*(n-1) insns
10768 for n # of intervals. */
10769 if (size <= 5 * PROBE_INTERVAL)
10771 HOST_WIDE_INT i, adjust;
10772 bool first_probe = true;
10774 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10775 values of N from 1 until it exceeds SIZE. If only one probe is
10776 needed, this will not generate any code. Then adjust and probe
10777 to PROBE_INTERVAL + SIZE. */
10778 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10780 if (first_probe)
10782 adjust = 2 * PROBE_INTERVAL + dope;
10783 first_probe = false;
10785 else
10786 adjust = PROBE_INTERVAL;
10788 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10789 plus_constant (Pmode, stack_pointer_rtx,
10790 -adjust)));
10791 emit_stack_probe (stack_pointer_rtx);
10794 if (first_probe)
10795 adjust = size + PROBE_INTERVAL + dope;
10796 else
10797 adjust = size + PROBE_INTERVAL - i;
10799 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10800 plus_constant (Pmode, stack_pointer_rtx,
10801 -adjust)));
10802 emit_stack_probe (stack_pointer_rtx);
10804 /* Adjust back to account for the additional first interval. */
10805 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10806 plus_constant (Pmode, stack_pointer_rtx,
10807 PROBE_INTERVAL + dope)));
10810 /* Otherwise, do the same as above, but in a loop. Note that we must be
10811 extra careful with variables wrapping around because we might be at
10812 the very top (or the very bottom) of the address space and we have
10813 to be able to handle this case properly; in particular, we use an
10814 equality test for the loop condition. */
10815 else
10817 HOST_WIDE_INT rounded_size;
10818 struct scratch_reg sr;
10820 get_scratch_register_on_entry (&sr);
10823 /* Step 1: round SIZE to the previous multiple of the interval. */
10825 rounded_size = size & -PROBE_INTERVAL;
10828 /* Step 2: compute initial and final value of the loop counter. */
10830 /* SP = SP_0 + PROBE_INTERVAL. */
10831 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10832 plus_constant (Pmode, stack_pointer_rtx,
10833 - (PROBE_INTERVAL + dope))));
10835 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10836 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
10837 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
10838 gen_rtx_PLUS (Pmode, sr.reg,
10839 stack_pointer_rtx)));
10842 /* Step 3: the loop
10844 while (SP != LAST_ADDR)
10846 SP = SP + PROBE_INTERVAL
10847 probe at SP
10850 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10851 values of N from 1 until it is equal to ROUNDED_SIZE. */
10853 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
10856 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10857 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10859 if (size != rounded_size)
10861 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10862 plus_constant (Pmode, stack_pointer_rtx,
10863 rounded_size - size)));
10864 emit_stack_probe (stack_pointer_rtx);
10867 /* Adjust back to account for the additional first interval. */
10868 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10869 plus_constant (Pmode, stack_pointer_rtx,
10870 PROBE_INTERVAL + dope)));
10872 release_scratch_register_on_entry (&sr);
10875 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
10877 /* Even if the stack pointer isn't the CFA register, we need to correctly
10878 describe the adjustments made to it, in particular differentiate the
10879 frame-related ones from the frame-unrelated ones. */
10880 if (size > 0)
10882 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
10883 XVECEXP (expr, 0, 0)
10884 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10885 plus_constant (Pmode, stack_pointer_rtx, -size));
10886 XVECEXP (expr, 0, 1)
10887 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10888 plus_constant (Pmode, stack_pointer_rtx,
10889 PROBE_INTERVAL + dope + size));
10890 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
10891 RTX_FRAME_RELATED_P (last) = 1;
10893 cfun->machine->fs.sp_offset += size;
10896 /* Make sure nothing is scheduled before we are done. */
10897 emit_insn (gen_blockage ());
10900 /* Adjust the stack pointer up to REG while probing it. */
10902 const char *
10903 output_adjust_stack_and_probe (rtx reg)
10905 static int labelno = 0;
10906 char loop_lab[32], end_lab[32];
10907 rtx xops[2];
10909 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10910 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10912 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10914 /* Jump to END_LAB if SP == LAST_ADDR. */
10915 xops[0] = stack_pointer_rtx;
10916 xops[1] = reg;
10917 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10918 fputs ("\tje\t", asm_out_file);
10919 assemble_name_raw (asm_out_file, end_lab);
10920 fputc ('\n', asm_out_file);
10922 /* SP = SP + PROBE_INTERVAL. */
10923 xops[1] = GEN_INT (PROBE_INTERVAL);
10924 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10926 /* Probe at SP. */
10927 xops[1] = const0_rtx;
10928 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
10930 fprintf (asm_out_file, "\tjmp\t");
10931 assemble_name_raw (asm_out_file, loop_lab);
10932 fputc ('\n', asm_out_file);
10934 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10936 return "";
10939 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10940 inclusive. These are offsets from the current stack pointer. */
10942 static void
10943 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
10945 /* See if we have a constant small number of probes to generate. If so,
10946 that's the easy case. The run-time loop is made up of 7 insns in the
10947 generic case while the compile-time loop is made up of n insns for n #
10948 of intervals. */
10949 if (size <= 7 * PROBE_INTERVAL)
10951 HOST_WIDE_INT i;
10953 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10954 it exceeds SIZE. If only one probe is needed, this will not
10955 generate any code. Then probe at FIRST + SIZE. */
10956 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10957 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10958 -(first + i)));
10960 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10961 -(first + size)));
10964 /* Otherwise, do the same as above, but in a loop. Note that we must be
10965 extra careful with variables wrapping around because we might be at
10966 the very top (or the very bottom) of the address space and we have
10967 to be able to handle this case properly; in particular, we use an
10968 equality test for the loop condition. */
10969 else
10971 HOST_WIDE_INT rounded_size, last;
10972 struct scratch_reg sr;
10974 get_scratch_register_on_entry (&sr);
10977 /* Step 1: round SIZE to the previous multiple of the interval. */
10979 rounded_size = size & -PROBE_INTERVAL;
10982 /* Step 2: compute initial and final value of the loop counter. */
10984 /* TEST_OFFSET = FIRST. */
10985 emit_move_insn (sr.reg, GEN_INT (-first));
10987 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10988 last = first + rounded_size;
10991 /* Step 3: the loop
10993 while (TEST_ADDR != LAST_ADDR)
10995 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10996 probe at TEST_ADDR
10999 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
11000 until it is equal to ROUNDED_SIZE. */
11002 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
11005 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
11006 that SIZE is equal to ROUNDED_SIZE. */
11008 if (size != rounded_size)
11009 emit_stack_probe (plus_constant (Pmode,
11010 gen_rtx_PLUS (Pmode,
11011 stack_pointer_rtx,
11012 sr.reg),
11013 rounded_size - size));
11015 release_scratch_register_on_entry (&sr);
11018 /* Make sure nothing is scheduled before we are done. */
11019 emit_insn (gen_blockage ());
11022 /* Probe a range of stack addresses from REG to END, inclusive. These are
11023 offsets from the current stack pointer. */
11025 const char *
11026 output_probe_stack_range (rtx reg, rtx end)
11028 static int labelno = 0;
11029 char loop_lab[32], end_lab[32];
11030 rtx xops[3];
11032 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
11033 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
11035 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
11037 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
11038 xops[0] = reg;
11039 xops[1] = end;
11040 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
11041 fputs ("\tje\t", asm_out_file);
11042 assemble_name_raw (asm_out_file, end_lab);
11043 fputc ('\n', asm_out_file);
11045 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
11046 xops[1] = GEN_INT (PROBE_INTERVAL);
11047 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
11049 /* Probe at TEST_ADDR. */
11050 xops[0] = stack_pointer_rtx;
11051 xops[1] = reg;
11052 xops[2] = const0_rtx;
11053 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
11055 fprintf (asm_out_file, "\tjmp\t");
11056 assemble_name_raw (asm_out_file, loop_lab);
11057 fputc ('\n', asm_out_file);
11059 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
11061 return "";
11064 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
11065 to be generated in correct form. */
11066 static void
11067 ix86_finalize_stack_realign_flags (void)
11069 /* Check if stack realign is really needed after reload, and
11070 stores result in cfun */
11071 unsigned int incoming_stack_boundary
11072 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
11073 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
11074 unsigned int stack_realign = (incoming_stack_boundary
11075 < (crtl->is_leaf
11076 ? crtl->max_used_stack_slot_alignment
11077 : crtl->stack_alignment_needed));
11079 if (crtl->stack_realign_finalized)
11081 /* After stack_realign_needed is finalized, we can't no longer
11082 change it. */
11083 gcc_assert (crtl->stack_realign_needed == stack_realign);
11084 return;
11087 /* If the only reason for frame_pointer_needed is that we conservatively
11088 assumed stack realignment might be needed, but in the end nothing that
11089 needed the stack alignment had been spilled, clear frame_pointer_needed
11090 and say we don't need stack realignment. */
11091 if (stack_realign
11092 && frame_pointer_needed
11093 && crtl->is_leaf
11094 && flag_omit_frame_pointer
11095 && crtl->sp_is_unchanging
11096 && !ix86_current_function_calls_tls_descriptor
11097 && !crtl->accesses_prior_frames
11098 && !cfun->calls_alloca
11099 && !crtl->calls_eh_return
11100 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
11101 && !ix86_frame_pointer_required ()
11102 && get_frame_size () == 0
11103 && ix86_nsaved_sseregs () == 0
11104 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
11106 HARD_REG_SET set_up_by_prologue, prologue_used;
11107 basic_block bb;
11109 CLEAR_HARD_REG_SET (prologue_used);
11110 CLEAR_HARD_REG_SET (set_up_by_prologue);
11111 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
11112 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
11113 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
11114 HARD_FRAME_POINTER_REGNUM);
11115 FOR_EACH_BB_FN (bb, cfun)
11117 rtx_insn *insn;
11118 FOR_BB_INSNS (bb, insn)
11119 if (NONDEBUG_INSN_P (insn)
11120 && requires_stack_frame_p (insn, prologue_used,
11121 set_up_by_prologue))
11123 crtl->stack_realign_needed = stack_realign;
11124 crtl->stack_realign_finalized = true;
11125 return;
11129 /* If drap has been set, but it actually isn't live at the start
11130 of the function, there is no reason to set it up. */
11131 if (crtl->drap_reg)
11133 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
11134 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
11136 crtl->drap_reg = NULL_RTX;
11137 crtl->need_drap = false;
11140 else
11141 cfun->machine->no_drap_save_restore = true;
11143 frame_pointer_needed = false;
11144 stack_realign = false;
11145 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
11146 crtl->stack_alignment_needed = incoming_stack_boundary;
11147 crtl->stack_alignment_estimated = incoming_stack_boundary;
11148 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
11149 crtl->preferred_stack_boundary = incoming_stack_boundary;
11150 df_finish_pass (true);
11151 df_scan_alloc (NULL);
11152 df_scan_blocks ();
11153 df_compute_regs_ever_live (true);
11154 df_analyze ();
11157 crtl->stack_realign_needed = stack_realign;
11158 crtl->stack_realign_finalized = true;
11161 /* Delete SET_GOT right after entry block if it is allocated to reg. */
11163 static void
11164 ix86_elim_entry_set_got (rtx reg)
11166 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
11167 rtx_insn *c_insn = BB_HEAD (bb);
11168 if (!NONDEBUG_INSN_P (c_insn))
11169 c_insn = next_nonnote_nondebug_insn (c_insn);
11170 if (c_insn && NONJUMP_INSN_P (c_insn))
11172 rtx pat = PATTERN (c_insn);
11173 if (GET_CODE (pat) == PARALLEL)
11175 rtx vec = XVECEXP (pat, 0, 0);
11176 if (GET_CODE (vec) == SET
11177 && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT
11178 && REGNO (XEXP (vec, 0)) == REGNO (reg))
11179 delete_insn (c_insn);
11184 /* Expand the prologue into a bunch of separate insns. */
11186 void
11187 ix86_expand_prologue (void)
11189 struct machine_function *m = cfun->machine;
11190 rtx insn, t;
11191 struct ix86_frame frame;
11192 HOST_WIDE_INT allocate;
11193 bool int_registers_saved;
11194 bool sse_registers_saved;
11196 ix86_finalize_stack_realign_flags ();
11198 /* DRAP should not coexist with stack_realign_fp */
11199 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
11201 memset (&m->fs, 0, sizeof (m->fs));
11203 /* Initialize CFA state for before the prologue. */
11204 m->fs.cfa_reg = stack_pointer_rtx;
11205 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
11207 /* Track SP offset to the CFA. We continue tracking this after we've
11208 swapped the CFA register away from SP. In the case of re-alignment
11209 this is fudged; we're interested to offsets within the local frame. */
11210 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
11211 m->fs.sp_valid = true;
11213 ix86_compute_frame_layout (&frame);
11215 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
11217 /* We should have already generated an error for any use of
11218 ms_hook on a nested function. */
11219 gcc_checking_assert (!ix86_static_chain_on_stack);
11221 /* Check if profiling is active and we shall use profiling before
11222 prologue variant. If so sorry. */
11223 if (crtl->profile && flag_fentry != 0)
11224 sorry ("ms_hook_prologue attribute isn%'t compatible "
11225 "with -mfentry for 32-bit");
11227 /* In ix86_asm_output_function_label we emitted:
11228 8b ff movl.s %edi,%edi
11229 55 push %ebp
11230 8b ec movl.s %esp,%ebp
11232 This matches the hookable function prologue in Win32 API
11233 functions in Microsoft Windows XP Service Pack 2 and newer.
11234 Wine uses this to enable Windows apps to hook the Win32 API
11235 functions provided by Wine.
11237 What that means is that we've already set up the frame pointer. */
11239 if (frame_pointer_needed
11240 && !(crtl->drap_reg && crtl->stack_realign_needed))
11242 rtx push, mov;
11244 /* We've decided to use the frame pointer already set up.
11245 Describe this to the unwinder by pretending that both
11246 push and mov insns happen right here.
11248 Putting the unwind info here at the end of the ms_hook
11249 is done so that we can make absolutely certain we get
11250 the required byte sequence at the start of the function,
11251 rather than relying on an assembler that can produce
11252 the exact encoding required.
11254 However it does mean (in the unpatched case) that we have
11255 a 1 insn window where the asynchronous unwind info is
11256 incorrect. However, if we placed the unwind info at
11257 its correct location we would have incorrect unwind info
11258 in the patched case. Which is probably all moot since
11259 I don't expect Wine generates dwarf2 unwind info for the
11260 system libraries that use this feature. */
11262 insn = emit_insn (gen_blockage ());
11264 push = gen_push (hard_frame_pointer_rtx);
11265 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
11266 stack_pointer_rtx);
11267 RTX_FRAME_RELATED_P (push) = 1;
11268 RTX_FRAME_RELATED_P (mov) = 1;
11270 RTX_FRAME_RELATED_P (insn) = 1;
11271 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11272 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
11274 /* Note that gen_push incremented m->fs.cfa_offset, even
11275 though we didn't emit the push insn here. */
11276 m->fs.cfa_reg = hard_frame_pointer_rtx;
11277 m->fs.fp_offset = m->fs.cfa_offset;
11278 m->fs.fp_valid = true;
11280 else
11282 /* The frame pointer is not needed so pop %ebp again.
11283 This leaves us with a pristine state. */
11284 emit_insn (gen_pop (hard_frame_pointer_rtx));
11288 /* The first insn of a function that accepts its static chain on the
11289 stack is to push the register that would be filled in by a direct
11290 call. This insn will be skipped by the trampoline. */
11291 else if (ix86_static_chain_on_stack)
11293 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
11294 emit_insn (gen_blockage ());
11296 /* We don't want to interpret this push insn as a register save,
11297 only as a stack adjustment. The real copy of the register as
11298 a save will be done later, if needed. */
11299 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
11300 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
11301 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
11302 RTX_FRAME_RELATED_P (insn) = 1;
11305 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
11306 of DRAP is needed and stack realignment is really needed after reload */
11307 if (stack_realign_drap)
11309 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11311 /* Only need to push parameter pointer reg if it is caller saved. */
11312 if (!call_used_regs[REGNO (crtl->drap_reg)])
11314 /* Push arg pointer reg */
11315 insn = emit_insn (gen_push (crtl->drap_reg));
11316 RTX_FRAME_RELATED_P (insn) = 1;
11319 /* Grab the argument pointer. */
11320 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
11321 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11322 RTX_FRAME_RELATED_P (insn) = 1;
11323 m->fs.cfa_reg = crtl->drap_reg;
11324 m->fs.cfa_offset = 0;
11326 /* Align the stack. */
11327 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11328 stack_pointer_rtx,
11329 GEN_INT (-align_bytes)));
11330 RTX_FRAME_RELATED_P (insn) = 1;
11332 /* Replicate the return address on the stack so that return
11333 address can be reached via (argp - 1) slot. This is needed
11334 to implement macro RETURN_ADDR_RTX and intrinsic function
11335 expand_builtin_return_addr etc. */
11336 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
11337 t = gen_frame_mem (word_mode, t);
11338 insn = emit_insn (gen_push (t));
11339 RTX_FRAME_RELATED_P (insn) = 1;
11341 /* For the purposes of frame and register save area addressing,
11342 we've started over with a new frame. */
11343 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
11344 m->fs.realigned = true;
11347 int_registers_saved = (frame.nregs == 0);
11348 sse_registers_saved = (frame.nsseregs == 0);
11350 if (frame_pointer_needed && !m->fs.fp_valid)
11352 /* Note: AT&T enter does NOT have reversed args. Enter is probably
11353 slower on all targets. Also sdb doesn't like it. */
11354 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
11355 RTX_FRAME_RELATED_P (insn) = 1;
11357 /* Push registers now, before setting the frame pointer
11358 on SEH target. */
11359 if (!int_registers_saved
11360 && TARGET_SEH
11361 && !frame.save_regs_using_mov)
11363 ix86_emit_save_regs ();
11364 int_registers_saved = true;
11365 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11368 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
11370 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
11371 RTX_FRAME_RELATED_P (insn) = 1;
11373 if (m->fs.cfa_reg == stack_pointer_rtx)
11374 m->fs.cfa_reg = hard_frame_pointer_rtx;
11375 m->fs.fp_offset = m->fs.sp_offset;
11376 m->fs.fp_valid = true;
11380 if (!int_registers_saved)
11382 /* If saving registers via PUSH, do so now. */
11383 if (!frame.save_regs_using_mov)
11385 ix86_emit_save_regs ();
11386 int_registers_saved = true;
11387 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11390 /* When using red zone we may start register saving before allocating
11391 the stack frame saving one cycle of the prologue. However, avoid
11392 doing this if we have to probe the stack; at least on x86_64 the
11393 stack probe can turn into a call that clobbers a red zone location. */
11394 else if (ix86_using_red_zone ()
11395 && (! TARGET_STACK_PROBE
11396 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
11398 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11399 int_registers_saved = true;
11403 if (stack_realign_fp)
11405 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11406 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
11408 /* The computation of the size of the re-aligned stack frame means
11409 that we must allocate the size of the register save area before
11410 performing the actual alignment. Otherwise we cannot guarantee
11411 that there's enough storage above the realignment point. */
11412 if (m->fs.sp_offset != frame.sse_reg_save_offset)
11413 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11414 GEN_INT (m->fs.sp_offset
11415 - frame.sse_reg_save_offset),
11416 -1, false);
11418 /* Align the stack. */
11419 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11420 stack_pointer_rtx,
11421 GEN_INT (-align_bytes)));
11423 /* For the purposes of register save area addressing, the stack
11424 pointer is no longer valid. As for the value of sp_offset,
11425 see ix86_compute_frame_layout, which we need to match in order
11426 to pass verification of stack_pointer_offset at the end. */
11427 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
11428 m->fs.sp_valid = false;
11431 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
11433 if (flag_stack_usage_info)
11435 /* We start to count from ARG_POINTER. */
11436 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
11438 /* If it was realigned, take into account the fake frame. */
11439 if (stack_realign_drap)
11441 if (ix86_static_chain_on_stack)
11442 stack_size += UNITS_PER_WORD;
11444 if (!call_used_regs[REGNO (crtl->drap_reg)])
11445 stack_size += UNITS_PER_WORD;
11447 /* This over-estimates by 1 minimal-stack-alignment-unit but
11448 mitigates that by counting in the new return address slot. */
11449 current_function_dynamic_stack_size
11450 += crtl->stack_alignment_needed / BITS_PER_UNIT;
11453 current_function_static_stack_size = stack_size;
11456 /* On SEH target with very large frame size, allocate an area to save
11457 SSE registers (as the very large allocation won't be described). */
11458 if (TARGET_SEH
11459 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
11460 && !sse_registers_saved)
11462 HOST_WIDE_INT sse_size =
11463 frame.sse_reg_save_offset - frame.reg_save_offset;
11465 gcc_assert (int_registers_saved);
11467 /* No need to do stack checking as the area will be immediately
11468 written. */
11469 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11470 GEN_INT (-sse_size), -1,
11471 m->fs.cfa_reg == stack_pointer_rtx);
11472 allocate -= sse_size;
11473 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11474 sse_registers_saved = true;
11477 /* The stack has already been decremented by the instruction calling us
11478 so probe if the size is non-negative to preserve the protection area. */
11479 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
11481 /* We expect the registers to be saved when probes are used. */
11482 gcc_assert (int_registers_saved);
11484 if (STACK_CHECK_MOVING_SP)
11486 if (!(crtl->is_leaf && !cfun->calls_alloca
11487 && allocate <= PROBE_INTERVAL))
11489 ix86_adjust_stack_and_probe (allocate);
11490 allocate = 0;
11493 else
11495 HOST_WIDE_INT size = allocate;
11497 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
11498 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
11500 if (TARGET_STACK_PROBE)
11502 if (crtl->is_leaf && !cfun->calls_alloca)
11504 if (size > PROBE_INTERVAL)
11505 ix86_emit_probe_stack_range (0, size);
11507 else
11508 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
11510 else
11512 if (crtl->is_leaf && !cfun->calls_alloca)
11514 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
11515 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
11516 size - STACK_CHECK_PROTECT);
11518 else
11519 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
11524 if (allocate == 0)
11526 else if (!ix86_target_stack_probe ()
11527 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
11529 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11530 GEN_INT (-allocate), -1,
11531 m->fs.cfa_reg == stack_pointer_rtx);
11533 else
11535 rtx eax = gen_rtx_REG (Pmode, AX_REG);
11536 rtx r10 = NULL;
11537 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
11538 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
11539 bool eax_live = ix86_eax_live_at_start_p ();
11540 bool r10_live = false;
11542 if (TARGET_64BIT)
11543 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
11545 if (eax_live)
11547 insn = emit_insn (gen_push (eax));
11548 allocate -= UNITS_PER_WORD;
11549 /* Note that SEH directives need to continue tracking the stack
11550 pointer even after the frame pointer has been set up. */
11551 if (sp_is_cfa_reg || TARGET_SEH)
11553 if (sp_is_cfa_reg)
11554 m->fs.cfa_offset += UNITS_PER_WORD;
11555 RTX_FRAME_RELATED_P (insn) = 1;
11559 if (r10_live)
11561 r10 = gen_rtx_REG (Pmode, R10_REG);
11562 insn = emit_insn (gen_push (r10));
11563 allocate -= UNITS_PER_WORD;
11564 if (sp_is_cfa_reg || TARGET_SEH)
11566 if (sp_is_cfa_reg)
11567 m->fs.cfa_offset += UNITS_PER_WORD;
11568 RTX_FRAME_RELATED_P (insn) = 1;
11572 emit_move_insn (eax, GEN_INT (allocate));
11573 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
11575 /* Use the fact that AX still contains ALLOCATE. */
11576 adjust_stack_insn = (Pmode == DImode
11577 ? gen_pro_epilogue_adjust_stack_di_sub
11578 : gen_pro_epilogue_adjust_stack_si_sub);
11580 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
11581 stack_pointer_rtx, eax));
11583 if (sp_is_cfa_reg || TARGET_SEH)
11585 if (sp_is_cfa_reg)
11586 m->fs.cfa_offset += allocate;
11587 RTX_FRAME_RELATED_P (insn) = 1;
11588 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11589 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11590 plus_constant (Pmode, stack_pointer_rtx,
11591 -allocate)));
11593 m->fs.sp_offset += allocate;
11595 /* Use stack_pointer_rtx for relative addressing so that code
11596 works for realigned stack, too. */
11597 if (r10_live && eax_live)
11599 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11600 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11601 gen_frame_mem (word_mode, t));
11602 t = plus_constant (Pmode, t, UNITS_PER_WORD);
11603 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
11604 gen_frame_mem (word_mode, t));
11606 else if (eax_live || r10_live)
11608 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11609 emit_move_insn (gen_rtx_REG (word_mode,
11610 (eax_live ? AX_REG : R10_REG)),
11611 gen_frame_mem (word_mode, t));
11614 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
11616 /* If we havn't already set up the frame pointer, do so now. */
11617 if (frame_pointer_needed && !m->fs.fp_valid)
11619 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
11620 GEN_INT (frame.stack_pointer_offset
11621 - frame.hard_frame_pointer_offset));
11622 insn = emit_insn (insn);
11623 RTX_FRAME_RELATED_P (insn) = 1;
11624 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
11626 if (m->fs.cfa_reg == stack_pointer_rtx)
11627 m->fs.cfa_reg = hard_frame_pointer_rtx;
11628 m->fs.fp_offset = frame.hard_frame_pointer_offset;
11629 m->fs.fp_valid = true;
11632 if (!int_registers_saved)
11633 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11634 if (!sse_registers_saved)
11635 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11637 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
11638 in PROLOGUE. */
11639 if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
11641 rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
11642 insn = emit_insn (gen_set_got (pic));
11643 RTX_FRAME_RELATED_P (insn) = 1;
11644 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
11645 emit_insn (gen_prologue_use (pic));
11646 /* Deleting already emmitted SET_GOT if exist and allocated to
11647 REAL_PIC_OFFSET_TABLE_REGNUM. */
11648 ix86_elim_entry_set_got (pic);
11651 if (crtl->drap_reg && !crtl->stack_realign_needed)
11653 /* vDRAP is setup but after reload it turns out stack realign
11654 isn't necessary, here we will emit prologue to setup DRAP
11655 without stack realign adjustment */
11656 t = choose_baseaddr (0);
11657 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11660 /* Prevent instructions from being scheduled into register save push
11661 sequence when access to the redzone area is done through frame pointer.
11662 The offset between the frame pointer and the stack pointer is calculated
11663 relative to the value of the stack pointer at the end of the function
11664 prologue, and moving instructions that access redzone area via frame
11665 pointer inside push sequence violates this assumption. */
11666 if (frame_pointer_needed && frame.red_zone_size)
11667 emit_insn (gen_memory_blockage ());
11669 /* Emit cld instruction if stringops are used in the function. */
11670 if (TARGET_CLD && ix86_current_function_needs_cld)
11671 emit_insn (gen_cld ());
11673 /* SEH requires that the prologue end within 256 bytes of the start of
11674 the function. Prevent instruction schedules that would extend that.
11675 Further, prevent alloca modifications to the stack pointer from being
11676 combined with prologue modifications. */
11677 if (TARGET_SEH)
11678 emit_insn (gen_prologue_use (stack_pointer_rtx));
11681 /* Emit code to restore REG using a POP insn. */
11683 static void
11684 ix86_emit_restore_reg_using_pop (rtx reg)
11686 struct machine_function *m = cfun->machine;
11687 rtx insn = emit_insn (gen_pop (reg));
11689 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
11690 m->fs.sp_offset -= UNITS_PER_WORD;
11692 if (m->fs.cfa_reg == crtl->drap_reg
11693 && REGNO (reg) == REGNO (crtl->drap_reg))
11695 /* Previously we'd represented the CFA as an expression
11696 like *(%ebp - 8). We've just popped that value from
11697 the stack, which means we need to reset the CFA to
11698 the drap register. This will remain until we restore
11699 the stack pointer. */
11700 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11701 RTX_FRAME_RELATED_P (insn) = 1;
11703 /* This means that the DRAP register is valid for addressing too. */
11704 m->fs.drap_valid = true;
11705 return;
11708 if (m->fs.cfa_reg == stack_pointer_rtx)
11710 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11711 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11712 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11713 RTX_FRAME_RELATED_P (insn) = 1;
11715 m->fs.cfa_offset -= UNITS_PER_WORD;
11718 /* When the frame pointer is the CFA, and we pop it, we are
11719 swapping back to the stack pointer as the CFA. This happens
11720 for stack frames that don't allocate other data, so we assume
11721 the stack pointer is now pointing at the return address, i.e.
11722 the function entry state, which makes the offset be 1 word. */
11723 if (reg == hard_frame_pointer_rtx)
11725 m->fs.fp_valid = false;
11726 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11728 m->fs.cfa_reg = stack_pointer_rtx;
11729 m->fs.cfa_offset -= UNITS_PER_WORD;
11731 add_reg_note (insn, REG_CFA_DEF_CFA,
11732 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11733 GEN_INT (m->fs.cfa_offset)));
11734 RTX_FRAME_RELATED_P (insn) = 1;
11739 /* Emit code to restore saved registers using POP insns. */
11741 static void
11742 ix86_emit_restore_regs_using_pop (void)
11744 unsigned int regno;
11746 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11747 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
11748 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
11751 /* Emit code and notes for the LEAVE instruction. */
11753 static void
11754 ix86_emit_leave (void)
11756 struct machine_function *m = cfun->machine;
11757 rtx insn = emit_insn (ix86_gen_leave ());
11759 ix86_add_queued_cfa_restore_notes (insn);
11761 gcc_assert (m->fs.fp_valid);
11762 m->fs.sp_valid = true;
11763 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
11764 m->fs.fp_valid = false;
11766 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11768 m->fs.cfa_reg = stack_pointer_rtx;
11769 m->fs.cfa_offset = m->fs.sp_offset;
11771 add_reg_note (insn, REG_CFA_DEF_CFA,
11772 plus_constant (Pmode, stack_pointer_rtx,
11773 m->fs.sp_offset));
11774 RTX_FRAME_RELATED_P (insn) = 1;
11776 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
11777 m->fs.fp_offset);
11780 /* Emit code to restore saved registers using MOV insns.
11781 First register is restored from CFA - CFA_OFFSET. */
11782 static void
11783 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
11784 bool maybe_eh_return)
11786 struct machine_function *m = cfun->machine;
11787 unsigned int regno;
11789 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11790 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11792 rtx reg = gen_rtx_REG (word_mode, regno);
11793 rtx insn, mem;
11795 mem = choose_baseaddr (cfa_offset);
11796 mem = gen_frame_mem (word_mode, mem);
11797 insn = emit_move_insn (reg, mem);
11799 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
11801 /* Previously we'd represented the CFA as an expression
11802 like *(%ebp - 8). We've just popped that value from
11803 the stack, which means we need to reset the CFA to
11804 the drap register. This will remain until we restore
11805 the stack pointer. */
11806 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11807 RTX_FRAME_RELATED_P (insn) = 1;
11809 /* This means that the DRAP register is valid for addressing. */
11810 m->fs.drap_valid = true;
11812 else
11813 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11815 cfa_offset -= UNITS_PER_WORD;
11819 /* Emit code to restore saved registers using MOV insns.
11820 First register is restored from CFA - CFA_OFFSET. */
11821 static void
11822 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
11823 bool maybe_eh_return)
11825 unsigned int regno;
11827 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11828 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11830 rtx reg = gen_rtx_REG (V4SFmode, regno);
11831 rtx mem;
11833 mem = choose_baseaddr (cfa_offset);
11834 mem = gen_rtx_MEM (V4SFmode, mem);
11835 set_mem_align (mem, 128);
11836 emit_move_insn (reg, mem);
11838 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11840 cfa_offset -= 16;
11844 /* Restore function stack, frame, and registers. */
11846 void
11847 ix86_expand_epilogue (int style)
11849 struct machine_function *m = cfun->machine;
11850 struct machine_frame_state frame_state_save = m->fs;
11851 struct ix86_frame frame;
11852 bool restore_regs_via_mov;
11853 bool using_drap;
11855 ix86_finalize_stack_realign_flags ();
11856 ix86_compute_frame_layout (&frame);
11858 m->fs.sp_valid = (!frame_pointer_needed
11859 || (crtl->sp_is_unchanging
11860 && !stack_realign_fp));
11861 gcc_assert (!m->fs.sp_valid
11862 || m->fs.sp_offset == frame.stack_pointer_offset);
11864 /* The FP must be valid if the frame pointer is present. */
11865 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
11866 gcc_assert (!m->fs.fp_valid
11867 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
11869 /* We must have *some* valid pointer to the stack frame. */
11870 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
11872 /* The DRAP is never valid at this point. */
11873 gcc_assert (!m->fs.drap_valid);
11875 /* See the comment about red zone and frame
11876 pointer usage in ix86_expand_prologue. */
11877 if (frame_pointer_needed && frame.red_zone_size)
11878 emit_insn (gen_memory_blockage ());
11880 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
11881 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
11883 /* Determine the CFA offset of the end of the red-zone. */
11884 m->fs.red_zone_offset = 0;
11885 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
11887 /* The red-zone begins below the return address. */
11888 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
11890 /* When the register save area is in the aligned portion of
11891 the stack, determine the maximum runtime displacement that
11892 matches up with the aligned frame. */
11893 if (stack_realign_drap)
11894 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
11895 + UNITS_PER_WORD);
11898 /* Special care must be taken for the normal return case of a function
11899 using eh_return: the eax and edx registers are marked as saved, but
11900 not restored along this path. Adjust the save location to match. */
11901 if (crtl->calls_eh_return && style != 2)
11902 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
11904 /* EH_RETURN requires the use of moves to function properly. */
11905 if (crtl->calls_eh_return)
11906 restore_regs_via_mov = true;
11907 /* SEH requires the use of pops to identify the epilogue. */
11908 else if (TARGET_SEH)
11909 restore_regs_via_mov = false;
11910 /* If we're only restoring one register and sp is not valid then
11911 using a move instruction to restore the register since it's
11912 less work than reloading sp and popping the register. */
11913 else if (!m->fs.sp_valid && frame.nregs <= 1)
11914 restore_regs_via_mov = true;
11915 else if (TARGET_EPILOGUE_USING_MOVE
11916 && cfun->machine->use_fast_prologue_epilogue
11917 && (frame.nregs > 1
11918 || m->fs.sp_offset != frame.reg_save_offset))
11919 restore_regs_via_mov = true;
11920 else if (frame_pointer_needed
11921 && !frame.nregs
11922 && m->fs.sp_offset != frame.reg_save_offset)
11923 restore_regs_via_mov = true;
11924 else if (frame_pointer_needed
11925 && TARGET_USE_LEAVE
11926 && cfun->machine->use_fast_prologue_epilogue
11927 && frame.nregs == 1)
11928 restore_regs_via_mov = true;
11929 else
11930 restore_regs_via_mov = false;
11932 if (restore_regs_via_mov || frame.nsseregs)
11934 /* Ensure that the entire register save area is addressable via
11935 the stack pointer, if we will restore via sp. */
11936 if (TARGET_64BIT
11937 && m->fs.sp_offset > 0x7fffffff
11938 && !(m->fs.fp_valid || m->fs.drap_valid)
11939 && (frame.nsseregs + frame.nregs) != 0)
11941 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11942 GEN_INT (m->fs.sp_offset
11943 - frame.sse_reg_save_offset),
11944 style,
11945 m->fs.cfa_reg == stack_pointer_rtx);
11949 /* If there are any SSE registers to restore, then we have to do it
11950 via moves, since there's obviously no pop for SSE regs. */
11951 if (frame.nsseregs)
11952 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
11953 style == 2);
11955 if (restore_regs_via_mov)
11957 rtx t;
11959 if (frame.nregs)
11960 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
11962 /* eh_return epilogues need %ecx added to the stack pointer. */
11963 if (style == 2)
11965 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
11967 /* Stack align doesn't work with eh_return. */
11968 gcc_assert (!stack_realign_drap);
11969 /* Neither does regparm nested functions. */
11970 gcc_assert (!ix86_static_chain_on_stack);
11972 if (frame_pointer_needed)
11974 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
11975 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
11976 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
11978 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
11979 insn = emit_move_insn (hard_frame_pointer_rtx, t);
11981 /* Note that we use SA as a temporary CFA, as the return
11982 address is at the proper place relative to it. We
11983 pretend this happens at the FP restore insn because
11984 prior to this insn the FP would be stored at the wrong
11985 offset relative to SA, and after this insn we have no
11986 other reasonable register to use for the CFA. We don't
11987 bother resetting the CFA to the SP for the duration of
11988 the return insn. */
11989 add_reg_note (insn, REG_CFA_DEF_CFA,
11990 plus_constant (Pmode, sa, UNITS_PER_WORD));
11991 ix86_add_queued_cfa_restore_notes (insn);
11992 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
11993 RTX_FRAME_RELATED_P (insn) = 1;
11995 m->fs.cfa_reg = sa;
11996 m->fs.cfa_offset = UNITS_PER_WORD;
11997 m->fs.fp_valid = false;
11999 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
12000 const0_rtx, style, false);
12002 else
12004 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
12005 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
12006 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
12007 ix86_add_queued_cfa_restore_notes (insn);
12009 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
12010 if (m->fs.cfa_offset != UNITS_PER_WORD)
12012 m->fs.cfa_offset = UNITS_PER_WORD;
12013 add_reg_note (insn, REG_CFA_DEF_CFA,
12014 plus_constant (Pmode, stack_pointer_rtx,
12015 UNITS_PER_WORD));
12016 RTX_FRAME_RELATED_P (insn) = 1;
12019 m->fs.sp_offset = UNITS_PER_WORD;
12020 m->fs.sp_valid = true;
12023 else
12025 /* SEH requires that the function end with (1) a stack adjustment
12026 if necessary, (2) a sequence of pops, and (3) a return or
12027 jump instruction. Prevent insns from the function body from
12028 being scheduled into this sequence. */
12029 if (TARGET_SEH)
12031 /* Prevent a catch region from being adjacent to the standard
12032 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
12033 several other flags that would be interesting to test are
12034 not yet set up. */
12035 if (flag_non_call_exceptions)
12036 emit_insn (gen_nops (const1_rtx));
12037 else
12038 emit_insn (gen_blockage ());
12041 /* First step is to deallocate the stack frame so that we can
12042 pop the registers. Also do it on SEH target for very large
12043 frame as the emitted instructions aren't allowed by the ABI in
12044 epilogues. */
12045 if (!m->fs.sp_valid
12046 || (TARGET_SEH
12047 && (m->fs.sp_offset - frame.reg_save_offset
12048 >= SEH_MAX_FRAME_SIZE)))
12050 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
12051 GEN_INT (m->fs.fp_offset
12052 - frame.reg_save_offset),
12053 style, false);
12055 else if (m->fs.sp_offset != frame.reg_save_offset)
12057 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12058 GEN_INT (m->fs.sp_offset
12059 - frame.reg_save_offset),
12060 style,
12061 m->fs.cfa_reg == stack_pointer_rtx);
12064 ix86_emit_restore_regs_using_pop ();
12067 /* If we used a stack pointer and haven't already got rid of it,
12068 then do so now. */
12069 if (m->fs.fp_valid)
12071 /* If the stack pointer is valid and pointing at the frame
12072 pointer store address, then we only need a pop. */
12073 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
12074 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
12075 /* Leave results in shorter dependency chains on CPUs that are
12076 able to grok it fast. */
12077 else if (TARGET_USE_LEAVE
12078 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
12079 || !cfun->machine->use_fast_prologue_epilogue)
12080 ix86_emit_leave ();
12081 else
12083 pro_epilogue_adjust_stack (stack_pointer_rtx,
12084 hard_frame_pointer_rtx,
12085 const0_rtx, style, !using_drap);
12086 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
12090 if (using_drap)
12092 int param_ptr_offset = UNITS_PER_WORD;
12093 rtx insn;
12095 gcc_assert (stack_realign_drap);
12097 if (ix86_static_chain_on_stack)
12098 param_ptr_offset += UNITS_PER_WORD;
12099 if (!call_used_regs[REGNO (crtl->drap_reg)])
12100 param_ptr_offset += UNITS_PER_WORD;
12102 insn = emit_insn (gen_rtx_SET
12103 (VOIDmode, stack_pointer_rtx,
12104 gen_rtx_PLUS (Pmode,
12105 crtl->drap_reg,
12106 GEN_INT (-param_ptr_offset))));
12107 m->fs.cfa_reg = stack_pointer_rtx;
12108 m->fs.cfa_offset = param_ptr_offset;
12109 m->fs.sp_offset = param_ptr_offset;
12110 m->fs.realigned = false;
12112 add_reg_note (insn, REG_CFA_DEF_CFA,
12113 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12114 GEN_INT (param_ptr_offset)));
12115 RTX_FRAME_RELATED_P (insn) = 1;
12117 if (!call_used_regs[REGNO (crtl->drap_reg)])
12118 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
12121 /* At this point the stack pointer must be valid, and we must have
12122 restored all of the registers. We may not have deallocated the
12123 entire stack frame. We've delayed this until now because it may
12124 be possible to merge the local stack deallocation with the
12125 deallocation forced by ix86_static_chain_on_stack. */
12126 gcc_assert (m->fs.sp_valid);
12127 gcc_assert (!m->fs.fp_valid);
12128 gcc_assert (!m->fs.realigned);
12129 if (m->fs.sp_offset != UNITS_PER_WORD)
12131 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12132 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
12133 style, true);
12135 else
12136 ix86_add_queued_cfa_restore_notes (get_last_insn ());
12138 /* Sibcall epilogues don't want a return instruction. */
12139 if (style == 0)
12141 m->fs = frame_state_save;
12142 return;
12145 if (crtl->args.pops_args && crtl->args.size)
12147 rtx popc = GEN_INT (crtl->args.pops_args);
12149 /* i386 can only pop 64K bytes. If asked to pop more, pop return
12150 address, do explicit add, and jump indirectly to the caller. */
12152 if (crtl->args.pops_args >= 65536)
12154 rtx ecx = gen_rtx_REG (SImode, CX_REG);
12155 rtx insn;
12157 /* There is no "pascal" calling convention in any 64bit ABI. */
12158 gcc_assert (!TARGET_64BIT);
12160 insn = emit_insn (gen_pop (ecx));
12161 m->fs.cfa_offset -= UNITS_PER_WORD;
12162 m->fs.sp_offset -= UNITS_PER_WORD;
12164 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
12165 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
12166 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
12167 add_reg_note (insn, REG_CFA_REGISTER,
12168 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
12169 RTX_FRAME_RELATED_P (insn) = 1;
12171 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12172 popc, -1, true);
12173 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
12175 else
12176 emit_jump_insn (gen_simple_return_pop_internal (popc));
12178 else
12179 emit_jump_insn (gen_simple_return_internal ());
12181 /* Restore the state back to the state from the prologue,
12182 so that it's correct for the next epilogue. */
12183 m->fs = frame_state_save;
12186 /* Reset from the function's potential modifications. */
12188 static void
12189 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, HOST_WIDE_INT)
12191 if (pic_offset_table_rtx
12192 && !ix86_use_pseudo_pic_reg ())
12193 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
12194 #if TARGET_MACHO
12195 /* Mach-O doesn't support labels at the end of objects, so if
12196 it looks like we might want one, insert a NOP. */
12198 rtx_insn *insn = get_last_insn ();
12199 rtx_insn *deleted_debug_label = NULL;
12200 while (insn
12201 && NOTE_P (insn)
12202 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
12204 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
12205 notes only, instead set their CODE_LABEL_NUMBER to -1,
12206 otherwise there would be code generation differences
12207 in between -g and -g0. */
12208 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
12209 deleted_debug_label = insn;
12210 insn = PREV_INSN (insn);
12212 if (insn
12213 && (LABEL_P (insn)
12214 || (NOTE_P (insn)
12215 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
12216 fputs ("\tnop\n", file);
12217 else if (deleted_debug_label)
12218 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
12219 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
12220 CODE_LABEL_NUMBER (insn) = -1;
12222 #endif
12226 /* Return a scratch register to use in the split stack prologue. The
12227 split stack prologue is used for -fsplit-stack. It is the first
12228 instructions in the function, even before the regular prologue.
12229 The scratch register can be any caller-saved register which is not
12230 used for parameters or for the static chain. */
12232 static unsigned int
12233 split_stack_prologue_scratch_regno (void)
12235 if (TARGET_64BIT)
12236 return R11_REG;
12237 else
12239 bool is_fastcall, is_thiscall;
12240 int regparm;
12242 is_fastcall = (lookup_attribute ("fastcall",
12243 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
12244 != NULL);
12245 is_thiscall = (lookup_attribute ("thiscall",
12246 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
12247 != NULL);
12248 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
12250 if (is_fastcall)
12252 if (DECL_STATIC_CHAIN (cfun->decl))
12254 sorry ("-fsplit-stack does not support fastcall with "
12255 "nested function");
12256 return INVALID_REGNUM;
12258 return AX_REG;
12260 else if (is_thiscall)
12262 if (!DECL_STATIC_CHAIN (cfun->decl))
12263 return DX_REG;
12264 return AX_REG;
12266 else if (regparm < 3)
12268 if (!DECL_STATIC_CHAIN (cfun->decl))
12269 return CX_REG;
12270 else
12272 if (regparm >= 2)
12274 sorry ("-fsplit-stack does not support 2 register "
12275 "parameters for a nested function");
12276 return INVALID_REGNUM;
12278 return DX_REG;
12281 else
12283 /* FIXME: We could make this work by pushing a register
12284 around the addition and comparison. */
12285 sorry ("-fsplit-stack does not support 3 register parameters");
12286 return INVALID_REGNUM;
12291 /* A SYMBOL_REF for the function which allocates new stackspace for
12292 -fsplit-stack. */
12294 static GTY(()) rtx split_stack_fn;
12296 /* A SYMBOL_REF for the more stack function when using the large
12297 model. */
12299 static GTY(()) rtx split_stack_fn_large;
12301 /* Handle -fsplit-stack. These are the first instructions in the
12302 function, even before the regular prologue. */
12304 void
12305 ix86_expand_split_stack_prologue (void)
12307 struct ix86_frame frame;
12308 HOST_WIDE_INT allocate;
12309 unsigned HOST_WIDE_INT args_size;
12310 rtx_code_label *label;
12311 rtx limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
12312 rtx scratch_reg = NULL_RTX;
12313 rtx_code_label *varargs_label = NULL;
12314 rtx fn;
12316 gcc_assert (flag_split_stack && reload_completed);
12318 ix86_finalize_stack_realign_flags ();
12319 ix86_compute_frame_layout (&frame);
12320 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
12322 /* This is the label we will branch to if we have enough stack
12323 space. We expect the basic block reordering pass to reverse this
12324 branch if optimizing, so that we branch in the unlikely case. */
12325 label = gen_label_rtx ();
12327 /* We need to compare the stack pointer minus the frame size with
12328 the stack boundary in the TCB. The stack boundary always gives
12329 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
12330 can compare directly. Otherwise we need to do an addition. */
12332 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12333 UNSPEC_STACK_CHECK);
12334 limit = gen_rtx_CONST (Pmode, limit);
12335 limit = gen_rtx_MEM (Pmode, limit);
12336 if (allocate < SPLIT_STACK_AVAILABLE)
12337 current = stack_pointer_rtx;
12338 else
12340 unsigned int scratch_regno;
12341 rtx offset;
12343 /* We need a scratch register to hold the stack pointer minus
12344 the required frame size. Since this is the very start of the
12345 function, the scratch register can be any caller-saved
12346 register which is not used for parameters. */
12347 offset = GEN_INT (- allocate);
12348 scratch_regno = split_stack_prologue_scratch_regno ();
12349 if (scratch_regno == INVALID_REGNUM)
12350 return;
12351 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12352 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
12354 /* We don't use ix86_gen_add3 in this case because it will
12355 want to split to lea, but when not optimizing the insn
12356 will not be split after this point. */
12357 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12358 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12359 offset)));
12361 else
12363 emit_move_insn (scratch_reg, offset);
12364 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
12365 stack_pointer_rtx));
12367 current = scratch_reg;
12370 ix86_expand_branch (GEU, current, limit, label);
12371 jump_insn = get_last_insn ();
12372 JUMP_LABEL (jump_insn) = label;
12374 /* Mark the jump as very likely to be taken. */
12375 add_int_reg_note (jump_insn, REG_BR_PROB,
12376 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
12378 if (split_stack_fn == NULL_RTX)
12380 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
12381 SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
12383 fn = split_stack_fn;
12385 /* Get more stack space. We pass in the desired stack space and the
12386 size of the arguments to copy to the new stack. In 32-bit mode
12387 we push the parameters; __morestack will return on a new stack
12388 anyhow. In 64-bit mode we pass the parameters in r10 and
12389 r11. */
12390 allocate_rtx = GEN_INT (allocate);
12391 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
12392 call_fusage = NULL_RTX;
12393 if (TARGET_64BIT)
12395 rtx reg10, reg11;
12397 reg10 = gen_rtx_REG (Pmode, R10_REG);
12398 reg11 = gen_rtx_REG (Pmode, R11_REG);
12400 /* If this function uses a static chain, it will be in %r10.
12401 Preserve it across the call to __morestack. */
12402 if (DECL_STATIC_CHAIN (cfun->decl))
12404 rtx rax;
12406 rax = gen_rtx_REG (word_mode, AX_REG);
12407 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
12408 use_reg (&call_fusage, rax);
12411 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
12412 && !TARGET_PECOFF)
12414 HOST_WIDE_INT argval;
12416 gcc_assert (Pmode == DImode);
12417 /* When using the large model we need to load the address
12418 into a register, and we've run out of registers. So we
12419 switch to a different calling convention, and we call a
12420 different function: __morestack_large. We pass the
12421 argument size in the upper 32 bits of r10 and pass the
12422 frame size in the lower 32 bits. */
12423 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
12424 gcc_assert ((args_size & 0xffffffff) == args_size);
12426 if (split_stack_fn_large == NULL_RTX)
12428 split_stack_fn_large =
12429 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
12430 SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
12432 if (ix86_cmodel == CM_LARGE_PIC)
12434 rtx_code_label *label;
12435 rtx x;
12437 label = gen_label_rtx ();
12438 emit_label (label);
12439 LABEL_PRESERVE_P (label) = 1;
12440 emit_insn (gen_set_rip_rex64 (reg10, label));
12441 emit_insn (gen_set_got_offset_rex64 (reg11, label));
12442 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
12443 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
12444 UNSPEC_GOT);
12445 x = gen_rtx_CONST (Pmode, x);
12446 emit_move_insn (reg11, x);
12447 x = gen_rtx_PLUS (Pmode, reg10, reg11);
12448 x = gen_const_mem (Pmode, x);
12449 emit_move_insn (reg11, x);
12451 else
12452 emit_move_insn (reg11, split_stack_fn_large);
12454 fn = reg11;
12456 argval = ((args_size << 16) << 16) + allocate;
12457 emit_move_insn (reg10, GEN_INT (argval));
12459 else
12461 emit_move_insn (reg10, allocate_rtx);
12462 emit_move_insn (reg11, GEN_INT (args_size));
12463 use_reg (&call_fusage, reg11);
12466 use_reg (&call_fusage, reg10);
12468 else
12470 emit_insn (gen_push (GEN_INT (args_size)));
12471 emit_insn (gen_push (allocate_rtx));
12473 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
12474 GEN_INT (UNITS_PER_WORD), constm1_rtx,
12475 NULL_RTX, false);
12476 add_function_usage_to (call_insn, call_fusage);
12478 /* In order to make call/return prediction work right, we now need
12479 to execute a return instruction. See
12480 libgcc/config/i386/morestack.S for the details on how this works.
12482 For flow purposes gcc must not see this as a return
12483 instruction--we need control flow to continue at the subsequent
12484 label. Therefore, we use an unspec. */
12485 gcc_assert (crtl->args.pops_args < 65536);
12486 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
12488 /* If we are in 64-bit mode and this function uses a static chain,
12489 we saved %r10 in %rax before calling _morestack. */
12490 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
12491 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
12492 gen_rtx_REG (word_mode, AX_REG));
12494 /* If this function calls va_start, we need to store a pointer to
12495 the arguments on the old stack, because they may not have been
12496 all copied to the new stack. At this point the old stack can be
12497 found at the frame pointer value used by __morestack, because
12498 __morestack has set that up before calling back to us. Here we
12499 store that pointer in a scratch register, and in
12500 ix86_expand_prologue we store the scratch register in a stack
12501 slot. */
12502 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12504 unsigned int scratch_regno;
12505 rtx frame_reg;
12506 int words;
12508 scratch_regno = split_stack_prologue_scratch_regno ();
12509 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12510 frame_reg = gen_rtx_REG (Pmode, BP_REG);
12512 /* 64-bit:
12513 fp -> old fp value
12514 return address within this function
12515 return address of caller of this function
12516 stack arguments
12517 So we add three words to get to the stack arguments.
12519 32-bit:
12520 fp -> old fp value
12521 return address within this function
12522 first argument to __morestack
12523 second argument to __morestack
12524 return address of caller of this function
12525 stack arguments
12526 So we add five words to get to the stack arguments.
12528 words = TARGET_64BIT ? 3 : 5;
12529 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12530 gen_rtx_PLUS (Pmode, frame_reg,
12531 GEN_INT (words * UNITS_PER_WORD))));
12533 varargs_label = gen_label_rtx ();
12534 emit_jump_insn (gen_jump (varargs_label));
12535 JUMP_LABEL (get_last_insn ()) = varargs_label;
12537 emit_barrier ();
12540 emit_label (label);
12541 LABEL_NUSES (label) = 1;
12543 /* If this function calls va_start, we now have to set the scratch
12544 register for the case where we do not call __morestack. In this
12545 case we need to set it based on the stack pointer. */
12546 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12548 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12549 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12550 GEN_INT (UNITS_PER_WORD))));
12552 emit_label (varargs_label);
12553 LABEL_NUSES (varargs_label) = 1;
12557 /* We may have to tell the dataflow pass that the split stack prologue
12558 is initializing a scratch register. */
12560 static void
12561 ix86_live_on_entry (bitmap regs)
12563 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12565 gcc_assert (flag_split_stack);
12566 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
12570 /* Extract the parts of an RTL expression that is a valid memory address
12571 for an instruction. Return 0 if the structure of the address is
12572 grossly off. Return -1 if the address contains ASHIFT, so it is not
12573 strictly valid, but still used for computing length of lea instruction. */
12576 ix86_decompose_address (rtx addr, struct ix86_address *out)
12578 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
12579 rtx base_reg, index_reg;
12580 HOST_WIDE_INT scale = 1;
12581 rtx scale_rtx = NULL_RTX;
12582 rtx tmp;
12583 int retval = 1;
12584 enum ix86_address_seg seg = SEG_DEFAULT;
12586 /* Allow zero-extended SImode addresses,
12587 they will be emitted with addr32 prefix. */
12588 if (TARGET_64BIT && GET_MODE (addr) == DImode)
12590 if (GET_CODE (addr) == ZERO_EXTEND
12591 && GET_MODE (XEXP (addr, 0)) == SImode)
12593 addr = XEXP (addr, 0);
12594 if (CONST_INT_P (addr))
12595 return 0;
12597 else if (GET_CODE (addr) == AND
12598 && const_32bit_mask (XEXP (addr, 1), DImode))
12600 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
12601 if (addr == NULL_RTX)
12602 return 0;
12604 if (CONST_INT_P (addr))
12605 return 0;
12609 /* Allow SImode subregs of DImode addresses,
12610 they will be emitted with addr32 prefix. */
12611 if (TARGET_64BIT && GET_MODE (addr) == SImode)
12613 if (GET_CODE (addr) == SUBREG
12614 && GET_MODE (SUBREG_REG (addr)) == DImode)
12616 addr = SUBREG_REG (addr);
12617 if (CONST_INT_P (addr))
12618 return 0;
12622 if (REG_P (addr))
12623 base = addr;
12624 else if (GET_CODE (addr) == SUBREG)
12626 if (REG_P (SUBREG_REG (addr)))
12627 base = addr;
12628 else
12629 return 0;
12631 else if (GET_CODE (addr) == PLUS)
12633 rtx addends[4], op;
12634 int n = 0, i;
12636 op = addr;
12639 if (n >= 4)
12640 return 0;
12641 addends[n++] = XEXP (op, 1);
12642 op = XEXP (op, 0);
12644 while (GET_CODE (op) == PLUS);
12645 if (n >= 4)
12646 return 0;
12647 addends[n] = op;
12649 for (i = n; i >= 0; --i)
12651 op = addends[i];
12652 switch (GET_CODE (op))
12654 case MULT:
12655 if (index)
12656 return 0;
12657 index = XEXP (op, 0);
12658 scale_rtx = XEXP (op, 1);
12659 break;
12661 case ASHIFT:
12662 if (index)
12663 return 0;
12664 index = XEXP (op, 0);
12665 tmp = XEXP (op, 1);
12666 if (!CONST_INT_P (tmp))
12667 return 0;
12668 scale = INTVAL (tmp);
12669 if ((unsigned HOST_WIDE_INT) scale > 3)
12670 return 0;
12671 scale = 1 << scale;
12672 break;
12674 case ZERO_EXTEND:
12675 op = XEXP (op, 0);
12676 if (GET_CODE (op) != UNSPEC)
12677 return 0;
12678 /* FALLTHRU */
12680 case UNSPEC:
12681 if (XINT (op, 1) == UNSPEC_TP
12682 && TARGET_TLS_DIRECT_SEG_REFS
12683 && seg == SEG_DEFAULT)
12684 seg = DEFAULT_TLS_SEG_REG;
12685 else
12686 return 0;
12687 break;
12689 case SUBREG:
12690 if (!REG_P (SUBREG_REG (op)))
12691 return 0;
12692 /* FALLTHRU */
12694 case REG:
12695 if (!base)
12696 base = op;
12697 else if (!index)
12698 index = op;
12699 else
12700 return 0;
12701 break;
12703 case CONST:
12704 case CONST_INT:
12705 case SYMBOL_REF:
12706 case LABEL_REF:
12707 if (disp)
12708 return 0;
12709 disp = op;
12710 break;
12712 default:
12713 return 0;
12717 else if (GET_CODE (addr) == MULT)
12719 index = XEXP (addr, 0); /* index*scale */
12720 scale_rtx = XEXP (addr, 1);
12722 else if (GET_CODE (addr) == ASHIFT)
12724 /* We're called for lea too, which implements ashift on occasion. */
12725 index = XEXP (addr, 0);
12726 tmp = XEXP (addr, 1);
12727 if (!CONST_INT_P (tmp))
12728 return 0;
12729 scale = INTVAL (tmp);
12730 if ((unsigned HOST_WIDE_INT) scale > 3)
12731 return 0;
12732 scale = 1 << scale;
12733 retval = -1;
12735 else
12736 disp = addr; /* displacement */
12738 if (index)
12740 if (REG_P (index))
12742 else if (GET_CODE (index) == SUBREG
12743 && REG_P (SUBREG_REG (index)))
12745 else
12746 return 0;
12749 /* Extract the integral value of scale. */
12750 if (scale_rtx)
12752 if (!CONST_INT_P (scale_rtx))
12753 return 0;
12754 scale = INTVAL (scale_rtx);
12757 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
12758 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
12760 /* Avoid useless 0 displacement. */
12761 if (disp == const0_rtx && (base || index))
12762 disp = NULL_RTX;
12764 /* Allow arg pointer and stack pointer as index if there is not scaling. */
12765 if (base_reg && index_reg && scale == 1
12766 && (index_reg == arg_pointer_rtx
12767 || index_reg == frame_pointer_rtx
12768 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
12770 std::swap (base, index);
12771 std::swap (base_reg, index_reg);
12774 /* Special case: %ebp cannot be encoded as a base without a displacement.
12775 Similarly %r13. */
12776 if (!disp
12777 && base_reg
12778 && (base_reg == hard_frame_pointer_rtx
12779 || base_reg == frame_pointer_rtx
12780 || base_reg == arg_pointer_rtx
12781 || (REG_P (base_reg)
12782 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
12783 || REGNO (base_reg) == R13_REG))))
12784 disp = const0_rtx;
12786 /* Special case: on K6, [%esi] makes the instruction vector decoded.
12787 Avoid this by transforming to [%esi+0].
12788 Reload calls address legitimization without cfun defined, so we need
12789 to test cfun for being non-NULL. */
12790 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
12791 && base_reg && !index_reg && !disp
12792 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
12793 disp = const0_rtx;
12795 /* Special case: encode reg+reg instead of reg*2. */
12796 if (!base && index && scale == 2)
12797 base = index, base_reg = index_reg, scale = 1;
12799 /* Special case: scaling cannot be encoded without base or displacement. */
12800 if (!base && !disp && index && scale != 1)
12801 disp = const0_rtx;
12803 out->base = base;
12804 out->index = index;
12805 out->disp = disp;
12806 out->scale = scale;
12807 out->seg = seg;
12809 return retval;
12812 /* Return cost of the memory address x.
12813 For i386, it is better to use a complex address than let gcc copy
12814 the address into a reg and make a new pseudo. But not if the address
12815 requires to two regs - that would mean more pseudos with longer
12816 lifetimes. */
12817 static int
12818 ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
12820 struct ix86_address parts;
12821 int cost = 1;
12822 int ok = ix86_decompose_address (x, &parts);
12824 gcc_assert (ok);
12826 if (parts.base && GET_CODE (parts.base) == SUBREG)
12827 parts.base = SUBREG_REG (parts.base);
12828 if (parts.index && GET_CODE (parts.index) == SUBREG)
12829 parts.index = SUBREG_REG (parts.index);
12831 /* Attempt to minimize number of registers in the address. */
12832 if ((parts.base
12833 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
12834 || (parts.index
12835 && (!REG_P (parts.index)
12836 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
12837 cost++;
12839 /* When address base or index is "pic_offset_table_rtx" we don't increase
12840 address cost. When a memopt with "pic_offset_table_rtx" is not invariant
12841 itself it most likely means that base or index is not invariant.
12842 Therefore only "pic_offset_table_rtx" could be hoisted out, which is not
12843 profitable for x86. */
12844 if (parts.base
12845 && (current_pass->type == GIMPLE_PASS
12846 || (!pic_offset_table_rtx
12847 || REGNO (pic_offset_table_rtx) != REGNO(parts.base)))
12848 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
12849 && parts.index
12850 && (current_pass->type == GIMPLE_PASS
12851 || (!pic_offset_table_rtx
12852 || REGNO (pic_offset_table_rtx) != REGNO(parts.index)))
12853 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
12854 && parts.base != parts.index)
12855 cost++;
12857 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
12858 since it's predecode logic can't detect the length of instructions
12859 and it degenerates to vector decoded. Increase cost of such
12860 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
12861 to split such addresses or even refuse such addresses at all.
12863 Following addressing modes are affected:
12864 [base+scale*index]
12865 [scale*index+disp]
12866 [base+index]
12868 The first and last case may be avoidable by explicitly coding the zero in
12869 memory address, but I don't have AMD-K6 machine handy to check this
12870 theory. */
12872 if (TARGET_K6
12873 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
12874 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
12875 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
12876 cost += 10;
12878 return cost;
12881 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
12882 this is used for to form addresses to local data when -fPIC is in
12883 use. */
12885 static bool
12886 darwin_local_data_pic (rtx disp)
12888 return (GET_CODE (disp) == UNSPEC
12889 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
12892 /* Determine if a given RTX is a valid constant. We already know this
12893 satisfies CONSTANT_P. */
12895 static bool
12896 ix86_legitimate_constant_p (machine_mode, rtx x)
12898 /* Pointer bounds constants are not valid. */
12899 if (POINTER_BOUNDS_MODE_P (GET_MODE (x)))
12900 return false;
12902 switch (GET_CODE (x))
12904 case CONST:
12905 x = XEXP (x, 0);
12907 if (GET_CODE (x) == PLUS)
12909 if (!CONST_INT_P (XEXP (x, 1)))
12910 return false;
12911 x = XEXP (x, 0);
12914 if (TARGET_MACHO && darwin_local_data_pic (x))
12915 return true;
12917 /* Only some unspecs are valid as "constants". */
12918 if (GET_CODE (x) == UNSPEC)
12919 switch (XINT (x, 1))
12921 case UNSPEC_GOT:
12922 case UNSPEC_GOTOFF:
12923 case UNSPEC_PLTOFF:
12924 return TARGET_64BIT;
12925 case UNSPEC_TPOFF:
12926 case UNSPEC_NTPOFF:
12927 x = XVECEXP (x, 0, 0);
12928 return (GET_CODE (x) == SYMBOL_REF
12929 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12930 case UNSPEC_DTPOFF:
12931 x = XVECEXP (x, 0, 0);
12932 return (GET_CODE (x) == SYMBOL_REF
12933 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
12934 default:
12935 return false;
12938 /* We must have drilled down to a symbol. */
12939 if (GET_CODE (x) == LABEL_REF)
12940 return true;
12941 if (GET_CODE (x) != SYMBOL_REF)
12942 return false;
12943 /* FALLTHRU */
12945 case SYMBOL_REF:
12946 /* TLS symbols are never valid. */
12947 if (SYMBOL_REF_TLS_MODEL (x))
12948 return false;
12950 /* DLLIMPORT symbols are never valid. */
12951 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12952 && SYMBOL_REF_DLLIMPORT_P (x))
12953 return false;
12955 #if TARGET_MACHO
12956 /* mdynamic-no-pic */
12957 if (MACHO_DYNAMIC_NO_PIC_P)
12958 return machopic_symbol_defined_p (x);
12959 #endif
12960 break;
12962 case CONST_DOUBLE:
12963 if (GET_MODE (x) == TImode
12964 && x != CONST0_RTX (TImode)
12965 && !TARGET_64BIT)
12966 return false;
12967 break;
12969 case CONST_VECTOR:
12970 if (!standard_sse_constant_p (x))
12971 return false;
12973 default:
12974 break;
12977 /* Otherwise we handle everything else in the move patterns. */
12978 return true;
12981 /* Determine if it's legal to put X into the constant pool. This
12982 is not possible for the address of thread-local symbols, which
12983 is checked above. */
12985 static bool
12986 ix86_cannot_force_const_mem (machine_mode mode, rtx x)
12988 /* We can always put integral constants and vectors in memory. */
12989 switch (GET_CODE (x))
12991 case CONST_INT:
12992 case CONST_DOUBLE:
12993 case CONST_VECTOR:
12994 return false;
12996 default:
12997 break;
12999 return !ix86_legitimate_constant_p (mode, x);
13002 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
13003 otherwise zero. */
13005 static bool
13006 is_imported_p (rtx x)
13008 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
13009 || GET_CODE (x) != SYMBOL_REF)
13010 return false;
13012 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
13016 /* Nonzero if the constant value X is a legitimate general operand
13017 when generating PIC code. It is given that flag_pic is on and
13018 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
13020 bool
13021 legitimate_pic_operand_p (rtx x)
13023 rtx inner;
13025 switch (GET_CODE (x))
13027 case CONST:
13028 inner = XEXP (x, 0);
13029 if (GET_CODE (inner) == PLUS
13030 && CONST_INT_P (XEXP (inner, 1)))
13031 inner = XEXP (inner, 0);
13033 /* Only some unspecs are valid as "constants". */
13034 if (GET_CODE (inner) == UNSPEC)
13035 switch (XINT (inner, 1))
13037 case UNSPEC_GOT:
13038 case UNSPEC_GOTOFF:
13039 case UNSPEC_PLTOFF:
13040 return TARGET_64BIT;
13041 case UNSPEC_TPOFF:
13042 x = XVECEXP (inner, 0, 0);
13043 return (GET_CODE (x) == SYMBOL_REF
13044 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
13045 case UNSPEC_MACHOPIC_OFFSET:
13046 return legitimate_pic_address_disp_p (x);
13047 default:
13048 return false;
13050 /* FALLTHRU */
13052 case SYMBOL_REF:
13053 case LABEL_REF:
13054 return legitimate_pic_address_disp_p (x);
13056 default:
13057 return true;
13061 /* Determine if a given CONST RTX is a valid memory displacement
13062 in PIC mode. */
13064 bool
13065 legitimate_pic_address_disp_p (rtx disp)
13067 bool saw_plus;
13069 /* In 64bit mode we can allow direct addresses of symbols and labels
13070 when they are not dynamic symbols. */
13071 if (TARGET_64BIT)
13073 rtx op0 = disp, op1;
13075 switch (GET_CODE (disp))
13077 case LABEL_REF:
13078 return true;
13080 case CONST:
13081 if (GET_CODE (XEXP (disp, 0)) != PLUS)
13082 break;
13083 op0 = XEXP (XEXP (disp, 0), 0);
13084 op1 = XEXP (XEXP (disp, 0), 1);
13085 if (!CONST_INT_P (op1)
13086 || INTVAL (op1) >= 16*1024*1024
13087 || INTVAL (op1) < -16*1024*1024)
13088 break;
13089 if (GET_CODE (op0) == LABEL_REF)
13090 return true;
13091 if (GET_CODE (op0) == CONST
13092 && GET_CODE (XEXP (op0, 0)) == UNSPEC
13093 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
13094 return true;
13095 if (GET_CODE (op0) == UNSPEC
13096 && XINT (op0, 1) == UNSPEC_PCREL)
13097 return true;
13098 if (GET_CODE (op0) != SYMBOL_REF)
13099 break;
13100 /* FALLTHRU */
13102 case SYMBOL_REF:
13103 /* TLS references should always be enclosed in UNSPEC.
13104 The dllimported symbol needs always to be resolved. */
13105 if (SYMBOL_REF_TLS_MODEL (op0)
13106 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
13107 return false;
13109 if (TARGET_PECOFF)
13111 if (is_imported_p (op0))
13112 return true;
13114 if (SYMBOL_REF_FAR_ADDR_P (op0)
13115 || !SYMBOL_REF_LOCAL_P (op0))
13116 break;
13118 /* Function-symbols need to be resolved only for
13119 large-model.
13120 For the small-model we don't need to resolve anything
13121 here. */
13122 if ((ix86_cmodel != CM_LARGE_PIC
13123 && SYMBOL_REF_FUNCTION_P (op0))
13124 || ix86_cmodel == CM_SMALL_PIC)
13125 return true;
13126 /* Non-external symbols don't need to be resolved for
13127 large, and medium-model. */
13128 if ((ix86_cmodel == CM_LARGE_PIC
13129 || ix86_cmodel == CM_MEDIUM_PIC)
13130 && !SYMBOL_REF_EXTERNAL_P (op0))
13131 return true;
13133 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
13134 && (SYMBOL_REF_LOCAL_P (op0)
13135 || (HAVE_LD_PIE_COPYRELOC
13136 && flag_pie
13137 && !SYMBOL_REF_WEAK (op0)
13138 && !SYMBOL_REF_FUNCTION_P (op0)))
13139 && ix86_cmodel != CM_LARGE_PIC)
13140 return true;
13141 break;
13143 default:
13144 break;
13147 if (GET_CODE (disp) != CONST)
13148 return false;
13149 disp = XEXP (disp, 0);
13151 if (TARGET_64BIT)
13153 /* We are unsafe to allow PLUS expressions. This limit allowed distance
13154 of GOT tables. We should not need these anyway. */
13155 if (GET_CODE (disp) != UNSPEC
13156 || (XINT (disp, 1) != UNSPEC_GOTPCREL
13157 && XINT (disp, 1) != UNSPEC_GOTOFF
13158 && XINT (disp, 1) != UNSPEC_PCREL
13159 && XINT (disp, 1) != UNSPEC_PLTOFF))
13160 return false;
13162 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
13163 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
13164 return false;
13165 return true;
13168 saw_plus = false;
13169 if (GET_CODE (disp) == PLUS)
13171 if (!CONST_INT_P (XEXP (disp, 1)))
13172 return false;
13173 disp = XEXP (disp, 0);
13174 saw_plus = true;
13177 if (TARGET_MACHO && darwin_local_data_pic (disp))
13178 return true;
13180 if (GET_CODE (disp) != UNSPEC)
13181 return false;
13183 switch (XINT (disp, 1))
13185 case UNSPEC_GOT:
13186 if (saw_plus)
13187 return false;
13188 /* We need to check for both symbols and labels because VxWorks loads
13189 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
13190 details. */
13191 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
13192 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
13193 case UNSPEC_GOTOFF:
13194 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
13195 While ABI specify also 32bit relocation but we don't produce it in
13196 small PIC model at all. */
13197 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
13198 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
13199 && !TARGET_64BIT)
13200 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
13201 return false;
13202 case UNSPEC_GOTTPOFF:
13203 case UNSPEC_GOTNTPOFF:
13204 case UNSPEC_INDNTPOFF:
13205 if (saw_plus)
13206 return false;
13207 disp = XVECEXP (disp, 0, 0);
13208 return (GET_CODE (disp) == SYMBOL_REF
13209 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
13210 case UNSPEC_NTPOFF:
13211 disp = XVECEXP (disp, 0, 0);
13212 return (GET_CODE (disp) == SYMBOL_REF
13213 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
13214 case UNSPEC_DTPOFF:
13215 disp = XVECEXP (disp, 0, 0);
13216 return (GET_CODE (disp) == SYMBOL_REF
13217 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
13220 return false;
13223 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
13224 replace the input X, or the original X if no replacement is called for.
13225 The output parameter *WIN is 1 if the calling macro should goto WIN,
13226 0 if it should not. */
13228 bool
13229 ix86_legitimize_reload_address (rtx x, machine_mode, int opnum, int type,
13230 int)
13232 /* Reload can generate:
13234 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
13235 (reg:DI 97))
13236 (reg:DI 2 cx))
13238 This RTX is rejected from ix86_legitimate_address_p due to
13239 non-strictness of base register 97. Following this rejection,
13240 reload pushes all three components into separate registers,
13241 creating invalid memory address RTX.
13243 Following code reloads only the invalid part of the
13244 memory address RTX. */
13246 if (GET_CODE (x) == PLUS
13247 && REG_P (XEXP (x, 1))
13248 && GET_CODE (XEXP (x, 0)) == PLUS
13249 && REG_P (XEXP (XEXP (x, 0), 1)))
13251 rtx base, index;
13252 bool something_reloaded = false;
13254 base = XEXP (XEXP (x, 0), 1);
13255 if (!REG_OK_FOR_BASE_STRICT_P (base))
13257 push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
13258 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
13259 opnum, (enum reload_type) type);
13260 something_reloaded = true;
13263 index = XEXP (x, 1);
13264 if (!REG_OK_FOR_INDEX_STRICT_P (index))
13266 push_reload (index, NULL_RTX, &XEXP (x, 1), NULL,
13267 INDEX_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
13268 opnum, (enum reload_type) type);
13269 something_reloaded = true;
13272 gcc_assert (something_reloaded);
13273 return true;
13276 return false;
13279 /* Determine if op is suitable RTX for an address register.
13280 Return naked register if a register or a register subreg is
13281 found, otherwise return NULL_RTX. */
13283 static rtx
13284 ix86_validate_address_register (rtx op)
13286 machine_mode mode = GET_MODE (op);
13288 /* Only SImode or DImode registers can form the address. */
13289 if (mode != SImode && mode != DImode)
13290 return NULL_RTX;
13292 if (REG_P (op))
13293 return op;
13294 else if (GET_CODE (op) == SUBREG)
13296 rtx reg = SUBREG_REG (op);
13298 if (!REG_P (reg))
13299 return NULL_RTX;
13301 mode = GET_MODE (reg);
13303 /* Don't allow SUBREGs that span more than a word. It can
13304 lead to spill failures when the register is one word out
13305 of a two word structure. */
13306 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
13307 return NULL_RTX;
13309 /* Allow only SUBREGs of non-eliminable hard registers. */
13310 if (register_no_elim_operand (reg, mode))
13311 return reg;
13314 /* Op is not a register. */
13315 return NULL_RTX;
13318 /* Recognizes RTL expressions that are valid memory addresses for an
13319 instruction. The MODE argument is the machine mode for the MEM
13320 expression that wants to use this address.
13322 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
13323 convert common non-canonical forms to canonical form so that they will
13324 be recognized. */
13326 static bool
13327 ix86_legitimate_address_p (machine_mode, rtx addr, bool strict)
13329 struct ix86_address parts;
13330 rtx base, index, disp;
13331 HOST_WIDE_INT scale;
13332 enum ix86_address_seg seg;
13334 if (ix86_decompose_address (addr, &parts) <= 0)
13335 /* Decomposition failed. */
13336 return false;
13338 base = parts.base;
13339 index = parts.index;
13340 disp = parts.disp;
13341 scale = parts.scale;
13342 seg = parts.seg;
13344 /* Validate base register. */
13345 if (base)
13347 rtx reg = ix86_validate_address_register (base);
13349 if (reg == NULL_RTX)
13350 return false;
13352 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
13353 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
13354 /* Base is not valid. */
13355 return false;
13358 /* Validate index register. */
13359 if (index)
13361 rtx reg = ix86_validate_address_register (index);
13363 if (reg == NULL_RTX)
13364 return false;
13366 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
13367 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
13368 /* Index is not valid. */
13369 return false;
13372 /* Index and base should have the same mode. */
13373 if (base && index
13374 && GET_MODE (base) != GET_MODE (index))
13375 return false;
13377 /* Address override works only on the (%reg) part of %fs:(%reg). */
13378 if (seg != SEG_DEFAULT
13379 && ((base && GET_MODE (base) != word_mode)
13380 || (index && GET_MODE (index) != word_mode)))
13381 return false;
13383 /* Validate scale factor. */
13384 if (scale != 1)
13386 if (!index)
13387 /* Scale without index. */
13388 return false;
13390 if (scale != 2 && scale != 4 && scale != 8)
13391 /* Scale is not a valid multiplier. */
13392 return false;
13395 /* Validate displacement. */
13396 if (disp)
13398 if (GET_CODE (disp) == CONST
13399 && GET_CODE (XEXP (disp, 0)) == UNSPEC
13400 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
13401 switch (XINT (XEXP (disp, 0), 1))
13403 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
13404 used. While ABI specify also 32bit relocations, we don't produce
13405 them at all and use IP relative instead. */
13406 case UNSPEC_GOT:
13407 case UNSPEC_GOTOFF:
13408 gcc_assert (flag_pic);
13409 if (!TARGET_64BIT)
13410 goto is_legitimate_pic;
13412 /* 64bit address unspec. */
13413 return false;
13415 case UNSPEC_GOTPCREL:
13416 case UNSPEC_PCREL:
13417 gcc_assert (flag_pic);
13418 goto is_legitimate_pic;
13420 case UNSPEC_GOTTPOFF:
13421 case UNSPEC_GOTNTPOFF:
13422 case UNSPEC_INDNTPOFF:
13423 case UNSPEC_NTPOFF:
13424 case UNSPEC_DTPOFF:
13425 break;
13427 case UNSPEC_STACK_CHECK:
13428 gcc_assert (flag_split_stack);
13429 break;
13431 default:
13432 /* Invalid address unspec. */
13433 return false;
13436 else if (SYMBOLIC_CONST (disp)
13437 && (flag_pic
13438 || (TARGET_MACHO
13439 #if TARGET_MACHO
13440 && MACHOPIC_INDIRECT
13441 && !machopic_operand_p (disp)
13442 #endif
13446 is_legitimate_pic:
13447 if (TARGET_64BIT && (index || base))
13449 /* foo@dtpoff(%rX) is ok. */
13450 if (GET_CODE (disp) != CONST
13451 || GET_CODE (XEXP (disp, 0)) != PLUS
13452 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
13453 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
13454 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
13455 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
13456 /* Non-constant pic memory reference. */
13457 return false;
13459 else if ((!TARGET_MACHO || flag_pic)
13460 && ! legitimate_pic_address_disp_p (disp))
13461 /* Displacement is an invalid pic construct. */
13462 return false;
13463 #if TARGET_MACHO
13464 else if (MACHO_DYNAMIC_NO_PIC_P
13465 && !ix86_legitimate_constant_p (Pmode, disp))
13466 /* displacment must be referenced via non_lazy_pointer */
13467 return false;
13468 #endif
13470 /* This code used to verify that a symbolic pic displacement
13471 includes the pic_offset_table_rtx register.
13473 While this is good idea, unfortunately these constructs may
13474 be created by "adds using lea" optimization for incorrect
13475 code like:
13477 int a;
13478 int foo(int i)
13480 return *(&a+i);
13483 This code is nonsensical, but results in addressing
13484 GOT table with pic_offset_table_rtx base. We can't
13485 just refuse it easily, since it gets matched by
13486 "addsi3" pattern, that later gets split to lea in the
13487 case output register differs from input. While this
13488 can be handled by separate addsi pattern for this case
13489 that never results in lea, this seems to be easier and
13490 correct fix for crash to disable this test. */
13492 else if (GET_CODE (disp) != LABEL_REF
13493 && !CONST_INT_P (disp)
13494 && (GET_CODE (disp) != CONST
13495 || !ix86_legitimate_constant_p (Pmode, disp))
13496 && (GET_CODE (disp) != SYMBOL_REF
13497 || !ix86_legitimate_constant_p (Pmode, disp)))
13498 /* Displacement is not constant. */
13499 return false;
13500 else if (TARGET_64BIT
13501 && !x86_64_immediate_operand (disp, VOIDmode))
13502 /* Displacement is out of range. */
13503 return false;
13504 /* In x32 mode, constant addresses are sign extended to 64bit, so
13505 we have to prevent addresses from 0x80000000 to 0xffffffff. */
13506 else if (TARGET_X32 && !(index || base)
13507 && CONST_INT_P (disp)
13508 && val_signbit_known_set_p (SImode, INTVAL (disp)))
13509 return false;
13512 /* Everything looks valid. */
13513 return true;
13516 /* Determine if a given RTX is a valid constant address. */
13518 bool
13519 constant_address_p (rtx x)
13521 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
13524 /* Return a unique alias set for the GOT. */
13526 static alias_set_type
13527 ix86_GOT_alias_set (void)
13529 static alias_set_type set = -1;
13530 if (set == -1)
13531 set = new_alias_set ();
13532 return set;
13535 /* Set regs_ever_live for PIC base address register
13536 to true if required. */
13537 static void
13538 set_pic_reg_ever_live ()
13540 if (reload_in_progress)
13541 df_set_regs_ever_live (REGNO (pic_offset_table_rtx), true);
13544 /* Return a legitimate reference for ORIG (an address) using the
13545 register REG. If REG is 0, a new pseudo is generated.
13547 There are two types of references that must be handled:
13549 1. Global data references must load the address from the GOT, via
13550 the PIC reg. An insn is emitted to do this load, and the reg is
13551 returned.
13553 2. Static data references, constant pool addresses, and code labels
13554 compute the address as an offset from the GOT, whose base is in
13555 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
13556 differentiate them from global data objects. The returned
13557 address is the PIC reg + an unspec constant.
13559 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
13560 reg also appears in the address. */
13562 static rtx
13563 legitimize_pic_address (rtx orig, rtx reg)
13565 rtx addr = orig;
13566 rtx new_rtx = orig;
13568 #if TARGET_MACHO
13569 if (TARGET_MACHO && !TARGET_64BIT)
13571 if (reg == 0)
13572 reg = gen_reg_rtx (Pmode);
13573 /* Use the generic Mach-O PIC machinery. */
13574 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
13576 #endif
13578 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13580 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13581 if (tmp)
13582 return tmp;
13585 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
13586 new_rtx = addr;
13587 else if (TARGET_64BIT && !TARGET_PECOFF
13588 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
13590 rtx tmpreg;
13591 /* This symbol may be referenced via a displacement from the PIC
13592 base address (@GOTOFF). */
13594 set_pic_reg_ever_live ();
13595 if (GET_CODE (addr) == CONST)
13596 addr = XEXP (addr, 0);
13597 if (GET_CODE (addr) == PLUS)
13599 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13600 UNSPEC_GOTOFF);
13601 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13603 else
13604 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13605 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13606 if (!reg)
13607 tmpreg = gen_reg_rtx (Pmode);
13608 else
13609 tmpreg = reg;
13610 emit_move_insn (tmpreg, new_rtx);
13612 if (reg != 0)
13614 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
13615 tmpreg, 1, OPTAB_DIRECT);
13616 new_rtx = reg;
13618 else
13619 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
13621 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
13623 /* This symbol may be referenced via a displacement from the PIC
13624 base address (@GOTOFF). */
13626 set_pic_reg_ever_live ();
13627 if (GET_CODE (addr) == CONST)
13628 addr = XEXP (addr, 0);
13629 if (GET_CODE (addr) == PLUS)
13631 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13632 UNSPEC_GOTOFF);
13633 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13635 else
13636 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13637 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13638 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13640 if (reg != 0)
13642 emit_move_insn (reg, new_rtx);
13643 new_rtx = reg;
13646 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
13647 /* We can't use @GOTOFF for text labels on VxWorks;
13648 see gotoff_operand. */
13649 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
13651 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13652 if (tmp)
13653 return tmp;
13655 /* For x64 PE-COFF there is no GOT table. So we use address
13656 directly. */
13657 if (TARGET_64BIT && TARGET_PECOFF)
13659 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
13660 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13662 if (reg == 0)
13663 reg = gen_reg_rtx (Pmode);
13664 emit_move_insn (reg, new_rtx);
13665 new_rtx = reg;
13667 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
13669 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
13670 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13671 new_rtx = gen_const_mem (Pmode, new_rtx);
13672 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13674 if (reg == 0)
13675 reg = gen_reg_rtx (Pmode);
13676 /* Use directly gen_movsi, otherwise the address is loaded
13677 into register for CSE. We don't want to CSE this addresses,
13678 instead we CSE addresses from the GOT table, so skip this. */
13679 emit_insn (gen_movsi (reg, new_rtx));
13680 new_rtx = reg;
13682 else
13684 /* This symbol must be referenced via a load from the
13685 Global Offset Table (@GOT). */
13687 set_pic_reg_ever_live ();
13688 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
13689 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13690 if (TARGET_64BIT)
13691 new_rtx = force_reg (Pmode, new_rtx);
13692 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13693 new_rtx = gen_const_mem (Pmode, new_rtx);
13694 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13696 if (reg == 0)
13697 reg = gen_reg_rtx (Pmode);
13698 emit_move_insn (reg, new_rtx);
13699 new_rtx = reg;
13702 else
13704 if (CONST_INT_P (addr)
13705 && !x86_64_immediate_operand (addr, VOIDmode))
13707 if (reg)
13709 emit_move_insn (reg, addr);
13710 new_rtx = reg;
13712 else
13713 new_rtx = force_reg (Pmode, addr);
13715 else if (GET_CODE (addr) == CONST)
13717 addr = XEXP (addr, 0);
13719 /* We must match stuff we generate before. Assume the only
13720 unspecs that can get here are ours. Not that we could do
13721 anything with them anyway.... */
13722 if (GET_CODE (addr) == UNSPEC
13723 || (GET_CODE (addr) == PLUS
13724 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
13725 return orig;
13726 gcc_assert (GET_CODE (addr) == PLUS);
13728 if (GET_CODE (addr) == PLUS)
13730 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
13732 /* Check first to see if this is a constant offset from a @GOTOFF
13733 symbol reference. */
13734 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
13735 && CONST_INT_P (op1))
13737 if (!TARGET_64BIT)
13739 set_pic_reg_ever_live ();
13740 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
13741 UNSPEC_GOTOFF);
13742 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
13743 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13744 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13746 if (reg != 0)
13748 emit_move_insn (reg, new_rtx);
13749 new_rtx = reg;
13752 else
13754 if (INTVAL (op1) < -16*1024*1024
13755 || INTVAL (op1) >= 16*1024*1024)
13757 if (!x86_64_immediate_operand (op1, Pmode))
13758 op1 = force_reg (Pmode, op1);
13759 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
13763 else
13765 rtx base = legitimize_pic_address (op0, reg);
13766 machine_mode mode = GET_MODE (base);
13767 new_rtx
13768 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
13770 if (CONST_INT_P (new_rtx))
13772 if (INTVAL (new_rtx) < -16*1024*1024
13773 || INTVAL (new_rtx) >= 16*1024*1024)
13775 if (!x86_64_immediate_operand (new_rtx, mode))
13776 new_rtx = force_reg (mode, new_rtx);
13777 new_rtx
13778 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
13780 else
13781 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
13783 else
13785 if (GET_CODE (new_rtx) == PLUS
13786 && CONSTANT_P (XEXP (new_rtx, 1)))
13788 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
13789 new_rtx = XEXP (new_rtx, 1);
13791 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
13796 return new_rtx;
13799 /* Load the thread pointer. If TO_REG is true, force it into a register. */
13801 static rtx
13802 get_thread_pointer (machine_mode tp_mode, bool to_reg)
13804 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
13806 if (GET_MODE (tp) != tp_mode)
13808 gcc_assert (GET_MODE (tp) == SImode);
13809 gcc_assert (tp_mode == DImode);
13811 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
13814 if (to_reg)
13815 tp = copy_to_mode_reg (tp_mode, tp);
13817 return tp;
13820 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13822 static GTY(()) rtx ix86_tls_symbol;
13824 static rtx
13825 ix86_tls_get_addr (void)
13827 if (!ix86_tls_symbol)
13829 const char *sym
13830 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
13831 ? "___tls_get_addr" : "__tls_get_addr");
13833 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
13836 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
13838 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
13839 UNSPEC_PLTOFF);
13840 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
13841 gen_rtx_CONST (Pmode, unspec));
13844 return ix86_tls_symbol;
13847 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13849 static GTY(()) rtx ix86_tls_module_base_symbol;
13852 ix86_tls_module_base (void)
13854 if (!ix86_tls_module_base_symbol)
13856 ix86_tls_module_base_symbol
13857 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
13859 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13860 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13863 return ix86_tls_module_base_symbol;
13866 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
13867 false if we expect this to be used for a memory address and true if
13868 we expect to load the address into a register. */
13870 static rtx
13871 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
13873 rtx dest, base, off;
13874 rtx pic = NULL_RTX, tp = NULL_RTX;
13875 machine_mode tp_mode = Pmode;
13876 int type;
13878 /* Fall back to global dynamic model if tool chain cannot support local
13879 dynamic. */
13880 if (TARGET_SUN_TLS && !TARGET_64BIT
13881 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
13882 && model == TLS_MODEL_LOCAL_DYNAMIC)
13883 model = TLS_MODEL_GLOBAL_DYNAMIC;
13885 switch (model)
13887 case TLS_MODEL_GLOBAL_DYNAMIC:
13888 dest = gen_reg_rtx (Pmode);
13890 if (!TARGET_64BIT)
13892 if (flag_pic && !TARGET_PECOFF)
13893 pic = pic_offset_table_rtx;
13894 else
13896 pic = gen_reg_rtx (Pmode);
13897 emit_insn (gen_set_got (pic));
13901 if (TARGET_GNU2_TLS)
13903 if (TARGET_64BIT)
13904 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
13905 else
13906 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
13908 tp = get_thread_pointer (Pmode, true);
13909 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
13911 if (GET_MODE (x) != Pmode)
13912 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13914 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13916 else
13918 rtx caddr = ix86_tls_get_addr ();
13920 if (TARGET_64BIT)
13922 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13923 rtx_insn *insns;
13925 start_sequence ();
13926 emit_call_insn
13927 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
13928 insns = get_insns ();
13929 end_sequence ();
13931 if (GET_MODE (x) != Pmode)
13932 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13934 RTL_CONST_CALL_P (insns) = 1;
13935 emit_libcall_block (insns, dest, rax, x);
13937 else
13938 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
13940 break;
13942 case TLS_MODEL_LOCAL_DYNAMIC:
13943 base = gen_reg_rtx (Pmode);
13945 if (!TARGET_64BIT)
13947 if (flag_pic)
13948 pic = pic_offset_table_rtx;
13949 else
13951 pic = gen_reg_rtx (Pmode);
13952 emit_insn (gen_set_got (pic));
13956 if (TARGET_GNU2_TLS)
13958 rtx tmp = ix86_tls_module_base ();
13960 if (TARGET_64BIT)
13961 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
13962 else
13963 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
13965 tp = get_thread_pointer (Pmode, true);
13966 set_unique_reg_note (get_last_insn (), REG_EQUAL,
13967 gen_rtx_MINUS (Pmode, tmp, tp));
13969 else
13971 rtx caddr = ix86_tls_get_addr ();
13973 if (TARGET_64BIT)
13975 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13976 rtx_insn *insns;
13977 rtx eqv;
13979 start_sequence ();
13980 emit_call_insn
13981 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
13982 insns = get_insns ();
13983 end_sequence ();
13985 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
13986 share the LD_BASE result with other LD model accesses. */
13987 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
13988 UNSPEC_TLS_LD_BASE);
13990 RTL_CONST_CALL_P (insns) = 1;
13991 emit_libcall_block (insns, base, rax, eqv);
13993 else
13994 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
13997 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
13998 off = gen_rtx_CONST (Pmode, off);
14000 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
14002 if (TARGET_GNU2_TLS)
14004 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
14006 if (GET_MODE (x) != Pmode)
14007 x = gen_rtx_ZERO_EXTEND (Pmode, x);
14009 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
14011 break;
14013 case TLS_MODEL_INITIAL_EXEC:
14014 if (TARGET_64BIT)
14016 if (TARGET_SUN_TLS && !TARGET_X32)
14018 /* The Sun linker took the AMD64 TLS spec literally
14019 and can only handle %rax as destination of the
14020 initial executable code sequence. */
14022 dest = gen_reg_rtx (DImode);
14023 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
14024 return dest;
14027 /* Generate DImode references to avoid %fs:(%reg32)
14028 problems and linker IE->LE relaxation bug. */
14029 tp_mode = DImode;
14030 pic = NULL;
14031 type = UNSPEC_GOTNTPOFF;
14033 else if (flag_pic)
14035 set_pic_reg_ever_live ();
14036 pic = pic_offset_table_rtx;
14037 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
14039 else if (!TARGET_ANY_GNU_TLS)
14041 pic = gen_reg_rtx (Pmode);
14042 emit_insn (gen_set_got (pic));
14043 type = UNSPEC_GOTTPOFF;
14045 else
14047 pic = NULL;
14048 type = UNSPEC_INDNTPOFF;
14051 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
14052 off = gen_rtx_CONST (tp_mode, off);
14053 if (pic)
14054 off = gen_rtx_PLUS (tp_mode, pic, off);
14055 off = gen_const_mem (tp_mode, off);
14056 set_mem_alias_set (off, ix86_GOT_alias_set ());
14058 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14060 base = get_thread_pointer (tp_mode,
14061 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
14062 off = force_reg (tp_mode, off);
14063 return gen_rtx_PLUS (tp_mode, base, off);
14065 else
14067 base = get_thread_pointer (Pmode, true);
14068 dest = gen_reg_rtx (Pmode);
14069 emit_insn (ix86_gen_sub3 (dest, base, off));
14071 break;
14073 case TLS_MODEL_LOCAL_EXEC:
14074 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
14075 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14076 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
14077 off = gen_rtx_CONST (Pmode, off);
14079 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14081 base = get_thread_pointer (Pmode,
14082 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
14083 return gen_rtx_PLUS (Pmode, base, off);
14085 else
14087 base = get_thread_pointer (Pmode, true);
14088 dest = gen_reg_rtx (Pmode);
14089 emit_insn (ix86_gen_sub3 (dest, base, off));
14091 break;
14093 default:
14094 gcc_unreachable ();
14097 return dest;
14100 /* Create or return the unique __imp_DECL dllimport symbol corresponding
14101 to symbol DECL if BEIMPORT is true. Otherwise create or return the
14102 unique refptr-DECL symbol corresponding to symbol DECL. */
14104 struct dllimport_hasher : ggc_cache_hasher<tree_map *>
14106 static inline hashval_t hash (tree_map *m) { return m->hash; }
14107 static inline bool
14108 equal (tree_map *a, tree_map *b)
14110 return a->base.from == b->base.from;
14113 static void
14114 handle_cache_entry (tree_map *&m)
14116 extern void gt_ggc_mx (tree_map *&);
14117 if (m == HTAB_EMPTY_ENTRY || m == HTAB_DELETED_ENTRY)
14118 return;
14119 else if (ggc_marked_p (m->base.from))
14120 gt_ggc_mx (m);
14121 else
14122 m = static_cast<tree_map *> (HTAB_DELETED_ENTRY);
14126 static GTY((cache)) hash_table<dllimport_hasher> *dllimport_map;
14128 static tree
14129 get_dllimport_decl (tree decl, bool beimport)
14131 struct tree_map *h, in;
14132 const char *name;
14133 const char *prefix;
14134 size_t namelen, prefixlen;
14135 char *imp_name;
14136 tree to;
14137 rtx rtl;
14139 if (!dllimport_map)
14140 dllimport_map = hash_table<dllimport_hasher>::create_ggc (512);
14142 in.hash = htab_hash_pointer (decl);
14143 in.base.from = decl;
14144 tree_map **loc = dllimport_map->find_slot_with_hash (&in, in.hash, INSERT);
14145 h = *loc;
14146 if (h)
14147 return h->to;
14149 *loc = h = ggc_alloc<tree_map> ();
14150 h->hash = in.hash;
14151 h->base.from = decl;
14152 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
14153 VAR_DECL, NULL, ptr_type_node);
14154 DECL_ARTIFICIAL (to) = 1;
14155 DECL_IGNORED_P (to) = 1;
14156 DECL_EXTERNAL (to) = 1;
14157 TREE_READONLY (to) = 1;
14159 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
14160 name = targetm.strip_name_encoding (name);
14161 if (beimport)
14162 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
14163 ? "*__imp_" : "*__imp__";
14164 else
14165 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
14166 namelen = strlen (name);
14167 prefixlen = strlen (prefix);
14168 imp_name = (char *) alloca (namelen + prefixlen + 1);
14169 memcpy (imp_name, prefix, prefixlen);
14170 memcpy (imp_name + prefixlen, name, namelen + 1);
14172 name = ggc_alloc_string (imp_name, namelen + prefixlen);
14173 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
14174 SET_SYMBOL_REF_DECL (rtl, to);
14175 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
14176 if (!beimport)
14178 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
14179 #ifdef SUB_TARGET_RECORD_STUB
14180 SUB_TARGET_RECORD_STUB (name);
14181 #endif
14184 rtl = gen_const_mem (Pmode, rtl);
14185 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
14187 SET_DECL_RTL (to, rtl);
14188 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
14190 return to;
14193 /* Expand SYMBOL into its corresponding far-addresse symbol.
14194 WANT_REG is true if we require the result be a register. */
14196 static rtx
14197 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
14199 tree imp_decl;
14200 rtx x;
14202 gcc_assert (SYMBOL_REF_DECL (symbol));
14203 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
14205 x = DECL_RTL (imp_decl);
14206 if (want_reg)
14207 x = force_reg (Pmode, x);
14208 return x;
14211 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
14212 true if we require the result be a register. */
14214 static rtx
14215 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
14217 tree imp_decl;
14218 rtx x;
14220 gcc_assert (SYMBOL_REF_DECL (symbol));
14221 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
14223 x = DECL_RTL (imp_decl);
14224 if (want_reg)
14225 x = force_reg (Pmode, x);
14226 return x;
14229 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
14230 is true if we require the result be a register. */
14232 static rtx
14233 legitimize_pe_coff_symbol (rtx addr, bool inreg)
14235 if (!TARGET_PECOFF)
14236 return NULL_RTX;
14238 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14240 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
14241 return legitimize_dllimport_symbol (addr, inreg);
14242 if (GET_CODE (addr) == CONST
14243 && GET_CODE (XEXP (addr, 0)) == PLUS
14244 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
14245 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
14247 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
14248 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
14252 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
14253 return NULL_RTX;
14254 if (GET_CODE (addr) == SYMBOL_REF
14255 && !is_imported_p (addr)
14256 && SYMBOL_REF_EXTERNAL_P (addr)
14257 && SYMBOL_REF_DECL (addr))
14258 return legitimize_pe_coff_extern_decl (addr, inreg);
14260 if (GET_CODE (addr) == CONST
14261 && GET_CODE (XEXP (addr, 0)) == PLUS
14262 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
14263 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
14264 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
14265 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
14267 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
14268 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
14270 return NULL_RTX;
14273 /* Try machine-dependent ways of modifying an illegitimate address
14274 to be legitimate. If we find one, return the new, valid address.
14275 This macro is used in only one place: `memory_address' in explow.c.
14277 OLDX is the address as it was before break_out_memory_refs was called.
14278 In some cases it is useful to look at this to decide what needs to be done.
14280 It is always safe for this macro to do nothing. It exists to recognize
14281 opportunities to optimize the output.
14283 For the 80386, we handle X+REG by loading X into a register R and
14284 using R+REG. R will go in a general reg and indexing will be used.
14285 However, if REG is a broken-out memory address or multiplication,
14286 nothing needs to be done because REG can certainly go in a general reg.
14288 When -fpic is used, special handling is needed for symbolic references.
14289 See comments by legitimize_pic_address in i386.c for details. */
14291 static rtx
14292 ix86_legitimize_address (rtx x, rtx, machine_mode mode)
14294 int changed = 0;
14295 unsigned log;
14297 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
14298 if (log)
14299 return legitimize_tls_address (x, (enum tls_model) log, false);
14300 if (GET_CODE (x) == CONST
14301 && GET_CODE (XEXP (x, 0)) == PLUS
14302 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
14303 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
14305 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
14306 (enum tls_model) log, false);
14307 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
14310 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14312 rtx tmp = legitimize_pe_coff_symbol (x, true);
14313 if (tmp)
14314 return tmp;
14317 if (flag_pic && SYMBOLIC_CONST (x))
14318 return legitimize_pic_address (x, 0);
14320 #if TARGET_MACHO
14321 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
14322 return machopic_indirect_data_reference (x, 0);
14323 #endif
14325 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
14326 if (GET_CODE (x) == ASHIFT
14327 && CONST_INT_P (XEXP (x, 1))
14328 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
14330 changed = 1;
14331 log = INTVAL (XEXP (x, 1));
14332 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
14333 GEN_INT (1 << log));
14336 if (GET_CODE (x) == PLUS)
14338 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
14340 if (GET_CODE (XEXP (x, 0)) == ASHIFT
14341 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14342 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
14344 changed = 1;
14345 log = INTVAL (XEXP (XEXP (x, 0), 1));
14346 XEXP (x, 0) = gen_rtx_MULT (Pmode,
14347 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
14348 GEN_INT (1 << log));
14351 if (GET_CODE (XEXP (x, 1)) == ASHIFT
14352 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
14353 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
14355 changed = 1;
14356 log = INTVAL (XEXP (XEXP (x, 1), 1));
14357 XEXP (x, 1) = gen_rtx_MULT (Pmode,
14358 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
14359 GEN_INT (1 << log));
14362 /* Put multiply first if it isn't already. */
14363 if (GET_CODE (XEXP (x, 1)) == MULT)
14365 rtx tmp = XEXP (x, 0);
14366 XEXP (x, 0) = XEXP (x, 1);
14367 XEXP (x, 1) = tmp;
14368 changed = 1;
14371 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
14372 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
14373 created by virtual register instantiation, register elimination, and
14374 similar optimizations. */
14375 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
14377 changed = 1;
14378 x = gen_rtx_PLUS (Pmode,
14379 gen_rtx_PLUS (Pmode, XEXP (x, 0),
14380 XEXP (XEXP (x, 1), 0)),
14381 XEXP (XEXP (x, 1), 1));
14384 /* Canonicalize
14385 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
14386 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
14387 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
14388 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
14389 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
14390 && CONSTANT_P (XEXP (x, 1)))
14392 rtx constant;
14393 rtx other = NULL_RTX;
14395 if (CONST_INT_P (XEXP (x, 1)))
14397 constant = XEXP (x, 1);
14398 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
14400 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
14402 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
14403 other = XEXP (x, 1);
14405 else
14406 constant = 0;
14408 if (constant)
14410 changed = 1;
14411 x = gen_rtx_PLUS (Pmode,
14412 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
14413 XEXP (XEXP (XEXP (x, 0), 1), 0)),
14414 plus_constant (Pmode, other,
14415 INTVAL (constant)));
14419 if (changed && ix86_legitimate_address_p (mode, x, false))
14420 return x;
14422 if (GET_CODE (XEXP (x, 0)) == MULT)
14424 changed = 1;
14425 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
14428 if (GET_CODE (XEXP (x, 1)) == MULT)
14430 changed = 1;
14431 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
14434 if (changed
14435 && REG_P (XEXP (x, 1))
14436 && REG_P (XEXP (x, 0)))
14437 return x;
14439 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
14441 changed = 1;
14442 x = legitimize_pic_address (x, 0);
14445 if (changed && ix86_legitimate_address_p (mode, x, false))
14446 return x;
14448 if (REG_P (XEXP (x, 0)))
14450 rtx temp = gen_reg_rtx (Pmode);
14451 rtx val = force_operand (XEXP (x, 1), temp);
14452 if (val != temp)
14454 val = convert_to_mode (Pmode, val, 1);
14455 emit_move_insn (temp, val);
14458 XEXP (x, 1) = temp;
14459 return x;
14462 else if (REG_P (XEXP (x, 1)))
14464 rtx temp = gen_reg_rtx (Pmode);
14465 rtx val = force_operand (XEXP (x, 0), temp);
14466 if (val != temp)
14468 val = convert_to_mode (Pmode, val, 1);
14469 emit_move_insn (temp, val);
14472 XEXP (x, 0) = temp;
14473 return x;
14477 return x;
14480 /* Print an integer constant expression in assembler syntax. Addition
14481 and subtraction are the only arithmetic that may appear in these
14482 expressions. FILE is the stdio stream to write to, X is the rtx, and
14483 CODE is the operand print code from the output string. */
14485 static void
14486 output_pic_addr_const (FILE *file, rtx x, int code)
14488 char buf[256];
14490 switch (GET_CODE (x))
14492 case PC:
14493 gcc_assert (flag_pic);
14494 putc ('.', file);
14495 break;
14497 case SYMBOL_REF:
14498 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
14499 output_addr_const (file, x);
14500 else
14502 const char *name = XSTR (x, 0);
14504 /* Mark the decl as referenced so that cgraph will
14505 output the function. */
14506 if (SYMBOL_REF_DECL (x))
14507 mark_decl_referenced (SYMBOL_REF_DECL (x));
14509 #if TARGET_MACHO
14510 if (MACHOPIC_INDIRECT
14511 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14512 name = machopic_indirection_name (x, /*stub_p=*/true);
14513 #endif
14514 assemble_name (file, name);
14516 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
14517 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
14518 fputs ("@PLT", file);
14519 break;
14521 case LABEL_REF:
14522 x = XEXP (x, 0);
14523 /* FALLTHRU */
14524 case CODE_LABEL:
14525 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
14526 assemble_name (asm_out_file, buf);
14527 break;
14529 case CONST_INT:
14530 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14531 break;
14533 case CONST:
14534 /* This used to output parentheses around the expression,
14535 but that does not work on the 386 (either ATT or BSD assembler). */
14536 output_pic_addr_const (file, XEXP (x, 0), code);
14537 break;
14539 case CONST_DOUBLE:
14540 if (GET_MODE (x) == VOIDmode)
14542 /* We can use %d if the number is <32 bits and positive. */
14543 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
14544 fprintf (file, "0x%lx%08lx",
14545 (unsigned long) CONST_DOUBLE_HIGH (x),
14546 (unsigned long) CONST_DOUBLE_LOW (x));
14547 else
14548 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
14550 else
14551 /* We can't handle floating point constants;
14552 TARGET_PRINT_OPERAND must handle them. */
14553 output_operand_lossage ("floating constant misused");
14554 break;
14556 case PLUS:
14557 /* Some assemblers need integer constants to appear first. */
14558 if (CONST_INT_P (XEXP (x, 0)))
14560 output_pic_addr_const (file, XEXP (x, 0), code);
14561 putc ('+', file);
14562 output_pic_addr_const (file, XEXP (x, 1), code);
14564 else
14566 gcc_assert (CONST_INT_P (XEXP (x, 1)));
14567 output_pic_addr_const (file, XEXP (x, 1), code);
14568 putc ('+', file);
14569 output_pic_addr_const (file, XEXP (x, 0), code);
14571 break;
14573 case MINUS:
14574 if (!TARGET_MACHO)
14575 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
14576 output_pic_addr_const (file, XEXP (x, 0), code);
14577 putc ('-', file);
14578 output_pic_addr_const (file, XEXP (x, 1), code);
14579 if (!TARGET_MACHO)
14580 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
14581 break;
14583 case UNSPEC:
14584 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
14586 bool f = i386_asm_output_addr_const_extra (file, x);
14587 gcc_assert (f);
14588 break;
14591 gcc_assert (XVECLEN (x, 0) == 1);
14592 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
14593 switch (XINT (x, 1))
14595 case UNSPEC_GOT:
14596 fputs ("@GOT", file);
14597 break;
14598 case UNSPEC_GOTOFF:
14599 fputs ("@GOTOFF", file);
14600 break;
14601 case UNSPEC_PLTOFF:
14602 fputs ("@PLTOFF", file);
14603 break;
14604 case UNSPEC_PCREL:
14605 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14606 "(%rip)" : "[rip]", file);
14607 break;
14608 case UNSPEC_GOTPCREL:
14609 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14610 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
14611 break;
14612 case UNSPEC_GOTTPOFF:
14613 /* FIXME: This might be @TPOFF in Sun ld too. */
14614 fputs ("@gottpoff", file);
14615 break;
14616 case UNSPEC_TPOFF:
14617 fputs ("@tpoff", file);
14618 break;
14619 case UNSPEC_NTPOFF:
14620 if (TARGET_64BIT)
14621 fputs ("@tpoff", file);
14622 else
14623 fputs ("@ntpoff", file);
14624 break;
14625 case UNSPEC_DTPOFF:
14626 fputs ("@dtpoff", file);
14627 break;
14628 case UNSPEC_GOTNTPOFF:
14629 if (TARGET_64BIT)
14630 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14631 "@gottpoff(%rip)": "@gottpoff[rip]", file);
14632 else
14633 fputs ("@gotntpoff", file);
14634 break;
14635 case UNSPEC_INDNTPOFF:
14636 fputs ("@indntpoff", file);
14637 break;
14638 #if TARGET_MACHO
14639 case UNSPEC_MACHOPIC_OFFSET:
14640 putc ('-', file);
14641 machopic_output_function_base_name (file);
14642 break;
14643 #endif
14644 default:
14645 output_operand_lossage ("invalid UNSPEC as operand");
14646 break;
14648 break;
14650 default:
14651 output_operand_lossage ("invalid expression as operand");
14655 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
14656 We need to emit DTP-relative relocations. */
14658 static void ATTRIBUTE_UNUSED
14659 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
14661 fputs (ASM_LONG, file);
14662 output_addr_const (file, x);
14663 fputs ("@dtpoff", file);
14664 switch (size)
14666 case 4:
14667 break;
14668 case 8:
14669 fputs (", 0", file);
14670 break;
14671 default:
14672 gcc_unreachable ();
14676 /* Return true if X is a representation of the PIC register. This copes
14677 with calls from ix86_find_base_term, where the register might have
14678 been replaced by a cselib value. */
14680 static bool
14681 ix86_pic_register_p (rtx x)
14683 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
14684 return (pic_offset_table_rtx
14685 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
14686 else if (!REG_P (x))
14687 return false;
14688 else if (pic_offset_table_rtx)
14690 if (REGNO (x) == REGNO (pic_offset_table_rtx))
14691 return true;
14692 if (HARD_REGISTER_P (x)
14693 && !HARD_REGISTER_P (pic_offset_table_rtx)
14694 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
14695 return true;
14696 return false;
14698 else
14699 return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
14702 /* Helper function for ix86_delegitimize_address.
14703 Attempt to delegitimize TLS local-exec accesses. */
14705 static rtx
14706 ix86_delegitimize_tls_address (rtx orig_x)
14708 rtx x = orig_x, unspec;
14709 struct ix86_address addr;
14711 if (!TARGET_TLS_DIRECT_SEG_REFS)
14712 return orig_x;
14713 if (MEM_P (x))
14714 x = XEXP (x, 0);
14715 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
14716 return orig_x;
14717 if (ix86_decompose_address (x, &addr) == 0
14718 || addr.seg != DEFAULT_TLS_SEG_REG
14719 || addr.disp == NULL_RTX
14720 || GET_CODE (addr.disp) != CONST)
14721 return orig_x;
14722 unspec = XEXP (addr.disp, 0);
14723 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
14724 unspec = XEXP (unspec, 0);
14725 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
14726 return orig_x;
14727 x = XVECEXP (unspec, 0, 0);
14728 gcc_assert (GET_CODE (x) == SYMBOL_REF);
14729 if (unspec != XEXP (addr.disp, 0))
14730 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
14731 if (addr.index)
14733 rtx idx = addr.index;
14734 if (addr.scale != 1)
14735 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
14736 x = gen_rtx_PLUS (Pmode, idx, x);
14738 if (addr.base)
14739 x = gen_rtx_PLUS (Pmode, addr.base, x);
14740 if (MEM_P (orig_x))
14741 x = replace_equiv_address_nv (orig_x, x);
14742 return x;
14745 /* In the name of slightly smaller debug output, and to cater to
14746 general assembler lossage, recognize PIC+GOTOFF and turn it back
14747 into a direct symbol reference.
14749 On Darwin, this is necessary to avoid a crash, because Darwin
14750 has a different PIC label for each routine but the DWARF debugging
14751 information is not associated with any particular routine, so it's
14752 necessary to remove references to the PIC label from RTL stored by
14753 the DWARF output code. */
14755 static rtx
14756 ix86_delegitimize_address (rtx x)
14758 rtx orig_x = delegitimize_mem_from_attrs (x);
14759 /* addend is NULL or some rtx if x is something+GOTOFF where
14760 something doesn't include the PIC register. */
14761 rtx addend = NULL_RTX;
14762 /* reg_addend is NULL or a multiple of some register. */
14763 rtx reg_addend = NULL_RTX;
14764 /* const_addend is NULL or a const_int. */
14765 rtx const_addend = NULL_RTX;
14766 /* This is the result, or NULL. */
14767 rtx result = NULL_RTX;
14769 x = orig_x;
14771 if (MEM_P (x))
14772 x = XEXP (x, 0);
14774 if (TARGET_64BIT)
14776 if (GET_CODE (x) == CONST
14777 && GET_CODE (XEXP (x, 0)) == PLUS
14778 && GET_MODE (XEXP (x, 0)) == Pmode
14779 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14780 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
14781 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
14783 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
14784 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
14785 if (MEM_P (orig_x))
14786 x = replace_equiv_address_nv (orig_x, x);
14787 return x;
14790 if (GET_CODE (x) == CONST
14791 && GET_CODE (XEXP (x, 0)) == UNSPEC
14792 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
14793 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
14794 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
14796 x = XVECEXP (XEXP (x, 0), 0, 0);
14797 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
14799 x = simplify_gen_subreg (GET_MODE (orig_x), x,
14800 GET_MODE (x), 0);
14801 if (x == NULL_RTX)
14802 return orig_x;
14804 return x;
14807 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
14808 return ix86_delegitimize_tls_address (orig_x);
14810 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
14811 and -mcmodel=medium -fpic. */
14814 if (GET_CODE (x) != PLUS
14815 || GET_CODE (XEXP (x, 1)) != CONST)
14816 return ix86_delegitimize_tls_address (orig_x);
14818 if (ix86_pic_register_p (XEXP (x, 0)))
14819 /* %ebx + GOT/GOTOFF */
14821 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14823 /* %ebx + %reg * scale + GOT/GOTOFF */
14824 reg_addend = XEXP (x, 0);
14825 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
14826 reg_addend = XEXP (reg_addend, 1);
14827 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
14828 reg_addend = XEXP (reg_addend, 0);
14829 else
14831 reg_addend = NULL_RTX;
14832 addend = XEXP (x, 0);
14835 else
14836 addend = XEXP (x, 0);
14838 x = XEXP (XEXP (x, 1), 0);
14839 if (GET_CODE (x) == PLUS
14840 && CONST_INT_P (XEXP (x, 1)))
14842 const_addend = XEXP (x, 1);
14843 x = XEXP (x, 0);
14846 if (GET_CODE (x) == UNSPEC
14847 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
14848 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
14849 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
14850 && !MEM_P (orig_x) && !addend)))
14851 result = XVECEXP (x, 0, 0);
14853 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
14854 && !MEM_P (orig_x))
14855 result = XVECEXP (x, 0, 0);
14857 if (! result)
14858 return ix86_delegitimize_tls_address (orig_x);
14860 if (const_addend)
14861 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
14862 if (reg_addend)
14863 result = gen_rtx_PLUS (Pmode, reg_addend, result);
14864 if (addend)
14866 /* If the rest of original X doesn't involve the PIC register, add
14867 addend and subtract pic_offset_table_rtx. This can happen e.g.
14868 for code like:
14869 leal (%ebx, %ecx, 4), %ecx
14871 movl foo@GOTOFF(%ecx), %edx
14872 in which case we return (%ecx - %ebx) + foo
14873 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
14874 and reload has completed. */
14875 if (pic_offset_table_rtx
14876 && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
14877 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
14878 pic_offset_table_rtx),
14879 result);
14880 else if (pic_offset_table_rtx && !TARGET_MACHO && !TARGET_VXWORKS_RTP)
14882 rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
14883 tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
14884 result = gen_rtx_PLUS (Pmode, tmp, result);
14886 else
14887 return orig_x;
14889 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
14891 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
14892 if (result == NULL_RTX)
14893 return orig_x;
14895 return result;
14898 /* If X is a machine specific address (i.e. a symbol or label being
14899 referenced as a displacement from the GOT implemented using an
14900 UNSPEC), then return the base term. Otherwise return X. */
14903 ix86_find_base_term (rtx x)
14905 rtx term;
14907 if (TARGET_64BIT)
14909 if (GET_CODE (x) != CONST)
14910 return x;
14911 term = XEXP (x, 0);
14912 if (GET_CODE (term) == PLUS
14913 && (CONST_INT_P (XEXP (term, 1))
14914 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
14915 term = XEXP (term, 0);
14916 if (GET_CODE (term) != UNSPEC
14917 || (XINT (term, 1) != UNSPEC_GOTPCREL
14918 && XINT (term, 1) != UNSPEC_PCREL))
14919 return x;
14921 return XVECEXP (term, 0, 0);
14924 return ix86_delegitimize_address (x);
14927 static void
14928 put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
14929 bool fp, FILE *file)
14931 const char *suffix;
14933 if (mode == CCFPmode || mode == CCFPUmode)
14935 code = ix86_fp_compare_code_to_integer (code);
14936 mode = CCmode;
14938 if (reverse)
14939 code = reverse_condition (code);
14941 switch (code)
14943 case EQ:
14944 switch (mode)
14946 case CCAmode:
14947 suffix = "a";
14948 break;
14950 case CCCmode:
14951 suffix = "c";
14952 break;
14954 case CCOmode:
14955 suffix = "o";
14956 break;
14958 case CCSmode:
14959 suffix = "s";
14960 break;
14962 default:
14963 suffix = "e";
14965 break;
14966 case NE:
14967 switch (mode)
14969 case CCAmode:
14970 suffix = "na";
14971 break;
14973 case CCCmode:
14974 suffix = "nc";
14975 break;
14977 case CCOmode:
14978 suffix = "no";
14979 break;
14981 case CCSmode:
14982 suffix = "ns";
14983 break;
14985 default:
14986 suffix = "ne";
14988 break;
14989 case GT:
14990 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
14991 suffix = "g";
14992 break;
14993 case GTU:
14994 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
14995 Those same assemblers have the same but opposite lossage on cmov. */
14996 if (mode == CCmode)
14997 suffix = fp ? "nbe" : "a";
14998 else
14999 gcc_unreachable ();
15000 break;
15001 case LT:
15002 switch (mode)
15004 case CCNOmode:
15005 case CCGOCmode:
15006 suffix = "s";
15007 break;
15009 case CCmode:
15010 case CCGCmode:
15011 suffix = "l";
15012 break;
15014 default:
15015 gcc_unreachable ();
15017 break;
15018 case LTU:
15019 if (mode == CCmode)
15020 suffix = "b";
15021 else if (mode == CCCmode)
15022 suffix = fp ? "b" : "c";
15023 else
15024 gcc_unreachable ();
15025 break;
15026 case GE:
15027 switch (mode)
15029 case CCNOmode:
15030 case CCGOCmode:
15031 suffix = "ns";
15032 break;
15034 case CCmode:
15035 case CCGCmode:
15036 suffix = "ge";
15037 break;
15039 default:
15040 gcc_unreachable ();
15042 break;
15043 case GEU:
15044 if (mode == CCmode)
15045 suffix = "nb";
15046 else if (mode == CCCmode)
15047 suffix = fp ? "nb" : "nc";
15048 else
15049 gcc_unreachable ();
15050 break;
15051 case LE:
15052 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
15053 suffix = "le";
15054 break;
15055 case LEU:
15056 if (mode == CCmode)
15057 suffix = "be";
15058 else
15059 gcc_unreachable ();
15060 break;
15061 case UNORDERED:
15062 suffix = fp ? "u" : "p";
15063 break;
15064 case ORDERED:
15065 suffix = fp ? "nu" : "np";
15066 break;
15067 default:
15068 gcc_unreachable ();
15070 fputs (suffix, file);
15073 /* Print the name of register X to FILE based on its machine mode and number.
15074 If CODE is 'w', pretend the mode is HImode.
15075 If CODE is 'b', pretend the mode is QImode.
15076 If CODE is 'k', pretend the mode is SImode.
15077 If CODE is 'q', pretend the mode is DImode.
15078 If CODE is 'x', pretend the mode is V4SFmode.
15079 If CODE is 't', pretend the mode is V8SFmode.
15080 If CODE is 'g', pretend the mode is V16SFmode.
15081 If CODE is 'h', pretend the reg is the 'high' byte register.
15082 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
15083 If CODE is 'd', duplicate the operand for AVX instruction.
15086 void
15087 print_reg (rtx x, int code, FILE *file)
15089 const char *reg;
15090 unsigned int regno;
15091 bool duplicated = code == 'd' && TARGET_AVX;
15093 if (ASSEMBLER_DIALECT == ASM_ATT)
15094 putc ('%', file);
15096 if (x == pc_rtx)
15098 gcc_assert (TARGET_64BIT);
15099 fputs ("rip", file);
15100 return;
15103 regno = true_regnum (x);
15104 gcc_assert (regno != ARG_POINTER_REGNUM
15105 && regno != FRAME_POINTER_REGNUM
15106 && regno != FLAGS_REG
15107 && regno != FPSR_REG
15108 && regno != FPCR_REG);
15110 if (code == 'w' || MMX_REG_P (x))
15111 code = 2;
15112 else if (code == 'b')
15113 code = 1;
15114 else if (code == 'k')
15115 code = 4;
15116 else if (code == 'q')
15117 code = 8;
15118 else if (code == 'y')
15119 code = 3;
15120 else if (code == 'h')
15121 code = 0;
15122 else if (code == 'x')
15123 code = 16;
15124 else if (code == 't')
15125 code = 32;
15126 else if (code == 'g')
15127 code = 64;
15128 else
15129 code = GET_MODE_SIZE (GET_MODE (x));
15131 /* Irritatingly, AMD extended registers use different naming convention
15132 from the normal registers: "r%d[bwd]" */
15133 if (REX_INT_REGNO_P (regno))
15135 gcc_assert (TARGET_64BIT);
15136 putc ('r', file);
15137 fprint_ul (file, regno - FIRST_REX_INT_REG + 8);
15138 switch (code)
15140 case 0:
15141 error ("extended registers have no high halves");
15142 break;
15143 case 1:
15144 putc ('b', file);
15145 break;
15146 case 2:
15147 putc ('w', file);
15148 break;
15149 case 4:
15150 putc ('d', file);
15151 break;
15152 case 8:
15153 /* no suffix */
15154 break;
15155 default:
15156 error ("unsupported operand size for extended register");
15157 break;
15159 return;
15162 reg = NULL;
15163 switch (code)
15165 case 3:
15166 if (STACK_TOP_P (x))
15168 reg = "st(0)";
15169 break;
15171 /* FALLTHRU */
15172 case 8:
15173 case 4:
15174 case 12:
15175 if (! ANY_FP_REG_P (x) && ! ANY_MASK_REG_P (x) && ! ANY_BND_REG_P (x))
15176 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
15177 /* FALLTHRU */
15178 case 16:
15179 case 2:
15180 normal:
15181 reg = hi_reg_name[regno];
15182 break;
15183 case 1:
15184 if (regno >= ARRAY_SIZE (qi_reg_name))
15185 goto normal;
15186 reg = qi_reg_name[regno];
15187 break;
15188 case 0:
15189 if (regno >= ARRAY_SIZE (qi_high_reg_name))
15190 goto normal;
15191 reg = qi_high_reg_name[regno];
15192 break;
15193 case 32:
15194 if (SSE_REG_P (x))
15196 gcc_assert (!duplicated);
15197 putc ('y', file);
15198 fputs (hi_reg_name[regno] + 1, file);
15199 return;
15201 case 64:
15202 if (SSE_REG_P (x))
15204 gcc_assert (!duplicated);
15205 putc ('z', file);
15206 fputs (hi_reg_name[REGNO (x)] + 1, file);
15207 return;
15209 break;
15210 default:
15211 gcc_unreachable ();
15214 fputs (reg, file);
15215 if (duplicated)
15217 if (ASSEMBLER_DIALECT == ASM_ATT)
15218 fprintf (file, ", %%%s", reg);
15219 else
15220 fprintf (file, ", %s", reg);
15224 /* Meaning of CODE:
15225 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
15226 C -- print opcode suffix for set/cmov insn.
15227 c -- like C, but print reversed condition
15228 F,f -- likewise, but for floating-point.
15229 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
15230 otherwise nothing
15231 R -- print embeded rounding and sae.
15232 r -- print only sae.
15233 z -- print the opcode suffix for the size of the current operand.
15234 Z -- likewise, with special suffixes for x87 instructions.
15235 * -- print a star (in certain assembler syntax)
15236 A -- print an absolute memory reference.
15237 E -- print address with DImode register names if TARGET_64BIT.
15238 w -- print the operand as if it's a "word" (HImode) even if it isn't.
15239 s -- print a shift double count, followed by the assemblers argument
15240 delimiter.
15241 b -- print the QImode name of the register for the indicated operand.
15242 %b0 would print %al if operands[0] is reg 0.
15243 w -- likewise, print the HImode name of the register.
15244 k -- likewise, print the SImode name of the register.
15245 q -- likewise, print the DImode name of the register.
15246 x -- likewise, print the V4SFmode name of the register.
15247 t -- likewise, print the V8SFmode name of the register.
15248 g -- likewise, print the V16SFmode name of the register.
15249 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
15250 y -- print "st(0)" instead of "st" as a register.
15251 d -- print duplicated register operand for AVX instruction.
15252 D -- print condition for SSE cmp instruction.
15253 P -- if PIC, print an @PLT suffix.
15254 p -- print raw symbol name.
15255 X -- don't print any sort of PIC '@' suffix for a symbol.
15256 & -- print some in-use local-dynamic symbol name.
15257 H -- print a memory address offset by 8; used for sse high-parts
15258 Y -- print condition for XOP pcom* instruction.
15259 + -- print a branch hint as 'cs' or 'ds' prefix
15260 ; -- print a semicolon (after prefixes due to bug in older gas).
15261 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
15262 @ -- print a segment register of thread base pointer load
15263 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
15264 ! -- print MPX prefix for jxx/call/ret instructions if required.
15267 void
15268 ix86_print_operand (FILE *file, rtx x, int code)
15270 if (code)
15272 switch (code)
15274 case 'A':
15275 switch (ASSEMBLER_DIALECT)
15277 case ASM_ATT:
15278 putc ('*', file);
15279 break;
15281 case ASM_INTEL:
15282 /* Intel syntax. For absolute addresses, registers should not
15283 be surrounded by braces. */
15284 if (!REG_P (x))
15286 putc ('[', file);
15287 ix86_print_operand (file, x, 0);
15288 putc (']', file);
15289 return;
15291 break;
15293 default:
15294 gcc_unreachable ();
15297 ix86_print_operand (file, x, 0);
15298 return;
15300 case 'E':
15301 /* Wrap address in an UNSPEC to declare special handling. */
15302 if (TARGET_64BIT)
15303 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
15305 output_address (x);
15306 return;
15308 case 'L':
15309 if (ASSEMBLER_DIALECT == ASM_ATT)
15310 putc ('l', file);
15311 return;
15313 case 'W':
15314 if (ASSEMBLER_DIALECT == ASM_ATT)
15315 putc ('w', file);
15316 return;
15318 case 'B':
15319 if (ASSEMBLER_DIALECT == ASM_ATT)
15320 putc ('b', file);
15321 return;
15323 case 'Q':
15324 if (ASSEMBLER_DIALECT == ASM_ATT)
15325 putc ('l', file);
15326 return;
15328 case 'S':
15329 if (ASSEMBLER_DIALECT == ASM_ATT)
15330 putc ('s', file);
15331 return;
15333 case 'T':
15334 if (ASSEMBLER_DIALECT == ASM_ATT)
15335 putc ('t', file);
15336 return;
15338 case 'O':
15339 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15340 if (ASSEMBLER_DIALECT != ASM_ATT)
15341 return;
15343 switch (GET_MODE_SIZE (GET_MODE (x)))
15345 case 2:
15346 putc ('w', file);
15347 break;
15349 case 4:
15350 putc ('l', file);
15351 break;
15353 case 8:
15354 putc ('q', file);
15355 break;
15357 default:
15358 output_operand_lossage
15359 ("invalid operand size for operand code 'O'");
15360 return;
15363 putc ('.', file);
15364 #endif
15365 return;
15367 case 'z':
15368 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15370 /* Opcodes don't get size suffixes if using Intel opcodes. */
15371 if (ASSEMBLER_DIALECT == ASM_INTEL)
15372 return;
15374 switch (GET_MODE_SIZE (GET_MODE (x)))
15376 case 1:
15377 putc ('b', file);
15378 return;
15380 case 2:
15381 putc ('w', file);
15382 return;
15384 case 4:
15385 putc ('l', file);
15386 return;
15388 case 8:
15389 putc ('q', file);
15390 return;
15392 default:
15393 output_operand_lossage
15394 ("invalid operand size for operand code 'z'");
15395 return;
15399 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15400 warning
15401 (0, "non-integer operand used with operand code 'z'");
15402 /* FALLTHRU */
15404 case 'Z':
15405 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
15406 if (ASSEMBLER_DIALECT == ASM_INTEL)
15407 return;
15409 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15411 switch (GET_MODE_SIZE (GET_MODE (x)))
15413 case 2:
15414 #ifdef HAVE_AS_IX86_FILDS
15415 putc ('s', file);
15416 #endif
15417 return;
15419 case 4:
15420 putc ('l', file);
15421 return;
15423 case 8:
15424 #ifdef HAVE_AS_IX86_FILDQ
15425 putc ('q', file);
15426 #else
15427 fputs ("ll", file);
15428 #endif
15429 return;
15431 default:
15432 break;
15435 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15437 /* 387 opcodes don't get size suffixes
15438 if the operands are registers. */
15439 if (STACK_REG_P (x))
15440 return;
15442 switch (GET_MODE_SIZE (GET_MODE (x)))
15444 case 4:
15445 putc ('s', file);
15446 return;
15448 case 8:
15449 putc ('l', file);
15450 return;
15452 case 12:
15453 case 16:
15454 putc ('t', file);
15455 return;
15457 default:
15458 break;
15461 else
15463 output_operand_lossage
15464 ("invalid operand type used with operand code 'Z'");
15465 return;
15468 output_operand_lossage
15469 ("invalid operand size for operand code 'Z'");
15470 return;
15472 case 'd':
15473 case 'b':
15474 case 'w':
15475 case 'k':
15476 case 'q':
15477 case 'h':
15478 case 't':
15479 case 'g':
15480 case 'y':
15481 case 'x':
15482 case 'X':
15483 case 'P':
15484 case 'p':
15485 break;
15487 case 's':
15488 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
15490 ix86_print_operand (file, x, 0);
15491 fputs (", ", file);
15493 return;
15495 case 'Y':
15496 switch (GET_CODE (x))
15498 case NE:
15499 fputs ("neq", file);
15500 break;
15501 case EQ:
15502 fputs ("eq", file);
15503 break;
15504 case GE:
15505 case GEU:
15506 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
15507 break;
15508 case GT:
15509 case GTU:
15510 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
15511 break;
15512 case LE:
15513 case LEU:
15514 fputs ("le", file);
15515 break;
15516 case LT:
15517 case LTU:
15518 fputs ("lt", file);
15519 break;
15520 case UNORDERED:
15521 fputs ("unord", file);
15522 break;
15523 case ORDERED:
15524 fputs ("ord", file);
15525 break;
15526 case UNEQ:
15527 fputs ("ueq", file);
15528 break;
15529 case UNGE:
15530 fputs ("nlt", file);
15531 break;
15532 case UNGT:
15533 fputs ("nle", file);
15534 break;
15535 case UNLE:
15536 fputs ("ule", file);
15537 break;
15538 case UNLT:
15539 fputs ("ult", file);
15540 break;
15541 case LTGT:
15542 fputs ("une", file);
15543 break;
15544 default:
15545 output_operand_lossage ("operand is not a condition code, "
15546 "invalid operand code 'Y'");
15547 return;
15549 return;
15551 case 'D':
15552 /* Little bit of braindamage here. The SSE compare instructions
15553 does use completely different names for the comparisons that the
15554 fp conditional moves. */
15555 switch (GET_CODE (x))
15557 case UNEQ:
15558 if (TARGET_AVX)
15560 fputs ("eq_us", file);
15561 break;
15563 case EQ:
15564 fputs ("eq", file);
15565 break;
15566 case UNLT:
15567 if (TARGET_AVX)
15569 fputs ("nge", file);
15570 break;
15572 case LT:
15573 fputs ("lt", file);
15574 break;
15575 case UNLE:
15576 if (TARGET_AVX)
15578 fputs ("ngt", file);
15579 break;
15581 case LE:
15582 fputs ("le", file);
15583 break;
15584 case UNORDERED:
15585 fputs ("unord", file);
15586 break;
15587 case LTGT:
15588 if (TARGET_AVX)
15590 fputs ("neq_oq", file);
15591 break;
15593 case NE:
15594 fputs ("neq", file);
15595 break;
15596 case GE:
15597 if (TARGET_AVX)
15599 fputs ("ge", file);
15600 break;
15602 case UNGE:
15603 fputs ("nlt", file);
15604 break;
15605 case GT:
15606 if (TARGET_AVX)
15608 fputs ("gt", file);
15609 break;
15611 case UNGT:
15612 fputs ("nle", file);
15613 break;
15614 case ORDERED:
15615 fputs ("ord", file);
15616 break;
15617 default:
15618 output_operand_lossage ("operand is not a condition code, "
15619 "invalid operand code 'D'");
15620 return;
15622 return;
15624 case 'F':
15625 case 'f':
15626 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15627 if (ASSEMBLER_DIALECT == ASM_ATT)
15628 putc ('.', file);
15629 #endif
15631 case 'C':
15632 case 'c':
15633 if (!COMPARISON_P (x))
15635 output_operand_lossage ("operand is not a condition code, "
15636 "invalid operand code '%c'", code);
15637 return;
15639 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
15640 code == 'c' || code == 'f',
15641 code == 'F' || code == 'f',
15642 file);
15643 return;
15645 case 'H':
15646 if (!offsettable_memref_p (x))
15648 output_operand_lossage ("operand is not an offsettable memory "
15649 "reference, invalid operand code 'H'");
15650 return;
15652 /* It doesn't actually matter what mode we use here, as we're
15653 only going to use this for printing. */
15654 x = adjust_address_nv (x, DImode, 8);
15655 /* Output 'qword ptr' for intel assembler dialect. */
15656 if (ASSEMBLER_DIALECT == ASM_INTEL)
15657 code = 'q';
15658 break;
15660 case 'K':
15661 gcc_assert (CONST_INT_P (x));
15663 if (INTVAL (x) & IX86_HLE_ACQUIRE)
15664 #ifdef HAVE_AS_IX86_HLE
15665 fputs ("xacquire ", file);
15666 #else
15667 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
15668 #endif
15669 else if (INTVAL (x) & IX86_HLE_RELEASE)
15670 #ifdef HAVE_AS_IX86_HLE
15671 fputs ("xrelease ", file);
15672 #else
15673 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
15674 #endif
15675 /* We do not want to print value of the operand. */
15676 return;
15678 case 'N':
15679 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
15680 fputs ("{z}", file);
15681 return;
15683 case 'r':
15684 gcc_assert (CONST_INT_P (x));
15685 gcc_assert (INTVAL (x) == ROUND_SAE);
15687 if (ASSEMBLER_DIALECT == ASM_INTEL)
15688 fputs (", ", file);
15690 fputs ("{sae}", file);
15692 if (ASSEMBLER_DIALECT == ASM_ATT)
15693 fputs (", ", file);
15695 return;
15697 case 'R':
15698 gcc_assert (CONST_INT_P (x));
15700 if (ASSEMBLER_DIALECT == ASM_INTEL)
15701 fputs (", ", file);
15703 switch (INTVAL (x))
15705 case ROUND_NEAREST_INT | ROUND_SAE:
15706 fputs ("{rn-sae}", file);
15707 break;
15708 case ROUND_NEG_INF | ROUND_SAE:
15709 fputs ("{rd-sae}", file);
15710 break;
15711 case ROUND_POS_INF | ROUND_SAE:
15712 fputs ("{ru-sae}", file);
15713 break;
15714 case ROUND_ZERO | ROUND_SAE:
15715 fputs ("{rz-sae}", file);
15716 break;
15717 default:
15718 gcc_unreachable ();
15721 if (ASSEMBLER_DIALECT == ASM_ATT)
15722 fputs (", ", file);
15724 return;
15726 case '*':
15727 if (ASSEMBLER_DIALECT == ASM_ATT)
15728 putc ('*', file);
15729 return;
15731 case '&':
15733 const char *name = get_some_local_dynamic_name ();
15734 if (name == NULL)
15735 output_operand_lossage ("'%%&' used without any "
15736 "local dynamic TLS references");
15737 else
15738 assemble_name (file, name);
15739 return;
15742 case '+':
15744 rtx x;
15746 if (!optimize
15747 || optimize_function_for_size_p (cfun)
15748 || !TARGET_BRANCH_PREDICTION_HINTS)
15749 return;
15751 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
15752 if (x)
15754 int pred_val = XINT (x, 0);
15756 if (pred_val < REG_BR_PROB_BASE * 45 / 100
15757 || pred_val > REG_BR_PROB_BASE * 55 / 100)
15759 bool taken = pred_val > REG_BR_PROB_BASE / 2;
15760 bool cputaken
15761 = final_forward_branch_p (current_output_insn) == 0;
15763 /* Emit hints only in the case default branch prediction
15764 heuristics would fail. */
15765 if (taken != cputaken)
15767 /* We use 3e (DS) prefix for taken branches and
15768 2e (CS) prefix for not taken branches. */
15769 if (taken)
15770 fputs ("ds ; ", file);
15771 else
15772 fputs ("cs ; ", file);
15776 return;
15779 case ';':
15780 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
15781 putc (';', file);
15782 #endif
15783 return;
15785 case '@':
15786 if (ASSEMBLER_DIALECT == ASM_ATT)
15787 putc ('%', file);
15789 /* The kernel uses a different segment register for performance
15790 reasons; a system call would not have to trash the userspace
15791 segment register, which would be expensive. */
15792 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
15793 fputs ("fs", file);
15794 else
15795 fputs ("gs", file);
15796 return;
15798 case '~':
15799 putc (TARGET_AVX2 ? 'i' : 'f', file);
15800 return;
15802 case '^':
15803 if (TARGET_64BIT && Pmode != word_mode)
15804 fputs ("addr32 ", file);
15805 return;
15807 case '!':
15808 if (ix86_bnd_prefixed_insn_p (current_output_insn))
15809 fputs ("bnd ", file);
15810 return;
15812 default:
15813 output_operand_lossage ("invalid operand code '%c'", code);
15817 if (REG_P (x))
15818 print_reg (x, code, file);
15820 else if (MEM_P (x))
15822 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
15823 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
15824 && GET_MODE (x) != BLKmode)
15826 const char * size;
15827 switch (GET_MODE_SIZE (GET_MODE (x)))
15829 case 1: size = "BYTE"; break;
15830 case 2: size = "WORD"; break;
15831 case 4: size = "DWORD"; break;
15832 case 8: size = "QWORD"; break;
15833 case 12: size = "TBYTE"; break;
15834 case 16:
15835 if (GET_MODE (x) == XFmode)
15836 size = "TBYTE";
15837 else
15838 size = "XMMWORD";
15839 break;
15840 case 32: size = "YMMWORD"; break;
15841 case 64: size = "ZMMWORD"; break;
15842 default:
15843 gcc_unreachable ();
15846 /* Check for explicit size override (codes 'b', 'w', 'k',
15847 'q' and 'x') */
15848 if (code == 'b')
15849 size = "BYTE";
15850 else if (code == 'w')
15851 size = "WORD";
15852 else if (code == 'k')
15853 size = "DWORD";
15854 else if (code == 'q')
15855 size = "QWORD";
15856 else if (code == 'x')
15857 size = "XMMWORD";
15859 fputs (size, file);
15860 fputs (" PTR ", file);
15863 x = XEXP (x, 0);
15864 /* Avoid (%rip) for call operands. */
15865 if (CONSTANT_ADDRESS_P (x) && code == 'P'
15866 && !CONST_INT_P (x))
15867 output_addr_const (file, x);
15868 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
15869 output_operand_lossage ("invalid constraints for operand");
15870 else
15871 output_address (x);
15874 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
15876 REAL_VALUE_TYPE r;
15877 long l;
15879 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15880 REAL_VALUE_TO_TARGET_SINGLE (r, l);
15882 if (ASSEMBLER_DIALECT == ASM_ATT)
15883 putc ('$', file);
15884 /* Sign extend 32bit SFmode immediate to 8 bytes. */
15885 if (code == 'q')
15886 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
15887 (unsigned long long) (int) l);
15888 else
15889 fprintf (file, "0x%08x", (unsigned int) l);
15892 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
15894 REAL_VALUE_TYPE r;
15895 long l[2];
15897 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15898 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
15900 if (ASSEMBLER_DIALECT == ASM_ATT)
15901 putc ('$', file);
15902 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
15905 /* These float cases don't actually occur as immediate operands. */
15906 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
15908 char dstr[30];
15910 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
15911 fputs (dstr, file);
15914 else
15916 /* We have patterns that allow zero sets of memory, for instance.
15917 In 64-bit mode, we should probably support all 8-byte vectors,
15918 since we can in fact encode that into an immediate. */
15919 if (GET_CODE (x) == CONST_VECTOR)
15921 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
15922 x = const0_rtx;
15925 if (code != 'P' && code != 'p')
15927 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
15929 if (ASSEMBLER_DIALECT == ASM_ATT)
15930 putc ('$', file);
15932 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
15933 || GET_CODE (x) == LABEL_REF)
15935 if (ASSEMBLER_DIALECT == ASM_ATT)
15936 putc ('$', file);
15937 else
15938 fputs ("OFFSET FLAT:", file);
15941 if (CONST_INT_P (x))
15942 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15943 else if (flag_pic || MACHOPIC_INDIRECT)
15944 output_pic_addr_const (file, x, code);
15945 else
15946 output_addr_const (file, x);
15950 static bool
15951 ix86_print_operand_punct_valid_p (unsigned char code)
15953 return (code == '@' || code == '*' || code == '+' || code == '&'
15954 || code == ';' || code == '~' || code == '^' || code == '!');
15957 /* Print a memory operand whose address is ADDR. */
15959 static void
15960 ix86_print_operand_address (FILE *file, rtx addr)
15962 struct ix86_address parts;
15963 rtx base, index, disp;
15964 int scale;
15965 int ok;
15966 bool vsib = false;
15967 int code = 0;
15969 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
15971 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15972 gcc_assert (parts.index == NULL_RTX);
15973 parts.index = XVECEXP (addr, 0, 1);
15974 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
15975 addr = XVECEXP (addr, 0, 0);
15976 vsib = true;
15978 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
15980 gcc_assert (TARGET_64BIT);
15981 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15982 code = 'q';
15984 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDMK_ADDR)
15986 ok = ix86_decompose_address (XVECEXP (addr, 0, 1), &parts);
15987 gcc_assert (parts.base == NULL_RTX || parts.index == NULL_RTX);
15988 if (parts.base != NULL_RTX)
15990 parts.index = parts.base;
15991 parts.scale = 1;
15993 parts.base = XVECEXP (addr, 0, 0);
15994 addr = XVECEXP (addr, 0, 0);
15996 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDLDX_ADDR)
15998 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15999 gcc_assert (parts.index == NULL_RTX);
16000 parts.index = XVECEXP (addr, 0, 1);
16001 addr = XVECEXP (addr, 0, 0);
16003 else
16004 ok = ix86_decompose_address (addr, &parts);
16006 gcc_assert (ok);
16008 base = parts.base;
16009 index = parts.index;
16010 disp = parts.disp;
16011 scale = parts.scale;
16013 switch (parts.seg)
16015 case SEG_DEFAULT:
16016 break;
16017 case SEG_FS:
16018 case SEG_GS:
16019 if (ASSEMBLER_DIALECT == ASM_ATT)
16020 putc ('%', file);
16021 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
16022 break;
16023 default:
16024 gcc_unreachable ();
16027 /* Use one byte shorter RIP relative addressing for 64bit mode. */
16028 if (TARGET_64BIT && !base && !index)
16030 rtx symbol = disp;
16032 if (GET_CODE (disp) == CONST
16033 && GET_CODE (XEXP (disp, 0)) == PLUS
16034 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
16035 symbol = XEXP (XEXP (disp, 0), 0);
16037 if (GET_CODE (symbol) == LABEL_REF
16038 || (GET_CODE (symbol) == SYMBOL_REF
16039 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
16040 base = pc_rtx;
16042 if (!base && !index)
16044 /* Displacement only requires special attention. */
16046 if (CONST_INT_P (disp))
16048 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
16049 fputs ("ds:", file);
16050 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
16052 else if (flag_pic)
16053 output_pic_addr_const (file, disp, 0);
16054 else
16055 output_addr_const (file, disp);
16057 else
16059 /* Print SImode register names to force addr32 prefix. */
16060 if (SImode_address_operand (addr, VOIDmode))
16062 #ifdef ENABLE_CHECKING
16063 gcc_assert (TARGET_64BIT);
16064 switch (GET_CODE (addr))
16066 case SUBREG:
16067 gcc_assert (GET_MODE (addr) == SImode);
16068 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
16069 break;
16070 case ZERO_EXTEND:
16071 case AND:
16072 gcc_assert (GET_MODE (addr) == DImode);
16073 break;
16074 default:
16075 gcc_unreachable ();
16077 #endif
16078 gcc_assert (!code);
16079 code = 'k';
16081 else if (code == 0
16082 && TARGET_X32
16083 && disp
16084 && CONST_INT_P (disp)
16085 && INTVAL (disp) < -16*1024*1024)
16087 /* X32 runs in 64-bit mode, where displacement, DISP, in
16088 address DISP(%r64), is encoded as 32-bit immediate sign-
16089 extended from 32-bit to 64-bit. For -0x40000300(%r64),
16090 address is %r64 + 0xffffffffbffffd00. When %r64 <
16091 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
16092 which is invalid for x32. The correct address is %r64
16093 - 0x40000300 == 0xf7ffdd64. To properly encode
16094 -0x40000300(%r64) for x32, we zero-extend negative
16095 displacement by forcing addr32 prefix which truncates
16096 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
16097 zero-extend all negative displacements, including -1(%rsp).
16098 However, for small negative displacements, sign-extension
16099 won't cause overflow. We only zero-extend negative
16100 displacements if they < -16*1024*1024, which is also used
16101 to check legitimate address displacements for PIC. */
16102 code = 'k';
16105 if (ASSEMBLER_DIALECT == ASM_ATT)
16107 if (disp)
16109 if (flag_pic)
16110 output_pic_addr_const (file, disp, 0);
16111 else if (GET_CODE (disp) == LABEL_REF)
16112 output_asm_label (disp);
16113 else
16114 output_addr_const (file, disp);
16117 putc ('(', file);
16118 if (base)
16119 print_reg (base, code, file);
16120 if (index)
16122 putc (',', file);
16123 print_reg (index, vsib ? 0 : code, file);
16124 if (scale != 1 || vsib)
16125 fprintf (file, ",%d", scale);
16127 putc (')', file);
16129 else
16131 rtx offset = NULL_RTX;
16133 if (disp)
16135 /* Pull out the offset of a symbol; print any symbol itself. */
16136 if (GET_CODE (disp) == CONST
16137 && GET_CODE (XEXP (disp, 0)) == PLUS
16138 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
16140 offset = XEXP (XEXP (disp, 0), 1);
16141 disp = gen_rtx_CONST (VOIDmode,
16142 XEXP (XEXP (disp, 0), 0));
16145 if (flag_pic)
16146 output_pic_addr_const (file, disp, 0);
16147 else if (GET_CODE (disp) == LABEL_REF)
16148 output_asm_label (disp);
16149 else if (CONST_INT_P (disp))
16150 offset = disp;
16151 else
16152 output_addr_const (file, disp);
16155 putc ('[', file);
16156 if (base)
16158 print_reg (base, code, file);
16159 if (offset)
16161 if (INTVAL (offset) >= 0)
16162 putc ('+', file);
16163 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
16166 else if (offset)
16167 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
16168 else
16169 putc ('0', file);
16171 if (index)
16173 putc ('+', file);
16174 print_reg (index, vsib ? 0 : code, file);
16175 if (scale != 1 || vsib)
16176 fprintf (file, "*%d", scale);
16178 putc (']', file);
16183 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
16185 static bool
16186 i386_asm_output_addr_const_extra (FILE *file, rtx x)
16188 rtx op;
16190 if (GET_CODE (x) != UNSPEC)
16191 return false;
16193 op = XVECEXP (x, 0, 0);
16194 switch (XINT (x, 1))
16196 case UNSPEC_GOTTPOFF:
16197 output_addr_const (file, op);
16198 /* FIXME: This might be @TPOFF in Sun ld. */
16199 fputs ("@gottpoff", file);
16200 break;
16201 case UNSPEC_TPOFF:
16202 output_addr_const (file, op);
16203 fputs ("@tpoff", file);
16204 break;
16205 case UNSPEC_NTPOFF:
16206 output_addr_const (file, op);
16207 if (TARGET_64BIT)
16208 fputs ("@tpoff", file);
16209 else
16210 fputs ("@ntpoff", file);
16211 break;
16212 case UNSPEC_DTPOFF:
16213 output_addr_const (file, op);
16214 fputs ("@dtpoff", file);
16215 break;
16216 case UNSPEC_GOTNTPOFF:
16217 output_addr_const (file, op);
16218 if (TARGET_64BIT)
16219 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
16220 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
16221 else
16222 fputs ("@gotntpoff", file);
16223 break;
16224 case UNSPEC_INDNTPOFF:
16225 output_addr_const (file, op);
16226 fputs ("@indntpoff", file);
16227 break;
16228 #if TARGET_MACHO
16229 case UNSPEC_MACHOPIC_OFFSET:
16230 output_addr_const (file, op);
16231 putc ('-', file);
16232 machopic_output_function_base_name (file);
16233 break;
16234 #endif
16236 case UNSPEC_STACK_CHECK:
16238 int offset;
16240 gcc_assert (flag_split_stack);
16242 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
16243 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
16244 #else
16245 gcc_unreachable ();
16246 #endif
16248 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
16250 break;
16252 default:
16253 return false;
16256 return true;
16259 /* Split one or more double-mode RTL references into pairs of half-mode
16260 references. The RTL can be REG, offsettable MEM, integer constant, or
16261 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
16262 split and "num" is its length. lo_half and hi_half are output arrays
16263 that parallel "operands". */
16265 void
16266 split_double_mode (machine_mode mode, rtx operands[],
16267 int num, rtx lo_half[], rtx hi_half[])
16269 machine_mode half_mode;
16270 unsigned int byte;
16272 switch (mode)
16274 case TImode:
16275 half_mode = DImode;
16276 break;
16277 case DImode:
16278 half_mode = SImode;
16279 break;
16280 default:
16281 gcc_unreachable ();
16284 byte = GET_MODE_SIZE (half_mode);
16286 while (num--)
16288 rtx op = operands[num];
16290 /* simplify_subreg refuse to split volatile memory addresses,
16291 but we still have to handle it. */
16292 if (MEM_P (op))
16294 lo_half[num] = adjust_address (op, half_mode, 0);
16295 hi_half[num] = adjust_address (op, half_mode, byte);
16297 else
16299 lo_half[num] = simplify_gen_subreg (half_mode, op,
16300 GET_MODE (op) == VOIDmode
16301 ? mode : GET_MODE (op), 0);
16302 hi_half[num] = simplify_gen_subreg (half_mode, op,
16303 GET_MODE (op) == VOIDmode
16304 ? mode : GET_MODE (op), byte);
16309 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
16310 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
16311 is the expression of the binary operation. The output may either be
16312 emitted here, or returned to the caller, like all output_* functions.
16314 There is no guarantee that the operands are the same mode, as they
16315 might be within FLOAT or FLOAT_EXTEND expressions. */
16317 #ifndef SYSV386_COMPAT
16318 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
16319 wants to fix the assemblers because that causes incompatibility
16320 with gcc. No-one wants to fix gcc because that causes
16321 incompatibility with assemblers... You can use the option of
16322 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
16323 #define SYSV386_COMPAT 1
16324 #endif
16326 const char *
16327 output_387_binary_op (rtx insn, rtx *operands)
16329 static char buf[40];
16330 const char *p;
16331 const char *ssep;
16332 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
16334 #ifdef ENABLE_CHECKING
16335 /* Even if we do not want to check the inputs, this documents input
16336 constraints. Which helps in understanding the following code. */
16337 if (STACK_REG_P (operands[0])
16338 && ((REG_P (operands[1])
16339 && REGNO (operands[0]) == REGNO (operands[1])
16340 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
16341 || (REG_P (operands[2])
16342 && REGNO (operands[0]) == REGNO (operands[2])
16343 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
16344 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
16345 ; /* ok */
16346 else
16347 gcc_assert (is_sse);
16348 #endif
16350 switch (GET_CODE (operands[3]))
16352 case PLUS:
16353 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16354 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16355 p = "fiadd";
16356 else
16357 p = "fadd";
16358 ssep = "vadd";
16359 break;
16361 case MINUS:
16362 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16363 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16364 p = "fisub";
16365 else
16366 p = "fsub";
16367 ssep = "vsub";
16368 break;
16370 case MULT:
16371 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16372 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16373 p = "fimul";
16374 else
16375 p = "fmul";
16376 ssep = "vmul";
16377 break;
16379 case DIV:
16380 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16381 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16382 p = "fidiv";
16383 else
16384 p = "fdiv";
16385 ssep = "vdiv";
16386 break;
16388 default:
16389 gcc_unreachable ();
16392 if (is_sse)
16394 if (TARGET_AVX)
16396 strcpy (buf, ssep);
16397 if (GET_MODE (operands[0]) == SFmode)
16398 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
16399 else
16400 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
16402 else
16404 strcpy (buf, ssep + 1);
16405 if (GET_MODE (operands[0]) == SFmode)
16406 strcat (buf, "ss\t{%2, %0|%0, %2}");
16407 else
16408 strcat (buf, "sd\t{%2, %0|%0, %2}");
16410 return buf;
16412 strcpy (buf, p);
16414 switch (GET_CODE (operands[3]))
16416 case MULT:
16417 case PLUS:
16418 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
16420 rtx temp = operands[2];
16421 operands[2] = operands[1];
16422 operands[1] = temp;
16425 /* know operands[0] == operands[1]. */
16427 if (MEM_P (operands[2]))
16429 p = "%Z2\t%2";
16430 break;
16433 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16435 if (STACK_TOP_P (operands[0]))
16436 /* How is it that we are storing to a dead operand[2]?
16437 Well, presumably operands[1] is dead too. We can't
16438 store the result to st(0) as st(0) gets popped on this
16439 instruction. Instead store to operands[2] (which I
16440 think has to be st(1)). st(1) will be popped later.
16441 gcc <= 2.8.1 didn't have this check and generated
16442 assembly code that the Unixware assembler rejected. */
16443 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16444 else
16445 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16446 break;
16449 if (STACK_TOP_P (operands[0]))
16450 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16451 else
16452 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16453 break;
16455 case MINUS:
16456 case DIV:
16457 if (MEM_P (operands[1]))
16459 p = "r%Z1\t%1";
16460 break;
16463 if (MEM_P (operands[2]))
16465 p = "%Z2\t%2";
16466 break;
16469 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16471 #if SYSV386_COMPAT
16472 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
16473 derived assemblers, confusingly reverse the direction of
16474 the operation for fsub{r} and fdiv{r} when the
16475 destination register is not st(0). The Intel assembler
16476 doesn't have this brain damage. Read !SYSV386_COMPAT to
16477 figure out what the hardware really does. */
16478 if (STACK_TOP_P (operands[0]))
16479 p = "{p\t%0, %2|rp\t%2, %0}";
16480 else
16481 p = "{rp\t%2, %0|p\t%0, %2}";
16482 #else
16483 if (STACK_TOP_P (operands[0]))
16484 /* As above for fmul/fadd, we can't store to st(0). */
16485 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16486 else
16487 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16488 #endif
16489 break;
16492 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
16494 #if SYSV386_COMPAT
16495 if (STACK_TOP_P (operands[0]))
16496 p = "{rp\t%0, %1|p\t%1, %0}";
16497 else
16498 p = "{p\t%1, %0|rp\t%0, %1}";
16499 #else
16500 if (STACK_TOP_P (operands[0]))
16501 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
16502 else
16503 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
16504 #endif
16505 break;
16508 if (STACK_TOP_P (operands[0]))
16510 if (STACK_TOP_P (operands[1]))
16511 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16512 else
16513 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
16514 break;
16516 else if (STACK_TOP_P (operands[1]))
16518 #if SYSV386_COMPAT
16519 p = "{\t%1, %0|r\t%0, %1}";
16520 #else
16521 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
16522 #endif
16524 else
16526 #if SYSV386_COMPAT
16527 p = "{r\t%2, %0|\t%0, %2}";
16528 #else
16529 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16530 #endif
16532 break;
16534 default:
16535 gcc_unreachable ();
16538 strcat (buf, p);
16539 return buf;
16542 /* Check if a 256bit AVX register is referenced inside of EXP. */
16544 static bool
16545 ix86_check_avx256_register (const_rtx exp)
16547 if (GET_CODE (exp) == SUBREG)
16548 exp = SUBREG_REG (exp);
16550 return (REG_P (exp)
16551 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)));
16554 /* Return needed mode for entity in optimize_mode_switching pass. */
16556 static int
16557 ix86_avx_u128_mode_needed (rtx_insn *insn)
16559 if (CALL_P (insn))
16561 rtx link;
16563 /* Needed mode is set to AVX_U128_CLEAN if there are
16564 no 256bit modes used in function arguments. */
16565 for (link = CALL_INSN_FUNCTION_USAGE (insn);
16566 link;
16567 link = XEXP (link, 1))
16569 if (GET_CODE (XEXP (link, 0)) == USE)
16571 rtx arg = XEXP (XEXP (link, 0), 0);
16573 if (ix86_check_avx256_register (arg))
16574 return AVX_U128_DIRTY;
16578 return AVX_U128_CLEAN;
16581 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
16582 changes state only when a 256bit register is written to, but we need
16583 to prevent the compiler from moving optimal insertion point above
16584 eventual read from 256bit register. */
16585 subrtx_iterator::array_type array;
16586 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
16587 if (ix86_check_avx256_register (*iter))
16588 return AVX_U128_DIRTY;
16590 return AVX_U128_ANY;
16593 /* Return mode that i387 must be switched into
16594 prior to the execution of insn. */
16596 static int
16597 ix86_i387_mode_needed (int entity, rtx_insn *insn)
16599 enum attr_i387_cw mode;
16601 /* The mode UNINITIALIZED is used to store control word after a
16602 function call or ASM pattern. The mode ANY specify that function
16603 has no requirements on the control word and make no changes in the
16604 bits we are interested in. */
16606 if (CALL_P (insn)
16607 || (NONJUMP_INSN_P (insn)
16608 && (asm_noperands (PATTERN (insn)) >= 0
16609 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
16610 return I387_CW_UNINITIALIZED;
16612 if (recog_memoized (insn) < 0)
16613 return I387_CW_ANY;
16615 mode = get_attr_i387_cw (insn);
16617 switch (entity)
16619 case I387_TRUNC:
16620 if (mode == I387_CW_TRUNC)
16621 return mode;
16622 break;
16624 case I387_FLOOR:
16625 if (mode == I387_CW_FLOOR)
16626 return mode;
16627 break;
16629 case I387_CEIL:
16630 if (mode == I387_CW_CEIL)
16631 return mode;
16632 break;
16634 case I387_MASK_PM:
16635 if (mode == I387_CW_MASK_PM)
16636 return mode;
16637 break;
16639 default:
16640 gcc_unreachable ();
16643 return I387_CW_ANY;
16646 /* Return mode that entity must be switched into
16647 prior to the execution of insn. */
16649 static int
16650 ix86_mode_needed (int entity, rtx_insn *insn)
16652 switch (entity)
16654 case AVX_U128:
16655 return ix86_avx_u128_mode_needed (insn);
16656 case I387_TRUNC:
16657 case I387_FLOOR:
16658 case I387_CEIL:
16659 case I387_MASK_PM:
16660 return ix86_i387_mode_needed (entity, insn);
16661 default:
16662 gcc_unreachable ();
16664 return 0;
16667 /* Check if a 256bit AVX register is referenced in stores. */
16669 static void
16670 ix86_check_avx256_stores (rtx dest, const_rtx, void *data)
16672 if (ix86_check_avx256_register (dest))
16674 bool *used = (bool *) data;
16675 *used = true;
16679 /* Calculate mode of upper 128bit AVX registers after the insn. */
16681 static int
16682 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
16684 rtx pat = PATTERN (insn);
16686 if (vzeroupper_operation (pat, VOIDmode)
16687 || vzeroall_operation (pat, VOIDmode))
16688 return AVX_U128_CLEAN;
16690 /* We know that state is clean after CALL insn if there are no
16691 256bit registers used in the function return register. */
16692 if (CALL_P (insn))
16694 bool avx_reg256_found = false;
16695 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
16697 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
16700 /* Otherwise, return current mode. Remember that if insn
16701 references AVX 256bit registers, the mode was already changed
16702 to DIRTY from MODE_NEEDED. */
16703 return mode;
16706 /* Return the mode that an insn results in. */
16709 ix86_mode_after (int entity, int mode, rtx_insn *insn)
16711 switch (entity)
16713 case AVX_U128:
16714 return ix86_avx_u128_mode_after (mode, insn);
16715 case I387_TRUNC:
16716 case I387_FLOOR:
16717 case I387_CEIL:
16718 case I387_MASK_PM:
16719 return mode;
16720 default:
16721 gcc_unreachable ();
16725 static int
16726 ix86_avx_u128_mode_entry (void)
16728 tree arg;
16730 /* Entry mode is set to AVX_U128_DIRTY if there are
16731 256bit modes used in function arguments. */
16732 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
16733 arg = TREE_CHAIN (arg))
16735 rtx incoming = DECL_INCOMING_RTL (arg);
16737 if (incoming && ix86_check_avx256_register (incoming))
16738 return AVX_U128_DIRTY;
16741 return AVX_U128_CLEAN;
16744 /* Return a mode that ENTITY is assumed to be
16745 switched to at function entry. */
16747 static int
16748 ix86_mode_entry (int entity)
16750 switch (entity)
16752 case AVX_U128:
16753 return ix86_avx_u128_mode_entry ();
16754 case I387_TRUNC:
16755 case I387_FLOOR:
16756 case I387_CEIL:
16757 case I387_MASK_PM:
16758 return I387_CW_ANY;
16759 default:
16760 gcc_unreachable ();
16764 static int
16765 ix86_avx_u128_mode_exit (void)
16767 rtx reg = crtl->return_rtx;
16769 /* Exit mode is set to AVX_U128_DIRTY if there are
16770 256bit modes used in the function return register. */
16771 if (reg && ix86_check_avx256_register (reg))
16772 return AVX_U128_DIRTY;
16774 return AVX_U128_CLEAN;
16777 /* Return a mode that ENTITY is assumed to be
16778 switched to at function exit. */
16780 static int
16781 ix86_mode_exit (int entity)
16783 switch (entity)
16785 case AVX_U128:
16786 return ix86_avx_u128_mode_exit ();
16787 case I387_TRUNC:
16788 case I387_FLOOR:
16789 case I387_CEIL:
16790 case I387_MASK_PM:
16791 return I387_CW_ANY;
16792 default:
16793 gcc_unreachable ();
16797 static int
16798 ix86_mode_priority (int, int n)
16800 return n;
16803 /* Output code to initialize control word copies used by trunc?f?i and
16804 rounding patterns. CURRENT_MODE is set to current control word,
16805 while NEW_MODE is set to new control word. */
16807 static void
16808 emit_i387_cw_initialization (int mode)
16810 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
16811 rtx new_mode;
16813 enum ix86_stack_slot slot;
16815 rtx reg = gen_reg_rtx (HImode);
16817 emit_insn (gen_x86_fnstcw_1 (stored_mode));
16818 emit_move_insn (reg, copy_rtx (stored_mode));
16820 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
16821 || optimize_insn_for_size_p ())
16823 switch (mode)
16825 case I387_CW_TRUNC:
16826 /* round toward zero (truncate) */
16827 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
16828 slot = SLOT_CW_TRUNC;
16829 break;
16831 case I387_CW_FLOOR:
16832 /* round down toward -oo */
16833 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16834 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
16835 slot = SLOT_CW_FLOOR;
16836 break;
16838 case I387_CW_CEIL:
16839 /* round up toward +oo */
16840 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16841 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
16842 slot = SLOT_CW_CEIL;
16843 break;
16845 case I387_CW_MASK_PM:
16846 /* mask precision exception for nearbyint() */
16847 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16848 slot = SLOT_CW_MASK_PM;
16849 break;
16851 default:
16852 gcc_unreachable ();
16855 else
16857 switch (mode)
16859 case I387_CW_TRUNC:
16860 /* round toward zero (truncate) */
16861 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
16862 slot = SLOT_CW_TRUNC;
16863 break;
16865 case I387_CW_FLOOR:
16866 /* round down toward -oo */
16867 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
16868 slot = SLOT_CW_FLOOR;
16869 break;
16871 case I387_CW_CEIL:
16872 /* round up toward +oo */
16873 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
16874 slot = SLOT_CW_CEIL;
16875 break;
16877 case I387_CW_MASK_PM:
16878 /* mask precision exception for nearbyint() */
16879 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16880 slot = SLOT_CW_MASK_PM;
16881 break;
16883 default:
16884 gcc_unreachable ();
16888 gcc_assert (slot < MAX_386_STACK_LOCALS);
16890 new_mode = assign_386_stack_local (HImode, slot);
16891 emit_move_insn (new_mode, reg);
16894 /* Emit vzeroupper. */
16896 void
16897 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
16899 int i;
16901 /* Cancel automatic vzeroupper insertion if there are
16902 live call-saved SSE registers at the insertion point. */
16904 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
16905 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16906 return;
16908 if (TARGET_64BIT)
16909 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
16910 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16911 return;
16913 emit_insn (gen_avx_vzeroupper ());
16916 /* Generate one or more insns to set ENTITY to MODE. */
16918 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
16919 is the set of hard registers live at the point where the insn(s)
16920 are to be inserted. */
16922 static void
16923 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
16924 HARD_REG_SET regs_live)
16926 switch (entity)
16928 case AVX_U128:
16929 if (mode == AVX_U128_CLEAN)
16930 ix86_avx_emit_vzeroupper (regs_live);
16931 break;
16932 case I387_TRUNC:
16933 case I387_FLOOR:
16934 case I387_CEIL:
16935 case I387_MASK_PM:
16936 if (mode != I387_CW_ANY
16937 && mode != I387_CW_UNINITIALIZED)
16938 emit_i387_cw_initialization (mode);
16939 break;
16940 default:
16941 gcc_unreachable ();
16945 /* Output code for INSN to convert a float to a signed int. OPERANDS
16946 are the insn operands. The output may be [HSD]Imode and the input
16947 operand may be [SDX]Fmode. */
16949 const char *
16950 output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
16952 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16953 int dimode_p = GET_MODE (operands[0]) == DImode;
16954 int round_mode = get_attr_i387_cw (insn);
16956 /* Jump through a hoop or two for DImode, since the hardware has no
16957 non-popping instruction. We used to do this a different way, but
16958 that was somewhat fragile and broke with post-reload splitters. */
16959 if ((dimode_p || fisttp) && !stack_top_dies)
16960 output_asm_insn ("fld\t%y1", operands);
16962 gcc_assert (STACK_TOP_P (operands[1]));
16963 gcc_assert (MEM_P (operands[0]));
16964 gcc_assert (GET_MODE (operands[1]) != TFmode);
16966 if (fisttp)
16967 output_asm_insn ("fisttp%Z0\t%0", operands);
16968 else
16970 if (round_mode != I387_CW_ANY)
16971 output_asm_insn ("fldcw\t%3", operands);
16972 if (stack_top_dies || dimode_p)
16973 output_asm_insn ("fistp%Z0\t%0", operands);
16974 else
16975 output_asm_insn ("fist%Z0\t%0", operands);
16976 if (round_mode != I387_CW_ANY)
16977 output_asm_insn ("fldcw\t%2", operands);
16980 return "";
16983 /* Output code for x87 ffreep insn. The OPNO argument, which may only
16984 have the values zero or one, indicates the ffreep insn's operand
16985 from the OPERANDS array. */
16987 static const char *
16988 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
16990 if (TARGET_USE_FFREEP)
16991 #ifdef HAVE_AS_IX86_FFREEP
16992 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
16993 #else
16995 static char retval[32];
16996 int regno = REGNO (operands[opno]);
16998 gcc_assert (STACK_REGNO_P (regno));
17000 regno -= FIRST_STACK_REG;
17002 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
17003 return retval;
17005 #endif
17007 return opno ? "fstp\t%y1" : "fstp\t%y0";
17011 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
17012 should be used. UNORDERED_P is true when fucom should be used. */
17014 const char *
17015 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
17017 int stack_top_dies;
17018 rtx cmp_op0, cmp_op1;
17019 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
17021 if (eflags_p)
17023 cmp_op0 = operands[0];
17024 cmp_op1 = operands[1];
17026 else
17028 cmp_op0 = operands[1];
17029 cmp_op1 = operands[2];
17032 if (is_sse)
17034 if (GET_MODE (operands[0]) == SFmode)
17035 if (unordered_p)
17036 return "%vucomiss\t{%1, %0|%0, %1}";
17037 else
17038 return "%vcomiss\t{%1, %0|%0, %1}";
17039 else
17040 if (unordered_p)
17041 return "%vucomisd\t{%1, %0|%0, %1}";
17042 else
17043 return "%vcomisd\t{%1, %0|%0, %1}";
17046 gcc_assert (STACK_TOP_P (cmp_op0));
17048 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
17050 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
17052 if (stack_top_dies)
17054 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
17055 return output_387_ffreep (operands, 1);
17057 else
17058 return "ftst\n\tfnstsw\t%0";
17061 if (STACK_REG_P (cmp_op1)
17062 && stack_top_dies
17063 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
17064 && REGNO (cmp_op1) != FIRST_STACK_REG)
17066 /* If both the top of the 387 stack dies, and the other operand
17067 is also a stack register that dies, then this must be a
17068 `fcompp' float compare */
17070 if (eflags_p)
17072 /* There is no double popping fcomi variant. Fortunately,
17073 eflags is immune from the fstp's cc clobbering. */
17074 if (unordered_p)
17075 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
17076 else
17077 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
17078 return output_387_ffreep (operands, 0);
17080 else
17082 if (unordered_p)
17083 return "fucompp\n\tfnstsw\t%0";
17084 else
17085 return "fcompp\n\tfnstsw\t%0";
17088 else
17090 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
17092 static const char * const alt[16] =
17094 "fcom%Z2\t%y2\n\tfnstsw\t%0",
17095 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
17096 "fucom%Z2\t%y2\n\tfnstsw\t%0",
17097 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
17099 "ficom%Z2\t%y2\n\tfnstsw\t%0",
17100 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
17101 NULL,
17102 NULL,
17104 "fcomi\t{%y1, %0|%0, %y1}",
17105 "fcomip\t{%y1, %0|%0, %y1}",
17106 "fucomi\t{%y1, %0|%0, %y1}",
17107 "fucomip\t{%y1, %0|%0, %y1}",
17109 NULL,
17110 NULL,
17111 NULL,
17112 NULL
17115 int mask;
17116 const char *ret;
17118 mask = eflags_p << 3;
17119 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
17120 mask |= unordered_p << 1;
17121 mask |= stack_top_dies;
17123 gcc_assert (mask < 16);
17124 ret = alt[mask];
17125 gcc_assert (ret);
17127 return ret;
17131 void
17132 ix86_output_addr_vec_elt (FILE *file, int value)
17134 const char *directive = ASM_LONG;
17136 #ifdef ASM_QUAD
17137 if (TARGET_LP64)
17138 directive = ASM_QUAD;
17139 #else
17140 gcc_assert (!TARGET_64BIT);
17141 #endif
17143 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
17146 void
17147 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
17149 const char *directive = ASM_LONG;
17151 #ifdef ASM_QUAD
17152 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
17153 directive = ASM_QUAD;
17154 #else
17155 gcc_assert (!TARGET_64BIT);
17156 #endif
17157 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
17158 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
17159 fprintf (file, "%s%s%d-%s%d\n",
17160 directive, LPREFIX, value, LPREFIX, rel);
17161 else if (HAVE_AS_GOTOFF_IN_DATA)
17162 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
17163 #if TARGET_MACHO
17164 else if (TARGET_MACHO)
17166 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
17167 machopic_output_function_base_name (file);
17168 putc ('\n', file);
17170 #endif
17171 else
17172 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
17173 GOT_SYMBOL_NAME, LPREFIX, value);
17176 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
17177 for the target. */
17179 void
17180 ix86_expand_clear (rtx dest)
17182 rtx tmp;
17184 /* We play register width games, which are only valid after reload. */
17185 gcc_assert (reload_completed);
17187 /* Avoid HImode and its attendant prefix byte. */
17188 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
17189 dest = gen_rtx_REG (SImode, REGNO (dest));
17190 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
17192 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
17194 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17195 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
17198 emit_insn (tmp);
17201 /* X is an unchanging MEM. If it is a constant pool reference, return
17202 the constant pool rtx, else NULL. */
17205 maybe_get_pool_constant (rtx x)
17207 x = ix86_delegitimize_address (XEXP (x, 0));
17209 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
17210 return get_pool_constant (x);
17212 return NULL_RTX;
17215 void
17216 ix86_expand_move (machine_mode mode, rtx operands[])
17218 rtx op0, op1;
17219 enum tls_model model;
17221 op0 = operands[0];
17222 op1 = operands[1];
17224 if (GET_CODE (op1) == SYMBOL_REF)
17226 rtx tmp;
17228 model = SYMBOL_REF_TLS_MODEL (op1);
17229 if (model)
17231 op1 = legitimize_tls_address (op1, model, true);
17232 op1 = force_operand (op1, op0);
17233 if (op1 == op0)
17234 return;
17235 op1 = convert_to_mode (mode, op1, 1);
17237 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
17238 op1 = tmp;
17240 else if (GET_CODE (op1) == CONST
17241 && GET_CODE (XEXP (op1, 0)) == PLUS
17242 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
17244 rtx addend = XEXP (XEXP (op1, 0), 1);
17245 rtx symbol = XEXP (XEXP (op1, 0), 0);
17246 rtx tmp;
17248 model = SYMBOL_REF_TLS_MODEL (symbol);
17249 if (model)
17250 tmp = legitimize_tls_address (symbol, model, true);
17251 else
17252 tmp = legitimize_pe_coff_symbol (symbol, true);
17254 if (tmp)
17256 tmp = force_operand (tmp, NULL);
17257 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
17258 op0, 1, OPTAB_DIRECT);
17259 if (tmp == op0)
17260 return;
17261 op1 = convert_to_mode (mode, tmp, 1);
17265 if ((flag_pic || MACHOPIC_INDIRECT)
17266 && symbolic_operand (op1, mode))
17268 if (TARGET_MACHO && !TARGET_64BIT)
17270 #if TARGET_MACHO
17271 /* dynamic-no-pic */
17272 if (MACHOPIC_INDIRECT)
17274 rtx temp = ((reload_in_progress
17275 || ((op0 && REG_P (op0))
17276 && mode == Pmode))
17277 ? op0 : gen_reg_rtx (Pmode));
17278 op1 = machopic_indirect_data_reference (op1, temp);
17279 if (MACHOPIC_PURE)
17280 op1 = machopic_legitimize_pic_address (op1, mode,
17281 temp == op1 ? 0 : temp);
17283 if (op0 != op1 && GET_CODE (op0) != MEM)
17285 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
17286 emit_insn (insn);
17287 return;
17289 if (GET_CODE (op0) == MEM)
17290 op1 = force_reg (Pmode, op1);
17291 else
17293 rtx temp = op0;
17294 if (GET_CODE (temp) != REG)
17295 temp = gen_reg_rtx (Pmode);
17296 temp = legitimize_pic_address (op1, temp);
17297 if (temp == op0)
17298 return;
17299 op1 = temp;
17301 /* dynamic-no-pic */
17302 #endif
17304 else
17306 if (MEM_P (op0))
17307 op1 = force_reg (mode, op1);
17308 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
17310 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
17311 op1 = legitimize_pic_address (op1, reg);
17312 if (op0 == op1)
17313 return;
17314 op1 = convert_to_mode (mode, op1, 1);
17318 else
17320 if (MEM_P (op0)
17321 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
17322 || !push_operand (op0, mode))
17323 && MEM_P (op1))
17324 op1 = force_reg (mode, op1);
17326 if (push_operand (op0, mode)
17327 && ! general_no_elim_operand (op1, mode))
17328 op1 = copy_to_mode_reg (mode, op1);
17330 /* Force large constants in 64bit compilation into register
17331 to get them CSEed. */
17332 if (can_create_pseudo_p ()
17333 && (mode == DImode) && TARGET_64BIT
17334 && immediate_operand (op1, mode)
17335 && !x86_64_zext_immediate_operand (op1, VOIDmode)
17336 && !register_operand (op0, mode)
17337 && optimize)
17338 op1 = copy_to_mode_reg (mode, op1);
17340 if (can_create_pseudo_p ()
17341 && FLOAT_MODE_P (mode)
17342 && GET_CODE (op1) == CONST_DOUBLE)
17344 /* If we are loading a floating point constant to a register,
17345 force the value to memory now, since we'll get better code
17346 out the back end. */
17348 op1 = validize_mem (force_const_mem (mode, op1));
17349 if (!register_operand (op0, mode))
17351 rtx temp = gen_reg_rtx (mode);
17352 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
17353 emit_move_insn (op0, temp);
17354 return;
17359 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17362 void
17363 ix86_expand_vector_move (machine_mode mode, rtx operands[])
17365 rtx op0 = operands[0], op1 = operands[1];
17366 unsigned int align = GET_MODE_ALIGNMENT (mode);
17368 if (push_operand (op0, VOIDmode))
17369 op0 = emit_move_resolve_push (mode, op0);
17371 /* Force constants other than zero into memory. We do not know how
17372 the instructions used to build constants modify the upper 64 bits
17373 of the register, once we have that information we may be able
17374 to handle some of them more efficiently. */
17375 if (can_create_pseudo_p ()
17376 && register_operand (op0, mode)
17377 && (CONSTANT_P (op1)
17378 || (GET_CODE (op1) == SUBREG
17379 && CONSTANT_P (SUBREG_REG (op1))))
17380 && !standard_sse_constant_p (op1))
17381 op1 = validize_mem (force_const_mem (mode, op1));
17383 /* We need to check memory alignment for SSE mode since attribute
17384 can make operands unaligned. */
17385 if (can_create_pseudo_p ()
17386 && SSE_REG_MODE_P (mode)
17387 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
17388 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
17390 rtx tmp[2];
17392 /* ix86_expand_vector_move_misalign() does not like constants ... */
17393 if (CONSTANT_P (op1)
17394 || (GET_CODE (op1) == SUBREG
17395 && CONSTANT_P (SUBREG_REG (op1))))
17396 op1 = validize_mem (force_const_mem (mode, op1));
17398 /* ... nor both arguments in memory. */
17399 if (!register_operand (op0, mode)
17400 && !register_operand (op1, mode))
17401 op1 = force_reg (mode, op1);
17403 tmp[0] = op0; tmp[1] = op1;
17404 ix86_expand_vector_move_misalign (mode, tmp);
17405 return;
17408 /* Make operand1 a register if it isn't already. */
17409 if (can_create_pseudo_p ()
17410 && !register_operand (op0, mode)
17411 && !register_operand (op1, mode))
17413 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
17414 return;
17417 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17420 /* Split 32-byte AVX unaligned load and store if needed. */
17422 static void
17423 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
17425 rtx m;
17426 rtx (*extract) (rtx, rtx, rtx);
17427 rtx (*load_unaligned) (rtx, rtx);
17428 rtx (*store_unaligned) (rtx, rtx);
17429 machine_mode mode;
17431 switch (GET_MODE (op0))
17433 default:
17434 gcc_unreachable ();
17435 case V32QImode:
17436 extract = gen_avx_vextractf128v32qi;
17437 load_unaligned = gen_avx_loaddquv32qi;
17438 store_unaligned = gen_avx_storedquv32qi;
17439 mode = V16QImode;
17440 break;
17441 case V8SFmode:
17442 extract = gen_avx_vextractf128v8sf;
17443 load_unaligned = gen_avx_loadups256;
17444 store_unaligned = gen_avx_storeups256;
17445 mode = V4SFmode;
17446 break;
17447 case V4DFmode:
17448 extract = gen_avx_vextractf128v4df;
17449 load_unaligned = gen_avx_loadupd256;
17450 store_unaligned = gen_avx_storeupd256;
17451 mode = V2DFmode;
17452 break;
17455 if (MEM_P (op1))
17457 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
17459 rtx r = gen_reg_rtx (mode);
17460 m = adjust_address (op1, mode, 0);
17461 emit_move_insn (r, m);
17462 m = adjust_address (op1, mode, 16);
17463 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
17464 emit_move_insn (op0, r);
17466 /* Normal *mov<mode>_internal pattern will handle
17467 unaligned loads just fine if misaligned_operand
17468 is true, and without the UNSPEC it can be combined
17469 with arithmetic instructions. */
17470 else if (misaligned_operand (op1, GET_MODE (op1)))
17471 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17472 else
17473 emit_insn (load_unaligned (op0, op1));
17475 else if (MEM_P (op0))
17477 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE)
17479 m = adjust_address (op0, mode, 0);
17480 emit_insn (extract (m, op1, const0_rtx));
17481 m = adjust_address (op0, mode, 16);
17482 emit_insn (extract (m, op1, const1_rtx));
17484 else
17485 emit_insn (store_unaligned (op0, op1));
17487 else
17488 gcc_unreachable ();
17491 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
17492 straight to ix86_expand_vector_move. */
17493 /* Code generation for scalar reg-reg moves of single and double precision data:
17494 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
17495 movaps reg, reg
17496 else
17497 movss reg, reg
17498 if (x86_sse_partial_reg_dependency == true)
17499 movapd reg, reg
17500 else
17501 movsd reg, reg
17503 Code generation for scalar loads of double precision data:
17504 if (x86_sse_split_regs == true)
17505 movlpd mem, reg (gas syntax)
17506 else
17507 movsd mem, reg
17509 Code generation for unaligned packed loads of single precision data
17510 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
17511 if (x86_sse_unaligned_move_optimal)
17512 movups mem, reg
17514 if (x86_sse_partial_reg_dependency == true)
17516 xorps reg, reg
17517 movlps mem, reg
17518 movhps mem+8, reg
17520 else
17522 movlps mem, reg
17523 movhps mem+8, reg
17526 Code generation for unaligned packed loads of double precision data
17527 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
17528 if (x86_sse_unaligned_move_optimal)
17529 movupd mem, reg
17531 if (x86_sse_split_regs == true)
17533 movlpd mem, reg
17534 movhpd mem+8, reg
17536 else
17538 movsd mem, reg
17539 movhpd mem+8, reg
17543 void
17544 ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[])
17546 rtx op0, op1, orig_op0 = NULL_RTX, m;
17547 rtx (*load_unaligned) (rtx, rtx);
17548 rtx (*store_unaligned) (rtx, rtx);
17550 op0 = operands[0];
17551 op1 = operands[1];
17553 if (GET_MODE_SIZE (mode) == 64)
17555 switch (GET_MODE_CLASS (mode))
17557 case MODE_VECTOR_INT:
17558 case MODE_INT:
17559 if (GET_MODE (op0) != V16SImode)
17561 if (!MEM_P (op0))
17563 orig_op0 = op0;
17564 op0 = gen_reg_rtx (V16SImode);
17566 else
17567 op0 = gen_lowpart (V16SImode, op0);
17569 op1 = gen_lowpart (V16SImode, op1);
17570 /* FALLTHRU */
17572 case MODE_VECTOR_FLOAT:
17573 switch (GET_MODE (op0))
17575 default:
17576 gcc_unreachable ();
17577 case V16SImode:
17578 load_unaligned = gen_avx512f_loaddquv16si;
17579 store_unaligned = gen_avx512f_storedquv16si;
17580 break;
17581 case V16SFmode:
17582 load_unaligned = gen_avx512f_loadups512;
17583 store_unaligned = gen_avx512f_storeups512;
17584 break;
17585 case V8DFmode:
17586 load_unaligned = gen_avx512f_loadupd512;
17587 store_unaligned = gen_avx512f_storeupd512;
17588 break;
17591 if (MEM_P (op1))
17592 emit_insn (load_unaligned (op0, op1));
17593 else if (MEM_P (op0))
17594 emit_insn (store_unaligned (op0, op1));
17595 else
17596 gcc_unreachable ();
17597 if (orig_op0)
17598 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17599 break;
17601 default:
17602 gcc_unreachable ();
17605 return;
17608 if (TARGET_AVX
17609 && GET_MODE_SIZE (mode) == 32)
17611 switch (GET_MODE_CLASS (mode))
17613 case MODE_VECTOR_INT:
17614 case MODE_INT:
17615 if (GET_MODE (op0) != V32QImode)
17617 if (!MEM_P (op0))
17619 orig_op0 = op0;
17620 op0 = gen_reg_rtx (V32QImode);
17622 else
17623 op0 = gen_lowpart (V32QImode, op0);
17625 op1 = gen_lowpart (V32QImode, op1);
17626 /* FALLTHRU */
17628 case MODE_VECTOR_FLOAT:
17629 ix86_avx256_split_vector_move_misalign (op0, op1);
17630 if (orig_op0)
17631 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17632 break;
17634 default:
17635 gcc_unreachable ();
17638 return;
17641 if (MEM_P (op1))
17643 /* Normal *mov<mode>_internal pattern will handle
17644 unaligned loads just fine if misaligned_operand
17645 is true, and without the UNSPEC it can be combined
17646 with arithmetic instructions. */
17647 if (TARGET_AVX
17648 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
17649 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
17650 && misaligned_operand (op1, GET_MODE (op1)))
17651 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17652 /* ??? If we have typed data, then it would appear that using
17653 movdqu is the only way to get unaligned data loaded with
17654 integer type. */
17655 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17657 if (GET_MODE (op0) != V16QImode)
17659 orig_op0 = op0;
17660 op0 = gen_reg_rtx (V16QImode);
17662 op1 = gen_lowpart (V16QImode, op1);
17663 /* We will eventually emit movups based on insn attributes. */
17664 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
17665 if (orig_op0)
17666 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17668 else if (TARGET_SSE2 && mode == V2DFmode)
17670 rtx zero;
17672 if (TARGET_AVX
17673 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17674 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17675 || optimize_insn_for_size_p ())
17677 /* We will eventually emit movups based on insn attributes. */
17678 emit_insn (gen_sse2_loadupd (op0, op1));
17679 return;
17682 /* When SSE registers are split into halves, we can avoid
17683 writing to the top half twice. */
17684 if (TARGET_SSE_SPLIT_REGS)
17686 emit_clobber (op0);
17687 zero = op0;
17689 else
17691 /* ??? Not sure about the best option for the Intel chips.
17692 The following would seem to satisfy; the register is
17693 entirely cleared, breaking the dependency chain. We
17694 then store to the upper half, with a dependency depth
17695 of one. A rumor has it that Intel recommends two movsd
17696 followed by an unpacklpd, but this is unconfirmed. And
17697 given that the dependency depth of the unpacklpd would
17698 still be one, I'm not sure why this would be better. */
17699 zero = CONST0_RTX (V2DFmode);
17702 m = adjust_address (op1, DFmode, 0);
17703 emit_insn (gen_sse2_loadlpd (op0, zero, m));
17704 m = adjust_address (op1, DFmode, 8);
17705 emit_insn (gen_sse2_loadhpd (op0, op0, m));
17707 else
17709 rtx t;
17711 if (TARGET_AVX
17712 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17713 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17714 || optimize_insn_for_size_p ())
17716 if (GET_MODE (op0) != V4SFmode)
17718 orig_op0 = op0;
17719 op0 = gen_reg_rtx (V4SFmode);
17721 op1 = gen_lowpart (V4SFmode, op1);
17722 emit_insn (gen_sse_loadups (op0, op1));
17723 if (orig_op0)
17724 emit_move_insn (orig_op0,
17725 gen_lowpart (GET_MODE (orig_op0), op0));
17726 return;
17729 if (mode != V4SFmode)
17730 t = gen_reg_rtx (V4SFmode);
17731 else
17732 t = op0;
17734 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
17735 emit_move_insn (t, CONST0_RTX (V4SFmode));
17736 else
17737 emit_clobber (t);
17739 m = adjust_address (op1, V2SFmode, 0);
17740 emit_insn (gen_sse_loadlps (t, t, m));
17741 m = adjust_address (op1, V2SFmode, 8);
17742 emit_insn (gen_sse_loadhps (t, t, m));
17743 if (mode != V4SFmode)
17744 emit_move_insn (op0, gen_lowpart (mode, t));
17747 else if (MEM_P (op0))
17749 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17751 op0 = gen_lowpart (V16QImode, op0);
17752 op1 = gen_lowpart (V16QImode, op1);
17753 /* We will eventually emit movups based on insn attributes. */
17754 emit_insn (gen_sse2_storedquv16qi (op0, op1));
17756 else if (TARGET_SSE2 && mode == V2DFmode)
17758 if (TARGET_AVX
17759 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17760 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17761 || optimize_insn_for_size_p ())
17762 /* We will eventually emit movups based on insn attributes. */
17763 emit_insn (gen_sse2_storeupd (op0, op1));
17764 else
17766 m = adjust_address (op0, DFmode, 0);
17767 emit_insn (gen_sse2_storelpd (m, op1));
17768 m = adjust_address (op0, DFmode, 8);
17769 emit_insn (gen_sse2_storehpd (m, op1));
17772 else
17774 if (mode != V4SFmode)
17775 op1 = gen_lowpart (V4SFmode, op1);
17777 if (TARGET_AVX
17778 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17779 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17780 || optimize_insn_for_size_p ())
17782 op0 = gen_lowpart (V4SFmode, op0);
17783 emit_insn (gen_sse_storeups (op0, op1));
17785 else
17787 m = adjust_address (op0, V2SFmode, 0);
17788 emit_insn (gen_sse_storelps (m, op1));
17789 m = adjust_address (op0, V2SFmode, 8);
17790 emit_insn (gen_sse_storehps (m, op1));
17794 else
17795 gcc_unreachable ();
17798 /* Helper function of ix86_fixup_binary_operands to canonicalize
17799 operand order. Returns true if the operands should be swapped. */
17801 static bool
17802 ix86_swap_binary_operands_p (enum rtx_code code, machine_mode mode,
17803 rtx operands[])
17805 rtx dst = operands[0];
17806 rtx src1 = operands[1];
17807 rtx src2 = operands[2];
17809 /* If the operation is not commutative, we can't do anything. */
17810 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
17811 return false;
17813 /* Highest priority is that src1 should match dst. */
17814 if (rtx_equal_p (dst, src1))
17815 return false;
17816 if (rtx_equal_p (dst, src2))
17817 return true;
17819 /* Next highest priority is that immediate constants come second. */
17820 if (immediate_operand (src2, mode))
17821 return false;
17822 if (immediate_operand (src1, mode))
17823 return true;
17825 /* Lowest priority is that memory references should come second. */
17826 if (MEM_P (src2))
17827 return false;
17828 if (MEM_P (src1))
17829 return true;
17831 return false;
17835 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
17836 destination to use for the operation. If different from the true
17837 destination in operands[0], a copy operation will be required. */
17840 ix86_fixup_binary_operands (enum rtx_code code, machine_mode mode,
17841 rtx operands[])
17843 rtx dst = operands[0];
17844 rtx src1 = operands[1];
17845 rtx src2 = operands[2];
17847 /* Canonicalize operand order. */
17848 if (ix86_swap_binary_operands_p (code, mode, operands))
17850 /* It is invalid to swap operands of different modes. */
17851 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
17853 std::swap (src1, src2);
17856 /* Both source operands cannot be in memory. */
17857 if (MEM_P (src1) && MEM_P (src2))
17859 /* Optimization: Only read from memory once. */
17860 if (rtx_equal_p (src1, src2))
17862 src2 = force_reg (mode, src2);
17863 src1 = src2;
17865 else if (rtx_equal_p (dst, src1))
17866 src2 = force_reg (mode, src2);
17867 else
17868 src1 = force_reg (mode, src1);
17871 /* If the destination is memory, and we do not have matching source
17872 operands, do things in registers. */
17873 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17874 dst = gen_reg_rtx (mode);
17876 /* Source 1 cannot be a constant. */
17877 if (CONSTANT_P (src1))
17878 src1 = force_reg (mode, src1);
17880 /* Source 1 cannot be a non-matching memory. */
17881 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17882 src1 = force_reg (mode, src1);
17884 /* Improve address combine. */
17885 if (code == PLUS
17886 && GET_MODE_CLASS (mode) == MODE_INT
17887 && MEM_P (src2))
17888 src2 = force_reg (mode, src2);
17890 operands[1] = src1;
17891 operands[2] = src2;
17892 return dst;
17895 /* Similarly, but assume that the destination has already been
17896 set up properly. */
17898 void
17899 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
17900 machine_mode mode, rtx operands[])
17902 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
17903 gcc_assert (dst == operands[0]);
17906 /* Attempt to expand a binary operator. Make the expansion closer to the
17907 actual machine, then just general_operand, which will allow 3 separate
17908 memory references (one output, two input) in a single insn. */
17910 void
17911 ix86_expand_binary_operator (enum rtx_code code, machine_mode mode,
17912 rtx operands[])
17914 rtx src1, src2, dst, op, clob;
17916 dst = ix86_fixup_binary_operands (code, mode, operands);
17917 src1 = operands[1];
17918 src2 = operands[2];
17920 /* Emit the instruction. */
17922 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
17923 if (reload_in_progress)
17925 /* Reload doesn't know about the flags register, and doesn't know that
17926 it doesn't want to clobber it. We can only do this with PLUS. */
17927 gcc_assert (code == PLUS);
17928 emit_insn (op);
17930 else if (reload_completed
17931 && code == PLUS
17932 && !rtx_equal_p (dst, src1))
17934 /* This is going to be an LEA; avoid splitting it later. */
17935 emit_insn (op);
17937 else
17939 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17940 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17943 /* Fix up the destination if needed. */
17944 if (dst != operands[0])
17945 emit_move_insn (operands[0], dst);
17948 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
17949 the given OPERANDS. */
17951 void
17952 ix86_expand_vector_logical_operator (enum rtx_code code, machine_mode mode,
17953 rtx operands[])
17955 rtx op1 = NULL_RTX, op2 = NULL_RTX;
17956 if (GET_CODE (operands[1]) == SUBREG)
17958 op1 = operands[1];
17959 op2 = operands[2];
17961 else if (GET_CODE (operands[2]) == SUBREG)
17963 op1 = operands[2];
17964 op2 = operands[1];
17966 /* Optimize (__m128i) d | (__m128i) e and similar code
17967 when d and e are float vectors into float vector logical
17968 insn. In C/C++ without using intrinsics there is no other way
17969 to express vector logical operation on float vectors than
17970 to cast them temporarily to integer vectors. */
17971 if (op1
17972 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17973 && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR))
17974 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
17975 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
17976 && SUBREG_BYTE (op1) == 0
17977 && (GET_CODE (op2) == CONST_VECTOR
17978 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
17979 && SUBREG_BYTE (op2) == 0))
17980 && can_create_pseudo_p ())
17982 rtx dst;
17983 switch (GET_MODE (SUBREG_REG (op1)))
17985 case V4SFmode:
17986 case V8SFmode:
17987 case V16SFmode:
17988 case V2DFmode:
17989 case V4DFmode:
17990 case V8DFmode:
17991 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
17992 if (GET_CODE (op2) == CONST_VECTOR)
17994 op2 = gen_lowpart (GET_MODE (dst), op2);
17995 op2 = force_reg (GET_MODE (dst), op2);
17997 else
17999 op1 = operands[1];
18000 op2 = SUBREG_REG (operands[2]);
18001 if (!nonimmediate_operand (op2, GET_MODE (dst)))
18002 op2 = force_reg (GET_MODE (dst), op2);
18004 op1 = SUBREG_REG (op1);
18005 if (!nonimmediate_operand (op1, GET_MODE (dst)))
18006 op1 = force_reg (GET_MODE (dst), op1);
18007 emit_insn (gen_rtx_SET (VOIDmode, dst,
18008 gen_rtx_fmt_ee (code, GET_MODE (dst),
18009 op1, op2)));
18010 emit_move_insn (operands[0], gen_lowpart (mode, dst));
18011 return;
18012 default:
18013 break;
18016 if (!nonimmediate_operand (operands[1], mode))
18017 operands[1] = force_reg (mode, operands[1]);
18018 if (!nonimmediate_operand (operands[2], mode))
18019 operands[2] = force_reg (mode, operands[2]);
18020 ix86_fixup_binary_operands_no_copy (code, mode, operands);
18021 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18022 gen_rtx_fmt_ee (code, mode, operands[1],
18023 operands[2])));
18026 /* Return TRUE or FALSE depending on whether the binary operator meets the
18027 appropriate constraints. */
18029 bool
18030 ix86_binary_operator_ok (enum rtx_code code, machine_mode mode,
18031 rtx operands[3])
18033 rtx dst = operands[0];
18034 rtx src1 = operands[1];
18035 rtx src2 = operands[2];
18037 /* Both source operands cannot be in memory. */
18038 if (MEM_P (src1) && MEM_P (src2))
18039 return false;
18041 /* Canonicalize operand order for commutative operators. */
18042 if (ix86_swap_binary_operands_p (code, mode, operands))
18043 std::swap (src1, src2);
18045 /* If the destination is memory, we must have a matching source operand. */
18046 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
18047 return false;
18049 /* Source 1 cannot be a constant. */
18050 if (CONSTANT_P (src1))
18051 return false;
18053 /* Source 1 cannot be a non-matching memory. */
18054 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
18055 /* Support "andhi/andsi/anddi" as a zero-extending move. */
18056 return (code == AND
18057 && (mode == HImode
18058 || mode == SImode
18059 || (TARGET_64BIT && mode == DImode))
18060 && satisfies_constraint_L (src2));
18062 return true;
18065 /* Attempt to expand a unary operator. Make the expansion closer to the
18066 actual machine, then just general_operand, which will allow 2 separate
18067 memory references (one output, one input) in a single insn. */
18069 void
18070 ix86_expand_unary_operator (enum rtx_code code, machine_mode mode,
18071 rtx operands[])
18073 int matching_memory;
18074 rtx src, dst, op, clob;
18076 dst = operands[0];
18077 src = operands[1];
18079 /* If the destination is memory, and we do not have matching source
18080 operands, do things in registers. */
18081 matching_memory = 0;
18082 if (MEM_P (dst))
18084 if (rtx_equal_p (dst, src))
18085 matching_memory = 1;
18086 else
18087 dst = gen_reg_rtx (mode);
18090 /* When source operand is memory, destination must match. */
18091 if (MEM_P (src) && !matching_memory)
18092 src = force_reg (mode, src);
18094 /* Emit the instruction. */
18096 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
18097 if (reload_in_progress || code == NOT)
18099 /* Reload doesn't know about the flags register, and doesn't know that
18100 it doesn't want to clobber it. */
18101 gcc_assert (code == NOT);
18102 emit_insn (op);
18104 else
18106 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18107 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18110 /* Fix up the destination if needed. */
18111 if (dst != operands[0])
18112 emit_move_insn (operands[0], dst);
18115 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
18116 divisor are within the range [0-255]. */
18118 void
18119 ix86_split_idivmod (machine_mode mode, rtx operands[],
18120 bool signed_p)
18122 rtx_code_label *end_label, *qimode_label;
18123 rtx insn, div, mod;
18124 rtx scratch, tmp0, tmp1, tmp2;
18125 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
18126 rtx (*gen_zero_extend) (rtx, rtx);
18127 rtx (*gen_test_ccno_1) (rtx, rtx);
18129 switch (mode)
18131 case SImode:
18132 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
18133 gen_test_ccno_1 = gen_testsi_ccno_1;
18134 gen_zero_extend = gen_zero_extendqisi2;
18135 break;
18136 case DImode:
18137 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
18138 gen_test_ccno_1 = gen_testdi_ccno_1;
18139 gen_zero_extend = gen_zero_extendqidi2;
18140 break;
18141 default:
18142 gcc_unreachable ();
18145 end_label = gen_label_rtx ();
18146 qimode_label = gen_label_rtx ();
18148 scratch = gen_reg_rtx (mode);
18150 /* Use 8bit unsigned divimod if dividend and divisor are within
18151 the range [0-255]. */
18152 emit_move_insn (scratch, operands[2]);
18153 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
18154 scratch, 1, OPTAB_DIRECT);
18155 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
18156 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
18157 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
18158 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
18159 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
18160 pc_rtx);
18161 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
18162 predict_jump (REG_BR_PROB_BASE * 50 / 100);
18163 JUMP_LABEL (insn) = qimode_label;
18165 /* Generate original signed/unsigned divimod. */
18166 div = gen_divmod4_1 (operands[0], operands[1],
18167 operands[2], operands[3]);
18168 emit_insn (div);
18170 /* Branch to the end. */
18171 emit_jump_insn (gen_jump (end_label));
18172 emit_barrier ();
18174 /* Generate 8bit unsigned divide. */
18175 emit_label (qimode_label);
18176 /* Don't use operands[0] for result of 8bit divide since not all
18177 registers support QImode ZERO_EXTRACT. */
18178 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
18179 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
18180 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
18181 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
18183 if (signed_p)
18185 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
18186 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
18188 else
18190 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
18191 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
18194 /* Extract remainder from AH. */
18195 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
18196 if (REG_P (operands[1]))
18197 insn = emit_move_insn (operands[1], tmp1);
18198 else
18200 /* Need a new scratch register since the old one has result
18201 of 8bit divide. */
18202 scratch = gen_reg_rtx (mode);
18203 emit_move_insn (scratch, tmp1);
18204 insn = emit_move_insn (operands[1], scratch);
18206 set_unique_reg_note (insn, REG_EQUAL, mod);
18208 /* Zero extend quotient from AL. */
18209 tmp1 = gen_lowpart (QImode, tmp0);
18210 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
18211 set_unique_reg_note (insn, REG_EQUAL, div);
18213 emit_label (end_label);
18216 #define LEA_MAX_STALL (3)
18217 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
18219 /* Increase given DISTANCE in half-cycles according to
18220 dependencies between PREV and NEXT instructions.
18221 Add 1 half-cycle if there is no dependency and
18222 go to next cycle if there is some dependecy. */
18224 static unsigned int
18225 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
18227 df_ref def, use;
18229 if (!prev || !next)
18230 return distance + (distance & 1) + 2;
18232 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
18233 return distance + 1;
18235 FOR_EACH_INSN_USE (use, next)
18236 FOR_EACH_INSN_DEF (def, prev)
18237 if (!DF_REF_IS_ARTIFICIAL (def)
18238 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
18239 return distance + (distance & 1) + 2;
18241 return distance + 1;
18244 /* Function checks if instruction INSN defines register number
18245 REGNO1 or REGNO2. */
18247 static bool
18248 insn_defines_reg (unsigned int regno1, unsigned int regno2,
18249 rtx insn)
18251 df_ref def;
18253 FOR_EACH_INSN_DEF (def, insn)
18254 if (DF_REF_REG_DEF_P (def)
18255 && !DF_REF_IS_ARTIFICIAL (def)
18256 && (regno1 == DF_REF_REGNO (def)
18257 || regno2 == DF_REF_REGNO (def)))
18258 return true;
18260 return false;
18263 /* Function checks if instruction INSN uses register number
18264 REGNO as a part of address expression. */
18266 static bool
18267 insn_uses_reg_mem (unsigned int regno, rtx insn)
18269 df_ref use;
18271 FOR_EACH_INSN_USE (use, insn)
18272 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
18273 return true;
18275 return false;
18278 /* Search backward for non-agu definition of register number REGNO1
18279 or register number REGNO2 in basic block starting from instruction
18280 START up to head of basic block or instruction INSN.
18282 Function puts true value into *FOUND var if definition was found
18283 and false otherwise.
18285 Distance in half-cycles between START and found instruction or head
18286 of BB is added to DISTANCE and returned. */
18288 static int
18289 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
18290 rtx_insn *insn, int distance,
18291 rtx_insn *start, bool *found)
18293 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
18294 rtx_insn *prev = start;
18295 rtx_insn *next = NULL;
18297 *found = false;
18299 while (prev
18300 && prev != insn
18301 && distance < LEA_SEARCH_THRESHOLD)
18303 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
18305 distance = increase_distance (prev, next, distance);
18306 if (insn_defines_reg (regno1, regno2, prev))
18308 if (recog_memoized (prev) < 0
18309 || get_attr_type (prev) != TYPE_LEA)
18311 *found = true;
18312 return distance;
18316 next = prev;
18318 if (prev == BB_HEAD (bb))
18319 break;
18321 prev = PREV_INSN (prev);
18324 return distance;
18327 /* Search backward for non-agu definition of register number REGNO1
18328 or register number REGNO2 in INSN's basic block until
18329 1. Pass LEA_SEARCH_THRESHOLD instructions, or
18330 2. Reach neighbour BBs boundary, or
18331 3. Reach agu definition.
18332 Returns the distance between the non-agu definition point and INSN.
18333 If no definition point, returns -1. */
18335 static int
18336 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
18337 rtx_insn *insn)
18339 basic_block bb = BLOCK_FOR_INSN (insn);
18340 int distance = 0;
18341 bool found = false;
18343 if (insn != BB_HEAD (bb))
18344 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
18345 distance, PREV_INSN (insn),
18346 &found);
18348 if (!found && distance < LEA_SEARCH_THRESHOLD)
18350 edge e;
18351 edge_iterator ei;
18352 bool simple_loop = false;
18354 FOR_EACH_EDGE (e, ei, bb->preds)
18355 if (e->src == bb)
18357 simple_loop = true;
18358 break;
18361 if (simple_loop)
18362 distance = distance_non_agu_define_in_bb (regno1, regno2,
18363 insn, distance,
18364 BB_END (bb), &found);
18365 else
18367 int shortest_dist = -1;
18368 bool found_in_bb = false;
18370 FOR_EACH_EDGE (e, ei, bb->preds)
18372 int bb_dist
18373 = distance_non_agu_define_in_bb (regno1, regno2,
18374 insn, distance,
18375 BB_END (e->src),
18376 &found_in_bb);
18377 if (found_in_bb)
18379 if (shortest_dist < 0)
18380 shortest_dist = bb_dist;
18381 else if (bb_dist > 0)
18382 shortest_dist = MIN (bb_dist, shortest_dist);
18384 found = true;
18388 distance = shortest_dist;
18392 /* get_attr_type may modify recog data. We want to make sure
18393 that recog data is valid for instruction INSN, on which
18394 distance_non_agu_define is called. INSN is unchanged here. */
18395 extract_insn_cached (insn);
18397 if (!found)
18398 return -1;
18400 return distance >> 1;
18403 /* Return the distance in half-cycles between INSN and the next
18404 insn that uses register number REGNO in memory address added
18405 to DISTANCE. Return -1 if REGNO0 is set.
18407 Put true value into *FOUND if register usage was found and
18408 false otherwise.
18409 Put true value into *REDEFINED if register redefinition was
18410 found and false otherwise. */
18412 static int
18413 distance_agu_use_in_bb (unsigned int regno,
18414 rtx_insn *insn, int distance, rtx_insn *start,
18415 bool *found, bool *redefined)
18417 basic_block bb = NULL;
18418 rtx_insn *next = start;
18419 rtx_insn *prev = NULL;
18421 *found = false;
18422 *redefined = false;
18424 if (start != NULL_RTX)
18426 bb = BLOCK_FOR_INSN (start);
18427 if (start != BB_HEAD (bb))
18428 /* If insn and start belong to the same bb, set prev to insn,
18429 so the call to increase_distance will increase the distance
18430 between insns by 1. */
18431 prev = insn;
18434 while (next
18435 && next != insn
18436 && distance < LEA_SEARCH_THRESHOLD)
18438 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
18440 distance = increase_distance(prev, next, distance);
18441 if (insn_uses_reg_mem (regno, next))
18443 /* Return DISTANCE if OP0 is used in memory
18444 address in NEXT. */
18445 *found = true;
18446 return distance;
18449 if (insn_defines_reg (regno, INVALID_REGNUM, next))
18451 /* Return -1 if OP0 is set in NEXT. */
18452 *redefined = true;
18453 return -1;
18456 prev = next;
18459 if (next == BB_END (bb))
18460 break;
18462 next = NEXT_INSN (next);
18465 return distance;
18468 /* Return the distance between INSN and the next insn that uses
18469 register number REGNO0 in memory address. Return -1 if no such
18470 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
18472 static int
18473 distance_agu_use (unsigned int regno0, rtx_insn *insn)
18475 basic_block bb = BLOCK_FOR_INSN (insn);
18476 int distance = 0;
18477 bool found = false;
18478 bool redefined = false;
18480 if (insn != BB_END (bb))
18481 distance = distance_agu_use_in_bb (regno0, insn, distance,
18482 NEXT_INSN (insn),
18483 &found, &redefined);
18485 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
18487 edge e;
18488 edge_iterator ei;
18489 bool simple_loop = false;
18491 FOR_EACH_EDGE (e, ei, bb->succs)
18492 if (e->dest == bb)
18494 simple_loop = true;
18495 break;
18498 if (simple_loop)
18499 distance = distance_agu_use_in_bb (regno0, insn,
18500 distance, BB_HEAD (bb),
18501 &found, &redefined);
18502 else
18504 int shortest_dist = -1;
18505 bool found_in_bb = false;
18506 bool redefined_in_bb = false;
18508 FOR_EACH_EDGE (e, ei, bb->succs)
18510 int bb_dist
18511 = distance_agu_use_in_bb (regno0, insn,
18512 distance, BB_HEAD (e->dest),
18513 &found_in_bb, &redefined_in_bb);
18514 if (found_in_bb)
18516 if (shortest_dist < 0)
18517 shortest_dist = bb_dist;
18518 else if (bb_dist > 0)
18519 shortest_dist = MIN (bb_dist, shortest_dist);
18521 found = true;
18525 distance = shortest_dist;
18529 if (!found || redefined)
18530 return -1;
18532 return distance >> 1;
18535 /* Define this macro to tune LEA priority vs ADD, it take effect when
18536 there is a dilemma of choicing LEA or ADD
18537 Negative value: ADD is more preferred than LEA
18538 Zero: Netrual
18539 Positive value: LEA is more preferred than ADD*/
18540 #define IX86_LEA_PRIORITY 0
18542 /* Return true if usage of lea INSN has performance advantage
18543 over a sequence of instructions. Instructions sequence has
18544 SPLIT_COST cycles higher latency than lea latency. */
18546 static bool
18547 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
18548 unsigned int regno2, int split_cost, bool has_scale)
18550 int dist_define, dist_use;
18552 /* For Silvermont if using a 2-source or 3-source LEA for
18553 non-destructive destination purposes, or due to wanting
18554 ability to use SCALE, the use of LEA is justified. */
18555 if (TARGET_SILVERMONT || TARGET_INTEL)
18557 if (has_scale)
18558 return true;
18559 if (split_cost < 1)
18560 return false;
18561 if (regno0 == regno1 || regno0 == regno2)
18562 return false;
18563 return true;
18566 dist_define = distance_non_agu_define (regno1, regno2, insn);
18567 dist_use = distance_agu_use (regno0, insn);
18569 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
18571 /* If there is no non AGU operand definition, no AGU
18572 operand usage and split cost is 0 then both lea
18573 and non lea variants have same priority. Currently
18574 we prefer lea for 64 bit code and non lea on 32 bit
18575 code. */
18576 if (dist_use < 0 && split_cost == 0)
18577 return TARGET_64BIT || IX86_LEA_PRIORITY;
18578 else
18579 return true;
18582 /* With longer definitions distance lea is more preferable.
18583 Here we change it to take into account splitting cost and
18584 lea priority. */
18585 dist_define += split_cost + IX86_LEA_PRIORITY;
18587 /* If there is no use in memory addess then we just check
18588 that split cost exceeds AGU stall. */
18589 if (dist_use < 0)
18590 return dist_define > LEA_MAX_STALL;
18592 /* If this insn has both backward non-agu dependence and forward
18593 agu dependence, the one with short distance takes effect. */
18594 return dist_define >= dist_use;
18597 /* Return true if it is legal to clobber flags by INSN and
18598 false otherwise. */
18600 static bool
18601 ix86_ok_to_clobber_flags (rtx_insn *insn)
18603 basic_block bb = BLOCK_FOR_INSN (insn);
18604 df_ref use;
18605 bitmap live;
18607 while (insn)
18609 if (NONDEBUG_INSN_P (insn))
18611 FOR_EACH_INSN_USE (use, insn)
18612 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
18613 return false;
18615 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
18616 return true;
18619 if (insn == BB_END (bb))
18620 break;
18622 insn = NEXT_INSN (insn);
18625 live = df_get_live_out(bb);
18626 return !REGNO_REG_SET_P (live, FLAGS_REG);
18629 /* Return true if we need to split op0 = op1 + op2 into a sequence of
18630 move and add to avoid AGU stalls. */
18632 bool
18633 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
18635 unsigned int regno0, regno1, regno2;
18637 /* Check if we need to optimize. */
18638 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18639 return false;
18641 /* Check it is correct to split here. */
18642 if (!ix86_ok_to_clobber_flags(insn))
18643 return false;
18645 regno0 = true_regnum (operands[0]);
18646 regno1 = true_regnum (operands[1]);
18647 regno2 = true_regnum (operands[2]);
18649 /* We need to split only adds with non destructive
18650 destination operand. */
18651 if (regno0 == regno1 || regno0 == regno2)
18652 return false;
18653 else
18654 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
18657 /* Return true if we should emit lea instruction instead of mov
18658 instruction. */
18660 bool
18661 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
18663 unsigned int regno0, regno1;
18665 /* Check if we need to optimize. */
18666 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18667 return false;
18669 /* Use lea for reg to reg moves only. */
18670 if (!REG_P (operands[0]) || !REG_P (operands[1]))
18671 return false;
18673 regno0 = true_regnum (operands[0]);
18674 regno1 = true_regnum (operands[1]);
18676 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
18679 /* Return true if we need to split lea into a sequence of
18680 instructions to avoid AGU stalls. */
18682 bool
18683 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
18685 unsigned int regno0, regno1, regno2;
18686 int split_cost;
18687 struct ix86_address parts;
18688 int ok;
18690 /* Check we need to optimize. */
18691 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
18692 return false;
18694 /* The "at least two components" test below might not catch simple
18695 move or zero extension insns if parts.base is non-NULL and parts.disp
18696 is const0_rtx as the only components in the address, e.g. if the
18697 register is %rbp or %r13. As this test is much cheaper and moves or
18698 zero extensions are the common case, do this check first. */
18699 if (REG_P (operands[1])
18700 || (SImode_address_operand (operands[1], VOIDmode)
18701 && REG_P (XEXP (operands[1], 0))))
18702 return false;
18704 /* Check if it is OK to split here. */
18705 if (!ix86_ok_to_clobber_flags (insn))
18706 return false;
18708 ok = ix86_decompose_address (operands[1], &parts);
18709 gcc_assert (ok);
18711 /* There should be at least two components in the address. */
18712 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
18713 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
18714 return false;
18716 /* We should not split into add if non legitimate pic
18717 operand is used as displacement. */
18718 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
18719 return false;
18721 regno0 = true_regnum (operands[0]) ;
18722 regno1 = INVALID_REGNUM;
18723 regno2 = INVALID_REGNUM;
18725 if (parts.base)
18726 regno1 = true_regnum (parts.base);
18727 if (parts.index)
18728 regno2 = true_regnum (parts.index);
18730 split_cost = 0;
18732 /* Compute how many cycles we will add to execution time
18733 if split lea into a sequence of instructions. */
18734 if (parts.base || parts.index)
18736 /* Have to use mov instruction if non desctructive
18737 destination form is used. */
18738 if (regno1 != regno0 && regno2 != regno0)
18739 split_cost += 1;
18741 /* Have to add index to base if both exist. */
18742 if (parts.base && parts.index)
18743 split_cost += 1;
18745 /* Have to use shift and adds if scale is 2 or greater. */
18746 if (parts.scale > 1)
18748 if (regno0 != regno1)
18749 split_cost += 1;
18750 else if (regno2 == regno0)
18751 split_cost += 4;
18752 else
18753 split_cost += parts.scale;
18756 /* Have to use add instruction with immediate if
18757 disp is non zero. */
18758 if (parts.disp && parts.disp != const0_rtx)
18759 split_cost += 1;
18761 /* Subtract the price of lea. */
18762 split_cost -= 1;
18765 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
18766 parts.scale > 1);
18769 /* Emit x86 binary operand CODE in mode MODE, where the first operand
18770 matches destination. RTX includes clobber of FLAGS_REG. */
18772 static void
18773 ix86_emit_binop (enum rtx_code code, machine_mode mode,
18774 rtx dst, rtx src)
18776 rtx op, clob;
18778 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
18779 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18781 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18784 /* Return true if regno1 def is nearest to the insn. */
18786 static bool
18787 find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2)
18789 rtx_insn *prev = insn;
18790 rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn));
18792 if (insn == start)
18793 return false;
18794 while (prev && prev != start)
18796 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
18798 prev = PREV_INSN (prev);
18799 continue;
18801 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
18802 return true;
18803 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
18804 return false;
18805 prev = PREV_INSN (prev);
18808 /* None of the regs is defined in the bb. */
18809 return false;
18812 /* Split lea instructions into a sequence of instructions
18813 which are executed on ALU to avoid AGU stalls.
18814 It is assumed that it is allowed to clobber flags register
18815 at lea position. */
18817 void
18818 ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], machine_mode mode)
18820 unsigned int regno0, regno1, regno2;
18821 struct ix86_address parts;
18822 rtx target, tmp;
18823 int ok, adds;
18825 ok = ix86_decompose_address (operands[1], &parts);
18826 gcc_assert (ok);
18828 target = gen_lowpart (mode, operands[0]);
18830 regno0 = true_regnum (target);
18831 regno1 = INVALID_REGNUM;
18832 regno2 = INVALID_REGNUM;
18834 if (parts.base)
18836 parts.base = gen_lowpart (mode, parts.base);
18837 regno1 = true_regnum (parts.base);
18840 if (parts.index)
18842 parts.index = gen_lowpart (mode, parts.index);
18843 regno2 = true_regnum (parts.index);
18846 if (parts.disp)
18847 parts.disp = gen_lowpart (mode, parts.disp);
18849 if (parts.scale > 1)
18851 /* Case r1 = r1 + ... */
18852 if (regno1 == regno0)
18854 /* If we have a case r1 = r1 + C * r2 then we
18855 should use multiplication which is very
18856 expensive. Assume cost model is wrong if we
18857 have such case here. */
18858 gcc_assert (regno2 != regno0);
18860 for (adds = parts.scale; adds > 0; adds--)
18861 ix86_emit_binop (PLUS, mode, target, parts.index);
18863 else
18865 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
18866 if (regno0 != regno2)
18867 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18869 /* Use shift for scaling. */
18870 ix86_emit_binop (ASHIFT, mode, target,
18871 GEN_INT (exact_log2 (parts.scale)));
18873 if (parts.base)
18874 ix86_emit_binop (PLUS, mode, target, parts.base);
18876 if (parts.disp && parts.disp != const0_rtx)
18877 ix86_emit_binop (PLUS, mode, target, parts.disp);
18880 else if (!parts.base && !parts.index)
18882 gcc_assert(parts.disp);
18883 emit_insn (gen_rtx_SET (VOIDmode, target, parts.disp));
18885 else
18887 if (!parts.base)
18889 if (regno0 != regno2)
18890 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18892 else if (!parts.index)
18894 if (regno0 != regno1)
18895 emit_insn (gen_rtx_SET (VOIDmode, target, parts.base));
18897 else
18899 if (regno0 == regno1)
18900 tmp = parts.index;
18901 else if (regno0 == regno2)
18902 tmp = parts.base;
18903 else
18905 rtx tmp1;
18907 /* Find better operand for SET instruction, depending
18908 on which definition is farther from the insn. */
18909 if (find_nearest_reg_def (insn, regno1, regno2))
18910 tmp = parts.index, tmp1 = parts.base;
18911 else
18912 tmp = parts.base, tmp1 = parts.index;
18914 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18916 if (parts.disp && parts.disp != const0_rtx)
18917 ix86_emit_binop (PLUS, mode, target, parts.disp);
18919 ix86_emit_binop (PLUS, mode, target, tmp1);
18920 return;
18923 ix86_emit_binop (PLUS, mode, target, tmp);
18926 if (parts.disp && parts.disp != const0_rtx)
18927 ix86_emit_binop (PLUS, mode, target, parts.disp);
18931 /* Return true if it is ok to optimize an ADD operation to LEA
18932 operation to avoid flag register consumation. For most processors,
18933 ADD is faster than LEA. For the processors like BONNELL, if the
18934 destination register of LEA holds an actual address which will be
18935 used soon, LEA is better and otherwise ADD is better. */
18937 bool
18938 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
18940 unsigned int regno0 = true_regnum (operands[0]);
18941 unsigned int regno1 = true_regnum (operands[1]);
18942 unsigned int regno2 = true_regnum (operands[2]);
18944 /* If a = b + c, (a!=b && a!=c), must use lea form. */
18945 if (regno0 != regno1 && regno0 != regno2)
18946 return true;
18948 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18949 return false;
18951 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
18954 /* Return true if destination reg of SET_BODY is shift count of
18955 USE_BODY. */
18957 static bool
18958 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
18960 rtx set_dest;
18961 rtx shift_rtx;
18962 int i;
18964 /* Retrieve destination of SET_BODY. */
18965 switch (GET_CODE (set_body))
18967 case SET:
18968 set_dest = SET_DEST (set_body);
18969 if (!set_dest || !REG_P (set_dest))
18970 return false;
18971 break;
18972 case PARALLEL:
18973 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
18974 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
18975 use_body))
18976 return true;
18977 default:
18978 return false;
18979 break;
18982 /* Retrieve shift count of USE_BODY. */
18983 switch (GET_CODE (use_body))
18985 case SET:
18986 shift_rtx = XEXP (use_body, 1);
18987 break;
18988 case PARALLEL:
18989 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
18990 if (ix86_dep_by_shift_count_body (set_body,
18991 XVECEXP (use_body, 0, i)))
18992 return true;
18993 default:
18994 return false;
18995 break;
18998 if (shift_rtx
18999 && (GET_CODE (shift_rtx) == ASHIFT
19000 || GET_CODE (shift_rtx) == LSHIFTRT
19001 || GET_CODE (shift_rtx) == ASHIFTRT
19002 || GET_CODE (shift_rtx) == ROTATE
19003 || GET_CODE (shift_rtx) == ROTATERT))
19005 rtx shift_count = XEXP (shift_rtx, 1);
19007 /* Return true if shift count is dest of SET_BODY. */
19008 if (REG_P (shift_count))
19010 /* Add check since it can be invoked before register
19011 allocation in pre-reload schedule. */
19012 if (reload_completed
19013 && true_regnum (set_dest) == true_regnum (shift_count))
19014 return true;
19015 else if (REGNO(set_dest) == REGNO(shift_count))
19016 return true;
19020 return false;
19023 /* Return true if destination reg of SET_INSN is shift count of
19024 USE_INSN. */
19026 bool
19027 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
19029 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
19030 PATTERN (use_insn));
19033 /* Return TRUE or FALSE depending on whether the unary operator meets the
19034 appropriate constraints. */
19036 bool
19037 ix86_unary_operator_ok (enum rtx_code,
19038 machine_mode,
19039 rtx operands[2])
19041 /* If one of operands is memory, source and destination must match. */
19042 if ((MEM_P (operands[0])
19043 || MEM_P (operands[1]))
19044 && ! rtx_equal_p (operands[0], operands[1]))
19045 return false;
19046 return true;
19049 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
19050 are ok, keeping in mind the possible movddup alternative. */
19052 bool
19053 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
19055 if (MEM_P (operands[0]))
19056 return rtx_equal_p (operands[0], operands[1 + high]);
19057 if (MEM_P (operands[1]) && MEM_P (operands[2]))
19058 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
19059 return true;
19062 /* Post-reload splitter for converting an SF or DFmode value in an
19063 SSE register into an unsigned SImode. */
19065 void
19066 ix86_split_convert_uns_si_sse (rtx operands[])
19068 machine_mode vecmode;
19069 rtx value, large, zero_or_two31, input, two31, x;
19071 large = operands[1];
19072 zero_or_two31 = operands[2];
19073 input = operands[3];
19074 two31 = operands[4];
19075 vecmode = GET_MODE (large);
19076 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
19078 /* Load up the value into the low element. We must ensure that the other
19079 elements are valid floats -- zero is the easiest such value. */
19080 if (MEM_P (input))
19082 if (vecmode == V4SFmode)
19083 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
19084 else
19085 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
19087 else
19089 input = gen_rtx_REG (vecmode, REGNO (input));
19090 emit_move_insn (value, CONST0_RTX (vecmode));
19091 if (vecmode == V4SFmode)
19092 emit_insn (gen_sse_movss (value, value, input));
19093 else
19094 emit_insn (gen_sse2_movsd (value, value, input));
19097 emit_move_insn (large, two31);
19098 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
19100 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
19101 emit_insn (gen_rtx_SET (VOIDmode, large, x));
19103 x = gen_rtx_AND (vecmode, zero_or_two31, large);
19104 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
19106 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
19107 emit_insn (gen_rtx_SET (VOIDmode, value, x));
19109 large = gen_rtx_REG (V4SImode, REGNO (large));
19110 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
19112 x = gen_rtx_REG (V4SImode, REGNO (value));
19113 if (vecmode == V4SFmode)
19114 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
19115 else
19116 emit_insn (gen_sse2_cvttpd2dq (x, value));
19117 value = x;
19119 emit_insn (gen_xorv4si3 (value, value, large));
19122 /* Convert an unsigned DImode value into a DFmode, using only SSE.
19123 Expects the 64-bit DImode to be supplied in a pair of integral
19124 registers. Requires SSE2; will use SSE3 if available. For x86_32,
19125 -mfpmath=sse, !optimize_size only. */
19127 void
19128 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
19130 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
19131 rtx int_xmm, fp_xmm;
19132 rtx biases, exponents;
19133 rtx x;
19135 int_xmm = gen_reg_rtx (V4SImode);
19136 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
19137 emit_insn (gen_movdi_to_sse (int_xmm, input));
19138 else if (TARGET_SSE_SPLIT_REGS)
19140 emit_clobber (int_xmm);
19141 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
19143 else
19145 x = gen_reg_rtx (V2DImode);
19146 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
19147 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
19150 x = gen_rtx_CONST_VECTOR (V4SImode,
19151 gen_rtvec (4, GEN_INT (0x43300000UL),
19152 GEN_INT (0x45300000UL),
19153 const0_rtx, const0_rtx));
19154 exponents = validize_mem (force_const_mem (V4SImode, x));
19156 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
19157 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
19159 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
19160 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
19161 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
19162 (0x1.0p84 + double(fp_value_hi_xmm)).
19163 Note these exponents differ by 32. */
19165 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
19167 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
19168 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
19169 real_ldexp (&bias_lo_rvt, &dconst1, 52);
19170 real_ldexp (&bias_hi_rvt, &dconst1, 84);
19171 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
19172 x = const_double_from_real_value (bias_hi_rvt, DFmode);
19173 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
19174 biases = validize_mem (force_const_mem (V2DFmode, biases));
19175 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
19177 /* Add the upper and lower DFmode values together. */
19178 if (TARGET_SSE3)
19179 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
19180 else
19182 x = copy_to_mode_reg (V2DFmode, fp_xmm);
19183 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
19184 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
19187 ix86_expand_vector_extract (false, target, fp_xmm, 0);
19190 /* Not used, but eases macroization of patterns. */
19191 void
19192 ix86_expand_convert_uns_sixf_sse (rtx, rtx)
19194 gcc_unreachable ();
19197 /* Convert an unsigned SImode value into a DFmode. Only currently used
19198 for SSE, but applicable anywhere. */
19200 void
19201 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
19203 REAL_VALUE_TYPE TWO31r;
19204 rtx x, fp;
19206 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
19207 NULL, 1, OPTAB_DIRECT);
19209 fp = gen_reg_rtx (DFmode);
19210 emit_insn (gen_floatsidf2 (fp, x));
19212 real_ldexp (&TWO31r, &dconst1, 31);
19213 x = const_double_from_real_value (TWO31r, DFmode);
19215 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
19216 if (x != target)
19217 emit_move_insn (target, x);
19220 /* Convert a signed DImode value into a DFmode. Only used for SSE in
19221 32-bit mode; otherwise we have a direct convert instruction. */
19223 void
19224 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
19226 REAL_VALUE_TYPE TWO32r;
19227 rtx fp_lo, fp_hi, x;
19229 fp_lo = gen_reg_rtx (DFmode);
19230 fp_hi = gen_reg_rtx (DFmode);
19232 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
19234 real_ldexp (&TWO32r, &dconst1, 32);
19235 x = const_double_from_real_value (TWO32r, DFmode);
19236 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
19238 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
19240 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
19241 0, OPTAB_DIRECT);
19242 if (x != target)
19243 emit_move_insn (target, x);
19246 /* Convert an unsigned SImode value into a SFmode, using only SSE.
19247 For x86_32, -mfpmath=sse, !optimize_size only. */
19248 void
19249 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
19251 REAL_VALUE_TYPE ONE16r;
19252 rtx fp_hi, fp_lo, int_hi, int_lo, x;
19254 real_ldexp (&ONE16r, &dconst1, 16);
19255 x = const_double_from_real_value (ONE16r, SFmode);
19256 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
19257 NULL, 0, OPTAB_DIRECT);
19258 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
19259 NULL, 0, OPTAB_DIRECT);
19260 fp_hi = gen_reg_rtx (SFmode);
19261 fp_lo = gen_reg_rtx (SFmode);
19262 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
19263 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
19264 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
19265 0, OPTAB_DIRECT);
19266 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
19267 0, OPTAB_DIRECT);
19268 if (!rtx_equal_p (target, fp_hi))
19269 emit_move_insn (target, fp_hi);
19272 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
19273 a vector of unsigned ints VAL to vector of floats TARGET. */
19275 void
19276 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
19278 rtx tmp[8];
19279 REAL_VALUE_TYPE TWO16r;
19280 machine_mode intmode = GET_MODE (val);
19281 machine_mode fltmode = GET_MODE (target);
19282 rtx (*cvt) (rtx, rtx);
19284 if (intmode == V4SImode)
19285 cvt = gen_floatv4siv4sf2;
19286 else
19287 cvt = gen_floatv8siv8sf2;
19288 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
19289 tmp[0] = force_reg (intmode, tmp[0]);
19290 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
19291 OPTAB_DIRECT);
19292 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
19293 NULL_RTX, 1, OPTAB_DIRECT);
19294 tmp[3] = gen_reg_rtx (fltmode);
19295 emit_insn (cvt (tmp[3], tmp[1]));
19296 tmp[4] = gen_reg_rtx (fltmode);
19297 emit_insn (cvt (tmp[4], tmp[2]));
19298 real_ldexp (&TWO16r, &dconst1, 16);
19299 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
19300 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
19301 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
19302 OPTAB_DIRECT);
19303 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
19304 OPTAB_DIRECT);
19305 if (tmp[7] != target)
19306 emit_move_insn (target, tmp[7]);
19309 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
19310 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
19311 This is done by doing just signed conversion if < 0x1p31, and otherwise by
19312 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
19315 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
19317 REAL_VALUE_TYPE TWO31r;
19318 rtx two31r, tmp[4];
19319 machine_mode mode = GET_MODE (val);
19320 machine_mode scalarmode = GET_MODE_INNER (mode);
19321 machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
19322 rtx (*cmp) (rtx, rtx, rtx, rtx);
19323 int i;
19325 for (i = 0; i < 3; i++)
19326 tmp[i] = gen_reg_rtx (mode);
19327 real_ldexp (&TWO31r, &dconst1, 31);
19328 two31r = const_double_from_real_value (TWO31r, scalarmode);
19329 two31r = ix86_build_const_vector (mode, 1, two31r);
19330 two31r = force_reg (mode, two31r);
19331 switch (mode)
19333 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
19334 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
19335 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
19336 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
19337 default: gcc_unreachable ();
19339 tmp[3] = gen_rtx_LE (mode, two31r, val);
19340 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
19341 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
19342 0, OPTAB_DIRECT);
19343 if (intmode == V4SImode || TARGET_AVX2)
19344 *xorp = expand_simple_binop (intmode, ASHIFT,
19345 gen_lowpart (intmode, tmp[0]),
19346 GEN_INT (31), NULL_RTX, 0,
19347 OPTAB_DIRECT);
19348 else
19350 rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
19351 two31 = ix86_build_const_vector (intmode, 1, two31);
19352 *xorp = expand_simple_binop (intmode, AND,
19353 gen_lowpart (intmode, tmp[0]),
19354 two31, NULL_RTX, 0,
19355 OPTAB_DIRECT);
19357 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
19358 0, OPTAB_DIRECT);
19361 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
19362 then replicate the value for all elements of the vector
19363 register. */
19366 ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
19368 int i, n_elt;
19369 rtvec v;
19370 machine_mode scalar_mode;
19372 switch (mode)
19374 case V64QImode:
19375 case V32QImode:
19376 case V16QImode:
19377 case V32HImode:
19378 case V16HImode:
19379 case V8HImode:
19380 case V16SImode:
19381 case V8SImode:
19382 case V4SImode:
19383 case V8DImode:
19384 case V4DImode:
19385 case V2DImode:
19386 gcc_assert (vect);
19387 case V16SFmode:
19388 case V8SFmode:
19389 case V4SFmode:
19390 case V8DFmode:
19391 case V4DFmode:
19392 case V2DFmode:
19393 n_elt = GET_MODE_NUNITS (mode);
19394 v = rtvec_alloc (n_elt);
19395 scalar_mode = GET_MODE_INNER (mode);
19397 RTVEC_ELT (v, 0) = value;
19399 for (i = 1; i < n_elt; ++i)
19400 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
19402 return gen_rtx_CONST_VECTOR (mode, v);
19404 default:
19405 gcc_unreachable ();
19409 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
19410 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
19411 for an SSE register. If VECT is true, then replicate the mask for
19412 all elements of the vector register. If INVERT is true, then create
19413 a mask excluding the sign bit. */
19416 ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
19418 machine_mode vec_mode, imode;
19419 HOST_WIDE_INT hi, lo;
19420 int shift = 63;
19421 rtx v;
19422 rtx mask;
19424 /* Find the sign bit, sign extended to 2*HWI. */
19425 switch (mode)
19427 case V16SImode:
19428 case V16SFmode:
19429 case V8SImode:
19430 case V4SImode:
19431 case V8SFmode:
19432 case V4SFmode:
19433 vec_mode = mode;
19434 mode = GET_MODE_INNER (mode);
19435 imode = SImode;
19436 lo = 0x80000000, hi = lo < 0;
19437 break;
19439 case V8DImode:
19440 case V4DImode:
19441 case V2DImode:
19442 case V8DFmode:
19443 case V4DFmode:
19444 case V2DFmode:
19445 vec_mode = mode;
19446 mode = GET_MODE_INNER (mode);
19447 imode = DImode;
19448 if (HOST_BITS_PER_WIDE_INT >= 64)
19449 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
19450 else
19451 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19452 break;
19454 case TImode:
19455 case TFmode:
19456 vec_mode = VOIDmode;
19457 if (HOST_BITS_PER_WIDE_INT >= 64)
19459 imode = TImode;
19460 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
19462 else
19464 rtvec vec;
19466 imode = DImode;
19467 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19469 if (invert)
19471 lo = ~lo, hi = ~hi;
19472 v = constm1_rtx;
19474 else
19475 v = const0_rtx;
19477 mask = immed_double_const (lo, hi, imode);
19479 vec = gen_rtvec (2, v, mask);
19480 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
19481 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
19483 return v;
19485 break;
19487 default:
19488 gcc_unreachable ();
19491 if (invert)
19492 lo = ~lo, hi = ~hi;
19494 /* Force this value into the low part of a fp vector constant. */
19495 mask = immed_double_const (lo, hi, imode);
19496 mask = gen_lowpart (mode, mask);
19498 if (vec_mode == VOIDmode)
19499 return force_reg (mode, mask);
19501 v = ix86_build_const_vector (vec_mode, vect, mask);
19502 return force_reg (vec_mode, v);
19505 /* Generate code for floating point ABS or NEG. */
19507 void
19508 ix86_expand_fp_absneg_operator (enum rtx_code code, machine_mode mode,
19509 rtx operands[])
19511 rtx mask, set, dst, src;
19512 bool use_sse = false;
19513 bool vector_mode = VECTOR_MODE_P (mode);
19514 machine_mode vmode = mode;
19516 if (vector_mode)
19517 use_sse = true;
19518 else if (mode == TFmode)
19519 use_sse = true;
19520 else if (TARGET_SSE_MATH)
19522 use_sse = SSE_FLOAT_MODE_P (mode);
19523 if (mode == SFmode)
19524 vmode = V4SFmode;
19525 else if (mode == DFmode)
19526 vmode = V2DFmode;
19529 /* NEG and ABS performed with SSE use bitwise mask operations.
19530 Create the appropriate mask now. */
19531 if (use_sse)
19532 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
19533 else
19534 mask = NULL_RTX;
19536 dst = operands[0];
19537 src = operands[1];
19539 set = gen_rtx_fmt_e (code, mode, src);
19540 set = gen_rtx_SET (VOIDmode, dst, set);
19542 if (mask)
19544 rtx use, clob;
19545 rtvec par;
19547 use = gen_rtx_USE (VOIDmode, mask);
19548 if (vector_mode)
19549 par = gen_rtvec (2, set, use);
19550 else
19552 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19553 par = gen_rtvec (3, set, use, clob);
19555 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
19557 else
19558 emit_insn (set);
19561 /* Expand a copysign operation. Special case operand 0 being a constant. */
19563 void
19564 ix86_expand_copysign (rtx operands[])
19566 machine_mode mode, vmode;
19567 rtx dest, op0, op1, mask, nmask;
19569 dest = operands[0];
19570 op0 = operands[1];
19571 op1 = operands[2];
19573 mode = GET_MODE (dest);
19575 if (mode == SFmode)
19576 vmode = V4SFmode;
19577 else if (mode == DFmode)
19578 vmode = V2DFmode;
19579 else
19580 vmode = mode;
19582 if (GET_CODE (op0) == CONST_DOUBLE)
19584 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
19586 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
19587 op0 = simplify_unary_operation (ABS, mode, op0, mode);
19589 if (mode == SFmode || mode == DFmode)
19591 if (op0 == CONST0_RTX (mode))
19592 op0 = CONST0_RTX (vmode);
19593 else
19595 rtx v = ix86_build_const_vector (vmode, false, op0);
19597 op0 = force_reg (vmode, v);
19600 else if (op0 != CONST0_RTX (mode))
19601 op0 = force_reg (mode, op0);
19603 mask = ix86_build_signbit_mask (vmode, 0, 0);
19605 if (mode == SFmode)
19606 copysign_insn = gen_copysignsf3_const;
19607 else if (mode == DFmode)
19608 copysign_insn = gen_copysigndf3_const;
19609 else
19610 copysign_insn = gen_copysigntf3_const;
19612 emit_insn (copysign_insn (dest, op0, op1, mask));
19614 else
19616 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
19618 nmask = ix86_build_signbit_mask (vmode, 0, 1);
19619 mask = ix86_build_signbit_mask (vmode, 0, 0);
19621 if (mode == SFmode)
19622 copysign_insn = gen_copysignsf3_var;
19623 else if (mode == DFmode)
19624 copysign_insn = gen_copysigndf3_var;
19625 else
19626 copysign_insn = gen_copysigntf3_var;
19628 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
19632 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
19633 be a constant, and so has already been expanded into a vector constant. */
19635 void
19636 ix86_split_copysign_const (rtx operands[])
19638 machine_mode mode, vmode;
19639 rtx dest, op0, mask, x;
19641 dest = operands[0];
19642 op0 = operands[1];
19643 mask = operands[3];
19645 mode = GET_MODE (dest);
19646 vmode = GET_MODE (mask);
19648 dest = simplify_gen_subreg (vmode, dest, mode, 0);
19649 x = gen_rtx_AND (vmode, dest, mask);
19650 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19652 if (op0 != CONST0_RTX (vmode))
19654 x = gen_rtx_IOR (vmode, dest, op0);
19655 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19659 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
19660 so we have to do two masks. */
19662 void
19663 ix86_split_copysign_var (rtx operands[])
19665 machine_mode mode, vmode;
19666 rtx dest, scratch, op0, op1, mask, nmask, x;
19668 dest = operands[0];
19669 scratch = operands[1];
19670 op0 = operands[2];
19671 op1 = operands[3];
19672 nmask = operands[4];
19673 mask = operands[5];
19675 mode = GET_MODE (dest);
19676 vmode = GET_MODE (mask);
19678 if (rtx_equal_p (op0, op1))
19680 /* Shouldn't happen often (it's useless, obviously), but when it does
19681 we'd generate incorrect code if we continue below. */
19682 emit_move_insn (dest, op0);
19683 return;
19686 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
19688 gcc_assert (REGNO (op1) == REGNO (scratch));
19690 x = gen_rtx_AND (vmode, scratch, mask);
19691 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19693 dest = mask;
19694 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19695 x = gen_rtx_NOT (vmode, dest);
19696 x = gen_rtx_AND (vmode, x, op0);
19697 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19699 else
19701 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
19703 x = gen_rtx_AND (vmode, scratch, mask);
19705 else /* alternative 2,4 */
19707 gcc_assert (REGNO (mask) == REGNO (scratch));
19708 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
19709 x = gen_rtx_AND (vmode, scratch, op1);
19711 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19713 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
19715 dest = simplify_gen_subreg (vmode, op0, mode, 0);
19716 x = gen_rtx_AND (vmode, dest, nmask);
19718 else /* alternative 3,4 */
19720 gcc_assert (REGNO (nmask) == REGNO (dest));
19721 dest = nmask;
19722 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19723 x = gen_rtx_AND (vmode, dest, op0);
19725 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19728 x = gen_rtx_IOR (vmode, dest, scratch);
19729 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19732 /* Return TRUE or FALSE depending on whether the first SET in INSN
19733 has source and destination with matching CC modes, and that the
19734 CC mode is at least as constrained as REQ_MODE. */
19736 bool
19737 ix86_match_ccmode (rtx insn, machine_mode req_mode)
19739 rtx set;
19740 machine_mode set_mode;
19742 set = PATTERN (insn);
19743 if (GET_CODE (set) == PARALLEL)
19744 set = XVECEXP (set, 0, 0);
19745 gcc_assert (GET_CODE (set) == SET);
19746 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
19748 set_mode = GET_MODE (SET_DEST (set));
19749 switch (set_mode)
19751 case CCNOmode:
19752 if (req_mode != CCNOmode
19753 && (req_mode != CCmode
19754 || XEXP (SET_SRC (set), 1) != const0_rtx))
19755 return false;
19756 break;
19757 case CCmode:
19758 if (req_mode == CCGCmode)
19759 return false;
19760 /* FALLTHRU */
19761 case CCGCmode:
19762 if (req_mode == CCGOCmode || req_mode == CCNOmode)
19763 return false;
19764 /* FALLTHRU */
19765 case CCGOCmode:
19766 if (req_mode == CCZmode)
19767 return false;
19768 /* FALLTHRU */
19769 case CCZmode:
19770 break;
19772 case CCAmode:
19773 case CCCmode:
19774 case CCOmode:
19775 case CCSmode:
19776 if (set_mode != req_mode)
19777 return false;
19778 break;
19780 default:
19781 gcc_unreachable ();
19784 return GET_MODE (SET_SRC (set)) == set_mode;
19787 /* Generate insn patterns to do an integer compare of OPERANDS. */
19789 static rtx
19790 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
19792 machine_mode cmpmode;
19793 rtx tmp, flags;
19795 cmpmode = SELECT_CC_MODE (code, op0, op1);
19796 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
19798 /* This is very simple, but making the interface the same as in the
19799 FP case makes the rest of the code easier. */
19800 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
19801 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
19803 /* Return the test that should be put into the flags user, i.e.
19804 the bcc, scc, or cmov instruction. */
19805 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
19808 /* Figure out whether to use ordered or unordered fp comparisons.
19809 Return the appropriate mode to use. */
19811 machine_mode
19812 ix86_fp_compare_mode (enum rtx_code)
19814 /* ??? In order to make all comparisons reversible, we do all comparisons
19815 non-trapping when compiling for IEEE. Once gcc is able to distinguish
19816 all forms trapping and nontrapping comparisons, we can make inequality
19817 comparisons trapping again, since it results in better code when using
19818 FCOM based compares. */
19819 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
19822 machine_mode
19823 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
19825 machine_mode mode = GET_MODE (op0);
19827 if (SCALAR_FLOAT_MODE_P (mode))
19829 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
19830 return ix86_fp_compare_mode (code);
19833 switch (code)
19835 /* Only zero flag is needed. */
19836 case EQ: /* ZF=0 */
19837 case NE: /* ZF!=0 */
19838 return CCZmode;
19839 /* Codes needing carry flag. */
19840 case GEU: /* CF=0 */
19841 case LTU: /* CF=1 */
19842 /* Detect overflow checks. They need just the carry flag. */
19843 if (GET_CODE (op0) == PLUS
19844 && rtx_equal_p (op1, XEXP (op0, 0)))
19845 return CCCmode;
19846 else
19847 return CCmode;
19848 case GTU: /* CF=0 & ZF=0 */
19849 case LEU: /* CF=1 | ZF=1 */
19850 return CCmode;
19851 /* Codes possibly doable only with sign flag when
19852 comparing against zero. */
19853 case GE: /* SF=OF or SF=0 */
19854 case LT: /* SF<>OF or SF=1 */
19855 if (op1 == const0_rtx)
19856 return CCGOCmode;
19857 else
19858 /* For other cases Carry flag is not required. */
19859 return CCGCmode;
19860 /* Codes doable only with sign flag when comparing
19861 against zero, but we miss jump instruction for it
19862 so we need to use relational tests against overflow
19863 that thus needs to be zero. */
19864 case GT: /* ZF=0 & SF=OF */
19865 case LE: /* ZF=1 | SF<>OF */
19866 if (op1 == const0_rtx)
19867 return CCNOmode;
19868 else
19869 return CCGCmode;
19870 /* strcmp pattern do (use flags) and combine may ask us for proper
19871 mode. */
19872 case USE:
19873 return CCmode;
19874 default:
19875 gcc_unreachable ();
19879 /* Return the fixed registers used for condition codes. */
19881 static bool
19882 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
19884 *p1 = FLAGS_REG;
19885 *p2 = FPSR_REG;
19886 return true;
19889 /* If two condition code modes are compatible, return a condition code
19890 mode which is compatible with both. Otherwise, return
19891 VOIDmode. */
19893 static machine_mode
19894 ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
19896 if (m1 == m2)
19897 return m1;
19899 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
19900 return VOIDmode;
19902 if ((m1 == CCGCmode && m2 == CCGOCmode)
19903 || (m1 == CCGOCmode && m2 == CCGCmode))
19904 return CCGCmode;
19906 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
19907 return m2;
19908 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
19909 return m1;
19911 switch (m1)
19913 default:
19914 gcc_unreachable ();
19916 case CCmode:
19917 case CCGCmode:
19918 case CCGOCmode:
19919 case CCNOmode:
19920 case CCAmode:
19921 case CCCmode:
19922 case CCOmode:
19923 case CCSmode:
19924 case CCZmode:
19925 switch (m2)
19927 default:
19928 return VOIDmode;
19930 case CCmode:
19931 case CCGCmode:
19932 case CCGOCmode:
19933 case CCNOmode:
19934 case CCAmode:
19935 case CCCmode:
19936 case CCOmode:
19937 case CCSmode:
19938 case CCZmode:
19939 return CCmode;
19942 case CCFPmode:
19943 case CCFPUmode:
19944 /* These are only compatible with themselves, which we already
19945 checked above. */
19946 return VOIDmode;
19951 /* Return a comparison we can do and that it is equivalent to
19952 swap_condition (code) apart possibly from orderedness.
19953 But, never change orderedness if TARGET_IEEE_FP, returning
19954 UNKNOWN in that case if necessary. */
19956 static enum rtx_code
19957 ix86_fp_swap_condition (enum rtx_code code)
19959 switch (code)
19961 case GT: /* GTU - CF=0 & ZF=0 */
19962 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
19963 case GE: /* GEU - CF=0 */
19964 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
19965 case UNLT: /* LTU - CF=1 */
19966 return TARGET_IEEE_FP ? UNKNOWN : GT;
19967 case UNLE: /* LEU - CF=1 | ZF=1 */
19968 return TARGET_IEEE_FP ? UNKNOWN : GE;
19969 default:
19970 return swap_condition (code);
19974 /* Return cost of comparison CODE using the best strategy for performance.
19975 All following functions do use number of instructions as a cost metrics.
19976 In future this should be tweaked to compute bytes for optimize_size and
19977 take into account performance of various instructions on various CPUs. */
19979 static int
19980 ix86_fp_comparison_cost (enum rtx_code code)
19982 int arith_cost;
19984 /* The cost of code using bit-twiddling on %ah. */
19985 switch (code)
19987 case UNLE:
19988 case UNLT:
19989 case LTGT:
19990 case GT:
19991 case GE:
19992 case UNORDERED:
19993 case ORDERED:
19994 case UNEQ:
19995 arith_cost = 4;
19996 break;
19997 case LT:
19998 case NE:
19999 case EQ:
20000 case UNGE:
20001 arith_cost = TARGET_IEEE_FP ? 5 : 4;
20002 break;
20003 case LE:
20004 case UNGT:
20005 arith_cost = TARGET_IEEE_FP ? 6 : 4;
20006 break;
20007 default:
20008 gcc_unreachable ();
20011 switch (ix86_fp_comparison_strategy (code))
20013 case IX86_FPCMP_COMI:
20014 return arith_cost > 4 ? 3 : 2;
20015 case IX86_FPCMP_SAHF:
20016 return arith_cost > 4 ? 4 : 3;
20017 default:
20018 return arith_cost;
20022 /* Return strategy to use for floating-point. We assume that fcomi is always
20023 preferrable where available, since that is also true when looking at size
20024 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
20026 enum ix86_fpcmp_strategy
20027 ix86_fp_comparison_strategy (enum rtx_code)
20029 /* Do fcomi/sahf based test when profitable. */
20031 if (TARGET_CMOVE)
20032 return IX86_FPCMP_COMI;
20034 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
20035 return IX86_FPCMP_SAHF;
20037 return IX86_FPCMP_ARITH;
20040 /* Swap, force into registers, or otherwise massage the two operands
20041 to a fp comparison. The operands are updated in place; the new
20042 comparison code is returned. */
20044 static enum rtx_code
20045 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
20047 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
20048 rtx op0 = *pop0, op1 = *pop1;
20049 machine_mode op_mode = GET_MODE (op0);
20050 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
20052 /* All of the unordered compare instructions only work on registers.
20053 The same is true of the fcomi compare instructions. The XFmode
20054 compare instructions require registers except when comparing
20055 against zero or when converting operand 1 from fixed point to
20056 floating point. */
20058 if (!is_sse
20059 && (fpcmp_mode == CCFPUmode
20060 || (op_mode == XFmode
20061 && ! (standard_80387_constant_p (op0) == 1
20062 || standard_80387_constant_p (op1) == 1)
20063 && GET_CODE (op1) != FLOAT)
20064 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
20066 op0 = force_reg (op_mode, op0);
20067 op1 = force_reg (op_mode, op1);
20069 else
20071 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
20072 things around if they appear profitable, otherwise force op0
20073 into a register. */
20075 if (standard_80387_constant_p (op0) == 0
20076 || (MEM_P (op0)
20077 && ! (standard_80387_constant_p (op1) == 0
20078 || MEM_P (op1))))
20080 enum rtx_code new_code = ix86_fp_swap_condition (code);
20081 if (new_code != UNKNOWN)
20083 std::swap (op0, op1);
20084 code = new_code;
20088 if (!REG_P (op0))
20089 op0 = force_reg (op_mode, op0);
20091 if (CONSTANT_P (op1))
20093 int tmp = standard_80387_constant_p (op1);
20094 if (tmp == 0)
20095 op1 = validize_mem (force_const_mem (op_mode, op1));
20096 else if (tmp == 1)
20098 if (TARGET_CMOVE)
20099 op1 = force_reg (op_mode, op1);
20101 else
20102 op1 = force_reg (op_mode, op1);
20106 /* Try to rearrange the comparison to make it cheaper. */
20107 if (ix86_fp_comparison_cost (code)
20108 > ix86_fp_comparison_cost (swap_condition (code))
20109 && (REG_P (op1) || can_create_pseudo_p ()))
20111 std::swap (op0, op1);
20112 code = swap_condition (code);
20113 if (!REG_P (op0))
20114 op0 = force_reg (op_mode, op0);
20117 *pop0 = op0;
20118 *pop1 = op1;
20119 return code;
20122 /* Convert comparison codes we use to represent FP comparison to integer
20123 code that will result in proper branch. Return UNKNOWN if no such code
20124 is available. */
20126 enum rtx_code
20127 ix86_fp_compare_code_to_integer (enum rtx_code code)
20129 switch (code)
20131 case GT:
20132 return GTU;
20133 case GE:
20134 return GEU;
20135 case ORDERED:
20136 case UNORDERED:
20137 return code;
20138 break;
20139 case UNEQ:
20140 return EQ;
20141 break;
20142 case UNLT:
20143 return LTU;
20144 break;
20145 case UNLE:
20146 return LEU;
20147 break;
20148 case LTGT:
20149 return NE;
20150 break;
20151 default:
20152 return UNKNOWN;
20156 /* Generate insn patterns to do a floating point compare of OPERANDS. */
20158 static rtx
20159 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
20161 machine_mode fpcmp_mode, intcmp_mode;
20162 rtx tmp, tmp2;
20164 fpcmp_mode = ix86_fp_compare_mode (code);
20165 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
20167 /* Do fcomi/sahf based test when profitable. */
20168 switch (ix86_fp_comparison_strategy (code))
20170 case IX86_FPCMP_COMI:
20171 intcmp_mode = fpcmp_mode;
20172 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20173 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
20174 tmp);
20175 emit_insn (tmp);
20176 break;
20178 case IX86_FPCMP_SAHF:
20179 intcmp_mode = fpcmp_mode;
20180 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20181 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
20182 tmp);
20184 if (!scratch)
20185 scratch = gen_reg_rtx (HImode);
20186 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
20187 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
20188 break;
20190 case IX86_FPCMP_ARITH:
20191 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
20192 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20193 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
20194 if (!scratch)
20195 scratch = gen_reg_rtx (HImode);
20196 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
20198 /* In the unordered case, we have to check C2 for NaN's, which
20199 doesn't happen to work out to anything nice combination-wise.
20200 So do some bit twiddling on the value we've got in AH to come
20201 up with an appropriate set of condition codes. */
20203 intcmp_mode = CCNOmode;
20204 switch (code)
20206 case GT:
20207 case UNGT:
20208 if (code == GT || !TARGET_IEEE_FP)
20210 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
20211 code = EQ;
20213 else
20215 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20216 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
20217 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
20218 intcmp_mode = CCmode;
20219 code = GEU;
20221 break;
20222 case LT:
20223 case UNLT:
20224 if (code == LT && TARGET_IEEE_FP)
20226 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20227 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
20228 intcmp_mode = CCmode;
20229 code = EQ;
20231 else
20233 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
20234 code = NE;
20236 break;
20237 case GE:
20238 case UNGE:
20239 if (code == GE || !TARGET_IEEE_FP)
20241 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
20242 code = EQ;
20244 else
20246 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20247 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
20248 code = NE;
20250 break;
20251 case LE:
20252 case UNLE:
20253 if (code == LE && TARGET_IEEE_FP)
20255 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20256 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
20257 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
20258 intcmp_mode = CCmode;
20259 code = LTU;
20261 else
20263 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
20264 code = NE;
20266 break;
20267 case EQ:
20268 case UNEQ:
20269 if (code == EQ && TARGET_IEEE_FP)
20271 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20272 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
20273 intcmp_mode = CCmode;
20274 code = EQ;
20276 else
20278 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
20279 code = NE;
20281 break;
20282 case NE:
20283 case LTGT:
20284 if (code == NE && TARGET_IEEE_FP)
20286 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20287 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
20288 GEN_INT (0x40)));
20289 code = NE;
20291 else
20293 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
20294 code = EQ;
20296 break;
20298 case UNORDERED:
20299 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
20300 code = NE;
20301 break;
20302 case ORDERED:
20303 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
20304 code = EQ;
20305 break;
20307 default:
20308 gcc_unreachable ();
20310 break;
20312 default:
20313 gcc_unreachable();
20316 /* Return the test that should be put into the flags user, i.e.
20317 the bcc, scc, or cmov instruction. */
20318 return gen_rtx_fmt_ee (code, VOIDmode,
20319 gen_rtx_REG (intcmp_mode, FLAGS_REG),
20320 const0_rtx);
20323 static rtx
20324 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
20326 rtx ret;
20328 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
20329 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
20331 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
20333 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
20334 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20336 else
20337 ret = ix86_expand_int_compare (code, op0, op1);
20339 return ret;
20342 void
20343 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
20345 machine_mode mode = GET_MODE (op0);
20346 rtx tmp;
20348 switch (mode)
20350 case SFmode:
20351 case DFmode:
20352 case XFmode:
20353 case QImode:
20354 case HImode:
20355 case SImode:
20356 simple:
20357 tmp = ix86_expand_compare (code, op0, op1);
20358 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
20359 gen_rtx_LABEL_REF (VOIDmode, label),
20360 pc_rtx);
20361 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
20362 return;
20364 case DImode:
20365 if (TARGET_64BIT)
20366 goto simple;
20367 case TImode:
20368 /* Expand DImode branch into multiple compare+branch. */
20370 rtx lo[2], hi[2];
20371 rtx_code_label *label2;
20372 enum rtx_code code1, code2, code3;
20373 machine_mode submode;
20375 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
20377 std::swap (op0, op1);
20378 code = swap_condition (code);
20381 split_double_mode (mode, &op0, 1, lo+0, hi+0);
20382 split_double_mode (mode, &op1, 1, lo+1, hi+1);
20384 submode = mode == DImode ? SImode : DImode;
20386 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
20387 avoid two branches. This costs one extra insn, so disable when
20388 optimizing for size. */
20390 if ((code == EQ || code == NE)
20391 && (!optimize_insn_for_size_p ()
20392 || hi[1] == const0_rtx || lo[1] == const0_rtx))
20394 rtx xor0, xor1;
20396 xor1 = hi[0];
20397 if (hi[1] != const0_rtx)
20398 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
20399 NULL_RTX, 0, OPTAB_WIDEN);
20401 xor0 = lo[0];
20402 if (lo[1] != const0_rtx)
20403 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
20404 NULL_RTX, 0, OPTAB_WIDEN);
20406 tmp = expand_binop (submode, ior_optab, xor1, xor0,
20407 NULL_RTX, 0, OPTAB_WIDEN);
20409 ix86_expand_branch (code, tmp, const0_rtx, label);
20410 return;
20413 /* Otherwise, if we are doing less-than or greater-or-equal-than,
20414 op1 is a constant and the low word is zero, then we can just
20415 examine the high word. Similarly for low word -1 and
20416 less-or-equal-than or greater-than. */
20418 if (CONST_INT_P (hi[1]))
20419 switch (code)
20421 case LT: case LTU: case GE: case GEU:
20422 if (lo[1] == const0_rtx)
20424 ix86_expand_branch (code, hi[0], hi[1], label);
20425 return;
20427 break;
20428 case LE: case LEU: case GT: case GTU:
20429 if (lo[1] == constm1_rtx)
20431 ix86_expand_branch (code, hi[0], hi[1], label);
20432 return;
20434 break;
20435 default:
20436 break;
20439 /* Otherwise, we need two or three jumps. */
20441 label2 = gen_label_rtx ();
20443 code1 = code;
20444 code2 = swap_condition (code);
20445 code3 = unsigned_condition (code);
20447 switch (code)
20449 case LT: case GT: case LTU: case GTU:
20450 break;
20452 case LE: code1 = LT; code2 = GT; break;
20453 case GE: code1 = GT; code2 = LT; break;
20454 case LEU: code1 = LTU; code2 = GTU; break;
20455 case GEU: code1 = GTU; code2 = LTU; break;
20457 case EQ: code1 = UNKNOWN; code2 = NE; break;
20458 case NE: code2 = UNKNOWN; break;
20460 default:
20461 gcc_unreachable ();
20465 * a < b =>
20466 * if (hi(a) < hi(b)) goto true;
20467 * if (hi(a) > hi(b)) goto false;
20468 * if (lo(a) < lo(b)) goto true;
20469 * false:
20472 if (code1 != UNKNOWN)
20473 ix86_expand_branch (code1, hi[0], hi[1], label);
20474 if (code2 != UNKNOWN)
20475 ix86_expand_branch (code2, hi[0], hi[1], label2);
20477 ix86_expand_branch (code3, lo[0], lo[1], label);
20479 if (code2 != UNKNOWN)
20480 emit_label (label2);
20481 return;
20484 default:
20485 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
20486 goto simple;
20490 /* Split branch based on floating point condition. */
20491 void
20492 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
20493 rtx target1, rtx target2, rtx tmp)
20495 rtx condition;
20496 rtx i;
20498 if (target2 != pc_rtx)
20500 rtx tmp = target2;
20501 code = reverse_condition_maybe_unordered (code);
20502 target2 = target1;
20503 target1 = tmp;
20506 condition = ix86_expand_fp_compare (code, op1, op2,
20507 tmp);
20509 i = emit_jump_insn (gen_rtx_SET
20510 (VOIDmode, pc_rtx,
20511 gen_rtx_IF_THEN_ELSE (VOIDmode,
20512 condition, target1, target2)));
20513 if (split_branch_probability >= 0)
20514 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
20517 void
20518 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20520 rtx ret;
20522 gcc_assert (GET_MODE (dest) == QImode);
20524 ret = ix86_expand_compare (code, op0, op1);
20525 PUT_MODE (ret, QImode);
20526 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
20529 /* Expand comparison setting or clearing carry flag. Return true when
20530 successful and set pop for the operation. */
20531 static bool
20532 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
20534 machine_mode mode =
20535 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
20537 /* Do not handle double-mode compares that go through special path. */
20538 if (mode == (TARGET_64BIT ? TImode : DImode))
20539 return false;
20541 if (SCALAR_FLOAT_MODE_P (mode))
20543 rtx compare_op;
20544 rtx_insn *compare_seq;
20546 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
20548 /* Shortcut: following common codes never translate
20549 into carry flag compares. */
20550 if (code == EQ || code == NE || code == UNEQ || code == LTGT
20551 || code == ORDERED || code == UNORDERED)
20552 return false;
20554 /* These comparisons require zero flag; swap operands so they won't. */
20555 if ((code == GT || code == UNLE || code == LE || code == UNGT)
20556 && !TARGET_IEEE_FP)
20558 std::swap (op0, op1);
20559 code = swap_condition (code);
20562 /* Try to expand the comparison and verify that we end up with
20563 carry flag based comparison. This fails to be true only when
20564 we decide to expand comparison using arithmetic that is not
20565 too common scenario. */
20566 start_sequence ();
20567 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20568 compare_seq = get_insns ();
20569 end_sequence ();
20571 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
20572 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
20573 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
20574 else
20575 code = GET_CODE (compare_op);
20577 if (code != LTU && code != GEU)
20578 return false;
20580 emit_insn (compare_seq);
20581 *pop = compare_op;
20582 return true;
20585 if (!INTEGRAL_MODE_P (mode))
20586 return false;
20588 switch (code)
20590 case LTU:
20591 case GEU:
20592 break;
20594 /* Convert a==0 into (unsigned)a<1. */
20595 case EQ:
20596 case NE:
20597 if (op1 != const0_rtx)
20598 return false;
20599 op1 = const1_rtx;
20600 code = (code == EQ ? LTU : GEU);
20601 break;
20603 /* Convert a>b into b<a or a>=b-1. */
20604 case GTU:
20605 case LEU:
20606 if (CONST_INT_P (op1))
20608 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
20609 /* Bail out on overflow. We still can swap operands but that
20610 would force loading of the constant into register. */
20611 if (op1 == const0_rtx
20612 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
20613 return false;
20614 code = (code == GTU ? GEU : LTU);
20616 else
20618 std::swap (op1, op0);
20619 code = (code == GTU ? LTU : GEU);
20621 break;
20623 /* Convert a>=0 into (unsigned)a<0x80000000. */
20624 case LT:
20625 case GE:
20626 if (mode == DImode || op1 != const0_rtx)
20627 return false;
20628 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20629 code = (code == LT ? GEU : LTU);
20630 break;
20631 case LE:
20632 case GT:
20633 if (mode == DImode || op1 != constm1_rtx)
20634 return false;
20635 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20636 code = (code == LE ? GEU : LTU);
20637 break;
20639 default:
20640 return false;
20642 /* Swapping operands may cause constant to appear as first operand. */
20643 if (!nonimmediate_operand (op0, VOIDmode))
20645 if (!can_create_pseudo_p ())
20646 return false;
20647 op0 = force_reg (mode, op0);
20649 *pop = ix86_expand_compare (code, op0, op1);
20650 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
20651 return true;
20654 bool
20655 ix86_expand_int_movcc (rtx operands[])
20657 enum rtx_code code = GET_CODE (operands[1]), compare_code;
20658 rtx_insn *compare_seq;
20659 rtx compare_op;
20660 machine_mode mode = GET_MODE (operands[0]);
20661 bool sign_bit_compare_p = false;
20662 rtx op0 = XEXP (operands[1], 0);
20663 rtx op1 = XEXP (operands[1], 1);
20665 if (GET_MODE (op0) == TImode
20666 || (GET_MODE (op0) == DImode
20667 && !TARGET_64BIT))
20668 return false;
20670 start_sequence ();
20671 compare_op = ix86_expand_compare (code, op0, op1);
20672 compare_seq = get_insns ();
20673 end_sequence ();
20675 compare_code = GET_CODE (compare_op);
20677 if ((op1 == const0_rtx && (code == GE || code == LT))
20678 || (op1 == constm1_rtx && (code == GT || code == LE)))
20679 sign_bit_compare_p = true;
20681 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
20682 HImode insns, we'd be swallowed in word prefix ops. */
20684 if ((mode != HImode || TARGET_FAST_PREFIX)
20685 && (mode != (TARGET_64BIT ? TImode : DImode))
20686 && CONST_INT_P (operands[2])
20687 && CONST_INT_P (operands[3]))
20689 rtx out = operands[0];
20690 HOST_WIDE_INT ct = INTVAL (operands[2]);
20691 HOST_WIDE_INT cf = INTVAL (operands[3]);
20692 HOST_WIDE_INT diff;
20694 diff = ct - cf;
20695 /* Sign bit compares are better done using shifts than we do by using
20696 sbb. */
20697 if (sign_bit_compare_p
20698 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20700 /* Detect overlap between destination and compare sources. */
20701 rtx tmp = out;
20703 if (!sign_bit_compare_p)
20705 rtx flags;
20706 bool fpcmp = false;
20708 compare_code = GET_CODE (compare_op);
20710 flags = XEXP (compare_op, 0);
20712 if (GET_MODE (flags) == CCFPmode
20713 || GET_MODE (flags) == CCFPUmode)
20715 fpcmp = true;
20716 compare_code
20717 = ix86_fp_compare_code_to_integer (compare_code);
20720 /* To simplify rest of code, restrict to the GEU case. */
20721 if (compare_code == LTU)
20723 HOST_WIDE_INT tmp = ct;
20724 ct = cf;
20725 cf = tmp;
20726 compare_code = reverse_condition (compare_code);
20727 code = reverse_condition (code);
20729 else
20731 if (fpcmp)
20732 PUT_CODE (compare_op,
20733 reverse_condition_maybe_unordered
20734 (GET_CODE (compare_op)));
20735 else
20736 PUT_CODE (compare_op,
20737 reverse_condition (GET_CODE (compare_op)));
20739 diff = ct - cf;
20741 if (reg_overlap_mentioned_p (out, op0)
20742 || reg_overlap_mentioned_p (out, op1))
20743 tmp = gen_reg_rtx (mode);
20745 if (mode == DImode)
20746 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
20747 else
20748 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
20749 flags, compare_op));
20751 else
20753 if (code == GT || code == GE)
20754 code = reverse_condition (code);
20755 else
20757 HOST_WIDE_INT tmp = ct;
20758 ct = cf;
20759 cf = tmp;
20760 diff = ct - cf;
20762 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
20765 if (diff == 1)
20768 * cmpl op0,op1
20769 * sbbl dest,dest
20770 * [addl dest, ct]
20772 * Size 5 - 8.
20774 if (ct)
20775 tmp = expand_simple_binop (mode, PLUS,
20776 tmp, GEN_INT (ct),
20777 copy_rtx (tmp), 1, OPTAB_DIRECT);
20779 else if (cf == -1)
20782 * cmpl op0,op1
20783 * sbbl dest,dest
20784 * orl $ct, dest
20786 * Size 8.
20788 tmp = expand_simple_binop (mode, IOR,
20789 tmp, GEN_INT (ct),
20790 copy_rtx (tmp), 1, OPTAB_DIRECT);
20792 else if (diff == -1 && ct)
20795 * cmpl op0,op1
20796 * sbbl dest,dest
20797 * notl dest
20798 * [addl dest, cf]
20800 * Size 8 - 11.
20802 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20803 if (cf)
20804 tmp = expand_simple_binop (mode, PLUS,
20805 copy_rtx (tmp), GEN_INT (cf),
20806 copy_rtx (tmp), 1, OPTAB_DIRECT);
20808 else
20811 * cmpl op0,op1
20812 * sbbl dest,dest
20813 * [notl dest]
20814 * andl cf - ct, dest
20815 * [addl dest, ct]
20817 * Size 8 - 11.
20820 if (cf == 0)
20822 cf = ct;
20823 ct = 0;
20824 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20827 tmp = expand_simple_binop (mode, AND,
20828 copy_rtx (tmp),
20829 gen_int_mode (cf - ct, mode),
20830 copy_rtx (tmp), 1, OPTAB_DIRECT);
20831 if (ct)
20832 tmp = expand_simple_binop (mode, PLUS,
20833 copy_rtx (tmp), GEN_INT (ct),
20834 copy_rtx (tmp), 1, OPTAB_DIRECT);
20837 if (!rtx_equal_p (tmp, out))
20838 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
20840 return true;
20843 if (diff < 0)
20845 machine_mode cmp_mode = GET_MODE (op0);
20847 std::swap (ct, cf);
20848 diff = -diff;
20850 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20852 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20854 /* We may be reversing unordered compare to normal compare, that
20855 is not valid in general (we may convert non-trapping condition
20856 to trapping one), however on i386 we currently emit all
20857 comparisons unordered. */
20858 compare_code = reverse_condition_maybe_unordered (compare_code);
20859 code = reverse_condition_maybe_unordered (code);
20861 else
20863 compare_code = reverse_condition (compare_code);
20864 code = reverse_condition (code);
20868 compare_code = UNKNOWN;
20869 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
20870 && CONST_INT_P (op1))
20872 if (op1 == const0_rtx
20873 && (code == LT || code == GE))
20874 compare_code = code;
20875 else if (op1 == constm1_rtx)
20877 if (code == LE)
20878 compare_code = LT;
20879 else if (code == GT)
20880 compare_code = GE;
20884 /* Optimize dest = (op0 < 0) ? -1 : cf. */
20885 if (compare_code != UNKNOWN
20886 && GET_MODE (op0) == GET_MODE (out)
20887 && (cf == -1 || ct == -1))
20889 /* If lea code below could be used, only optimize
20890 if it results in a 2 insn sequence. */
20892 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
20893 || diff == 3 || diff == 5 || diff == 9)
20894 || (compare_code == LT && ct == -1)
20895 || (compare_code == GE && cf == -1))
20898 * notl op1 (if necessary)
20899 * sarl $31, op1
20900 * orl cf, op1
20902 if (ct != -1)
20904 cf = ct;
20905 ct = -1;
20906 code = reverse_condition (code);
20909 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20911 out = expand_simple_binop (mode, IOR,
20912 out, GEN_INT (cf),
20913 out, 1, OPTAB_DIRECT);
20914 if (out != operands[0])
20915 emit_move_insn (operands[0], out);
20917 return true;
20922 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
20923 || diff == 3 || diff == 5 || diff == 9)
20924 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
20925 && (mode != DImode
20926 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
20929 * xorl dest,dest
20930 * cmpl op1,op2
20931 * setcc dest
20932 * lea cf(dest*(ct-cf)),dest
20934 * Size 14.
20936 * This also catches the degenerate setcc-only case.
20939 rtx tmp;
20940 int nops;
20942 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20944 nops = 0;
20945 /* On x86_64 the lea instruction operates on Pmode, so we need
20946 to get arithmetics done in proper mode to match. */
20947 if (diff == 1)
20948 tmp = copy_rtx (out);
20949 else
20951 rtx out1;
20952 out1 = copy_rtx (out);
20953 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
20954 nops++;
20955 if (diff & 1)
20957 tmp = gen_rtx_PLUS (mode, tmp, out1);
20958 nops++;
20961 if (cf != 0)
20963 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
20964 nops++;
20966 if (!rtx_equal_p (tmp, out))
20968 if (nops == 1)
20969 out = force_operand (tmp, copy_rtx (out));
20970 else
20971 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
20973 if (!rtx_equal_p (out, operands[0]))
20974 emit_move_insn (operands[0], copy_rtx (out));
20976 return true;
20980 * General case: Jumpful:
20981 * xorl dest,dest cmpl op1, op2
20982 * cmpl op1, op2 movl ct, dest
20983 * setcc dest jcc 1f
20984 * decl dest movl cf, dest
20985 * andl (cf-ct),dest 1:
20986 * addl ct,dest
20988 * Size 20. Size 14.
20990 * This is reasonably steep, but branch mispredict costs are
20991 * high on modern cpus, so consider failing only if optimizing
20992 * for space.
20995 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20996 && BRANCH_COST (optimize_insn_for_speed_p (),
20997 false) >= 2)
20999 if (cf == 0)
21001 machine_mode cmp_mode = GET_MODE (op0);
21003 cf = ct;
21004 ct = 0;
21006 if (SCALAR_FLOAT_MODE_P (cmp_mode))
21008 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
21010 /* We may be reversing unordered compare to normal compare,
21011 that is not valid in general (we may convert non-trapping
21012 condition to trapping one), however on i386 we currently
21013 emit all comparisons unordered. */
21014 code = reverse_condition_maybe_unordered (code);
21016 else
21018 code = reverse_condition (code);
21019 if (compare_code != UNKNOWN)
21020 compare_code = reverse_condition (compare_code);
21024 if (compare_code != UNKNOWN)
21026 /* notl op1 (if needed)
21027 sarl $31, op1
21028 andl (cf-ct), op1
21029 addl ct, op1
21031 For x < 0 (resp. x <= -1) there will be no notl,
21032 so if possible swap the constants to get rid of the
21033 complement.
21034 True/false will be -1/0 while code below (store flag
21035 followed by decrement) is 0/-1, so the constants need
21036 to be exchanged once more. */
21038 if (compare_code == GE || !cf)
21040 code = reverse_condition (code);
21041 compare_code = LT;
21043 else
21044 std::swap (cf, ct);
21046 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
21048 else
21050 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
21052 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
21053 constm1_rtx,
21054 copy_rtx (out), 1, OPTAB_DIRECT);
21057 out = expand_simple_binop (mode, AND, copy_rtx (out),
21058 gen_int_mode (cf - ct, mode),
21059 copy_rtx (out), 1, OPTAB_DIRECT);
21060 if (ct)
21061 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
21062 copy_rtx (out), 1, OPTAB_DIRECT);
21063 if (!rtx_equal_p (out, operands[0]))
21064 emit_move_insn (operands[0], copy_rtx (out));
21066 return true;
21070 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
21072 /* Try a few things more with specific constants and a variable. */
21074 optab op;
21075 rtx var, orig_out, out, tmp;
21077 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
21078 return false;
21080 /* If one of the two operands is an interesting constant, load a
21081 constant with the above and mask it in with a logical operation. */
21083 if (CONST_INT_P (operands[2]))
21085 var = operands[3];
21086 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
21087 operands[3] = constm1_rtx, op = and_optab;
21088 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
21089 operands[3] = const0_rtx, op = ior_optab;
21090 else
21091 return false;
21093 else if (CONST_INT_P (operands[3]))
21095 var = operands[2];
21096 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
21097 operands[2] = constm1_rtx, op = and_optab;
21098 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
21099 operands[2] = const0_rtx, op = ior_optab;
21100 else
21101 return false;
21103 else
21104 return false;
21106 orig_out = operands[0];
21107 tmp = gen_reg_rtx (mode);
21108 operands[0] = tmp;
21110 /* Recurse to get the constant loaded. */
21111 if (ix86_expand_int_movcc (operands) == 0)
21112 return false;
21114 /* Mask in the interesting variable. */
21115 out = expand_binop (mode, op, var, tmp, orig_out, 0,
21116 OPTAB_WIDEN);
21117 if (!rtx_equal_p (out, orig_out))
21118 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
21120 return true;
21124 * For comparison with above,
21126 * movl cf,dest
21127 * movl ct,tmp
21128 * cmpl op1,op2
21129 * cmovcc tmp,dest
21131 * Size 15.
21134 if (! nonimmediate_operand (operands[2], mode))
21135 operands[2] = force_reg (mode, operands[2]);
21136 if (! nonimmediate_operand (operands[3], mode))
21137 operands[3] = force_reg (mode, operands[3]);
21139 if (! register_operand (operands[2], VOIDmode)
21140 && (mode == QImode
21141 || ! register_operand (operands[3], VOIDmode)))
21142 operands[2] = force_reg (mode, operands[2]);
21144 if (mode == QImode
21145 && ! register_operand (operands[3], VOIDmode))
21146 operands[3] = force_reg (mode, operands[3]);
21148 emit_insn (compare_seq);
21149 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21150 gen_rtx_IF_THEN_ELSE (mode,
21151 compare_op, operands[2],
21152 operands[3])));
21153 return true;
21156 /* Swap, force into registers, or otherwise massage the two operands
21157 to an sse comparison with a mask result. Thus we differ a bit from
21158 ix86_prepare_fp_compare_args which expects to produce a flags result.
21160 The DEST operand exists to help determine whether to commute commutative
21161 operators. The POP0/POP1 operands are updated in place. The new
21162 comparison code is returned, or UNKNOWN if not implementable. */
21164 static enum rtx_code
21165 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
21166 rtx *pop0, rtx *pop1)
21168 switch (code)
21170 case LTGT:
21171 case UNEQ:
21172 /* AVX supports all the needed comparisons. */
21173 if (TARGET_AVX)
21174 break;
21175 /* We have no LTGT as an operator. We could implement it with
21176 NE & ORDERED, but this requires an extra temporary. It's
21177 not clear that it's worth it. */
21178 return UNKNOWN;
21180 case LT:
21181 case LE:
21182 case UNGT:
21183 case UNGE:
21184 /* These are supported directly. */
21185 break;
21187 case EQ:
21188 case NE:
21189 case UNORDERED:
21190 case ORDERED:
21191 /* AVX has 3 operand comparisons, no need to swap anything. */
21192 if (TARGET_AVX)
21193 break;
21194 /* For commutative operators, try to canonicalize the destination
21195 operand to be first in the comparison - this helps reload to
21196 avoid extra moves. */
21197 if (!dest || !rtx_equal_p (dest, *pop1))
21198 break;
21199 /* FALLTHRU */
21201 case GE:
21202 case GT:
21203 case UNLE:
21204 case UNLT:
21205 /* These are not supported directly before AVX, and furthermore
21206 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
21207 comparison operands to transform into something that is
21208 supported. */
21209 std::swap (*pop0, *pop1);
21210 code = swap_condition (code);
21211 break;
21213 default:
21214 gcc_unreachable ();
21217 return code;
21220 /* Detect conditional moves that exactly match min/max operational
21221 semantics. Note that this is IEEE safe, as long as we don't
21222 interchange the operands.
21224 Returns FALSE if this conditional move doesn't match a MIN/MAX,
21225 and TRUE if the operation is successful and instructions are emitted. */
21227 static bool
21228 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
21229 rtx cmp_op1, rtx if_true, rtx if_false)
21231 machine_mode mode;
21232 bool is_min;
21233 rtx tmp;
21235 if (code == LT)
21237 else if (code == UNGE)
21238 std::swap (if_true, if_false);
21239 else
21240 return false;
21242 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
21243 is_min = true;
21244 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
21245 is_min = false;
21246 else
21247 return false;
21249 mode = GET_MODE (dest);
21251 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
21252 but MODE may be a vector mode and thus not appropriate. */
21253 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
21255 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
21256 rtvec v;
21258 if_true = force_reg (mode, if_true);
21259 v = gen_rtvec (2, if_true, if_false);
21260 tmp = gen_rtx_UNSPEC (mode, v, u);
21262 else
21264 code = is_min ? SMIN : SMAX;
21265 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
21268 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
21269 return true;
21272 /* Expand an sse vector comparison. Return the register with the result. */
21274 static rtx
21275 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
21276 rtx op_true, rtx op_false)
21278 machine_mode mode = GET_MODE (dest);
21279 machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
21281 /* In general case result of comparison can differ from operands' type. */
21282 machine_mode cmp_mode;
21284 /* In AVX512F the result of comparison is an integer mask. */
21285 bool maskcmp = false;
21286 rtx x;
21288 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
21290 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
21291 gcc_assert (cmp_mode != BLKmode);
21293 maskcmp = true;
21295 else
21296 cmp_mode = cmp_ops_mode;
21299 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
21300 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
21301 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
21303 if (optimize
21304 || reg_overlap_mentioned_p (dest, op_true)
21305 || reg_overlap_mentioned_p (dest, op_false))
21306 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
21308 /* Compare patterns for int modes are unspec in AVX512F only. */
21309 if (maskcmp && (code == GT || code == EQ))
21311 rtx (*gen)(rtx, rtx, rtx);
21313 switch (cmp_ops_mode)
21315 case V16SImode:
21316 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
21317 break;
21318 case V8DImode:
21319 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
21320 break;
21321 default:
21322 gen = NULL;
21325 if (gen)
21327 emit_insn (gen (dest, cmp_op0, cmp_op1));
21328 return dest;
21331 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
21333 if (cmp_mode != mode && !maskcmp)
21335 x = force_reg (cmp_ops_mode, x);
21336 convert_move (dest, x, false);
21338 else
21339 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21341 return dest;
21344 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
21345 operations. This is used for both scalar and vector conditional moves. */
21347 static void
21348 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
21350 machine_mode mode = GET_MODE (dest);
21351 machine_mode cmpmode = GET_MODE (cmp);
21353 /* In AVX512F the result of comparison is an integer mask. */
21354 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
21356 rtx t2, t3, x;
21358 if (vector_all_ones_operand (op_true, mode)
21359 && rtx_equal_p (op_false, CONST0_RTX (mode))
21360 && !maskcmp)
21362 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
21364 else if (op_false == CONST0_RTX (mode)
21365 && !maskcmp)
21367 op_true = force_reg (mode, op_true);
21368 x = gen_rtx_AND (mode, cmp, op_true);
21369 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21371 else if (op_true == CONST0_RTX (mode)
21372 && !maskcmp)
21374 op_false = force_reg (mode, op_false);
21375 x = gen_rtx_NOT (mode, cmp);
21376 x = gen_rtx_AND (mode, x, op_false);
21377 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21379 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
21380 && !maskcmp)
21382 op_false = force_reg (mode, op_false);
21383 x = gen_rtx_IOR (mode, cmp, op_false);
21384 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21386 else if (TARGET_XOP
21387 && !maskcmp)
21389 op_true = force_reg (mode, op_true);
21391 if (!nonimmediate_operand (op_false, mode))
21392 op_false = force_reg (mode, op_false);
21394 emit_insn (gen_rtx_SET (mode, dest,
21395 gen_rtx_IF_THEN_ELSE (mode, cmp,
21396 op_true,
21397 op_false)));
21399 else
21401 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21402 rtx d = dest;
21404 if (!nonimmediate_operand (op_true, mode))
21405 op_true = force_reg (mode, op_true);
21407 op_false = force_reg (mode, op_false);
21409 switch (mode)
21411 case V4SFmode:
21412 if (TARGET_SSE4_1)
21413 gen = gen_sse4_1_blendvps;
21414 break;
21415 case V2DFmode:
21416 if (TARGET_SSE4_1)
21417 gen = gen_sse4_1_blendvpd;
21418 break;
21419 case V16QImode:
21420 case V8HImode:
21421 case V4SImode:
21422 case V2DImode:
21423 if (TARGET_SSE4_1)
21425 gen = gen_sse4_1_pblendvb;
21426 if (mode != V16QImode)
21427 d = gen_reg_rtx (V16QImode);
21428 op_false = gen_lowpart (V16QImode, op_false);
21429 op_true = gen_lowpart (V16QImode, op_true);
21430 cmp = gen_lowpart (V16QImode, cmp);
21432 break;
21433 case V8SFmode:
21434 if (TARGET_AVX)
21435 gen = gen_avx_blendvps256;
21436 break;
21437 case V4DFmode:
21438 if (TARGET_AVX)
21439 gen = gen_avx_blendvpd256;
21440 break;
21441 case V32QImode:
21442 case V16HImode:
21443 case V8SImode:
21444 case V4DImode:
21445 if (TARGET_AVX2)
21447 gen = gen_avx2_pblendvb;
21448 if (mode != V32QImode)
21449 d = gen_reg_rtx (V32QImode);
21450 op_false = gen_lowpart (V32QImode, op_false);
21451 op_true = gen_lowpart (V32QImode, op_true);
21452 cmp = gen_lowpart (V32QImode, cmp);
21454 break;
21456 case V64QImode:
21457 gen = gen_avx512bw_blendmv64qi;
21458 break;
21459 case V32HImode:
21460 gen = gen_avx512bw_blendmv32hi;
21461 break;
21462 case V16SImode:
21463 gen = gen_avx512f_blendmv16si;
21464 break;
21465 case V8DImode:
21466 gen = gen_avx512f_blendmv8di;
21467 break;
21468 case V8DFmode:
21469 gen = gen_avx512f_blendmv8df;
21470 break;
21471 case V16SFmode:
21472 gen = gen_avx512f_blendmv16sf;
21473 break;
21475 default:
21476 break;
21479 if (gen != NULL)
21481 emit_insn (gen (d, op_false, op_true, cmp));
21482 if (d != dest)
21483 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
21485 else
21487 op_true = force_reg (mode, op_true);
21489 t2 = gen_reg_rtx (mode);
21490 if (optimize)
21491 t3 = gen_reg_rtx (mode);
21492 else
21493 t3 = dest;
21495 x = gen_rtx_AND (mode, op_true, cmp);
21496 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
21498 x = gen_rtx_NOT (mode, cmp);
21499 x = gen_rtx_AND (mode, x, op_false);
21500 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
21502 x = gen_rtx_IOR (mode, t3, t2);
21503 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21508 /* Expand a floating-point conditional move. Return true if successful. */
21510 bool
21511 ix86_expand_fp_movcc (rtx operands[])
21513 machine_mode mode = GET_MODE (operands[0]);
21514 enum rtx_code code = GET_CODE (operands[1]);
21515 rtx tmp, compare_op;
21516 rtx op0 = XEXP (operands[1], 0);
21517 rtx op1 = XEXP (operands[1], 1);
21519 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21521 machine_mode cmode;
21523 /* Since we've no cmove for sse registers, don't force bad register
21524 allocation just to gain access to it. Deny movcc when the
21525 comparison mode doesn't match the move mode. */
21526 cmode = GET_MODE (op0);
21527 if (cmode == VOIDmode)
21528 cmode = GET_MODE (op1);
21529 if (cmode != mode)
21530 return false;
21532 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
21533 if (code == UNKNOWN)
21534 return false;
21536 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
21537 operands[2], operands[3]))
21538 return true;
21540 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
21541 operands[2], operands[3]);
21542 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
21543 return true;
21546 if (GET_MODE (op0) == TImode
21547 || (GET_MODE (op0) == DImode
21548 && !TARGET_64BIT))
21549 return false;
21551 /* The floating point conditional move instructions don't directly
21552 support conditions resulting from a signed integer comparison. */
21554 compare_op = ix86_expand_compare (code, op0, op1);
21555 if (!fcmov_comparison_operator (compare_op, VOIDmode))
21557 tmp = gen_reg_rtx (QImode);
21558 ix86_expand_setcc (tmp, code, op0, op1);
21560 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
21563 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21564 gen_rtx_IF_THEN_ELSE (mode, compare_op,
21565 operands[2], operands[3])));
21567 return true;
21570 /* Expand a floating-point vector conditional move; a vcond operation
21571 rather than a movcc operation. */
21573 bool
21574 ix86_expand_fp_vcond (rtx operands[])
21576 enum rtx_code code = GET_CODE (operands[3]);
21577 rtx cmp;
21579 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
21580 &operands[4], &operands[5]);
21581 if (code == UNKNOWN)
21583 rtx temp;
21584 switch (GET_CODE (operands[3]))
21586 case LTGT:
21587 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
21588 operands[5], operands[0], operands[0]);
21589 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
21590 operands[5], operands[1], operands[2]);
21591 code = AND;
21592 break;
21593 case UNEQ:
21594 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
21595 operands[5], operands[0], operands[0]);
21596 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
21597 operands[5], operands[1], operands[2]);
21598 code = IOR;
21599 break;
21600 default:
21601 gcc_unreachable ();
21603 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
21604 OPTAB_DIRECT);
21605 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21606 return true;
21609 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
21610 operands[5], operands[1], operands[2]))
21611 return true;
21613 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
21614 operands[1], operands[2]);
21615 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21616 return true;
21619 /* Expand a signed/unsigned integral vector conditional move. */
21621 bool
21622 ix86_expand_int_vcond (rtx operands[])
21624 machine_mode data_mode = GET_MODE (operands[0]);
21625 machine_mode mode = GET_MODE (operands[4]);
21626 enum rtx_code code = GET_CODE (operands[3]);
21627 bool negate = false;
21628 rtx x, cop0, cop1;
21630 cop0 = operands[4];
21631 cop1 = operands[5];
21633 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
21634 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
21635 if ((code == LT || code == GE)
21636 && data_mode == mode
21637 && cop1 == CONST0_RTX (mode)
21638 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
21639 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
21640 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
21641 && (GET_MODE_SIZE (data_mode) == 16
21642 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
21644 rtx negop = operands[2 - (code == LT)];
21645 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
21646 if (negop == CONST1_RTX (data_mode))
21648 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
21649 operands[0], 1, OPTAB_DIRECT);
21650 if (res != operands[0])
21651 emit_move_insn (operands[0], res);
21652 return true;
21654 else if (GET_MODE_INNER (data_mode) != DImode
21655 && vector_all_ones_operand (negop, data_mode))
21657 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
21658 operands[0], 0, OPTAB_DIRECT);
21659 if (res != operands[0])
21660 emit_move_insn (operands[0], res);
21661 return true;
21665 if (!nonimmediate_operand (cop1, mode))
21666 cop1 = force_reg (mode, cop1);
21667 if (!general_operand (operands[1], data_mode))
21668 operands[1] = force_reg (data_mode, operands[1]);
21669 if (!general_operand (operands[2], data_mode))
21670 operands[2] = force_reg (data_mode, operands[2]);
21672 /* XOP supports all of the comparisons on all 128-bit vector int types. */
21673 if (TARGET_XOP
21674 && (mode == V16QImode || mode == V8HImode
21675 || mode == V4SImode || mode == V2DImode))
21677 else
21679 /* Canonicalize the comparison to EQ, GT, GTU. */
21680 switch (code)
21682 case EQ:
21683 case GT:
21684 case GTU:
21685 break;
21687 case NE:
21688 case LE:
21689 case LEU:
21690 code = reverse_condition (code);
21691 negate = true;
21692 break;
21694 case GE:
21695 case GEU:
21696 code = reverse_condition (code);
21697 negate = true;
21698 /* FALLTHRU */
21700 case LT:
21701 case LTU:
21702 std::swap (cop0, cop1);
21703 code = swap_condition (code);
21704 break;
21706 default:
21707 gcc_unreachable ();
21710 /* Only SSE4.1/SSE4.2 supports V2DImode. */
21711 if (mode == V2DImode)
21713 switch (code)
21715 case EQ:
21716 /* SSE4.1 supports EQ. */
21717 if (!TARGET_SSE4_1)
21718 return false;
21719 break;
21721 case GT:
21722 case GTU:
21723 /* SSE4.2 supports GT/GTU. */
21724 if (!TARGET_SSE4_2)
21725 return false;
21726 break;
21728 default:
21729 gcc_unreachable ();
21733 /* Unsigned parallel compare is not supported by the hardware.
21734 Play some tricks to turn this into a signed comparison
21735 against 0. */
21736 if (code == GTU)
21738 cop0 = force_reg (mode, cop0);
21740 switch (mode)
21742 case V16SImode:
21743 case V8DImode:
21744 case V8SImode:
21745 case V4DImode:
21746 case V4SImode:
21747 case V2DImode:
21749 rtx t1, t2, mask;
21750 rtx (*gen_sub3) (rtx, rtx, rtx);
21752 switch (mode)
21754 case V16SImode: gen_sub3 = gen_subv16si3; break;
21755 case V8DImode: gen_sub3 = gen_subv8di3; break;
21756 case V8SImode: gen_sub3 = gen_subv8si3; break;
21757 case V4DImode: gen_sub3 = gen_subv4di3; break;
21758 case V4SImode: gen_sub3 = gen_subv4si3; break;
21759 case V2DImode: gen_sub3 = gen_subv2di3; break;
21760 default:
21761 gcc_unreachable ();
21763 /* Subtract (-(INT MAX) - 1) from both operands to make
21764 them signed. */
21765 mask = ix86_build_signbit_mask (mode, true, false);
21766 t1 = gen_reg_rtx (mode);
21767 emit_insn (gen_sub3 (t1, cop0, mask));
21769 t2 = gen_reg_rtx (mode);
21770 emit_insn (gen_sub3 (t2, cop1, mask));
21772 cop0 = t1;
21773 cop1 = t2;
21774 code = GT;
21776 break;
21778 case V64QImode:
21779 case V32HImode:
21780 case V32QImode:
21781 case V16HImode:
21782 case V16QImode:
21783 case V8HImode:
21784 /* Perform a parallel unsigned saturating subtraction. */
21785 x = gen_reg_rtx (mode);
21786 emit_insn (gen_rtx_SET (VOIDmode, x,
21787 gen_rtx_US_MINUS (mode, cop0, cop1)));
21789 cop0 = x;
21790 cop1 = CONST0_RTX (mode);
21791 code = EQ;
21792 negate = !negate;
21793 break;
21795 default:
21796 gcc_unreachable ();
21801 /* Allow the comparison to be done in one mode, but the movcc to
21802 happen in another mode. */
21803 if (data_mode == mode)
21805 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
21806 operands[1+negate], operands[2-negate]);
21808 else
21810 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
21811 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
21812 operands[1+negate], operands[2-negate]);
21813 if (GET_MODE (x) == mode)
21814 x = gen_lowpart (data_mode, x);
21817 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
21818 operands[2-negate]);
21819 return true;
21822 /* AVX512F does support 64-byte integer vector operations,
21823 thus the longest vector we are faced with is V64QImode. */
21824 #define MAX_VECT_LEN 64
21826 struct expand_vec_perm_d
21828 rtx target, op0, op1;
21829 unsigned char perm[MAX_VECT_LEN];
21830 machine_mode vmode;
21831 unsigned char nelt;
21832 bool one_operand_p;
21833 bool testing_p;
21836 static bool
21837 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1,
21838 struct expand_vec_perm_d *d)
21840 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
21841 expander, so args are either in d, or in op0, op1 etc. */
21842 machine_mode mode = GET_MODE (d ? d->op0 : op0);
21843 machine_mode maskmode = mode;
21844 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21846 switch (mode)
21848 case V8HImode:
21849 if (TARGET_AVX512VL && TARGET_AVX512BW)
21850 gen = gen_avx512vl_vpermi2varv8hi3;
21851 break;
21852 case V16HImode:
21853 if (TARGET_AVX512VL && TARGET_AVX512BW)
21854 gen = gen_avx512vl_vpermi2varv16hi3;
21855 break;
21856 case V64QImode:
21857 if (TARGET_AVX512VBMI)
21858 gen = gen_avx512bw_vpermi2varv64qi3;
21859 break;
21860 case V32HImode:
21861 if (TARGET_AVX512BW)
21862 gen = gen_avx512bw_vpermi2varv32hi3;
21863 break;
21864 case V4SImode:
21865 if (TARGET_AVX512VL)
21866 gen = gen_avx512vl_vpermi2varv4si3;
21867 break;
21868 case V8SImode:
21869 if (TARGET_AVX512VL)
21870 gen = gen_avx512vl_vpermi2varv8si3;
21871 break;
21872 case V16SImode:
21873 if (TARGET_AVX512F)
21874 gen = gen_avx512f_vpermi2varv16si3;
21875 break;
21876 case V4SFmode:
21877 if (TARGET_AVX512VL)
21879 gen = gen_avx512vl_vpermi2varv4sf3;
21880 maskmode = V4SImode;
21882 break;
21883 case V8SFmode:
21884 if (TARGET_AVX512VL)
21886 gen = gen_avx512vl_vpermi2varv8sf3;
21887 maskmode = V8SImode;
21889 break;
21890 case V16SFmode:
21891 if (TARGET_AVX512F)
21893 gen = gen_avx512f_vpermi2varv16sf3;
21894 maskmode = V16SImode;
21896 break;
21897 case V2DImode:
21898 if (TARGET_AVX512VL)
21899 gen = gen_avx512vl_vpermi2varv2di3;
21900 break;
21901 case V4DImode:
21902 if (TARGET_AVX512VL)
21903 gen = gen_avx512vl_vpermi2varv4di3;
21904 break;
21905 case V8DImode:
21906 if (TARGET_AVX512F)
21907 gen = gen_avx512f_vpermi2varv8di3;
21908 break;
21909 case V2DFmode:
21910 if (TARGET_AVX512VL)
21912 gen = gen_avx512vl_vpermi2varv2df3;
21913 maskmode = V2DImode;
21915 break;
21916 case V4DFmode:
21917 if (TARGET_AVX512VL)
21919 gen = gen_avx512vl_vpermi2varv4df3;
21920 maskmode = V4DImode;
21922 break;
21923 case V8DFmode:
21924 if (TARGET_AVX512F)
21926 gen = gen_avx512f_vpermi2varv8df3;
21927 maskmode = V8DImode;
21929 break;
21930 default:
21931 break;
21934 if (gen == NULL)
21935 return false;
21937 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
21938 expander, so args are either in d, or in op0, op1 etc. */
21939 if (d)
21941 rtx vec[64];
21942 target = d->target;
21943 op0 = d->op0;
21944 op1 = d->op1;
21945 for (int i = 0; i < d->nelt; ++i)
21946 vec[i] = GEN_INT (d->perm[i]);
21947 mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
21950 emit_insn (gen (target, op0, force_reg (maskmode, mask), op1));
21951 return true;
21954 /* Expand a variable vector permutation. */
21956 void
21957 ix86_expand_vec_perm (rtx operands[])
21959 rtx target = operands[0];
21960 rtx op0 = operands[1];
21961 rtx op1 = operands[2];
21962 rtx mask = operands[3];
21963 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
21964 machine_mode mode = GET_MODE (op0);
21965 machine_mode maskmode = GET_MODE (mask);
21966 int w, e, i;
21967 bool one_operand_shuffle = rtx_equal_p (op0, op1);
21969 /* Number of elements in the vector. */
21970 w = GET_MODE_NUNITS (mode);
21971 e = GET_MODE_UNIT_SIZE (mode);
21972 gcc_assert (w <= 64);
21974 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1, NULL))
21975 return;
21977 if (TARGET_AVX2)
21979 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
21981 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
21982 an constant shuffle operand. With a tiny bit of effort we can
21983 use VPERMD instead. A re-interpretation stall for V4DFmode is
21984 unfortunate but there's no avoiding it.
21985 Similarly for V16HImode we don't have instructions for variable
21986 shuffling, while for V32QImode we can use after preparing suitable
21987 masks vpshufb; vpshufb; vpermq; vpor. */
21989 if (mode == V16HImode)
21991 maskmode = mode = V32QImode;
21992 w = 32;
21993 e = 1;
21995 else
21997 maskmode = mode = V8SImode;
21998 w = 8;
21999 e = 4;
22001 t1 = gen_reg_rtx (maskmode);
22003 /* Replicate the low bits of the V4DImode mask into V8SImode:
22004 mask = { A B C D }
22005 t1 = { A A B B C C D D }. */
22006 for (i = 0; i < w / 2; ++i)
22007 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
22008 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22009 vt = force_reg (maskmode, vt);
22010 mask = gen_lowpart (maskmode, mask);
22011 if (maskmode == V8SImode)
22012 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
22013 else
22014 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
22016 /* Multiply the shuffle indicies by two. */
22017 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
22018 OPTAB_DIRECT);
22020 /* Add one to the odd shuffle indicies:
22021 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
22022 for (i = 0; i < w / 2; ++i)
22024 vec[i * 2] = const0_rtx;
22025 vec[i * 2 + 1] = const1_rtx;
22027 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22028 vt = validize_mem (force_const_mem (maskmode, vt));
22029 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
22030 OPTAB_DIRECT);
22032 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
22033 operands[3] = mask = t1;
22034 target = gen_reg_rtx (mode);
22035 op0 = gen_lowpart (mode, op0);
22036 op1 = gen_lowpart (mode, op1);
22039 switch (mode)
22041 case V8SImode:
22042 /* The VPERMD and VPERMPS instructions already properly ignore
22043 the high bits of the shuffle elements. No need for us to
22044 perform an AND ourselves. */
22045 if (one_operand_shuffle)
22047 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
22048 if (target != operands[0])
22049 emit_move_insn (operands[0],
22050 gen_lowpart (GET_MODE (operands[0]), target));
22052 else
22054 t1 = gen_reg_rtx (V8SImode);
22055 t2 = gen_reg_rtx (V8SImode);
22056 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
22057 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
22058 goto merge_two;
22060 return;
22062 case V8SFmode:
22063 mask = gen_lowpart (V8SImode, mask);
22064 if (one_operand_shuffle)
22065 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
22066 else
22068 t1 = gen_reg_rtx (V8SFmode);
22069 t2 = gen_reg_rtx (V8SFmode);
22070 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
22071 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
22072 goto merge_two;
22074 return;
22076 case V4SImode:
22077 /* By combining the two 128-bit input vectors into one 256-bit
22078 input vector, we can use VPERMD and VPERMPS for the full
22079 two-operand shuffle. */
22080 t1 = gen_reg_rtx (V8SImode);
22081 t2 = gen_reg_rtx (V8SImode);
22082 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
22083 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
22084 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
22085 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
22086 return;
22088 case V4SFmode:
22089 t1 = gen_reg_rtx (V8SFmode);
22090 t2 = gen_reg_rtx (V8SImode);
22091 mask = gen_lowpart (V4SImode, mask);
22092 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
22093 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
22094 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
22095 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
22096 return;
22098 case V32QImode:
22099 t1 = gen_reg_rtx (V32QImode);
22100 t2 = gen_reg_rtx (V32QImode);
22101 t3 = gen_reg_rtx (V32QImode);
22102 vt2 = GEN_INT (-128);
22103 for (i = 0; i < 32; i++)
22104 vec[i] = vt2;
22105 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
22106 vt = force_reg (V32QImode, vt);
22107 for (i = 0; i < 32; i++)
22108 vec[i] = i < 16 ? vt2 : const0_rtx;
22109 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
22110 vt2 = force_reg (V32QImode, vt2);
22111 /* From mask create two adjusted masks, which contain the same
22112 bits as mask in the low 7 bits of each vector element.
22113 The first mask will have the most significant bit clear
22114 if it requests element from the same 128-bit lane
22115 and MSB set if it requests element from the other 128-bit lane.
22116 The second mask will have the opposite values of the MSB,
22117 and additionally will have its 128-bit lanes swapped.
22118 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
22119 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
22120 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
22121 stands for other 12 bytes. */
22122 /* The bit whether element is from the same lane or the other
22123 lane is bit 4, so shift it up by 3 to the MSB position. */
22124 t5 = gen_reg_rtx (V4DImode);
22125 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
22126 GEN_INT (3)));
22127 /* Clear MSB bits from the mask just in case it had them set. */
22128 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
22129 /* After this t1 will have MSB set for elements from other lane. */
22130 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
22131 /* Clear bits other than MSB. */
22132 emit_insn (gen_andv32qi3 (t1, t1, vt));
22133 /* Or in the lower bits from mask into t3. */
22134 emit_insn (gen_iorv32qi3 (t3, t1, t2));
22135 /* And invert MSB bits in t1, so MSB is set for elements from the same
22136 lane. */
22137 emit_insn (gen_xorv32qi3 (t1, t1, vt));
22138 /* Swap 128-bit lanes in t3. */
22139 t6 = gen_reg_rtx (V4DImode);
22140 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
22141 const2_rtx, GEN_INT (3),
22142 const0_rtx, const1_rtx));
22143 /* And or in the lower bits from mask into t1. */
22144 emit_insn (gen_iorv32qi3 (t1, t1, t2));
22145 if (one_operand_shuffle)
22147 /* Each of these shuffles will put 0s in places where
22148 element from the other 128-bit lane is needed, otherwise
22149 will shuffle in the requested value. */
22150 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
22151 gen_lowpart (V32QImode, t6)));
22152 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
22153 /* For t3 the 128-bit lanes are swapped again. */
22154 t7 = gen_reg_rtx (V4DImode);
22155 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
22156 const2_rtx, GEN_INT (3),
22157 const0_rtx, const1_rtx));
22158 /* And oring both together leads to the result. */
22159 emit_insn (gen_iorv32qi3 (target, t1,
22160 gen_lowpart (V32QImode, t7)));
22161 if (target != operands[0])
22162 emit_move_insn (operands[0],
22163 gen_lowpart (GET_MODE (operands[0]), target));
22164 return;
22167 t4 = gen_reg_rtx (V32QImode);
22168 /* Similarly to the above one_operand_shuffle code,
22169 just for repeated twice for each operand. merge_two:
22170 code will merge the two results together. */
22171 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
22172 gen_lowpart (V32QImode, t6)));
22173 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
22174 gen_lowpart (V32QImode, t6)));
22175 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
22176 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
22177 t7 = gen_reg_rtx (V4DImode);
22178 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
22179 const2_rtx, GEN_INT (3),
22180 const0_rtx, const1_rtx));
22181 t8 = gen_reg_rtx (V4DImode);
22182 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
22183 const2_rtx, GEN_INT (3),
22184 const0_rtx, const1_rtx));
22185 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
22186 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
22187 t1 = t4;
22188 t2 = t3;
22189 goto merge_two;
22191 default:
22192 gcc_assert (GET_MODE_SIZE (mode) <= 16);
22193 break;
22197 if (TARGET_XOP)
22199 /* The XOP VPPERM insn supports three inputs. By ignoring the
22200 one_operand_shuffle special case, we avoid creating another
22201 set of constant vectors in memory. */
22202 one_operand_shuffle = false;
22204 /* mask = mask & {2*w-1, ...} */
22205 vt = GEN_INT (2*w - 1);
22207 else
22209 /* mask = mask & {w-1, ...} */
22210 vt = GEN_INT (w - 1);
22213 for (i = 0; i < w; i++)
22214 vec[i] = vt;
22215 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22216 mask = expand_simple_binop (maskmode, AND, mask, vt,
22217 NULL_RTX, 0, OPTAB_DIRECT);
22219 /* For non-QImode operations, convert the word permutation control
22220 into a byte permutation control. */
22221 if (mode != V16QImode)
22223 mask = expand_simple_binop (maskmode, ASHIFT, mask,
22224 GEN_INT (exact_log2 (e)),
22225 NULL_RTX, 0, OPTAB_DIRECT);
22227 /* Convert mask to vector of chars. */
22228 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
22230 /* Replicate each of the input bytes into byte positions:
22231 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
22232 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
22233 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
22234 for (i = 0; i < 16; ++i)
22235 vec[i] = GEN_INT (i/e * e);
22236 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
22237 vt = validize_mem (force_const_mem (V16QImode, vt));
22238 if (TARGET_XOP)
22239 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
22240 else
22241 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
22243 /* Convert it into the byte positions by doing
22244 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
22245 for (i = 0; i < 16; ++i)
22246 vec[i] = GEN_INT (i % e);
22247 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
22248 vt = validize_mem (force_const_mem (V16QImode, vt));
22249 emit_insn (gen_addv16qi3 (mask, mask, vt));
22252 /* The actual shuffle operations all operate on V16QImode. */
22253 op0 = gen_lowpart (V16QImode, op0);
22254 op1 = gen_lowpart (V16QImode, op1);
22256 if (TARGET_XOP)
22258 if (GET_MODE (target) != V16QImode)
22259 target = gen_reg_rtx (V16QImode);
22260 emit_insn (gen_xop_pperm (target, op0, op1, mask));
22261 if (target != operands[0])
22262 emit_move_insn (operands[0],
22263 gen_lowpart (GET_MODE (operands[0]), target));
22265 else if (one_operand_shuffle)
22267 if (GET_MODE (target) != V16QImode)
22268 target = gen_reg_rtx (V16QImode);
22269 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
22270 if (target != operands[0])
22271 emit_move_insn (operands[0],
22272 gen_lowpart (GET_MODE (operands[0]), target));
22274 else
22276 rtx xops[6];
22277 bool ok;
22279 /* Shuffle the two input vectors independently. */
22280 t1 = gen_reg_rtx (V16QImode);
22281 t2 = gen_reg_rtx (V16QImode);
22282 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
22283 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
22285 merge_two:
22286 /* Then merge them together. The key is whether any given control
22287 element contained a bit set that indicates the second word. */
22288 mask = operands[3];
22289 vt = GEN_INT (w);
22290 if (maskmode == V2DImode && !TARGET_SSE4_1)
22292 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
22293 more shuffle to convert the V2DI input mask into a V4SI
22294 input mask. At which point the masking that expand_int_vcond
22295 will work as desired. */
22296 rtx t3 = gen_reg_rtx (V4SImode);
22297 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
22298 const0_rtx, const0_rtx,
22299 const2_rtx, const2_rtx));
22300 mask = t3;
22301 maskmode = V4SImode;
22302 e = w = 4;
22305 for (i = 0; i < w; i++)
22306 vec[i] = vt;
22307 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22308 vt = force_reg (maskmode, vt);
22309 mask = expand_simple_binop (maskmode, AND, mask, vt,
22310 NULL_RTX, 0, OPTAB_DIRECT);
22312 if (GET_MODE (target) != mode)
22313 target = gen_reg_rtx (mode);
22314 xops[0] = target;
22315 xops[1] = gen_lowpart (mode, t2);
22316 xops[2] = gen_lowpart (mode, t1);
22317 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
22318 xops[4] = mask;
22319 xops[5] = vt;
22320 ok = ix86_expand_int_vcond (xops);
22321 gcc_assert (ok);
22322 if (target != operands[0])
22323 emit_move_insn (operands[0],
22324 gen_lowpart (GET_MODE (operands[0]), target));
22328 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
22329 true if we should do zero extension, else sign extension. HIGH_P is
22330 true if we want the N/2 high elements, else the low elements. */
22332 void
22333 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
22335 machine_mode imode = GET_MODE (src);
22336 rtx tmp;
22338 if (TARGET_SSE4_1)
22340 rtx (*unpack)(rtx, rtx);
22341 rtx (*extract)(rtx, rtx) = NULL;
22342 machine_mode halfmode = BLKmode;
22344 switch (imode)
22346 case V64QImode:
22347 if (unsigned_p)
22348 unpack = gen_avx512bw_zero_extendv32qiv32hi2;
22349 else
22350 unpack = gen_avx512bw_sign_extendv32qiv32hi2;
22351 halfmode = V32QImode;
22352 extract
22353 = high_p ? gen_vec_extract_hi_v64qi : gen_vec_extract_lo_v64qi;
22354 break;
22355 case V32QImode:
22356 if (unsigned_p)
22357 unpack = gen_avx2_zero_extendv16qiv16hi2;
22358 else
22359 unpack = gen_avx2_sign_extendv16qiv16hi2;
22360 halfmode = V16QImode;
22361 extract
22362 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
22363 break;
22364 case V32HImode:
22365 if (unsigned_p)
22366 unpack = gen_avx512f_zero_extendv16hiv16si2;
22367 else
22368 unpack = gen_avx512f_sign_extendv16hiv16si2;
22369 halfmode = V16HImode;
22370 extract
22371 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
22372 break;
22373 case V16HImode:
22374 if (unsigned_p)
22375 unpack = gen_avx2_zero_extendv8hiv8si2;
22376 else
22377 unpack = gen_avx2_sign_extendv8hiv8si2;
22378 halfmode = V8HImode;
22379 extract
22380 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
22381 break;
22382 case V16SImode:
22383 if (unsigned_p)
22384 unpack = gen_avx512f_zero_extendv8siv8di2;
22385 else
22386 unpack = gen_avx512f_sign_extendv8siv8di2;
22387 halfmode = V8SImode;
22388 extract
22389 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
22390 break;
22391 case V8SImode:
22392 if (unsigned_p)
22393 unpack = gen_avx2_zero_extendv4siv4di2;
22394 else
22395 unpack = gen_avx2_sign_extendv4siv4di2;
22396 halfmode = V4SImode;
22397 extract
22398 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
22399 break;
22400 case V16QImode:
22401 if (unsigned_p)
22402 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
22403 else
22404 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
22405 break;
22406 case V8HImode:
22407 if (unsigned_p)
22408 unpack = gen_sse4_1_zero_extendv4hiv4si2;
22409 else
22410 unpack = gen_sse4_1_sign_extendv4hiv4si2;
22411 break;
22412 case V4SImode:
22413 if (unsigned_p)
22414 unpack = gen_sse4_1_zero_extendv2siv2di2;
22415 else
22416 unpack = gen_sse4_1_sign_extendv2siv2di2;
22417 break;
22418 default:
22419 gcc_unreachable ();
22422 if (GET_MODE_SIZE (imode) >= 32)
22424 tmp = gen_reg_rtx (halfmode);
22425 emit_insn (extract (tmp, src));
22427 else if (high_p)
22429 /* Shift higher 8 bytes to lower 8 bytes. */
22430 tmp = gen_reg_rtx (V1TImode);
22431 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
22432 GEN_INT (64)));
22433 tmp = gen_lowpart (imode, tmp);
22435 else
22436 tmp = src;
22438 emit_insn (unpack (dest, tmp));
22440 else
22442 rtx (*unpack)(rtx, rtx, rtx);
22444 switch (imode)
22446 case V16QImode:
22447 if (high_p)
22448 unpack = gen_vec_interleave_highv16qi;
22449 else
22450 unpack = gen_vec_interleave_lowv16qi;
22451 break;
22452 case V8HImode:
22453 if (high_p)
22454 unpack = gen_vec_interleave_highv8hi;
22455 else
22456 unpack = gen_vec_interleave_lowv8hi;
22457 break;
22458 case V4SImode:
22459 if (high_p)
22460 unpack = gen_vec_interleave_highv4si;
22461 else
22462 unpack = gen_vec_interleave_lowv4si;
22463 break;
22464 default:
22465 gcc_unreachable ();
22468 if (unsigned_p)
22469 tmp = force_reg (imode, CONST0_RTX (imode));
22470 else
22471 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
22472 src, pc_rtx, pc_rtx);
22474 rtx tmp2 = gen_reg_rtx (imode);
22475 emit_insn (unpack (tmp2, src, tmp));
22476 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
22480 /* Expand conditional increment or decrement using adb/sbb instructions.
22481 The default case using setcc followed by the conditional move can be
22482 done by generic code. */
22483 bool
22484 ix86_expand_int_addcc (rtx operands[])
22486 enum rtx_code code = GET_CODE (operands[1]);
22487 rtx flags;
22488 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
22489 rtx compare_op;
22490 rtx val = const0_rtx;
22491 bool fpcmp = false;
22492 machine_mode mode;
22493 rtx op0 = XEXP (operands[1], 0);
22494 rtx op1 = XEXP (operands[1], 1);
22496 if (operands[3] != const1_rtx
22497 && operands[3] != constm1_rtx)
22498 return false;
22499 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
22500 return false;
22501 code = GET_CODE (compare_op);
22503 flags = XEXP (compare_op, 0);
22505 if (GET_MODE (flags) == CCFPmode
22506 || GET_MODE (flags) == CCFPUmode)
22508 fpcmp = true;
22509 code = ix86_fp_compare_code_to_integer (code);
22512 if (code != LTU)
22514 val = constm1_rtx;
22515 if (fpcmp)
22516 PUT_CODE (compare_op,
22517 reverse_condition_maybe_unordered
22518 (GET_CODE (compare_op)));
22519 else
22520 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
22523 mode = GET_MODE (operands[0]);
22525 /* Construct either adc or sbb insn. */
22526 if ((code == LTU) == (operands[3] == constm1_rtx))
22528 switch (mode)
22530 case QImode:
22531 insn = gen_subqi3_carry;
22532 break;
22533 case HImode:
22534 insn = gen_subhi3_carry;
22535 break;
22536 case SImode:
22537 insn = gen_subsi3_carry;
22538 break;
22539 case DImode:
22540 insn = gen_subdi3_carry;
22541 break;
22542 default:
22543 gcc_unreachable ();
22546 else
22548 switch (mode)
22550 case QImode:
22551 insn = gen_addqi3_carry;
22552 break;
22553 case HImode:
22554 insn = gen_addhi3_carry;
22555 break;
22556 case SImode:
22557 insn = gen_addsi3_carry;
22558 break;
22559 case DImode:
22560 insn = gen_adddi3_carry;
22561 break;
22562 default:
22563 gcc_unreachable ();
22566 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
22568 return true;
22572 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
22573 but works for floating pointer parameters and nonoffsetable memories.
22574 For pushes, it returns just stack offsets; the values will be saved
22575 in the right order. Maximally three parts are generated. */
22577 static int
22578 ix86_split_to_parts (rtx operand, rtx *parts, machine_mode mode)
22580 int size;
22582 if (!TARGET_64BIT)
22583 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
22584 else
22585 size = (GET_MODE_SIZE (mode) + 4) / 8;
22587 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
22588 gcc_assert (size >= 2 && size <= 4);
22590 /* Optimize constant pool reference to immediates. This is used by fp
22591 moves, that force all constants to memory to allow combining. */
22592 if (MEM_P (operand) && MEM_READONLY_P (operand))
22594 rtx tmp = maybe_get_pool_constant (operand);
22595 if (tmp)
22596 operand = tmp;
22599 if (MEM_P (operand) && !offsettable_memref_p (operand))
22601 /* The only non-offsetable memories we handle are pushes. */
22602 int ok = push_operand (operand, VOIDmode);
22604 gcc_assert (ok);
22606 operand = copy_rtx (operand);
22607 PUT_MODE (operand, word_mode);
22608 parts[0] = parts[1] = parts[2] = parts[3] = operand;
22609 return size;
22612 if (GET_CODE (operand) == CONST_VECTOR)
22614 machine_mode imode = int_mode_for_mode (mode);
22615 /* Caution: if we looked through a constant pool memory above,
22616 the operand may actually have a different mode now. That's
22617 ok, since we want to pun this all the way back to an integer. */
22618 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
22619 gcc_assert (operand != NULL);
22620 mode = imode;
22623 if (!TARGET_64BIT)
22625 if (mode == DImode)
22626 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22627 else
22629 int i;
22631 if (REG_P (operand))
22633 gcc_assert (reload_completed);
22634 for (i = 0; i < size; i++)
22635 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
22637 else if (offsettable_memref_p (operand))
22639 operand = adjust_address (operand, SImode, 0);
22640 parts[0] = operand;
22641 for (i = 1; i < size; i++)
22642 parts[i] = adjust_address (operand, SImode, 4 * i);
22644 else if (GET_CODE (operand) == CONST_DOUBLE)
22646 REAL_VALUE_TYPE r;
22647 long l[4];
22649 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22650 switch (mode)
22652 case TFmode:
22653 real_to_target (l, &r, mode);
22654 parts[3] = gen_int_mode (l[3], SImode);
22655 parts[2] = gen_int_mode (l[2], SImode);
22656 break;
22657 case XFmode:
22658 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
22659 long double may not be 80-bit. */
22660 real_to_target (l, &r, mode);
22661 parts[2] = gen_int_mode (l[2], SImode);
22662 break;
22663 case DFmode:
22664 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
22665 break;
22666 default:
22667 gcc_unreachable ();
22669 parts[1] = gen_int_mode (l[1], SImode);
22670 parts[0] = gen_int_mode (l[0], SImode);
22672 else
22673 gcc_unreachable ();
22676 else
22678 if (mode == TImode)
22679 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22680 if (mode == XFmode || mode == TFmode)
22682 machine_mode upper_mode = mode==XFmode ? SImode : DImode;
22683 if (REG_P (operand))
22685 gcc_assert (reload_completed);
22686 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
22687 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
22689 else if (offsettable_memref_p (operand))
22691 operand = adjust_address (operand, DImode, 0);
22692 parts[0] = operand;
22693 parts[1] = adjust_address (operand, upper_mode, 8);
22695 else if (GET_CODE (operand) == CONST_DOUBLE)
22697 REAL_VALUE_TYPE r;
22698 long l[4];
22700 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22701 real_to_target (l, &r, mode);
22703 /* Do not use shift by 32 to avoid warning on 32bit systems. */
22704 if (HOST_BITS_PER_WIDE_INT >= 64)
22705 parts[0]
22706 = gen_int_mode
22707 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
22708 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
22709 DImode);
22710 else
22711 parts[0] = immed_double_const (l[0], l[1], DImode);
22713 if (upper_mode == SImode)
22714 parts[1] = gen_int_mode (l[2], SImode);
22715 else if (HOST_BITS_PER_WIDE_INT >= 64)
22716 parts[1]
22717 = gen_int_mode
22718 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
22719 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
22720 DImode);
22721 else
22722 parts[1] = immed_double_const (l[2], l[3], DImode);
22724 else
22725 gcc_unreachable ();
22729 return size;
22732 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
22733 Return false when normal moves are needed; true when all required
22734 insns have been emitted. Operands 2-4 contain the input values
22735 int the correct order; operands 5-7 contain the output values. */
22737 void
22738 ix86_split_long_move (rtx operands[])
22740 rtx part[2][4];
22741 int nparts, i, j;
22742 int push = 0;
22743 int collisions = 0;
22744 machine_mode mode = GET_MODE (operands[0]);
22745 bool collisionparts[4];
22747 /* The DFmode expanders may ask us to move double.
22748 For 64bit target this is single move. By hiding the fact
22749 here we simplify i386.md splitters. */
22750 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
22752 /* Optimize constant pool reference to immediates. This is used by
22753 fp moves, that force all constants to memory to allow combining. */
22755 if (MEM_P (operands[1])
22756 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
22757 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
22758 operands[1] = get_pool_constant (XEXP (operands[1], 0));
22759 if (push_operand (operands[0], VOIDmode))
22761 operands[0] = copy_rtx (operands[0]);
22762 PUT_MODE (operands[0], word_mode);
22764 else
22765 operands[0] = gen_lowpart (DImode, operands[0]);
22766 operands[1] = gen_lowpart (DImode, operands[1]);
22767 emit_move_insn (operands[0], operands[1]);
22768 return;
22771 /* The only non-offsettable memory we handle is push. */
22772 if (push_operand (operands[0], VOIDmode))
22773 push = 1;
22774 else
22775 gcc_assert (!MEM_P (operands[0])
22776 || offsettable_memref_p (operands[0]));
22778 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
22779 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
22781 /* When emitting push, take care for source operands on the stack. */
22782 if (push && MEM_P (operands[1])
22783 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
22785 rtx src_base = XEXP (part[1][nparts - 1], 0);
22787 /* Compensate for the stack decrement by 4. */
22788 if (!TARGET_64BIT && nparts == 3
22789 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
22790 src_base = plus_constant (Pmode, src_base, 4);
22792 /* src_base refers to the stack pointer and is
22793 automatically decreased by emitted push. */
22794 for (i = 0; i < nparts; i++)
22795 part[1][i] = change_address (part[1][i],
22796 GET_MODE (part[1][i]), src_base);
22799 /* We need to do copy in the right order in case an address register
22800 of the source overlaps the destination. */
22801 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
22803 rtx tmp;
22805 for (i = 0; i < nparts; i++)
22807 collisionparts[i]
22808 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
22809 if (collisionparts[i])
22810 collisions++;
22813 /* Collision in the middle part can be handled by reordering. */
22814 if (collisions == 1 && nparts == 3 && collisionparts [1])
22816 std::swap (part[0][1], part[0][2]);
22817 std::swap (part[1][1], part[1][2]);
22819 else if (collisions == 1
22820 && nparts == 4
22821 && (collisionparts [1] || collisionparts [2]))
22823 if (collisionparts [1])
22825 std::swap (part[0][1], part[0][2]);
22826 std::swap (part[1][1], part[1][2]);
22828 else
22830 std::swap (part[0][2], part[0][3]);
22831 std::swap (part[1][2], part[1][3]);
22835 /* If there are more collisions, we can't handle it by reordering.
22836 Do an lea to the last part and use only one colliding move. */
22837 else if (collisions > 1)
22839 rtx base;
22841 collisions = 1;
22843 base = part[0][nparts - 1];
22845 /* Handle the case when the last part isn't valid for lea.
22846 Happens in 64-bit mode storing the 12-byte XFmode. */
22847 if (GET_MODE (base) != Pmode)
22848 base = gen_rtx_REG (Pmode, REGNO (base));
22850 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
22851 part[1][0] = replace_equiv_address (part[1][0], base);
22852 for (i = 1; i < nparts; i++)
22854 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
22855 part[1][i] = replace_equiv_address (part[1][i], tmp);
22860 if (push)
22862 if (!TARGET_64BIT)
22864 if (nparts == 3)
22866 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
22867 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
22868 stack_pointer_rtx, GEN_INT (-4)));
22869 emit_move_insn (part[0][2], part[1][2]);
22871 else if (nparts == 4)
22873 emit_move_insn (part[0][3], part[1][3]);
22874 emit_move_insn (part[0][2], part[1][2]);
22877 else
22879 /* In 64bit mode we don't have 32bit push available. In case this is
22880 register, it is OK - we will just use larger counterpart. We also
22881 retype memory - these comes from attempt to avoid REX prefix on
22882 moving of second half of TFmode value. */
22883 if (GET_MODE (part[1][1]) == SImode)
22885 switch (GET_CODE (part[1][1]))
22887 case MEM:
22888 part[1][1] = adjust_address (part[1][1], DImode, 0);
22889 break;
22891 case REG:
22892 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
22893 break;
22895 default:
22896 gcc_unreachable ();
22899 if (GET_MODE (part[1][0]) == SImode)
22900 part[1][0] = part[1][1];
22903 emit_move_insn (part[0][1], part[1][1]);
22904 emit_move_insn (part[0][0], part[1][0]);
22905 return;
22908 /* Choose correct order to not overwrite the source before it is copied. */
22909 if ((REG_P (part[0][0])
22910 && REG_P (part[1][1])
22911 && (REGNO (part[0][0]) == REGNO (part[1][1])
22912 || (nparts == 3
22913 && REGNO (part[0][0]) == REGNO (part[1][2]))
22914 || (nparts == 4
22915 && REGNO (part[0][0]) == REGNO (part[1][3]))))
22916 || (collisions > 0
22917 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
22919 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
22921 operands[2 + i] = part[0][j];
22922 operands[6 + i] = part[1][j];
22925 else
22927 for (i = 0; i < nparts; i++)
22929 operands[2 + i] = part[0][i];
22930 operands[6 + i] = part[1][i];
22934 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
22935 if (optimize_insn_for_size_p ())
22937 for (j = 0; j < nparts - 1; j++)
22938 if (CONST_INT_P (operands[6 + j])
22939 && operands[6 + j] != const0_rtx
22940 && REG_P (operands[2 + j]))
22941 for (i = j; i < nparts - 1; i++)
22942 if (CONST_INT_P (operands[7 + i])
22943 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
22944 operands[7 + i] = operands[2 + j];
22947 for (i = 0; i < nparts; i++)
22948 emit_move_insn (operands[2 + i], operands[6 + i]);
22950 return;
22953 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
22954 left shift by a constant, either using a single shift or
22955 a sequence of add instructions. */
22957 static void
22958 ix86_expand_ashl_const (rtx operand, int count, machine_mode mode)
22960 rtx (*insn)(rtx, rtx, rtx);
22962 if (count == 1
22963 || (count * ix86_cost->add <= ix86_cost->shift_const
22964 && !optimize_insn_for_size_p ()))
22966 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
22967 while (count-- > 0)
22968 emit_insn (insn (operand, operand, operand));
22970 else
22972 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22973 emit_insn (insn (operand, operand, GEN_INT (count)));
22977 void
22978 ix86_split_ashl (rtx *operands, rtx scratch, machine_mode mode)
22980 rtx (*gen_ashl3)(rtx, rtx, rtx);
22981 rtx (*gen_shld)(rtx, rtx, rtx);
22982 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22984 rtx low[2], high[2];
22985 int count;
22987 if (CONST_INT_P (operands[2]))
22989 split_double_mode (mode, operands, 2, low, high);
22990 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22992 if (count >= half_width)
22994 emit_move_insn (high[0], low[1]);
22995 emit_move_insn (low[0], const0_rtx);
22997 if (count > half_width)
22998 ix86_expand_ashl_const (high[0], count - half_width, mode);
23000 else
23002 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
23004 if (!rtx_equal_p (operands[0], operands[1]))
23005 emit_move_insn (operands[0], operands[1]);
23007 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
23008 ix86_expand_ashl_const (low[0], count, mode);
23010 return;
23013 split_double_mode (mode, operands, 1, low, high);
23015 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
23017 if (operands[1] == const1_rtx)
23019 /* Assuming we've chosen a QImode capable registers, then 1 << N
23020 can be done with two 32/64-bit shifts, no branches, no cmoves. */
23021 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
23023 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
23025 ix86_expand_clear (low[0]);
23026 ix86_expand_clear (high[0]);
23027 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
23029 d = gen_lowpart (QImode, low[0]);
23030 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
23031 s = gen_rtx_EQ (QImode, flags, const0_rtx);
23032 emit_insn (gen_rtx_SET (VOIDmode, d, s));
23034 d = gen_lowpart (QImode, high[0]);
23035 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
23036 s = gen_rtx_NE (QImode, flags, const0_rtx);
23037 emit_insn (gen_rtx_SET (VOIDmode, d, s));
23040 /* Otherwise, we can get the same results by manually performing
23041 a bit extract operation on bit 5/6, and then performing the two
23042 shifts. The two methods of getting 0/1 into low/high are exactly
23043 the same size. Avoiding the shift in the bit extract case helps
23044 pentium4 a bit; no one else seems to care much either way. */
23045 else
23047 machine_mode half_mode;
23048 rtx (*gen_lshr3)(rtx, rtx, rtx);
23049 rtx (*gen_and3)(rtx, rtx, rtx);
23050 rtx (*gen_xor3)(rtx, rtx, rtx);
23051 HOST_WIDE_INT bits;
23052 rtx x;
23054 if (mode == DImode)
23056 half_mode = SImode;
23057 gen_lshr3 = gen_lshrsi3;
23058 gen_and3 = gen_andsi3;
23059 gen_xor3 = gen_xorsi3;
23060 bits = 5;
23062 else
23064 half_mode = DImode;
23065 gen_lshr3 = gen_lshrdi3;
23066 gen_and3 = gen_anddi3;
23067 gen_xor3 = gen_xordi3;
23068 bits = 6;
23071 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
23072 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
23073 else
23074 x = gen_lowpart (half_mode, operands[2]);
23075 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
23077 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
23078 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
23079 emit_move_insn (low[0], high[0]);
23080 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
23083 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
23084 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
23085 return;
23088 if (operands[1] == constm1_rtx)
23090 /* For -1 << N, we can avoid the shld instruction, because we
23091 know that we're shifting 0...31/63 ones into a -1. */
23092 emit_move_insn (low[0], constm1_rtx);
23093 if (optimize_insn_for_size_p ())
23094 emit_move_insn (high[0], low[0]);
23095 else
23096 emit_move_insn (high[0], constm1_rtx);
23098 else
23100 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
23102 if (!rtx_equal_p (operands[0], operands[1]))
23103 emit_move_insn (operands[0], operands[1]);
23105 split_double_mode (mode, operands, 1, low, high);
23106 emit_insn (gen_shld (high[0], low[0], operands[2]));
23109 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
23111 if (TARGET_CMOVE && scratch)
23113 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23114 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23116 ix86_expand_clear (scratch);
23117 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
23119 else
23121 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
23122 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
23124 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
23128 void
23129 ix86_split_ashr (rtx *operands, rtx scratch, machine_mode mode)
23131 rtx (*gen_ashr3)(rtx, rtx, rtx)
23132 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
23133 rtx (*gen_shrd)(rtx, rtx, rtx);
23134 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23136 rtx low[2], high[2];
23137 int count;
23139 if (CONST_INT_P (operands[2]))
23141 split_double_mode (mode, operands, 2, low, high);
23142 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23144 if (count == GET_MODE_BITSIZE (mode) - 1)
23146 emit_move_insn (high[0], high[1]);
23147 emit_insn (gen_ashr3 (high[0], high[0],
23148 GEN_INT (half_width - 1)));
23149 emit_move_insn (low[0], high[0]);
23152 else if (count >= half_width)
23154 emit_move_insn (low[0], high[1]);
23155 emit_move_insn (high[0], low[0]);
23156 emit_insn (gen_ashr3 (high[0], high[0],
23157 GEN_INT (half_width - 1)));
23159 if (count > half_width)
23160 emit_insn (gen_ashr3 (low[0], low[0],
23161 GEN_INT (count - half_width)));
23163 else
23165 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23167 if (!rtx_equal_p (operands[0], operands[1]))
23168 emit_move_insn (operands[0], operands[1]);
23170 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
23171 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
23174 else
23176 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23178 if (!rtx_equal_p (operands[0], operands[1]))
23179 emit_move_insn (operands[0], operands[1]);
23181 split_double_mode (mode, operands, 1, low, high);
23183 emit_insn (gen_shrd (low[0], high[0], operands[2]));
23184 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
23186 if (TARGET_CMOVE && scratch)
23188 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23189 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23191 emit_move_insn (scratch, high[0]);
23192 emit_insn (gen_ashr3 (scratch, scratch,
23193 GEN_INT (half_width - 1)));
23194 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
23195 scratch));
23197 else
23199 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
23200 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
23202 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
23207 void
23208 ix86_split_lshr (rtx *operands, rtx scratch, machine_mode mode)
23210 rtx (*gen_lshr3)(rtx, rtx, rtx)
23211 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
23212 rtx (*gen_shrd)(rtx, rtx, rtx);
23213 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23215 rtx low[2], high[2];
23216 int count;
23218 if (CONST_INT_P (operands[2]))
23220 split_double_mode (mode, operands, 2, low, high);
23221 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23223 if (count >= half_width)
23225 emit_move_insn (low[0], high[1]);
23226 ix86_expand_clear (high[0]);
23228 if (count > half_width)
23229 emit_insn (gen_lshr3 (low[0], low[0],
23230 GEN_INT (count - half_width)));
23232 else
23234 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23236 if (!rtx_equal_p (operands[0], operands[1]))
23237 emit_move_insn (operands[0], operands[1]);
23239 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
23240 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
23243 else
23245 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23247 if (!rtx_equal_p (operands[0], operands[1]))
23248 emit_move_insn (operands[0], operands[1]);
23250 split_double_mode (mode, operands, 1, low, high);
23252 emit_insn (gen_shrd (low[0], high[0], operands[2]));
23253 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
23255 if (TARGET_CMOVE && scratch)
23257 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23258 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23260 ix86_expand_clear (scratch);
23261 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
23262 scratch));
23264 else
23266 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
23267 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
23269 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
23274 /* Predict just emitted jump instruction to be taken with probability PROB. */
23275 static void
23276 predict_jump (int prob)
23278 rtx insn = get_last_insn ();
23279 gcc_assert (JUMP_P (insn));
23280 add_int_reg_note (insn, REG_BR_PROB, prob);
23283 /* Helper function for the string operations below. Dest VARIABLE whether
23284 it is aligned to VALUE bytes. If true, jump to the label. */
23285 static rtx_code_label *
23286 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
23288 rtx_code_label *label = gen_label_rtx ();
23289 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
23290 if (GET_MODE (variable) == DImode)
23291 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
23292 else
23293 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
23294 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
23295 1, label);
23296 if (epilogue)
23297 predict_jump (REG_BR_PROB_BASE * 50 / 100);
23298 else
23299 predict_jump (REG_BR_PROB_BASE * 90 / 100);
23300 return label;
23303 /* Adjust COUNTER by the VALUE. */
23304 static void
23305 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
23307 rtx (*gen_add)(rtx, rtx, rtx)
23308 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
23310 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
23313 /* Zero extend possibly SImode EXP to Pmode register. */
23315 ix86_zero_extend_to_Pmode (rtx exp)
23317 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
23320 /* Divide COUNTREG by SCALE. */
23321 static rtx
23322 scale_counter (rtx countreg, int scale)
23324 rtx sc;
23326 if (scale == 1)
23327 return countreg;
23328 if (CONST_INT_P (countreg))
23329 return GEN_INT (INTVAL (countreg) / scale);
23330 gcc_assert (REG_P (countreg));
23332 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
23333 GEN_INT (exact_log2 (scale)),
23334 NULL, 1, OPTAB_DIRECT);
23335 return sc;
23338 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
23339 DImode for constant loop counts. */
23341 static machine_mode
23342 counter_mode (rtx count_exp)
23344 if (GET_MODE (count_exp) != VOIDmode)
23345 return GET_MODE (count_exp);
23346 if (!CONST_INT_P (count_exp))
23347 return Pmode;
23348 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
23349 return DImode;
23350 return SImode;
23353 /* Copy the address to a Pmode register. This is used for x32 to
23354 truncate DImode TLS address to a SImode register. */
23356 static rtx
23357 ix86_copy_addr_to_reg (rtx addr)
23359 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
23360 return copy_addr_to_reg (addr);
23361 else
23363 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
23364 return gen_rtx_SUBREG (SImode, copy_to_mode_reg (DImode, addr), 0);
23368 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
23369 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
23370 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
23371 memory by VALUE (supposed to be in MODE).
23373 The size is rounded down to whole number of chunk size moved at once.
23374 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
23377 static void
23378 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
23379 rtx destptr, rtx srcptr, rtx value,
23380 rtx count, machine_mode mode, int unroll,
23381 int expected_size, bool issetmem)
23383 rtx_code_label *out_label, *top_label;
23384 rtx iter, tmp;
23385 machine_mode iter_mode = counter_mode (count);
23386 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
23387 rtx piece_size = GEN_INT (piece_size_n);
23388 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
23389 rtx size;
23390 int i;
23392 top_label = gen_label_rtx ();
23393 out_label = gen_label_rtx ();
23394 iter = gen_reg_rtx (iter_mode);
23396 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
23397 NULL, 1, OPTAB_DIRECT);
23398 /* Those two should combine. */
23399 if (piece_size == const1_rtx)
23401 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
23402 true, out_label);
23403 predict_jump (REG_BR_PROB_BASE * 10 / 100);
23405 emit_move_insn (iter, const0_rtx);
23407 emit_label (top_label);
23409 tmp = convert_modes (Pmode, iter_mode, iter, true);
23411 /* This assert could be relaxed - in this case we'll need to compute
23412 smallest power of two, containing in PIECE_SIZE_N and pass it to
23413 offset_address. */
23414 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
23415 destmem = offset_address (destmem, tmp, piece_size_n);
23416 destmem = adjust_address (destmem, mode, 0);
23418 if (!issetmem)
23420 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
23421 srcmem = adjust_address (srcmem, mode, 0);
23423 /* When unrolling for chips that reorder memory reads and writes,
23424 we can save registers by using single temporary.
23425 Also using 4 temporaries is overkill in 32bit mode. */
23426 if (!TARGET_64BIT && 0)
23428 for (i = 0; i < unroll; i++)
23430 if (i)
23432 destmem =
23433 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23434 srcmem =
23435 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23437 emit_move_insn (destmem, srcmem);
23440 else
23442 rtx tmpreg[4];
23443 gcc_assert (unroll <= 4);
23444 for (i = 0; i < unroll; i++)
23446 tmpreg[i] = gen_reg_rtx (mode);
23447 if (i)
23449 srcmem =
23450 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23452 emit_move_insn (tmpreg[i], srcmem);
23454 for (i = 0; i < unroll; i++)
23456 if (i)
23458 destmem =
23459 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23461 emit_move_insn (destmem, tmpreg[i]);
23465 else
23466 for (i = 0; i < unroll; i++)
23468 if (i)
23469 destmem =
23470 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23471 emit_move_insn (destmem, value);
23474 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
23475 true, OPTAB_LIB_WIDEN);
23476 if (tmp != iter)
23477 emit_move_insn (iter, tmp);
23479 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
23480 true, top_label);
23481 if (expected_size != -1)
23483 expected_size /= GET_MODE_SIZE (mode) * unroll;
23484 if (expected_size == 0)
23485 predict_jump (0);
23486 else if (expected_size > REG_BR_PROB_BASE)
23487 predict_jump (REG_BR_PROB_BASE - 1);
23488 else
23489 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
23491 else
23492 predict_jump (REG_BR_PROB_BASE * 80 / 100);
23493 iter = ix86_zero_extend_to_Pmode (iter);
23494 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
23495 true, OPTAB_LIB_WIDEN);
23496 if (tmp != destptr)
23497 emit_move_insn (destptr, tmp);
23498 if (!issetmem)
23500 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
23501 true, OPTAB_LIB_WIDEN);
23502 if (tmp != srcptr)
23503 emit_move_insn (srcptr, tmp);
23505 emit_label (out_label);
23508 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
23509 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
23510 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
23511 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
23512 ORIG_VALUE is the original value passed to memset to fill the memory with.
23513 Other arguments have same meaning as for previous function. */
23515 static void
23516 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
23517 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
23518 rtx count,
23519 machine_mode mode, bool issetmem)
23521 rtx destexp;
23522 rtx srcexp;
23523 rtx countreg;
23524 HOST_WIDE_INT rounded_count;
23526 /* If possible, it is shorter to use rep movs.
23527 TODO: Maybe it is better to move this logic to decide_alg. */
23528 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
23529 && (!issetmem || orig_value == const0_rtx))
23530 mode = SImode;
23532 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
23533 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
23535 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
23536 GET_MODE_SIZE (mode)));
23537 if (mode != QImode)
23539 destexp = gen_rtx_ASHIFT (Pmode, countreg,
23540 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23541 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
23543 else
23544 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
23545 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
23547 rounded_count = (INTVAL (count)
23548 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23549 destmem = shallow_copy_rtx (destmem);
23550 set_mem_size (destmem, rounded_count);
23552 else if (MEM_SIZE_KNOWN_P (destmem))
23553 clear_mem_size (destmem);
23555 if (issetmem)
23557 value = force_reg (mode, gen_lowpart (mode, value));
23558 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
23560 else
23562 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
23563 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
23564 if (mode != QImode)
23566 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
23567 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23568 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
23570 else
23571 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
23572 if (CONST_INT_P (count))
23574 rounded_count = (INTVAL (count)
23575 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23576 srcmem = shallow_copy_rtx (srcmem);
23577 set_mem_size (srcmem, rounded_count);
23579 else
23581 if (MEM_SIZE_KNOWN_P (srcmem))
23582 clear_mem_size (srcmem);
23584 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
23585 destexp, srcexp));
23589 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
23590 DESTMEM.
23591 SRC is passed by pointer to be updated on return.
23592 Return value is updated DST. */
23593 static rtx
23594 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
23595 HOST_WIDE_INT size_to_move)
23597 rtx dst = destmem, src = *srcmem, adjust, tempreg;
23598 enum insn_code code;
23599 machine_mode move_mode;
23600 int piece_size, i;
23602 /* Find the widest mode in which we could perform moves.
23603 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23604 it until move of such size is supported. */
23605 piece_size = 1 << floor_log2 (size_to_move);
23606 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23607 code = optab_handler (mov_optab, move_mode);
23608 while (code == CODE_FOR_nothing && piece_size > 1)
23610 piece_size >>= 1;
23611 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23612 code = optab_handler (mov_optab, move_mode);
23615 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23616 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23617 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
23619 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
23620 move_mode = mode_for_vector (word_mode, nunits);
23621 code = optab_handler (mov_optab, move_mode);
23622 if (code == CODE_FOR_nothing)
23624 move_mode = word_mode;
23625 piece_size = GET_MODE_SIZE (move_mode);
23626 code = optab_handler (mov_optab, move_mode);
23629 gcc_assert (code != CODE_FOR_nothing);
23631 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23632 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
23634 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23635 gcc_assert (size_to_move % piece_size == 0);
23636 adjust = GEN_INT (piece_size);
23637 for (i = 0; i < size_to_move; i += piece_size)
23639 /* We move from memory to memory, so we'll need to do it via
23640 a temporary register. */
23641 tempreg = gen_reg_rtx (move_mode);
23642 emit_insn (GEN_FCN (code) (tempreg, src));
23643 emit_insn (GEN_FCN (code) (dst, tempreg));
23645 emit_move_insn (destptr,
23646 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23647 emit_move_insn (srcptr,
23648 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
23650 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23651 piece_size);
23652 src = adjust_automodify_address_nv (src, move_mode, srcptr,
23653 piece_size);
23656 /* Update DST and SRC rtx. */
23657 *srcmem = src;
23658 return dst;
23661 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
23662 static void
23663 expand_movmem_epilogue (rtx destmem, rtx srcmem,
23664 rtx destptr, rtx srcptr, rtx count, int max_size)
23666 rtx src, dest;
23667 if (CONST_INT_P (count))
23669 HOST_WIDE_INT countval = INTVAL (count);
23670 HOST_WIDE_INT epilogue_size = countval % max_size;
23671 int i;
23673 /* For now MAX_SIZE should be a power of 2. This assert could be
23674 relaxed, but it'll require a bit more complicated epilogue
23675 expanding. */
23676 gcc_assert ((max_size & (max_size - 1)) == 0);
23677 for (i = max_size; i >= 1; i >>= 1)
23679 if (epilogue_size & i)
23680 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23682 return;
23684 if (max_size > 8)
23686 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
23687 count, 1, OPTAB_DIRECT);
23688 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
23689 count, QImode, 1, 4, false);
23690 return;
23693 /* When there are stringops, we can cheaply increase dest and src pointers.
23694 Otherwise we save code size by maintaining offset (zero is readily
23695 available from preceding rep operation) and using x86 addressing modes.
23697 if (TARGET_SINGLE_STRINGOP)
23699 if (max_size > 4)
23701 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23702 src = change_address (srcmem, SImode, srcptr);
23703 dest = change_address (destmem, SImode, destptr);
23704 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23705 emit_label (label);
23706 LABEL_NUSES (label) = 1;
23708 if (max_size > 2)
23710 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23711 src = change_address (srcmem, HImode, srcptr);
23712 dest = change_address (destmem, HImode, destptr);
23713 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23714 emit_label (label);
23715 LABEL_NUSES (label) = 1;
23717 if (max_size > 1)
23719 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23720 src = change_address (srcmem, QImode, srcptr);
23721 dest = change_address (destmem, QImode, destptr);
23722 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23723 emit_label (label);
23724 LABEL_NUSES (label) = 1;
23727 else
23729 rtx offset = force_reg (Pmode, const0_rtx);
23730 rtx tmp;
23732 if (max_size > 4)
23734 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23735 src = change_address (srcmem, SImode, srcptr);
23736 dest = change_address (destmem, SImode, destptr);
23737 emit_move_insn (dest, src);
23738 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
23739 true, OPTAB_LIB_WIDEN);
23740 if (tmp != offset)
23741 emit_move_insn (offset, tmp);
23742 emit_label (label);
23743 LABEL_NUSES (label) = 1;
23745 if (max_size > 2)
23747 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23748 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23749 src = change_address (srcmem, HImode, tmp);
23750 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23751 dest = change_address (destmem, HImode, tmp);
23752 emit_move_insn (dest, src);
23753 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
23754 true, OPTAB_LIB_WIDEN);
23755 if (tmp != offset)
23756 emit_move_insn (offset, tmp);
23757 emit_label (label);
23758 LABEL_NUSES (label) = 1;
23760 if (max_size > 1)
23762 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23763 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23764 src = change_address (srcmem, QImode, tmp);
23765 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23766 dest = change_address (destmem, QImode, tmp);
23767 emit_move_insn (dest, src);
23768 emit_label (label);
23769 LABEL_NUSES (label) = 1;
23774 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
23775 with value PROMOTED_VAL.
23776 SRC is passed by pointer to be updated on return.
23777 Return value is updated DST. */
23778 static rtx
23779 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
23780 HOST_WIDE_INT size_to_move)
23782 rtx dst = destmem, adjust;
23783 enum insn_code code;
23784 machine_mode move_mode;
23785 int piece_size, i;
23787 /* Find the widest mode in which we could perform moves.
23788 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23789 it until move of such size is supported. */
23790 move_mode = GET_MODE (promoted_val);
23791 if (move_mode == VOIDmode)
23792 move_mode = QImode;
23793 if (size_to_move < GET_MODE_SIZE (move_mode))
23795 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
23796 promoted_val = gen_lowpart (move_mode, promoted_val);
23798 piece_size = GET_MODE_SIZE (move_mode);
23799 code = optab_handler (mov_optab, move_mode);
23800 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
23802 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23804 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23805 gcc_assert (size_to_move % piece_size == 0);
23806 adjust = GEN_INT (piece_size);
23807 for (i = 0; i < size_to_move; i += piece_size)
23809 if (piece_size <= GET_MODE_SIZE (word_mode))
23811 emit_insn (gen_strset (destptr, dst, promoted_val));
23812 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23813 piece_size);
23814 continue;
23817 emit_insn (GEN_FCN (code) (dst, promoted_val));
23819 emit_move_insn (destptr,
23820 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23822 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23823 piece_size);
23826 /* Update DST rtx. */
23827 return dst;
23829 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23830 static void
23831 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
23832 rtx count, int max_size)
23834 count =
23835 expand_simple_binop (counter_mode (count), AND, count,
23836 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
23837 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
23838 gen_lowpart (QImode, value), count, QImode,
23839 1, max_size / 2, true);
23842 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23843 static void
23844 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
23845 rtx count, int max_size)
23847 rtx dest;
23849 if (CONST_INT_P (count))
23851 HOST_WIDE_INT countval = INTVAL (count);
23852 HOST_WIDE_INT epilogue_size = countval % max_size;
23853 int i;
23855 /* For now MAX_SIZE should be a power of 2. This assert could be
23856 relaxed, but it'll require a bit more complicated epilogue
23857 expanding. */
23858 gcc_assert ((max_size & (max_size - 1)) == 0);
23859 for (i = max_size; i >= 1; i >>= 1)
23861 if (epilogue_size & i)
23863 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23864 destmem = emit_memset (destmem, destptr, vec_value, i);
23865 else
23866 destmem = emit_memset (destmem, destptr, value, i);
23869 return;
23871 if (max_size > 32)
23873 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
23874 return;
23876 if (max_size > 16)
23878 rtx_code_label *label = ix86_expand_aligntest (count, 16, true);
23879 if (TARGET_64BIT)
23881 dest = change_address (destmem, DImode, destptr);
23882 emit_insn (gen_strset (destptr, dest, value));
23883 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
23884 emit_insn (gen_strset (destptr, dest, value));
23886 else
23888 dest = change_address (destmem, SImode, destptr);
23889 emit_insn (gen_strset (destptr, dest, value));
23890 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23891 emit_insn (gen_strset (destptr, dest, value));
23892 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
23893 emit_insn (gen_strset (destptr, dest, value));
23894 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
23895 emit_insn (gen_strset (destptr, dest, value));
23897 emit_label (label);
23898 LABEL_NUSES (label) = 1;
23900 if (max_size > 8)
23902 rtx_code_label *label = ix86_expand_aligntest (count, 8, true);
23903 if (TARGET_64BIT)
23905 dest = change_address (destmem, DImode, destptr);
23906 emit_insn (gen_strset (destptr, dest, value));
23908 else
23910 dest = change_address (destmem, SImode, destptr);
23911 emit_insn (gen_strset (destptr, dest, value));
23912 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23913 emit_insn (gen_strset (destptr, dest, value));
23915 emit_label (label);
23916 LABEL_NUSES (label) = 1;
23918 if (max_size > 4)
23920 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23921 dest = change_address (destmem, SImode, destptr);
23922 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
23923 emit_label (label);
23924 LABEL_NUSES (label) = 1;
23926 if (max_size > 2)
23928 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23929 dest = change_address (destmem, HImode, destptr);
23930 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
23931 emit_label (label);
23932 LABEL_NUSES (label) = 1;
23934 if (max_size > 1)
23936 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23937 dest = change_address (destmem, QImode, destptr);
23938 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
23939 emit_label (label);
23940 LABEL_NUSES (label) = 1;
23944 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
23945 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
23946 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
23947 ignored.
23948 Return value is updated DESTMEM. */
23949 static rtx
23950 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
23951 rtx destptr, rtx srcptr, rtx value,
23952 rtx vec_value, rtx count, int align,
23953 int desired_alignment, bool issetmem)
23955 int i;
23956 for (i = 1; i < desired_alignment; i <<= 1)
23958 if (align <= i)
23960 rtx_code_label *label = ix86_expand_aligntest (destptr, i, false);
23961 if (issetmem)
23963 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23964 destmem = emit_memset (destmem, destptr, vec_value, i);
23965 else
23966 destmem = emit_memset (destmem, destptr, value, i);
23968 else
23969 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23970 ix86_adjust_counter (count, i);
23971 emit_label (label);
23972 LABEL_NUSES (label) = 1;
23973 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
23976 return destmem;
23979 /* Test if COUNT&SIZE is nonzero and if so, expand movme
23980 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
23981 and jump to DONE_LABEL. */
23982 static void
23983 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
23984 rtx destptr, rtx srcptr,
23985 rtx value, rtx vec_value,
23986 rtx count, int size,
23987 rtx done_label, bool issetmem)
23989 rtx_code_label *label = ix86_expand_aligntest (count, size, false);
23990 machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
23991 rtx modesize;
23992 int n;
23994 /* If we do not have vector value to copy, we must reduce size. */
23995 if (issetmem)
23997 if (!vec_value)
23999 if (GET_MODE (value) == VOIDmode && size > 8)
24000 mode = Pmode;
24001 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
24002 mode = GET_MODE (value);
24004 else
24005 mode = GET_MODE (vec_value), value = vec_value;
24007 else
24009 /* Choose appropriate vector mode. */
24010 if (size >= 32)
24011 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
24012 else if (size >= 16)
24013 mode = TARGET_SSE ? V16QImode : DImode;
24014 srcmem = change_address (srcmem, mode, srcptr);
24016 destmem = change_address (destmem, mode, destptr);
24017 modesize = GEN_INT (GET_MODE_SIZE (mode));
24018 gcc_assert (GET_MODE_SIZE (mode) <= size);
24019 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
24021 if (issetmem)
24022 emit_move_insn (destmem, gen_lowpart (mode, value));
24023 else
24025 emit_move_insn (destmem, srcmem);
24026 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24028 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24031 destmem = offset_address (destmem, count, 1);
24032 destmem = offset_address (destmem, GEN_INT (-2 * size),
24033 GET_MODE_SIZE (mode));
24034 if (!issetmem)
24036 srcmem = offset_address (srcmem, count, 1);
24037 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
24038 GET_MODE_SIZE (mode));
24040 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
24042 if (issetmem)
24043 emit_move_insn (destmem, gen_lowpart (mode, value));
24044 else
24046 emit_move_insn (destmem, srcmem);
24047 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24049 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24051 emit_jump_insn (gen_jump (done_label));
24052 emit_barrier ();
24054 emit_label (label);
24055 LABEL_NUSES (label) = 1;
24058 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
24059 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
24060 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
24061 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
24062 DONE_LABEL is a label after the whole copying sequence. The label is created
24063 on demand if *DONE_LABEL is NULL.
24064 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
24065 bounds after the initial copies.
24067 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
24068 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
24069 we will dispatch to a library call for large blocks.
24071 In pseudocode we do:
24073 if (COUNT < SIZE)
24075 Assume that SIZE is 4. Bigger sizes are handled analogously
24076 if (COUNT & 4)
24078 copy 4 bytes from SRCPTR to DESTPTR
24079 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
24080 goto done_label
24082 if (!COUNT)
24083 goto done_label;
24084 copy 1 byte from SRCPTR to DESTPTR
24085 if (COUNT & 2)
24087 copy 2 bytes from SRCPTR to DESTPTR
24088 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
24091 else
24093 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
24094 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
24096 OLD_DESPTR = DESTPTR;
24097 Align DESTPTR up to DESIRED_ALIGN
24098 SRCPTR += DESTPTR - OLD_DESTPTR
24099 COUNT -= DEST_PTR - OLD_DESTPTR
24100 if (DYNAMIC_CHECK)
24101 Round COUNT down to multiple of SIZE
24102 << optional caller supplied zero size guard is here >>
24103 << optional caller suppplied dynamic check is here >>
24104 << caller supplied main copy loop is here >>
24106 done_label:
24108 static void
24109 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
24110 rtx *destptr, rtx *srcptr,
24111 machine_mode mode,
24112 rtx value, rtx vec_value,
24113 rtx *count,
24114 rtx_code_label **done_label,
24115 int size,
24116 int desired_align,
24117 int align,
24118 unsigned HOST_WIDE_INT *min_size,
24119 bool dynamic_check,
24120 bool issetmem)
24122 rtx_code_label *loop_label = NULL, *label;
24123 int n;
24124 rtx modesize;
24125 int prolog_size = 0;
24126 rtx mode_value;
24128 /* Chose proper value to copy. */
24129 if (issetmem && VECTOR_MODE_P (mode))
24130 mode_value = vec_value;
24131 else
24132 mode_value = value;
24133 gcc_assert (GET_MODE_SIZE (mode) <= size);
24135 /* See if block is big or small, handle small blocks. */
24136 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
24138 int size2 = size;
24139 loop_label = gen_label_rtx ();
24141 if (!*done_label)
24142 *done_label = gen_label_rtx ();
24144 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
24145 1, loop_label);
24146 size2 >>= 1;
24148 /* Handle sizes > 3. */
24149 for (;size2 > 2; size2 >>= 1)
24150 expand_small_movmem_or_setmem (destmem, srcmem,
24151 *destptr, *srcptr,
24152 value, vec_value,
24153 *count,
24154 size2, *done_label, issetmem);
24155 /* Nothing to copy? Jump to DONE_LABEL if so */
24156 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
24157 1, *done_label);
24159 /* Do a byte copy. */
24160 destmem = change_address (destmem, QImode, *destptr);
24161 if (issetmem)
24162 emit_move_insn (destmem, gen_lowpart (QImode, value));
24163 else
24165 srcmem = change_address (srcmem, QImode, *srcptr);
24166 emit_move_insn (destmem, srcmem);
24169 /* Handle sizes 2 and 3. */
24170 label = ix86_expand_aligntest (*count, 2, false);
24171 destmem = change_address (destmem, HImode, *destptr);
24172 destmem = offset_address (destmem, *count, 1);
24173 destmem = offset_address (destmem, GEN_INT (-2), 2);
24174 if (issetmem)
24175 emit_move_insn (destmem, gen_lowpart (HImode, value));
24176 else
24178 srcmem = change_address (srcmem, HImode, *srcptr);
24179 srcmem = offset_address (srcmem, *count, 1);
24180 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
24181 emit_move_insn (destmem, srcmem);
24184 emit_label (label);
24185 LABEL_NUSES (label) = 1;
24186 emit_jump_insn (gen_jump (*done_label));
24187 emit_barrier ();
24189 else
24190 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
24191 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
24193 /* Start memcpy for COUNT >= SIZE. */
24194 if (loop_label)
24196 emit_label (loop_label);
24197 LABEL_NUSES (loop_label) = 1;
24200 /* Copy first desired_align bytes. */
24201 if (!issetmem)
24202 srcmem = change_address (srcmem, mode, *srcptr);
24203 destmem = change_address (destmem, mode, *destptr);
24204 modesize = GEN_INT (GET_MODE_SIZE (mode));
24205 for (n = 0; prolog_size < desired_align - align; n++)
24207 if (issetmem)
24208 emit_move_insn (destmem, mode_value);
24209 else
24211 emit_move_insn (destmem, srcmem);
24212 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24214 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24215 prolog_size += GET_MODE_SIZE (mode);
24219 /* Copy last SIZE bytes. */
24220 destmem = offset_address (destmem, *count, 1);
24221 destmem = offset_address (destmem,
24222 GEN_INT (-size - prolog_size),
24224 if (issetmem)
24225 emit_move_insn (destmem, mode_value);
24226 else
24228 srcmem = offset_address (srcmem, *count, 1);
24229 srcmem = offset_address (srcmem,
24230 GEN_INT (-size - prolog_size),
24232 emit_move_insn (destmem, srcmem);
24234 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
24236 destmem = offset_address (destmem, modesize, 1);
24237 if (issetmem)
24238 emit_move_insn (destmem, mode_value);
24239 else
24241 srcmem = offset_address (srcmem, modesize, 1);
24242 emit_move_insn (destmem, srcmem);
24246 /* Align destination. */
24247 if (desired_align > 1 && desired_align > align)
24249 rtx saveddest = *destptr;
24251 gcc_assert (desired_align <= size);
24252 /* Align destptr up, place it to new register. */
24253 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
24254 GEN_INT (prolog_size),
24255 NULL_RTX, 1, OPTAB_DIRECT);
24256 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
24257 GEN_INT (-desired_align),
24258 *destptr, 1, OPTAB_DIRECT);
24259 /* See how many bytes we skipped. */
24260 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
24261 *destptr,
24262 saveddest, 1, OPTAB_DIRECT);
24263 /* Adjust srcptr and count. */
24264 if (!issetmem)
24265 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr, saveddest,
24266 *srcptr, 1, OPTAB_DIRECT);
24267 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
24268 saveddest, *count, 1, OPTAB_DIRECT);
24269 /* We copied at most size + prolog_size. */
24270 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
24271 *min_size = (*min_size - size) & ~(unsigned HOST_WIDE_INT)(size - 1);
24272 else
24273 *min_size = 0;
24275 /* Our loops always round down the bock size, but for dispatch to library
24276 we need precise value. */
24277 if (dynamic_check)
24278 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
24279 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
24281 else
24283 gcc_assert (prolog_size == 0);
24284 /* Decrease count, so we won't end up copying last word twice. */
24285 if (!CONST_INT_P (*count))
24286 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
24287 constm1_rtx, *count, 1, OPTAB_DIRECT);
24288 else
24289 *count = GEN_INT ((UINTVAL (*count) - 1) & ~(unsigned HOST_WIDE_INT)(size - 1));
24290 if (*min_size)
24291 *min_size = (*min_size - 1) & ~(unsigned HOST_WIDE_INT)(size - 1);
24296 /* This function is like the previous one, except here we know how many bytes
24297 need to be copied. That allows us to update alignment not only of DST, which
24298 is returned, but also of SRC, which is passed as a pointer for that
24299 reason. */
24300 static rtx
24301 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
24302 rtx srcreg, rtx value, rtx vec_value,
24303 int desired_align, int align_bytes,
24304 bool issetmem)
24306 rtx src = NULL;
24307 rtx orig_dst = dst;
24308 rtx orig_src = NULL;
24309 int piece_size = 1;
24310 int copied_bytes = 0;
24312 if (!issetmem)
24314 gcc_assert (srcp != NULL);
24315 src = *srcp;
24316 orig_src = src;
24319 for (piece_size = 1;
24320 piece_size <= desired_align && copied_bytes < align_bytes;
24321 piece_size <<= 1)
24323 if (align_bytes & piece_size)
24325 if (issetmem)
24327 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
24328 dst = emit_memset (dst, destreg, vec_value, piece_size);
24329 else
24330 dst = emit_memset (dst, destreg, value, piece_size);
24332 else
24333 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
24334 copied_bytes += piece_size;
24337 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
24338 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24339 if (MEM_SIZE_KNOWN_P (orig_dst))
24340 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
24342 if (!issetmem)
24344 int src_align_bytes = get_mem_align_offset (src, desired_align
24345 * BITS_PER_UNIT);
24346 if (src_align_bytes >= 0)
24347 src_align_bytes = desired_align - src_align_bytes;
24348 if (src_align_bytes >= 0)
24350 unsigned int src_align;
24351 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
24353 if ((src_align_bytes & (src_align - 1))
24354 == (align_bytes & (src_align - 1)))
24355 break;
24357 if (src_align > (unsigned int) desired_align)
24358 src_align = desired_align;
24359 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
24360 set_mem_align (src, src_align * BITS_PER_UNIT);
24362 if (MEM_SIZE_KNOWN_P (orig_src))
24363 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
24364 *srcp = src;
24367 return dst;
24370 /* Return true if ALG can be used in current context.
24371 Assume we expand memset if MEMSET is true. */
24372 static bool
24373 alg_usable_p (enum stringop_alg alg, bool memset)
24375 if (alg == no_stringop)
24376 return false;
24377 if (alg == vector_loop)
24378 return TARGET_SSE || TARGET_AVX;
24379 /* Algorithms using the rep prefix want at least edi and ecx;
24380 additionally, memset wants eax and memcpy wants esi. Don't
24381 consider such algorithms if the user has appropriated those
24382 registers for their own purposes. */
24383 if (alg == rep_prefix_1_byte
24384 || alg == rep_prefix_4_byte
24385 || alg == rep_prefix_8_byte)
24386 return !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
24387 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
24388 return true;
24391 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
24392 static enum stringop_alg
24393 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
24394 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
24395 bool memset, bool zero_memset, int *dynamic_check, bool *noalign)
24397 const struct stringop_algs * algs;
24398 bool optimize_for_speed;
24399 int max = 0;
24400 const struct processor_costs *cost;
24401 int i;
24402 bool any_alg_usable_p = false;
24404 *noalign = false;
24405 *dynamic_check = -1;
24407 /* Even if the string operation call is cold, we still might spend a lot
24408 of time processing large blocks. */
24409 if (optimize_function_for_size_p (cfun)
24410 || (optimize_insn_for_size_p ()
24411 && (max_size < 256
24412 || (expected_size != -1 && expected_size < 256))))
24413 optimize_for_speed = false;
24414 else
24415 optimize_for_speed = true;
24417 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
24418 if (memset)
24419 algs = &cost->memset[TARGET_64BIT != 0];
24420 else
24421 algs = &cost->memcpy[TARGET_64BIT != 0];
24423 /* See maximal size for user defined algorithm. */
24424 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24426 enum stringop_alg candidate = algs->size[i].alg;
24427 bool usable = alg_usable_p (candidate, memset);
24428 any_alg_usable_p |= usable;
24430 if (candidate != libcall && candidate && usable)
24431 max = algs->size[i].max;
24434 /* If expected size is not known but max size is small enough
24435 so inline version is a win, set expected size into
24436 the range. */
24437 if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
24438 && expected_size == -1)
24439 expected_size = min_size / 2 + max_size / 2;
24441 /* If user specified the algorithm, honnor it if possible. */
24442 if (ix86_stringop_alg != no_stringop
24443 && alg_usable_p (ix86_stringop_alg, memset))
24444 return ix86_stringop_alg;
24445 /* rep; movq or rep; movl is the smallest variant. */
24446 else if (!optimize_for_speed)
24448 *noalign = true;
24449 if (!count || (count & 3) || (memset && !zero_memset))
24450 return alg_usable_p (rep_prefix_1_byte, memset)
24451 ? rep_prefix_1_byte : loop_1_byte;
24452 else
24453 return alg_usable_p (rep_prefix_4_byte, memset)
24454 ? rep_prefix_4_byte : loop;
24456 /* Very tiny blocks are best handled via the loop, REP is expensive to
24457 setup. */
24458 else if (expected_size != -1 && expected_size < 4)
24459 return loop_1_byte;
24460 else if (expected_size != -1)
24462 enum stringop_alg alg = libcall;
24463 bool alg_noalign = false;
24464 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24466 /* We get here if the algorithms that were not libcall-based
24467 were rep-prefix based and we are unable to use rep prefixes
24468 based on global register usage. Break out of the loop and
24469 use the heuristic below. */
24470 if (algs->size[i].max == 0)
24471 break;
24472 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
24474 enum stringop_alg candidate = algs->size[i].alg;
24476 if (candidate != libcall && alg_usable_p (candidate, memset))
24478 alg = candidate;
24479 alg_noalign = algs->size[i].noalign;
24481 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
24482 last non-libcall inline algorithm. */
24483 if (TARGET_INLINE_ALL_STRINGOPS)
24485 /* When the current size is best to be copied by a libcall,
24486 but we are still forced to inline, run the heuristic below
24487 that will pick code for medium sized blocks. */
24488 if (alg != libcall)
24490 *noalign = alg_noalign;
24491 return alg;
24493 else if (!any_alg_usable_p)
24494 break;
24496 else if (alg_usable_p (candidate, memset))
24498 *noalign = algs->size[i].noalign;
24499 return candidate;
24504 /* When asked to inline the call anyway, try to pick meaningful choice.
24505 We look for maximal size of block that is faster to copy by hand and
24506 take blocks of at most of that size guessing that average size will
24507 be roughly half of the block.
24509 If this turns out to be bad, we might simply specify the preferred
24510 choice in ix86_costs. */
24511 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24512 && (algs->unknown_size == libcall
24513 || !alg_usable_p (algs->unknown_size, memset)))
24515 enum stringop_alg alg;
24517 /* If there aren't any usable algorithms, then recursing on
24518 smaller sizes isn't going to find anything. Just return the
24519 simple byte-at-a-time copy loop. */
24520 if (!any_alg_usable_p)
24522 /* Pick something reasonable. */
24523 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24524 *dynamic_check = 128;
24525 return loop_1_byte;
24527 if (max <= 0)
24528 max = 4096;
24529 alg = decide_alg (count, max / 2, min_size, max_size, memset,
24530 zero_memset, dynamic_check, noalign);
24531 gcc_assert (*dynamic_check == -1);
24532 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24533 *dynamic_check = max;
24534 else
24535 gcc_assert (alg != libcall);
24536 return alg;
24538 return (alg_usable_p (algs->unknown_size, memset)
24539 ? algs->unknown_size : libcall);
24542 /* Decide on alignment. We know that the operand is already aligned to ALIGN
24543 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
24544 static int
24545 decide_alignment (int align,
24546 enum stringop_alg alg,
24547 int expected_size,
24548 machine_mode move_mode)
24550 int desired_align = 0;
24552 gcc_assert (alg != no_stringop);
24554 if (alg == libcall)
24555 return 0;
24556 if (move_mode == VOIDmode)
24557 return 0;
24559 desired_align = GET_MODE_SIZE (move_mode);
24560 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
24561 copying whole cacheline at once. */
24562 if (TARGET_PENTIUMPRO
24563 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
24564 desired_align = 8;
24566 if (optimize_size)
24567 desired_align = 1;
24568 if (desired_align < align)
24569 desired_align = align;
24570 if (expected_size != -1 && expected_size < 4)
24571 desired_align = align;
24573 return desired_align;
24577 /* Helper function for memcpy. For QImode value 0xXY produce
24578 0xXYXYXYXY of wide specified by MODE. This is essentially
24579 a * 0x10101010, but we can do slightly better than
24580 synth_mult by unwinding the sequence by hand on CPUs with
24581 slow multiply. */
24582 static rtx
24583 promote_duplicated_reg (machine_mode mode, rtx val)
24585 machine_mode valmode = GET_MODE (val);
24586 rtx tmp;
24587 int nops = mode == DImode ? 3 : 2;
24589 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
24590 if (val == const0_rtx)
24591 return copy_to_mode_reg (mode, CONST0_RTX (mode));
24592 if (CONST_INT_P (val))
24594 HOST_WIDE_INT v = INTVAL (val) & 255;
24596 v |= v << 8;
24597 v |= v << 16;
24598 if (mode == DImode)
24599 v |= (v << 16) << 16;
24600 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
24603 if (valmode == VOIDmode)
24604 valmode = QImode;
24605 if (valmode != QImode)
24606 val = gen_lowpart (QImode, val);
24607 if (mode == QImode)
24608 return val;
24609 if (!TARGET_PARTIAL_REG_STALL)
24610 nops--;
24611 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
24612 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
24613 <= (ix86_cost->shift_const + ix86_cost->add) * nops
24614 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
24616 rtx reg = convert_modes (mode, QImode, val, true);
24617 tmp = promote_duplicated_reg (mode, const1_rtx);
24618 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
24619 OPTAB_DIRECT);
24621 else
24623 rtx reg = convert_modes (mode, QImode, val, true);
24625 if (!TARGET_PARTIAL_REG_STALL)
24626 if (mode == SImode)
24627 emit_insn (gen_movsi_insv_1 (reg, reg));
24628 else
24629 emit_insn (gen_movdi_insv_1 (reg, reg));
24630 else
24632 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
24633 NULL, 1, OPTAB_DIRECT);
24634 reg =
24635 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24637 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
24638 NULL, 1, OPTAB_DIRECT);
24639 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24640 if (mode == SImode)
24641 return reg;
24642 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
24643 NULL, 1, OPTAB_DIRECT);
24644 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24645 return reg;
24649 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
24650 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
24651 alignment from ALIGN to DESIRED_ALIGN. */
24652 static rtx
24653 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
24654 int align)
24656 rtx promoted_val;
24658 if (TARGET_64BIT
24659 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
24660 promoted_val = promote_duplicated_reg (DImode, val);
24661 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
24662 promoted_val = promote_duplicated_reg (SImode, val);
24663 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
24664 promoted_val = promote_duplicated_reg (HImode, val);
24665 else
24666 promoted_val = val;
24668 return promoted_val;
24671 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
24672 operations when profitable. The code depends upon architecture, block size
24673 and alignment, but always has one of the following overall structures:
24675 Aligned move sequence:
24677 1) Prologue guard: Conditional that jumps up to epilogues for small
24678 blocks that can be handled by epilogue alone. This is faster
24679 but also needed for correctness, since prologue assume the block
24680 is larger than the desired alignment.
24682 Optional dynamic check for size and libcall for large
24683 blocks is emitted here too, with -minline-stringops-dynamically.
24685 2) Prologue: copy first few bytes in order to get destination
24686 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
24687 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
24688 copied. We emit either a jump tree on power of two sized
24689 blocks, or a byte loop.
24691 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24692 with specified algorithm.
24694 4) Epilogue: code copying tail of the block that is too small to be
24695 handled by main body (or up to size guarded by prologue guard).
24697 Misaligned move sequence
24699 1) missaligned move prologue/epilogue containing:
24700 a) Prologue handling small memory blocks and jumping to done_label
24701 (skipped if blocks are known to be large enough)
24702 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
24703 needed by single possibly misaligned move
24704 (skipped if alignment is not needed)
24705 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
24707 2) Zero size guard dispatching to done_label, if needed
24709 3) dispatch to library call, if needed,
24711 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24712 with specified algorithm. */
24713 bool
24714 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
24715 rtx align_exp, rtx expected_align_exp,
24716 rtx expected_size_exp, rtx min_size_exp,
24717 rtx max_size_exp, rtx probable_max_size_exp,
24718 bool issetmem)
24720 rtx destreg;
24721 rtx srcreg = NULL;
24722 rtx_code_label *label = NULL;
24723 rtx tmp;
24724 rtx_code_label *jump_around_label = NULL;
24725 HOST_WIDE_INT align = 1;
24726 unsigned HOST_WIDE_INT count = 0;
24727 HOST_WIDE_INT expected_size = -1;
24728 int size_needed = 0, epilogue_size_needed;
24729 int desired_align = 0, align_bytes = 0;
24730 enum stringop_alg alg;
24731 rtx promoted_val = NULL;
24732 rtx vec_promoted_val = NULL;
24733 bool force_loopy_epilogue = false;
24734 int dynamic_check;
24735 bool need_zero_guard = false;
24736 bool noalign;
24737 machine_mode move_mode = VOIDmode;
24738 int unroll_factor = 1;
24739 /* TODO: Once value ranges are available, fill in proper data. */
24740 unsigned HOST_WIDE_INT min_size = 0;
24741 unsigned HOST_WIDE_INT max_size = -1;
24742 unsigned HOST_WIDE_INT probable_max_size = -1;
24743 bool misaligned_prologue_used = false;
24745 if (CONST_INT_P (align_exp))
24746 align = INTVAL (align_exp);
24747 /* i386 can do misaligned access on reasonably increased cost. */
24748 if (CONST_INT_P (expected_align_exp)
24749 && INTVAL (expected_align_exp) > align)
24750 align = INTVAL (expected_align_exp);
24751 /* ALIGN is the minimum of destination and source alignment, but we care here
24752 just about destination alignment. */
24753 else if (!issetmem
24754 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
24755 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
24757 if (CONST_INT_P (count_exp))
24759 min_size = max_size = probable_max_size = count = expected_size
24760 = INTVAL (count_exp);
24761 /* When COUNT is 0, there is nothing to do. */
24762 if (!count)
24763 return true;
24765 else
24767 if (min_size_exp)
24768 min_size = INTVAL (min_size_exp);
24769 if (max_size_exp)
24770 max_size = INTVAL (max_size_exp);
24771 if (probable_max_size_exp)
24772 probable_max_size = INTVAL (probable_max_size_exp);
24773 if (CONST_INT_P (expected_size_exp))
24774 expected_size = INTVAL (expected_size_exp);
24777 /* Make sure we don't need to care about overflow later on. */
24778 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
24779 return false;
24781 /* Step 0: Decide on preferred algorithm, desired alignment and
24782 size of chunks to be copied by main loop. */
24783 alg = decide_alg (count, expected_size, min_size, probable_max_size,
24784 issetmem,
24785 issetmem && val_exp == const0_rtx,
24786 &dynamic_check, &noalign);
24787 if (alg == libcall)
24788 return false;
24789 gcc_assert (alg != no_stringop);
24791 /* For now vector-version of memset is generated only for memory zeroing, as
24792 creating of promoted vector value is very cheap in this case. */
24793 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
24794 alg = unrolled_loop;
24796 if (!count)
24797 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
24798 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
24799 if (!issetmem)
24800 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
24802 unroll_factor = 1;
24803 move_mode = word_mode;
24804 switch (alg)
24806 case libcall:
24807 case no_stringop:
24808 case last_alg:
24809 gcc_unreachable ();
24810 case loop_1_byte:
24811 need_zero_guard = true;
24812 move_mode = QImode;
24813 break;
24814 case loop:
24815 need_zero_guard = true;
24816 break;
24817 case unrolled_loop:
24818 need_zero_guard = true;
24819 unroll_factor = (TARGET_64BIT ? 4 : 2);
24820 break;
24821 case vector_loop:
24822 need_zero_guard = true;
24823 unroll_factor = 4;
24824 /* Find the widest supported mode. */
24825 move_mode = word_mode;
24826 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
24827 != CODE_FOR_nothing)
24828 move_mode = GET_MODE_WIDER_MODE (move_mode);
24830 /* Find the corresponding vector mode with the same size as MOVE_MODE.
24831 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
24832 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
24834 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
24835 move_mode = mode_for_vector (word_mode, nunits);
24836 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
24837 move_mode = word_mode;
24839 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
24840 break;
24841 case rep_prefix_8_byte:
24842 move_mode = DImode;
24843 break;
24844 case rep_prefix_4_byte:
24845 move_mode = SImode;
24846 break;
24847 case rep_prefix_1_byte:
24848 move_mode = QImode;
24849 break;
24851 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
24852 epilogue_size_needed = size_needed;
24854 desired_align = decide_alignment (align, alg, expected_size, move_mode);
24855 if (!TARGET_ALIGN_STRINGOPS || noalign)
24856 align = desired_align;
24858 /* Step 1: Prologue guard. */
24860 /* Alignment code needs count to be in register. */
24861 if (CONST_INT_P (count_exp) && desired_align > align)
24863 if (INTVAL (count_exp) > desired_align
24864 && INTVAL (count_exp) > size_needed)
24866 align_bytes
24867 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
24868 if (align_bytes <= 0)
24869 align_bytes = 0;
24870 else
24871 align_bytes = desired_align - align_bytes;
24873 if (align_bytes == 0)
24874 count_exp = force_reg (counter_mode (count_exp), count_exp);
24876 gcc_assert (desired_align >= 1 && align >= 1);
24878 /* Misaligned move sequences handle both prologue and epilogue at once.
24879 Default code generation results in a smaller code for large alignments
24880 and also avoids redundant job when sizes are known precisely. */
24881 misaligned_prologue_used
24882 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
24883 && MAX (desired_align, epilogue_size_needed) <= 32
24884 && desired_align <= epilogue_size_needed
24885 && ((desired_align > align && !align_bytes)
24886 || (!count && epilogue_size_needed > 1)));
24888 /* Do the cheap promotion to allow better CSE across the
24889 main loop and epilogue (ie one load of the big constant in the
24890 front of all code.
24891 For now the misaligned move sequences do not have fast path
24892 without broadcasting. */
24893 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
24895 if (alg == vector_loop)
24897 gcc_assert (val_exp == const0_rtx);
24898 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
24899 promoted_val = promote_duplicated_reg_to_size (val_exp,
24900 GET_MODE_SIZE (word_mode),
24901 desired_align, align);
24903 else
24905 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24906 desired_align, align);
24909 /* Misaligned move sequences handles both prologues and epilogues at once.
24910 Default code generation results in smaller code for large alignments and
24911 also avoids redundant job when sizes are known precisely. */
24912 if (misaligned_prologue_used)
24914 /* Misaligned move prologue handled small blocks by itself. */
24915 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
24916 (dst, src, &destreg, &srcreg,
24917 move_mode, promoted_val, vec_promoted_val,
24918 &count_exp,
24919 &jump_around_label,
24920 desired_align < align
24921 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
24922 desired_align, align, &min_size, dynamic_check, issetmem);
24923 if (!issetmem)
24924 src = change_address (src, BLKmode, srcreg);
24925 dst = change_address (dst, BLKmode, destreg);
24926 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24927 epilogue_size_needed = 0;
24928 if (need_zero_guard && !min_size)
24930 /* It is possible that we copied enough so the main loop will not
24931 execute. */
24932 gcc_assert (size_needed > 1);
24933 if (jump_around_label == NULL_RTX)
24934 jump_around_label = gen_label_rtx ();
24935 emit_cmp_and_jump_insns (count_exp,
24936 GEN_INT (size_needed),
24937 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
24938 if (expected_size == -1
24939 || expected_size < (desired_align - align) / 2 + size_needed)
24940 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24941 else
24942 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24945 /* Ensure that alignment prologue won't copy past end of block. */
24946 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
24948 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
24949 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
24950 Make sure it is power of 2. */
24951 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
24953 /* To improve performance of small blocks, we jump around the VAL
24954 promoting mode. This mean that if the promoted VAL is not constant,
24955 we might not use it in the epilogue and have to use byte
24956 loop variant. */
24957 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
24958 force_loopy_epilogue = true;
24959 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24960 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24962 /* If main algorithm works on QImode, no epilogue is needed.
24963 For small sizes just don't align anything. */
24964 if (size_needed == 1)
24965 desired_align = align;
24966 else
24967 goto epilogue;
24969 else if (!count
24970 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24972 label = gen_label_rtx ();
24973 emit_cmp_and_jump_insns (count_exp,
24974 GEN_INT (epilogue_size_needed),
24975 LTU, 0, counter_mode (count_exp), 1, label);
24976 if (expected_size == -1 || expected_size < epilogue_size_needed)
24977 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24978 else
24979 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24983 /* Emit code to decide on runtime whether library call or inline should be
24984 used. */
24985 if (dynamic_check != -1)
24987 if (!issetmem && CONST_INT_P (count_exp))
24989 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
24991 emit_block_move_via_libcall (dst, src, count_exp, false);
24992 count_exp = const0_rtx;
24993 goto epilogue;
24996 else
24998 rtx_code_label *hot_label = gen_label_rtx ();
24999 if (jump_around_label == NULL_RTX)
25000 jump_around_label = gen_label_rtx ();
25001 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
25002 LEU, 0, counter_mode (count_exp),
25003 1, hot_label);
25004 predict_jump (REG_BR_PROB_BASE * 90 / 100);
25005 if (issetmem)
25006 set_storage_via_libcall (dst, count_exp, val_exp, false);
25007 else
25008 emit_block_move_via_libcall (dst, src, count_exp, false);
25009 emit_jump (jump_around_label);
25010 emit_label (hot_label);
25014 /* Step 2: Alignment prologue. */
25015 /* Do the expensive promotion once we branched off the small blocks. */
25016 if (issetmem && !promoted_val)
25017 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
25018 desired_align, align);
25020 if (desired_align > align && !misaligned_prologue_used)
25022 if (align_bytes == 0)
25024 /* Except for the first move in prologue, we no longer know
25025 constant offset in aliasing info. It don't seems to worth
25026 the pain to maintain it for the first move, so throw away
25027 the info early. */
25028 dst = change_address (dst, BLKmode, destreg);
25029 if (!issetmem)
25030 src = change_address (src, BLKmode, srcreg);
25031 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
25032 promoted_val, vec_promoted_val,
25033 count_exp, align, desired_align,
25034 issetmem);
25035 /* At most desired_align - align bytes are copied. */
25036 if (min_size < (unsigned)(desired_align - align))
25037 min_size = 0;
25038 else
25039 min_size -= desired_align - align;
25041 else
25043 /* If we know how many bytes need to be stored before dst is
25044 sufficiently aligned, maintain aliasing info accurately. */
25045 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
25046 srcreg,
25047 promoted_val,
25048 vec_promoted_val,
25049 desired_align,
25050 align_bytes,
25051 issetmem);
25053 count_exp = plus_constant (counter_mode (count_exp),
25054 count_exp, -align_bytes);
25055 count -= align_bytes;
25056 min_size -= align_bytes;
25057 max_size -= align_bytes;
25059 if (need_zero_guard
25060 && !min_size
25061 && (count < (unsigned HOST_WIDE_INT) size_needed
25062 || (align_bytes == 0
25063 && count < ((unsigned HOST_WIDE_INT) size_needed
25064 + desired_align - align))))
25066 /* It is possible that we copied enough so the main loop will not
25067 execute. */
25068 gcc_assert (size_needed > 1);
25069 if (label == NULL_RTX)
25070 label = gen_label_rtx ();
25071 emit_cmp_and_jump_insns (count_exp,
25072 GEN_INT (size_needed),
25073 LTU, 0, counter_mode (count_exp), 1, label);
25074 if (expected_size == -1
25075 || expected_size < (desired_align - align) / 2 + size_needed)
25076 predict_jump (REG_BR_PROB_BASE * 20 / 100);
25077 else
25078 predict_jump (REG_BR_PROB_BASE * 60 / 100);
25081 if (label && size_needed == 1)
25083 emit_label (label);
25084 LABEL_NUSES (label) = 1;
25085 label = NULL;
25086 epilogue_size_needed = 1;
25087 if (issetmem)
25088 promoted_val = val_exp;
25090 else if (label == NULL_RTX && !misaligned_prologue_used)
25091 epilogue_size_needed = size_needed;
25093 /* Step 3: Main loop. */
25095 switch (alg)
25097 case libcall:
25098 case no_stringop:
25099 case last_alg:
25100 gcc_unreachable ();
25101 case loop_1_byte:
25102 case loop:
25103 case unrolled_loop:
25104 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
25105 count_exp, move_mode, unroll_factor,
25106 expected_size, issetmem);
25107 break;
25108 case vector_loop:
25109 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
25110 vec_promoted_val, count_exp, move_mode,
25111 unroll_factor, expected_size, issetmem);
25112 break;
25113 case rep_prefix_8_byte:
25114 case rep_prefix_4_byte:
25115 case rep_prefix_1_byte:
25116 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
25117 val_exp, count_exp, move_mode, issetmem);
25118 break;
25120 /* Adjust properly the offset of src and dest memory for aliasing. */
25121 if (CONST_INT_P (count_exp))
25123 if (!issetmem)
25124 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
25125 (count / size_needed) * size_needed);
25126 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
25127 (count / size_needed) * size_needed);
25129 else
25131 if (!issetmem)
25132 src = change_address (src, BLKmode, srcreg);
25133 dst = change_address (dst, BLKmode, destreg);
25136 /* Step 4: Epilogue to copy the remaining bytes. */
25137 epilogue:
25138 if (label)
25140 /* When the main loop is done, COUNT_EXP might hold original count,
25141 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
25142 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
25143 bytes. Compensate if needed. */
25145 if (size_needed < epilogue_size_needed)
25147 tmp =
25148 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
25149 GEN_INT (size_needed - 1), count_exp, 1,
25150 OPTAB_DIRECT);
25151 if (tmp != count_exp)
25152 emit_move_insn (count_exp, tmp);
25154 emit_label (label);
25155 LABEL_NUSES (label) = 1;
25158 if (count_exp != const0_rtx && epilogue_size_needed > 1)
25160 if (force_loopy_epilogue)
25161 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
25162 epilogue_size_needed);
25163 else
25165 if (issetmem)
25166 expand_setmem_epilogue (dst, destreg, promoted_val,
25167 vec_promoted_val, count_exp,
25168 epilogue_size_needed);
25169 else
25170 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
25171 epilogue_size_needed);
25174 if (jump_around_label)
25175 emit_label (jump_around_label);
25176 return true;
25180 /* Expand the appropriate insns for doing strlen if not just doing
25181 repnz; scasb
25183 out = result, initialized with the start address
25184 align_rtx = alignment of the address.
25185 scratch = scratch register, initialized with the startaddress when
25186 not aligned, otherwise undefined
25188 This is just the body. It needs the initializations mentioned above and
25189 some address computing at the end. These things are done in i386.md. */
25191 static void
25192 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
25194 int align;
25195 rtx tmp;
25196 rtx_code_label *align_2_label = NULL;
25197 rtx_code_label *align_3_label = NULL;
25198 rtx_code_label *align_4_label = gen_label_rtx ();
25199 rtx_code_label *end_0_label = gen_label_rtx ();
25200 rtx mem;
25201 rtx tmpreg = gen_reg_rtx (SImode);
25202 rtx scratch = gen_reg_rtx (SImode);
25203 rtx cmp;
25205 align = 0;
25206 if (CONST_INT_P (align_rtx))
25207 align = INTVAL (align_rtx);
25209 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
25211 /* Is there a known alignment and is it less than 4? */
25212 if (align < 4)
25214 rtx scratch1 = gen_reg_rtx (Pmode);
25215 emit_move_insn (scratch1, out);
25216 /* Is there a known alignment and is it not 2? */
25217 if (align != 2)
25219 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
25220 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
25222 /* Leave just the 3 lower bits. */
25223 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
25224 NULL_RTX, 0, OPTAB_WIDEN);
25226 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
25227 Pmode, 1, align_4_label);
25228 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
25229 Pmode, 1, align_2_label);
25230 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
25231 Pmode, 1, align_3_label);
25233 else
25235 /* Since the alignment is 2, we have to check 2 or 0 bytes;
25236 check if is aligned to 4 - byte. */
25238 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
25239 NULL_RTX, 0, OPTAB_WIDEN);
25241 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
25242 Pmode, 1, align_4_label);
25245 mem = change_address (src, QImode, out);
25247 /* Now compare the bytes. */
25249 /* Compare the first n unaligned byte on a byte per byte basis. */
25250 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
25251 QImode, 1, end_0_label);
25253 /* Increment the address. */
25254 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25256 /* Not needed with an alignment of 2 */
25257 if (align != 2)
25259 emit_label (align_2_label);
25261 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
25262 end_0_label);
25264 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25266 emit_label (align_3_label);
25269 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
25270 end_0_label);
25272 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25275 /* Generate loop to check 4 bytes at a time. It is not a good idea to
25276 align this loop. It gives only huge programs, but does not help to
25277 speed up. */
25278 emit_label (align_4_label);
25280 mem = change_address (src, SImode, out);
25281 emit_move_insn (scratch, mem);
25282 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
25284 /* This formula yields a nonzero result iff one of the bytes is zero.
25285 This saves three branches inside loop and many cycles. */
25287 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
25288 emit_insn (gen_one_cmplsi2 (scratch, scratch));
25289 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
25290 emit_insn (gen_andsi3 (tmpreg, tmpreg,
25291 gen_int_mode (0x80808080, SImode)));
25292 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
25293 align_4_label);
25295 if (TARGET_CMOVE)
25297 rtx reg = gen_reg_rtx (SImode);
25298 rtx reg2 = gen_reg_rtx (Pmode);
25299 emit_move_insn (reg, tmpreg);
25300 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
25302 /* If zero is not in the first two bytes, move two bytes forward. */
25303 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
25304 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25305 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
25306 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
25307 gen_rtx_IF_THEN_ELSE (SImode, tmp,
25308 reg,
25309 tmpreg)));
25310 /* Emit lea manually to avoid clobbering of flags. */
25311 emit_insn (gen_rtx_SET (SImode, reg2,
25312 gen_rtx_PLUS (Pmode, out, const2_rtx)));
25314 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25315 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
25316 emit_insn (gen_rtx_SET (VOIDmode, out,
25317 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
25318 reg2,
25319 out)));
25321 else
25323 rtx_code_label *end_2_label = gen_label_rtx ();
25324 /* Is zero in the first two bytes? */
25326 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
25327 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25328 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
25329 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
25330 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
25331 pc_rtx);
25332 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
25333 JUMP_LABEL (tmp) = end_2_label;
25335 /* Not in the first two. Move two bytes forward. */
25336 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
25337 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
25339 emit_label (end_2_label);
25343 /* Avoid branch in fixing the byte. */
25344 tmpreg = gen_lowpart (QImode, tmpreg);
25345 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
25346 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
25347 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
25348 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
25350 emit_label (end_0_label);
25353 /* Expand strlen. */
25355 bool
25356 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
25358 rtx addr, scratch1, scratch2, scratch3, scratch4;
25360 /* The generic case of strlen expander is long. Avoid it's
25361 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
25363 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
25364 && !TARGET_INLINE_ALL_STRINGOPS
25365 && !optimize_insn_for_size_p ()
25366 && (!CONST_INT_P (align) || INTVAL (align) < 4))
25367 return false;
25369 addr = force_reg (Pmode, XEXP (src, 0));
25370 scratch1 = gen_reg_rtx (Pmode);
25372 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
25373 && !optimize_insn_for_size_p ())
25375 /* Well it seems that some optimizer does not combine a call like
25376 foo(strlen(bar), strlen(bar));
25377 when the move and the subtraction is done here. It does calculate
25378 the length just once when these instructions are done inside of
25379 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
25380 often used and I use one fewer register for the lifetime of
25381 output_strlen_unroll() this is better. */
25383 emit_move_insn (out, addr);
25385 ix86_expand_strlensi_unroll_1 (out, src, align);
25387 /* strlensi_unroll_1 returns the address of the zero at the end of
25388 the string, like memchr(), so compute the length by subtracting
25389 the start address. */
25390 emit_insn (ix86_gen_sub3 (out, out, addr));
25392 else
25394 rtx unspec;
25396 /* Can't use this if the user has appropriated eax, ecx, or edi. */
25397 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
25398 return false;
25400 scratch2 = gen_reg_rtx (Pmode);
25401 scratch3 = gen_reg_rtx (Pmode);
25402 scratch4 = force_reg (Pmode, constm1_rtx);
25404 emit_move_insn (scratch3, addr);
25405 eoschar = force_reg (QImode, eoschar);
25407 src = replace_equiv_address_nv (src, scratch3);
25409 /* If .md starts supporting :P, this can be done in .md. */
25410 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
25411 scratch4), UNSPEC_SCAS);
25412 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
25413 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
25414 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
25416 return true;
25419 /* For given symbol (function) construct code to compute address of it's PLT
25420 entry in large x86-64 PIC model. */
25421 static rtx
25422 construct_plt_address (rtx symbol)
25424 rtx tmp, unspec;
25426 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
25427 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
25428 gcc_assert (Pmode == DImode);
25430 tmp = gen_reg_rtx (Pmode);
25431 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
25433 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
25434 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
25435 return tmp;
25439 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
25440 rtx callarg2,
25441 rtx pop, bool sibcall)
25443 rtx vec[3];
25444 rtx use = NULL, call;
25445 unsigned int vec_len = 0;
25447 if (pop == const0_rtx)
25448 pop = NULL;
25449 gcc_assert (!TARGET_64BIT || !pop);
25451 if (TARGET_MACHO && !TARGET_64BIT)
25453 #if TARGET_MACHO
25454 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
25455 fnaddr = machopic_indirect_call_target (fnaddr);
25456 #endif
25458 else
25460 /* Static functions and indirect calls don't need the pic register. */
25461 if (flag_pic
25462 && (!TARGET_64BIT
25463 || (ix86_cmodel == CM_LARGE_PIC
25464 && DEFAULT_ABI != MS_ABI))
25465 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25466 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
25468 use_reg (&use, gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM));
25469 if (ix86_use_pseudo_pic_reg ())
25470 emit_move_insn (gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM),
25471 pic_offset_table_rtx);
25475 /* Skip setting up RAX register for -mskip-rax-setup when there are no
25476 parameters passed in vector registers. */
25477 if (TARGET_64BIT
25478 && (INTVAL (callarg2) > 0
25479 || (INTVAL (callarg2) == 0
25480 && (TARGET_SSE || !flag_skip_rax_setup))))
25482 rtx al = gen_rtx_REG (QImode, AX_REG);
25483 emit_move_insn (al, callarg2);
25484 use_reg (&use, al);
25487 if (ix86_cmodel == CM_LARGE_PIC
25488 && !TARGET_PECOFF
25489 && MEM_P (fnaddr)
25490 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25491 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
25492 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
25493 else if (sibcall
25494 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
25495 : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
25497 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
25498 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
25501 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
25503 if (retval)
25505 /* We should add bounds as destination register in case
25506 pointer with bounds may be returned. */
25507 if (TARGET_MPX && SCALAR_INT_MODE_P (GET_MODE (retval)))
25509 rtx b0 = gen_rtx_REG (BND64mode, FIRST_BND_REG);
25510 rtx b1 = gen_rtx_REG (BND64mode, FIRST_BND_REG + 1);
25511 retval = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, retval, b0, b1));
25512 chkp_put_regs_to_expr_list (retval);
25515 call = gen_rtx_SET (VOIDmode, retval, call);
25517 vec[vec_len++] = call;
25519 if (pop)
25521 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
25522 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
25523 vec[vec_len++] = pop;
25526 if (TARGET_64BIT_MS_ABI
25527 && (!callarg2 || INTVAL (callarg2) != -2))
25529 int const cregs_size
25530 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
25531 int i;
25533 for (i = 0; i < cregs_size; i++)
25535 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
25536 machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
25538 clobber_reg (&use, gen_rtx_REG (mode, regno));
25542 if (vec_len > 1)
25543 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
25544 call = emit_call_insn (call);
25545 if (use)
25546 CALL_INSN_FUNCTION_USAGE (call) = use;
25548 return call;
25551 /* Output the assembly for a call instruction. */
25553 const char *
25554 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
25556 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
25557 bool seh_nop_p = false;
25558 const char *xasm;
25560 if (SIBLING_CALL_P (insn))
25562 if (direct_p)
25563 xasm = "%!jmp\t%P0";
25564 /* SEH epilogue detection requires the indirect branch case
25565 to include REX.W. */
25566 else if (TARGET_SEH)
25567 xasm = "%!rex.W jmp %A0";
25568 else
25569 xasm = "%!jmp\t%A0";
25571 output_asm_insn (xasm, &call_op);
25572 return "";
25575 /* SEH unwinding can require an extra nop to be emitted in several
25576 circumstances. Determine if we have one of those. */
25577 if (TARGET_SEH)
25579 rtx_insn *i;
25581 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
25583 /* If we get to another real insn, we don't need the nop. */
25584 if (INSN_P (i))
25585 break;
25587 /* If we get to the epilogue note, prevent a catch region from
25588 being adjacent to the standard epilogue sequence. If non-
25589 call-exceptions, we'll have done this during epilogue emission. */
25590 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
25591 && !flag_non_call_exceptions
25592 && !can_throw_internal (insn))
25594 seh_nop_p = true;
25595 break;
25599 /* If we didn't find a real insn following the call, prevent the
25600 unwinder from looking into the next function. */
25601 if (i == NULL)
25602 seh_nop_p = true;
25605 if (direct_p)
25606 xasm = "%!call\t%P0";
25607 else
25608 xasm = "%!call\t%A0";
25610 output_asm_insn (xasm, &call_op);
25612 if (seh_nop_p)
25613 return "nop";
25615 return "";
25618 /* Clear stack slot assignments remembered from previous functions.
25619 This is called from INIT_EXPANDERS once before RTL is emitted for each
25620 function. */
25622 static struct machine_function *
25623 ix86_init_machine_status (void)
25625 struct machine_function *f;
25627 f = ggc_cleared_alloc<machine_function> ();
25628 f->use_fast_prologue_epilogue_nregs = -1;
25629 f->call_abi = ix86_abi;
25631 return f;
25634 /* Return a MEM corresponding to a stack slot with mode MODE.
25635 Allocate a new slot if necessary.
25637 The RTL for a function can have several slots available: N is
25638 which slot to use. */
25641 assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
25643 struct stack_local_entry *s;
25645 gcc_assert (n < MAX_386_STACK_LOCALS);
25647 for (s = ix86_stack_locals; s; s = s->next)
25648 if (s->mode == mode && s->n == n)
25649 return validize_mem (copy_rtx (s->rtl));
25651 s = ggc_alloc<stack_local_entry> ();
25652 s->n = n;
25653 s->mode = mode;
25654 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
25656 s->next = ix86_stack_locals;
25657 ix86_stack_locals = s;
25658 return validize_mem (copy_rtx (s->rtl));
25661 static void
25662 ix86_instantiate_decls (void)
25664 struct stack_local_entry *s;
25666 for (s = ix86_stack_locals; s; s = s->next)
25667 if (s->rtl != NULL_RTX)
25668 instantiate_decl_rtl (s->rtl);
25671 /* Check whether x86 address PARTS is a pc-relative address. */
25673 static bool
25674 rip_relative_addr_p (struct ix86_address *parts)
25676 rtx base, index, disp;
25678 base = parts->base;
25679 index = parts->index;
25680 disp = parts->disp;
25682 if (disp && !base && !index)
25684 if (TARGET_64BIT)
25686 rtx symbol = disp;
25688 if (GET_CODE (disp) == CONST)
25689 symbol = XEXP (disp, 0);
25690 if (GET_CODE (symbol) == PLUS
25691 && CONST_INT_P (XEXP (symbol, 1)))
25692 symbol = XEXP (symbol, 0);
25694 if (GET_CODE (symbol) == LABEL_REF
25695 || (GET_CODE (symbol) == SYMBOL_REF
25696 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
25697 || (GET_CODE (symbol) == UNSPEC
25698 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
25699 || XINT (symbol, 1) == UNSPEC_PCREL
25700 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
25701 return true;
25704 return false;
25707 /* Calculate the length of the memory address in the instruction encoding.
25708 Includes addr32 prefix, does not include the one-byte modrm, opcode,
25709 or other prefixes. We never generate addr32 prefix for LEA insn. */
25712 memory_address_length (rtx addr, bool lea)
25714 struct ix86_address parts;
25715 rtx base, index, disp;
25716 int len;
25717 int ok;
25719 if (GET_CODE (addr) == PRE_DEC
25720 || GET_CODE (addr) == POST_INC
25721 || GET_CODE (addr) == PRE_MODIFY
25722 || GET_CODE (addr) == POST_MODIFY)
25723 return 0;
25725 ok = ix86_decompose_address (addr, &parts);
25726 gcc_assert (ok);
25728 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
25730 /* If this is not LEA instruction, add the length of addr32 prefix. */
25731 if (TARGET_64BIT && !lea
25732 && (SImode_address_operand (addr, VOIDmode)
25733 || (parts.base && GET_MODE (parts.base) == SImode)
25734 || (parts.index && GET_MODE (parts.index) == SImode)))
25735 len++;
25737 base = parts.base;
25738 index = parts.index;
25739 disp = parts.disp;
25741 if (base && GET_CODE (base) == SUBREG)
25742 base = SUBREG_REG (base);
25743 if (index && GET_CODE (index) == SUBREG)
25744 index = SUBREG_REG (index);
25746 gcc_assert (base == NULL_RTX || REG_P (base));
25747 gcc_assert (index == NULL_RTX || REG_P (index));
25749 /* Rule of thumb:
25750 - esp as the base always wants an index,
25751 - ebp as the base always wants a displacement,
25752 - r12 as the base always wants an index,
25753 - r13 as the base always wants a displacement. */
25755 /* Register Indirect. */
25756 if (base && !index && !disp)
25758 /* esp (for its index) and ebp (for its displacement) need
25759 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
25760 code. */
25761 if (base == arg_pointer_rtx
25762 || base == frame_pointer_rtx
25763 || REGNO (base) == SP_REG
25764 || REGNO (base) == BP_REG
25765 || REGNO (base) == R12_REG
25766 || REGNO (base) == R13_REG)
25767 len++;
25770 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
25771 is not disp32, but disp32(%rip), so for disp32
25772 SIB byte is needed, unless print_operand_address
25773 optimizes it into disp32(%rip) or (%rip) is implied
25774 by UNSPEC. */
25775 else if (disp && !base && !index)
25777 len += 4;
25778 if (rip_relative_addr_p (&parts))
25779 len++;
25781 else
25783 /* Find the length of the displacement constant. */
25784 if (disp)
25786 if (base && satisfies_constraint_K (disp))
25787 len += 1;
25788 else
25789 len += 4;
25791 /* ebp always wants a displacement. Similarly r13. */
25792 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
25793 len++;
25795 /* An index requires the two-byte modrm form.... */
25796 if (index
25797 /* ...like esp (or r12), which always wants an index. */
25798 || base == arg_pointer_rtx
25799 || base == frame_pointer_rtx
25800 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
25801 len++;
25804 return len;
25807 /* Compute default value for "length_immediate" attribute. When SHORTFORM
25808 is set, expect that insn have 8bit immediate alternative. */
25810 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
25812 int len = 0;
25813 int i;
25814 extract_insn_cached (insn);
25815 for (i = recog_data.n_operands - 1; i >= 0; --i)
25816 if (CONSTANT_P (recog_data.operand[i]))
25818 enum attr_mode mode = get_attr_mode (insn);
25820 gcc_assert (!len);
25821 if (shortform && CONST_INT_P (recog_data.operand[i]))
25823 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
25824 switch (mode)
25826 case MODE_QI:
25827 len = 1;
25828 continue;
25829 case MODE_HI:
25830 ival = trunc_int_for_mode (ival, HImode);
25831 break;
25832 case MODE_SI:
25833 ival = trunc_int_for_mode (ival, SImode);
25834 break;
25835 default:
25836 break;
25838 if (IN_RANGE (ival, -128, 127))
25840 len = 1;
25841 continue;
25844 switch (mode)
25846 case MODE_QI:
25847 len = 1;
25848 break;
25849 case MODE_HI:
25850 len = 2;
25851 break;
25852 case MODE_SI:
25853 len = 4;
25854 break;
25855 /* Immediates for DImode instructions are encoded
25856 as 32bit sign extended values. */
25857 case MODE_DI:
25858 len = 4;
25859 break;
25860 default:
25861 fatal_insn ("unknown insn mode", insn);
25864 return len;
25867 /* Compute default value for "length_address" attribute. */
25869 ix86_attr_length_address_default (rtx_insn *insn)
25871 int i;
25873 if (get_attr_type (insn) == TYPE_LEA)
25875 rtx set = PATTERN (insn), addr;
25877 if (GET_CODE (set) == PARALLEL)
25878 set = XVECEXP (set, 0, 0);
25880 gcc_assert (GET_CODE (set) == SET);
25882 addr = SET_SRC (set);
25884 return memory_address_length (addr, true);
25887 extract_insn_cached (insn);
25888 for (i = recog_data.n_operands - 1; i >= 0; --i)
25889 if (MEM_P (recog_data.operand[i]))
25891 constrain_operands_cached (insn, reload_completed);
25892 if (which_alternative != -1)
25894 const char *constraints = recog_data.constraints[i];
25895 int alt = which_alternative;
25897 while (*constraints == '=' || *constraints == '+')
25898 constraints++;
25899 while (alt-- > 0)
25900 while (*constraints++ != ',')
25902 /* Skip ignored operands. */
25903 if (*constraints == 'X')
25904 continue;
25906 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
25908 return 0;
25911 /* Compute default value for "length_vex" attribute. It includes
25912 2 or 3 byte VEX prefix and 1 opcode byte. */
25915 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
25916 bool has_vex_w)
25918 int i;
25920 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
25921 byte VEX prefix. */
25922 if (!has_0f_opcode || has_vex_w)
25923 return 3 + 1;
25925 /* We can always use 2 byte VEX prefix in 32bit. */
25926 if (!TARGET_64BIT)
25927 return 2 + 1;
25929 extract_insn_cached (insn);
25931 for (i = recog_data.n_operands - 1; i >= 0; --i)
25932 if (REG_P (recog_data.operand[i]))
25934 /* REX.W bit uses 3 byte VEX prefix. */
25935 if (GET_MODE (recog_data.operand[i]) == DImode
25936 && GENERAL_REG_P (recog_data.operand[i]))
25937 return 3 + 1;
25939 else
25941 /* REX.X or REX.B bits use 3 byte VEX prefix. */
25942 if (MEM_P (recog_data.operand[i])
25943 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
25944 return 3 + 1;
25947 return 2 + 1;
25950 /* Return the maximum number of instructions a cpu can issue. */
25952 static int
25953 ix86_issue_rate (void)
25955 switch (ix86_tune)
25957 case PROCESSOR_PENTIUM:
25958 case PROCESSOR_BONNELL:
25959 case PROCESSOR_SILVERMONT:
25960 case PROCESSOR_KNL:
25961 case PROCESSOR_INTEL:
25962 case PROCESSOR_K6:
25963 case PROCESSOR_BTVER2:
25964 case PROCESSOR_PENTIUM4:
25965 case PROCESSOR_NOCONA:
25966 return 2;
25968 case PROCESSOR_PENTIUMPRO:
25969 case PROCESSOR_ATHLON:
25970 case PROCESSOR_K8:
25971 case PROCESSOR_AMDFAM10:
25972 case PROCESSOR_GENERIC:
25973 case PROCESSOR_BTVER1:
25974 return 3;
25976 case PROCESSOR_BDVER1:
25977 case PROCESSOR_BDVER2:
25978 case PROCESSOR_BDVER3:
25979 case PROCESSOR_BDVER4:
25980 case PROCESSOR_CORE2:
25981 case PROCESSOR_NEHALEM:
25982 case PROCESSOR_SANDYBRIDGE:
25983 case PROCESSOR_HASWELL:
25984 return 4;
25986 default:
25987 return 1;
25991 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
25992 by DEP_INSN and nothing set by DEP_INSN. */
25994 static bool
25995 ix86_flags_dependent (rtx_insn *insn, rtx_insn *dep_insn, enum attr_type insn_type)
25997 rtx set, set2;
25999 /* Simplify the test for uninteresting insns. */
26000 if (insn_type != TYPE_SETCC
26001 && insn_type != TYPE_ICMOV
26002 && insn_type != TYPE_FCMOV
26003 && insn_type != TYPE_IBR)
26004 return false;
26006 if ((set = single_set (dep_insn)) != 0)
26008 set = SET_DEST (set);
26009 set2 = NULL_RTX;
26011 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
26012 && XVECLEN (PATTERN (dep_insn), 0) == 2
26013 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
26014 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
26016 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
26017 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
26019 else
26020 return false;
26022 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
26023 return false;
26025 /* This test is true if the dependent insn reads the flags but
26026 not any other potentially set register. */
26027 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
26028 return false;
26030 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
26031 return false;
26033 return true;
26036 /* Return true iff USE_INSN has a memory address with operands set by
26037 SET_INSN. */
26039 bool
26040 ix86_agi_dependent (rtx_insn *set_insn, rtx_insn *use_insn)
26042 int i;
26043 extract_insn_cached (use_insn);
26044 for (i = recog_data.n_operands - 1; i >= 0; --i)
26045 if (MEM_P (recog_data.operand[i]))
26047 rtx addr = XEXP (recog_data.operand[i], 0);
26048 return modified_in_p (addr, set_insn) != 0;
26050 return false;
26053 /* Helper function for exact_store_load_dependency.
26054 Return true if addr is found in insn. */
26055 static bool
26056 exact_dependency_1 (rtx addr, rtx insn)
26058 enum rtx_code code;
26059 const char *format_ptr;
26060 int i, j;
26062 code = GET_CODE (insn);
26063 switch (code)
26065 case MEM:
26066 if (rtx_equal_p (addr, insn))
26067 return true;
26068 break;
26069 case REG:
26070 CASE_CONST_ANY:
26071 case SYMBOL_REF:
26072 case CODE_LABEL:
26073 case PC:
26074 case CC0:
26075 case EXPR_LIST:
26076 return false;
26077 default:
26078 break;
26081 format_ptr = GET_RTX_FORMAT (code);
26082 for (i = 0; i < GET_RTX_LENGTH (code); i++)
26084 switch (*format_ptr++)
26086 case 'e':
26087 if (exact_dependency_1 (addr, XEXP (insn, i)))
26088 return true;
26089 break;
26090 case 'E':
26091 for (j = 0; j < XVECLEN (insn, i); j++)
26092 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
26093 return true;
26094 break;
26097 return false;
26100 /* Return true if there exists exact dependency for store & load, i.e.
26101 the same memory address is used in them. */
26102 static bool
26103 exact_store_load_dependency (rtx_insn *store, rtx_insn *load)
26105 rtx set1, set2;
26107 set1 = single_set (store);
26108 if (!set1)
26109 return false;
26110 if (!MEM_P (SET_DEST (set1)))
26111 return false;
26112 set2 = single_set (load);
26113 if (!set2)
26114 return false;
26115 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
26116 return true;
26117 return false;
26120 static int
26121 ix86_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
26123 enum attr_type insn_type, dep_insn_type;
26124 enum attr_memory memory;
26125 rtx set, set2;
26126 int dep_insn_code_number;
26128 /* Anti and output dependencies have zero cost on all CPUs. */
26129 if (REG_NOTE_KIND (link) != 0)
26130 return 0;
26132 dep_insn_code_number = recog_memoized (dep_insn);
26134 /* If we can't recognize the insns, we can't really do anything. */
26135 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
26136 return cost;
26138 insn_type = get_attr_type (insn);
26139 dep_insn_type = get_attr_type (dep_insn);
26141 switch (ix86_tune)
26143 case PROCESSOR_PENTIUM:
26144 /* Address Generation Interlock adds a cycle of latency. */
26145 if (insn_type == TYPE_LEA)
26147 rtx addr = PATTERN (insn);
26149 if (GET_CODE (addr) == PARALLEL)
26150 addr = XVECEXP (addr, 0, 0);
26152 gcc_assert (GET_CODE (addr) == SET);
26154 addr = SET_SRC (addr);
26155 if (modified_in_p (addr, dep_insn))
26156 cost += 1;
26158 else if (ix86_agi_dependent (dep_insn, insn))
26159 cost += 1;
26161 /* ??? Compares pair with jump/setcc. */
26162 if (ix86_flags_dependent (insn, dep_insn, insn_type))
26163 cost = 0;
26165 /* Floating point stores require value to be ready one cycle earlier. */
26166 if (insn_type == TYPE_FMOV
26167 && get_attr_memory (insn) == MEMORY_STORE
26168 && !ix86_agi_dependent (dep_insn, insn))
26169 cost += 1;
26170 break;
26172 case PROCESSOR_PENTIUMPRO:
26173 /* INT->FP conversion is expensive. */
26174 if (get_attr_fp_int_src (dep_insn))
26175 cost += 5;
26177 /* There is one cycle extra latency between an FP op and a store. */
26178 if (insn_type == TYPE_FMOV
26179 && (set = single_set (dep_insn)) != NULL_RTX
26180 && (set2 = single_set (insn)) != NULL_RTX
26181 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
26182 && MEM_P (SET_DEST (set2)))
26183 cost += 1;
26185 memory = get_attr_memory (insn);
26187 /* Show ability of reorder buffer to hide latency of load by executing
26188 in parallel with previous instruction in case
26189 previous instruction is not needed to compute the address. */
26190 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26191 && !ix86_agi_dependent (dep_insn, insn))
26193 /* Claim moves to take one cycle, as core can issue one load
26194 at time and the next load can start cycle later. */
26195 if (dep_insn_type == TYPE_IMOV
26196 || dep_insn_type == TYPE_FMOV)
26197 cost = 1;
26198 else if (cost > 1)
26199 cost--;
26201 break;
26203 case PROCESSOR_K6:
26204 /* The esp dependency is resolved before
26205 the instruction is really finished. */
26206 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26207 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26208 return 1;
26210 /* INT->FP conversion is expensive. */
26211 if (get_attr_fp_int_src (dep_insn))
26212 cost += 5;
26214 memory = get_attr_memory (insn);
26216 /* Show ability of reorder buffer to hide latency of load by executing
26217 in parallel with previous instruction in case
26218 previous instruction is not needed to compute the address. */
26219 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26220 && !ix86_agi_dependent (dep_insn, insn))
26222 /* Claim moves to take one cycle, as core can issue one load
26223 at time and the next load can start cycle later. */
26224 if (dep_insn_type == TYPE_IMOV
26225 || dep_insn_type == TYPE_FMOV)
26226 cost = 1;
26227 else if (cost > 2)
26228 cost -= 2;
26229 else
26230 cost = 1;
26232 break;
26234 case PROCESSOR_AMDFAM10:
26235 case PROCESSOR_BDVER1:
26236 case PROCESSOR_BDVER2:
26237 case PROCESSOR_BDVER3:
26238 case PROCESSOR_BDVER4:
26239 case PROCESSOR_BTVER1:
26240 case PROCESSOR_BTVER2:
26241 case PROCESSOR_GENERIC:
26242 /* Stack engine allows to execute push&pop instructions in parall. */
26243 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26244 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26245 return 0;
26246 /* FALLTHRU */
26248 case PROCESSOR_ATHLON:
26249 case PROCESSOR_K8:
26250 memory = get_attr_memory (insn);
26252 /* Show ability of reorder buffer to hide latency of load by executing
26253 in parallel with previous instruction in case
26254 previous instruction is not needed to compute the address. */
26255 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26256 && !ix86_agi_dependent (dep_insn, insn))
26258 enum attr_unit unit = get_attr_unit (insn);
26259 int loadcost = 3;
26261 /* Because of the difference between the length of integer and
26262 floating unit pipeline preparation stages, the memory operands
26263 for floating point are cheaper.
26265 ??? For Athlon it the difference is most probably 2. */
26266 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
26267 loadcost = 3;
26268 else
26269 loadcost = TARGET_ATHLON ? 2 : 0;
26271 if (cost >= loadcost)
26272 cost -= loadcost;
26273 else
26274 cost = 0;
26276 break;
26278 case PROCESSOR_CORE2:
26279 case PROCESSOR_NEHALEM:
26280 case PROCESSOR_SANDYBRIDGE:
26281 case PROCESSOR_HASWELL:
26282 /* Stack engine allows to execute push&pop instructions in parall. */
26283 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26284 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26285 return 0;
26287 memory = get_attr_memory (insn);
26289 /* Show ability of reorder buffer to hide latency of load by executing
26290 in parallel with previous instruction in case
26291 previous instruction is not needed to compute the address. */
26292 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26293 && !ix86_agi_dependent (dep_insn, insn))
26295 if (cost >= 4)
26296 cost -= 4;
26297 else
26298 cost = 0;
26300 break;
26302 case PROCESSOR_SILVERMONT:
26303 case PROCESSOR_KNL:
26304 case PROCESSOR_INTEL:
26305 if (!reload_completed)
26306 return cost;
26308 /* Increase cost of integer loads. */
26309 memory = get_attr_memory (dep_insn);
26310 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26312 enum attr_unit unit = get_attr_unit (dep_insn);
26313 if (unit == UNIT_INTEGER && cost == 1)
26315 if (memory == MEMORY_LOAD)
26316 cost = 3;
26317 else
26319 /* Increase cost of ld/st for short int types only
26320 because of store forwarding issue. */
26321 rtx set = single_set (dep_insn);
26322 if (set && (GET_MODE (SET_DEST (set)) == QImode
26323 || GET_MODE (SET_DEST (set)) == HImode))
26325 /* Increase cost of store/load insn if exact
26326 dependence exists and it is load insn. */
26327 enum attr_memory insn_memory = get_attr_memory (insn);
26328 if (insn_memory == MEMORY_LOAD
26329 && exact_store_load_dependency (dep_insn, insn))
26330 cost = 3;
26336 default:
26337 break;
26340 return cost;
26343 /* How many alternative schedules to try. This should be as wide as the
26344 scheduling freedom in the DFA, but no wider. Making this value too
26345 large results extra work for the scheduler. */
26347 static int
26348 ia32_multipass_dfa_lookahead (void)
26350 switch (ix86_tune)
26352 case PROCESSOR_PENTIUM:
26353 return 2;
26355 case PROCESSOR_PENTIUMPRO:
26356 case PROCESSOR_K6:
26357 return 1;
26359 case PROCESSOR_BDVER1:
26360 case PROCESSOR_BDVER2:
26361 case PROCESSOR_BDVER3:
26362 case PROCESSOR_BDVER4:
26363 /* We use lookahead value 4 for BD both before and after reload
26364 schedules. Plan is to have value 8 included for O3. */
26365 return 4;
26367 case PROCESSOR_CORE2:
26368 case PROCESSOR_NEHALEM:
26369 case PROCESSOR_SANDYBRIDGE:
26370 case PROCESSOR_HASWELL:
26371 case PROCESSOR_BONNELL:
26372 case PROCESSOR_SILVERMONT:
26373 case PROCESSOR_KNL:
26374 case PROCESSOR_INTEL:
26375 /* Generally, we want haifa-sched:max_issue() to look ahead as far
26376 as many instructions can be executed on a cycle, i.e.,
26377 issue_rate. I wonder why tuning for many CPUs does not do this. */
26378 if (reload_completed)
26379 return ix86_issue_rate ();
26380 /* Don't use lookahead for pre-reload schedule to save compile time. */
26381 return 0;
26383 default:
26384 return 0;
26388 /* Return true if target platform supports macro-fusion. */
26390 static bool
26391 ix86_macro_fusion_p ()
26393 return TARGET_FUSE_CMP_AND_BRANCH;
26396 /* Check whether current microarchitecture support macro fusion
26397 for insn pair "CONDGEN + CONDJMP". Refer to
26398 "Intel Architectures Optimization Reference Manual". */
26400 static bool
26401 ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
26403 rtx src, dest;
26404 enum rtx_code ccode;
26405 rtx compare_set = NULL_RTX, test_if, cond;
26406 rtx alu_set = NULL_RTX, addr = NULL_RTX;
26408 if (!any_condjump_p (condjmp))
26409 return false;
26411 if (get_attr_type (condgen) != TYPE_TEST
26412 && get_attr_type (condgen) != TYPE_ICMP
26413 && get_attr_type (condgen) != TYPE_INCDEC
26414 && get_attr_type (condgen) != TYPE_ALU)
26415 return false;
26417 compare_set = single_set (condgen);
26418 if (compare_set == NULL_RTX
26419 && !TARGET_FUSE_ALU_AND_BRANCH)
26420 return false;
26422 if (compare_set == NULL_RTX)
26424 int i;
26425 rtx pat = PATTERN (condgen);
26426 for (i = 0; i < XVECLEN (pat, 0); i++)
26427 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
26429 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
26430 if (GET_CODE (set_src) == COMPARE)
26431 compare_set = XVECEXP (pat, 0, i);
26432 else
26433 alu_set = XVECEXP (pat, 0, i);
26436 if (compare_set == NULL_RTX)
26437 return false;
26438 src = SET_SRC (compare_set);
26439 if (GET_CODE (src) != COMPARE)
26440 return false;
26442 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
26443 supported. */
26444 if ((MEM_P (XEXP (src, 0))
26445 && CONST_INT_P (XEXP (src, 1)))
26446 || (MEM_P (XEXP (src, 1))
26447 && CONST_INT_P (XEXP (src, 0))))
26448 return false;
26450 /* No fusion for RIP-relative address. */
26451 if (MEM_P (XEXP (src, 0)))
26452 addr = XEXP (XEXP (src, 0), 0);
26453 else if (MEM_P (XEXP (src, 1)))
26454 addr = XEXP (XEXP (src, 1), 0);
26456 if (addr) {
26457 ix86_address parts;
26458 int ok = ix86_decompose_address (addr, &parts);
26459 gcc_assert (ok);
26461 if (rip_relative_addr_p (&parts))
26462 return false;
26465 test_if = SET_SRC (pc_set (condjmp));
26466 cond = XEXP (test_if, 0);
26467 ccode = GET_CODE (cond);
26468 /* Check whether conditional jump use Sign or Overflow Flags. */
26469 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
26470 && (ccode == GE
26471 || ccode == GT
26472 || ccode == LE
26473 || ccode == LT))
26474 return false;
26476 /* Return true for TYPE_TEST and TYPE_ICMP. */
26477 if (get_attr_type (condgen) == TYPE_TEST
26478 || get_attr_type (condgen) == TYPE_ICMP)
26479 return true;
26481 /* The following is the case that macro-fusion for alu + jmp. */
26482 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
26483 return false;
26485 /* No fusion for alu op with memory destination operand. */
26486 dest = SET_DEST (alu_set);
26487 if (MEM_P (dest))
26488 return false;
26490 /* Macro-fusion for inc/dec + unsigned conditional jump is not
26491 supported. */
26492 if (get_attr_type (condgen) == TYPE_INCDEC
26493 && (ccode == GEU
26494 || ccode == GTU
26495 || ccode == LEU
26496 || ccode == LTU))
26497 return false;
26499 return true;
26502 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
26503 execution. It is applied if
26504 (1) IMUL instruction is on the top of list;
26505 (2) There exists the only producer of independent IMUL instruction in
26506 ready list.
26507 Return index of IMUL producer if it was found and -1 otherwise. */
26508 static int
26509 do_reorder_for_imul (rtx_insn **ready, int n_ready)
26511 rtx_insn *insn;
26512 rtx set, insn1, insn2;
26513 sd_iterator_def sd_it;
26514 dep_t dep;
26515 int index = -1;
26516 int i;
26518 if (!TARGET_BONNELL)
26519 return index;
26521 /* Check that IMUL instruction is on the top of ready list. */
26522 insn = ready[n_ready - 1];
26523 set = single_set (insn);
26524 if (!set)
26525 return index;
26526 if (!(GET_CODE (SET_SRC (set)) == MULT
26527 && GET_MODE (SET_SRC (set)) == SImode))
26528 return index;
26530 /* Search for producer of independent IMUL instruction. */
26531 for (i = n_ready - 2; i >= 0; i--)
26533 insn = ready[i];
26534 if (!NONDEBUG_INSN_P (insn))
26535 continue;
26536 /* Skip IMUL instruction. */
26537 insn2 = PATTERN (insn);
26538 if (GET_CODE (insn2) == PARALLEL)
26539 insn2 = XVECEXP (insn2, 0, 0);
26540 if (GET_CODE (insn2) == SET
26541 && GET_CODE (SET_SRC (insn2)) == MULT
26542 && GET_MODE (SET_SRC (insn2)) == SImode)
26543 continue;
26545 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
26547 rtx con;
26548 con = DEP_CON (dep);
26549 if (!NONDEBUG_INSN_P (con))
26550 continue;
26551 insn1 = PATTERN (con);
26552 if (GET_CODE (insn1) == PARALLEL)
26553 insn1 = XVECEXP (insn1, 0, 0);
26555 if (GET_CODE (insn1) == SET
26556 && GET_CODE (SET_SRC (insn1)) == MULT
26557 && GET_MODE (SET_SRC (insn1)) == SImode)
26559 sd_iterator_def sd_it1;
26560 dep_t dep1;
26561 /* Check if there is no other dependee for IMUL. */
26562 index = i;
26563 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
26565 rtx pro;
26566 pro = DEP_PRO (dep1);
26567 if (!NONDEBUG_INSN_P (pro))
26568 continue;
26569 if (pro != insn)
26570 index = -1;
26572 if (index >= 0)
26573 break;
26576 if (index >= 0)
26577 break;
26579 return index;
26582 /* Try to find the best candidate on the top of ready list if two insns
26583 have the same priority - candidate is best if its dependees were
26584 scheduled earlier. Applied for Silvermont only.
26585 Return true if top 2 insns must be interchanged. */
26586 static bool
26587 swap_top_of_ready_list (rtx_insn **ready, int n_ready)
26589 rtx_insn *top = ready[n_ready - 1];
26590 rtx_insn *next = ready[n_ready - 2];
26591 rtx set;
26592 sd_iterator_def sd_it;
26593 dep_t dep;
26594 int clock1 = -1;
26595 int clock2 = -1;
26596 #define INSN_TICK(INSN) (HID (INSN)->tick)
26598 if (!TARGET_SILVERMONT && !TARGET_INTEL)
26599 return false;
26601 if (!NONDEBUG_INSN_P (top))
26602 return false;
26603 if (!NONJUMP_INSN_P (top))
26604 return false;
26605 if (!NONDEBUG_INSN_P (next))
26606 return false;
26607 if (!NONJUMP_INSN_P (next))
26608 return false;
26609 set = single_set (top);
26610 if (!set)
26611 return false;
26612 set = single_set (next);
26613 if (!set)
26614 return false;
26616 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
26618 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
26619 return false;
26620 /* Determine winner more precise. */
26621 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
26623 rtx pro;
26624 pro = DEP_PRO (dep);
26625 if (!NONDEBUG_INSN_P (pro))
26626 continue;
26627 if (INSN_TICK (pro) > clock1)
26628 clock1 = INSN_TICK (pro);
26630 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
26632 rtx pro;
26633 pro = DEP_PRO (dep);
26634 if (!NONDEBUG_INSN_P (pro))
26635 continue;
26636 if (INSN_TICK (pro) > clock2)
26637 clock2 = INSN_TICK (pro);
26640 if (clock1 == clock2)
26642 /* Determine winner - load must win. */
26643 enum attr_memory memory1, memory2;
26644 memory1 = get_attr_memory (top);
26645 memory2 = get_attr_memory (next);
26646 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
26647 return true;
26649 return (bool) (clock2 < clock1);
26651 return false;
26652 #undef INSN_TICK
26655 /* Perform possible reodering of ready list for Atom/Silvermont only.
26656 Return issue rate. */
26657 static int
26658 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
26659 int *pn_ready, int clock_var)
26661 int issue_rate = -1;
26662 int n_ready = *pn_ready;
26663 int i;
26664 rtx_insn *insn;
26665 int index = -1;
26667 /* Set up issue rate. */
26668 issue_rate = ix86_issue_rate ();
26670 /* Do reodering for BONNELL/SILVERMONT only. */
26671 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
26672 return issue_rate;
26674 /* Nothing to do if ready list contains only 1 instruction. */
26675 if (n_ready <= 1)
26676 return issue_rate;
26678 /* Do reodering for post-reload scheduler only. */
26679 if (!reload_completed)
26680 return issue_rate;
26682 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
26684 if (sched_verbose > 1)
26685 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
26686 INSN_UID (ready[index]));
26688 /* Put IMUL producer (ready[index]) at the top of ready list. */
26689 insn = ready[index];
26690 for (i = index; i < n_ready - 1; i++)
26691 ready[i] = ready[i + 1];
26692 ready[n_ready - 1] = insn;
26693 return issue_rate;
26695 if (clock_var != 0 && swap_top_of_ready_list (ready, n_ready))
26697 if (sched_verbose > 1)
26698 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
26699 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
26700 /* Swap 2 top elements of ready list. */
26701 insn = ready[n_ready - 1];
26702 ready[n_ready - 1] = ready[n_ready - 2];
26703 ready[n_ready - 2] = insn;
26705 return issue_rate;
26708 static bool
26709 ix86_class_likely_spilled_p (reg_class_t);
26711 /* Returns true if lhs of insn is HW function argument register and set up
26712 is_spilled to true if it is likely spilled HW register. */
26713 static bool
26714 insn_is_function_arg (rtx insn, bool* is_spilled)
26716 rtx dst;
26718 if (!NONDEBUG_INSN_P (insn))
26719 return false;
26720 /* Call instructions are not movable, ignore it. */
26721 if (CALL_P (insn))
26722 return false;
26723 insn = PATTERN (insn);
26724 if (GET_CODE (insn) == PARALLEL)
26725 insn = XVECEXP (insn, 0, 0);
26726 if (GET_CODE (insn) != SET)
26727 return false;
26728 dst = SET_DEST (insn);
26729 if (REG_P (dst) && HARD_REGISTER_P (dst)
26730 && ix86_function_arg_regno_p (REGNO (dst)))
26732 /* Is it likely spilled HW register? */
26733 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
26734 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
26735 *is_spilled = true;
26736 return true;
26738 return false;
26741 /* Add output dependencies for chain of function adjacent arguments if only
26742 there is a move to likely spilled HW register. Return first argument
26743 if at least one dependence was added or NULL otherwise. */
26744 static rtx_insn *
26745 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
26747 rtx_insn *insn;
26748 rtx_insn *last = call;
26749 rtx_insn *first_arg = NULL;
26750 bool is_spilled = false;
26752 head = PREV_INSN (head);
26754 /* Find nearest to call argument passing instruction. */
26755 while (true)
26757 last = PREV_INSN (last);
26758 if (last == head)
26759 return NULL;
26760 if (!NONDEBUG_INSN_P (last))
26761 continue;
26762 if (insn_is_function_arg (last, &is_spilled))
26763 break;
26764 return NULL;
26767 first_arg = last;
26768 while (true)
26770 insn = PREV_INSN (last);
26771 if (!INSN_P (insn))
26772 break;
26773 if (insn == head)
26774 break;
26775 if (!NONDEBUG_INSN_P (insn))
26777 last = insn;
26778 continue;
26780 if (insn_is_function_arg (insn, &is_spilled))
26782 /* Add output depdendence between two function arguments if chain
26783 of output arguments contains likely spilled HW registers. */
26784 if (is_spilled)
26785 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26786 first_arg = last = insn;
26788 else
26789 break;
26791 if (!is_spilled)
26792 return NULL;
26793 return first_arg;
26796 /* Add output or anti dependency from insn to first_arg to restrict its code
26797 motion. */
26798 static void
26799 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
26801 rtx set;
26802 rtx tmp;
26804 set = single_set (insn);
26805 if (!set)
26806 return;
26807 tmp = SET_DEST (set);
26808 if (REG_P (tmp))
26810 /* Add output dependency to the first function argument. */
26811 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26812 return;
26814 /* Add anti dependency. */
26815 add_dependence (first_arg, insn, REG_DEP_ANTI);
26818 /* Avoid cross block motion of function argument through adding dependency
26819 from the first non-jump instruction in bb. */
26820 static void
26821 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
26823 rtx_insn *insn = BB_END (bb);
26825 while (insn)
26827 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
26829 rtx set = single_set (insn);
26830 if (set)
26832 avoid_func_arg_motion (arg, insn);
26833 return;
26836 if (insn == BB_HEAD (bb))
26837 return;
26838 insn = PREV_INSN (insn);
26842 /* Hook for pre-reload schedule - avoid motion of function arguments
26843 passed in likely spilled HW registers. */
26844 static void
26845 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
26847 rtx_insn *insn;
26848 rtx_insn *first_arg = NULL;
26849 if (reload_completed)
26850 return;
26851 while (head != tail && DEBUG_INSN_P (head))
26852 head = NEXT_INSN (head);
26853 for (insn = tail; insn != head; insn = PREV_INSN (insn))
26854 if (INSN_P (insn) && CALL_P (insn))
26856 first_arg = add_parameter_dependencies (insn, head);
26857 if (first_arg)
26859 /* Add dependee for first argument to predecessors if only
26860 region contains more than one block. */
26861 basic_block bb = BLOCK_FOR_INSN (insn);
26862 int rgn = CONTAINING_RGN (bb->index);
26863 int nr_blks = RGN_NR_BLOCKS (rgn);
26864 /* Skip trivial regions and region head blocks that can have
26865 predecessors outside of region. */
26866 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
26868 edge e;
26869 edge_iterator ei;
26871 /* Regions are SCCs with the exception of selective
26872 scheduling with pipelining of outer blocks enabled.
26873 So also check that immediate predecessors of a non-head
26874 block are in the same region. */
26875 FOR_EACH_EDGE (e, ei, bb->preds)
26877 /* Avoid creating of loop-carried dependencies through
26878 using topological ordering in the region. */
26879 if (rgn == CONTAINING_RGN (e->src->index)
26880 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
26881 add_dependee_for_func_arg (first_arg, e->src);
26884 insn = first_arg;
26885 if (insn == head)
26886 break;
26889 else if (first_arg)
26890 avoid_func_arg_motion (first_arg, insn);
26893 /* Hook for pre-reload schedule - set priority of moves from likely spilled
26894 HW registers to maximum, to schedule them at soon as possible. These are
26895 moves from function argument registers at the top of the function entry
26896 and moves from function return value registers after call. */
26897 static int
26898 ix86_adjust_priority (rtx_insn *insn, int priority)
26900 rtx set;
26902 if (reload_completed)
26903 return priority;
26905 if (!NONDEBUG_INSN_P (insn))
26906 return priority;
26908 set = single_set (insn);
26909 if (set)
26911 rtx tmp = SET_SRC (set);
26912 if (REG_P (tmp)
26913 && HARD_REGISTER_P (tmp)
26914 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
26915 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
26916 return current_sched_info->sched_max_insns_priority;
26919 return priority;
26922 /* Model decoder of Core 2/i7.
26923 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
26924 track the instruction fetch block boundaries and make sure that long
26925 (9+ bytes) instructions are assigned to D0. */
26927 /* Maximum length of an insn that can be handled by
26928 a secondary decoder unit. '8' for Core 2/i7. */
26929 static int core2i7_secondary_decoder_max_insn_size;
26931 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
26932 '16' for Core 2/i7. */
26933 static int core2i7_ifetch_block_size;
26935 /* Maximum number of instructions decoder can handle per cycle.
26936 '6' for Core 2/i7. */
26937 static int core2i7_ifetch_block_max_insns;
26939 typedef struct ix86_first_cycle_multipass_data_ *
26940 ix86_first_cycle_multipass_data_t;
26941 typedef const struct ix86_first_cycle_multipass_data_ *
26942 const_ix86_first_cycle_multipass_data_t;
26944 /* A variable to store target state across calls to max_issue within
26945 one cycle. */
26946 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
26947 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
26949 /* Initialize DATA. */
26950 static void
26951 core2i7_first_cycle_multipass_init (void *_data)
26953 ix86_first_cycle_multipass_data_t data
26954 = (ix86_first_cycle_multipass_data_t) _data;
26956 data->ifetch_block_len = 0;
26957 data->ifetch_block_n_insns = 0;
26958 data->ready_try_change = NULL;
26959 data->ready_try_change_size = 0;
26962 /* Advancing the cycle; reset ifetch block counts. */
26963 static void
26964 core2i7_dfa_post_advance_cycle (void)
26966 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
26968 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26970 data->ifetch_block_len = 0;
26971 data->ifetch_block_n_insns = 0;
26974 static int min_insn_size (rtx_insn *);
26976 /* Filter out insns from ready_try that the core will not be able to issue
26977 on current cycle due to decoder. */
26978 static void
26979 core2i7_first_cycle_multipass_filter_ready_try
26980 (const_ix86_first_cycle_multipass_data_t data,
26981 signed char *ready_try, int n_ready, bool first_cycle_insn_p)
26983 while (n_ready--)
26985 rtx_insn *insn;
26986 int insn_size;
26988 if (ready_try[n_ready])
26989 continue;
26991 insn = get_ready_element (n_ready);
26992 insn_size = min_insn_size (insn);
26994 if (/* If this is a too long an insn for a secondary decoder ... */
26995 (!first_cycle_insn_p
26996 && insn_size > core2i7_secondary_decoder_max_insn_size)
26997 /* ... or it would not fit into the ifetch block ... */
26998 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
26999 /* ... or the decoder is full already ... */
27000 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
27001 /* ... mask the insn out. */
27003 ready_try[n_ready] = 1;
27005 if (data->ready_try_change)
27006 bitmap_set_bit (data->ready_try_change, n_ready);
27011 /* Prepare for a new round of multipass lookahead scheduling. */
27012 static void
27013 core2i7_first_cycle_multipass_begin (void *_data,
27014 signed char *ready_try, int n_ready,
27015 bool first_cycle_insn_p)
27017 ix86_first_cycle_multipass_data_t data
27018 = (ix86_first_cycle_multipass_data_t) _data;
27019 const_ix86_first_cycle_multipass_data_t prev_data
27020 = ix86_first_cycle_multipass_data;
27022 /* Restore the state from the end of the previous round. */
27023 data->ifetch_block_len = prev_data->ifetch_block_len;
27024 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
27026 /* Filter instructions that cannot be issued on current cycle due to
27027 decoder restrictions. */
27028 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
27029 first_cycle_insn_p);
27032 /* INSN is being issued in current solution. Account for its impact on
27033 the decoder model. */
27034 static void
27035 core2i7_first_cycle_multipass_issue (void *_data,
27036 signed char *ready_try, int n_ready,
27037 rtx_insn *insn, const void *_prev_data)
27039 ix86_first_cycle_multipass_data_t data
27040 = (ix86_first_cycle_multipass_data_t) _data;
27041 const_ix86_first_cycle_multipass_data_t prev_data
27042 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
27044 int insn_size = min_insn_size (insn);
27046 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
27047 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
27048 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
27049 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
27051 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
27052 if (!data->ready_try_change)
27054 data->ready_try_change = sbitmap_alloc (n_ready);
27055 data->ready_try_change_size = n_ready;
27057 else if (data->ready_try_change_size < n_ready)
27059 data->ready_try_change = sbitmap_resize (data->ready_try_change,
27060 n_ready, 0);
27061 data->ready_try_change_size = n_ready;
27063 bitmap_clear (data->ready_try_change);
27065 /* Filter out insns from ready_try that the core will not be able to issue
27066 on current cycle due to decoder. */
27067 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
27068 false);
27071 /* Revert the effect on ready_try. */
27072 static void
27073 core2i7_first_cycle_multipass_backtrack (const void *_data,
27074 signed char *ready_try,
27075 int n_ready ATTRIBUTE_UNUSED)
27077 const_ix86_first_cycle_multipass_data_t data
27078 = (const_ix86_first_cycle_multipass_data_t) _data;
27079 unsigned int i = 0;
27080 sbitmap_iterator sbi;
27082 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
27083 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
27085 ready_try[i] = 0;
27089 /* Save the result of multipass lookahead scheduling for the next round. */
27090 static void
27091 core2i7_first_cycle_multipass_end (const void *_data)
27093 const_ix86_first_cycle_multipass_data_t data
27094 = (const_ix86_first_cycle_multipass_data_t) _data;
27095 ix86_first_cycle_multipass_data_t next_data
27096 = ix86_first_cycle_multipass_data;
27098 if (data != NULL)
27100 next_data->ifetch_block_len = data->ifetch_block_len;
27101 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
27105 /* Deallocate target data. */
27106 static void
27107 core2i7_first_cycle_multipass_fini (void *_data)
27109 ix86_first_cycle_multipass_data_t data
27110 = (ix86_first_cycle_multipass_data_t) _data;
27112 if (data->ready_try_change)
27114 sbitmap_free (data->ready_try_change);
27115 data->ready_try_change = NULL;
27116 data->ready_try_change_size = 0;
27120 /* Prepare for scheduling pass. */
27121 static void
27122 ix86_sched_init_global (FILE *, int, int)
27124 /* Install scheduling hooks for current CPU. Some of these hooks are used
27125 in time-critical parts of the scheduler, so we only set them up when
27126 they are actually used. */
27127 switch (ix86_tune)
27129 case PROCESSOR_CORE2:
27130 case PROCESSOR_NEHALEM:
27131 case PROCESSOR_SANDYBRIDGE:
27132 case PROCESSOR_HASWELL:
27133 /* Do not perform multipass scheduling for pre-reload schedule
27134 to save compile time. */
27135 if (reload_completed)
27137 targetm.sched.dfa_post_advance_cycle
27138 = core2i7_dfa_post_advance_cycle;
27139 targetm.sched.first_cycle_multipass_init
27140 = core2i7_first_cycle_multipass_init;
27141 targetm.sched.first_cycle_multipass_begin
27142 = core2i7_first_cycle_multipass_begin;
27143 targetm.sched.first_cycle_multipass_issue
27144 = core2i7_first_cycle_multipass_issue;
27145 targetm.sched.first_cycle_multipass_backtrack
27146 = core2i7_first_cycle_multipass_backtrack;
27147 targetm.sched.first_cycle_multipass_end
27148 = core2i7_first_cycle_multipass_end;
27149 targetm.sched.first_cycle_multipass_fini
27150 = core2i7_first_cycle_multipass_fini;
27152 /* Set decoder parameters. */
27153 core2i7_secondary_decoder_max_insn_size = 8;
27154 core2i7_ifetch_block_size = 16;
27155 core2i7_ifetch_block_max_insns = 6;
27156 break;
27158 /* ... Fall through ... */
27159 default:
27160 targetm.sched.dfa_post_advance_cycle = NULL;
27161 targetm.sched.first_cycle_multipass_init = NULL;
27162 targetm.sched.first_cycle_multipass_begin = NULL;
27163 targetm.sched.first_cycle_multipass_issue = NULL;
27164 targetm.sched.first_cycle_multipass_backtrack = NULL;
27165 targetm.sched.first_cycle_multipass_end = NULL;
27166 targetm.sched.first_cycle_multipass_fini = NULL;
27167 break;
27172 /* Compute the alignment given to a constant that is being placed in memory.
27173 EXP is the constant and ALIGN is the alignment that the object would
27174 ordinarily have.
27175 The value of this function is used instead of that alignment to align
27176 the object. */
27179 ix86_constant_alignment (tree exp, int align)
27181 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
27182 || TREE_CODE (exp) == INTEGER_CST)
27184 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
27185 return 64;
27186 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
27187 return 128;
27189 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
27190 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
27191 return BITS_PER_WORD;
27193 return align;
27196 /* Compute the alignment for a static variable.
27197 TYPE is the data type, and ALIGN is the alignment that
27198 the object would ordinarily have. The value of this function is used
27199 instead of that alignment to align the object. */
27202 ix86_data_alignment (tree type, int align, bool opt)
27204 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
27205 for symbols from other compilation units or symbols that don't need
27206 to bind locally. In order to preserve some ABI compatibility with
27207 those compilers, ensure we don't decrease alignment from what we
27208 used to assume. */
27210 int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
27212 /* A data structure, equal or greater than the size of a cache line
27213 (64 bytes in the Pentium 4 and other recent Intel processors, including
27214 processors based on Intel Core microarchitecture) should be aligned
27215 so that its base address is a multiple of a cache line size. */
27217 int max_align
27218 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
27220 if (max_align < BITS_PER_WORD)
27221 max_align = BITS_PER_WORD;
27223 switch (ix86_align_data_type)
27225 case ix86_align_data_type_abi: opt = false; break;
27226 case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
27227 case ix86_align_data_type_cacheline: break;
27230 if (opt
27231 && AGGREGATE_TYPE_P (type)
27232 && TYPE_SIZE (type)
27233 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
27235 if (wi::geu_p (TYPE_SIZE (type), max_align_compat)
27236 && align < max_align_compat)
27237 align = max_align_compat;
27238 if (wi::geu_p (TYPE_SIZE (type), max_align)
27239 && align < max_align)
27240 align = max_align;
27243 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
27244 to 16byte boundary. */
27245 if (TARGET_64BIT)
27247 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
27248 && TYPE_SIZE (type)
27249 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
27250 && wi::geu_p (TYPE_SIZE (type), 128)
27251 && align < 128)
27252 return 128;
27255 if (!opt)
27256 return align;
27258 if (TREE_CODE (type) == ARRAY_TYPE)
27260 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
27261 return 64;
27262 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
27263 return 128;
27265 else if (TREE_CODE (type) == COMPLEX_TYPE)
27268 if (TYPE_MODE (type) == DCmode && align < 64)
27269 return 64;
27270 if ((TYPE_MODE (type) == XCmode
27271 || TYPE_MODE (type) == TCmode) && align < 128)
27272 return 128;
27274 else if ((TREE_CODE (type) == RECORD_TYPE
27275 || TREE_CODE (type) == UNION_TYPE
27276 || TREE_CODE (type) == QUAL_UNION_TYPE)
27277 && TYPE_FIELDS (type))
27279 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
27280 return 64;
27281 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
27282 return 128;
27284 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
27285 || TREE_CODE (type) == INTEGER_TYPE)
27287 if (TYPE_MODE (type) == DFmode && align < 64)
27288 return 64;
27289 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
27290 return 128;
27293 return align;
27296 /* Compute the alignment for a local variable or a stack slot. EXP is
27297 the data type or decl itself, MODE is the widest mode available and
27298 ALIGN is the alignment that the object would ordinarily have. The
27299 value of this macro is used instead of that alignment to align the
27300 object. */
27302 unsigned int
27303 ix86_local_alignment (tree exp, machine_mode mode,
27304 unsigned int align)
27306 tree type, decl;
27308 if (exp && DECL_P (exp))
27310 type = TREE_TYPE (exp);
27311 decl = exp;
27313 else
27315 type = exp;
27316 decl = NULL;
27319 /* Don't do dynamic stack realignment for long long objects with
27320 -mpreferred-stack-boundary=2. */
27321 if (!TARGET_64BIT
27322 && align == 64
27323 && ix86_preferred_stack_boundary < 64
27324 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
27325 && (!type || !TYPE_USER_ALIGN (type))
27326 && (!decl || !DECL_USER_ALIGN (decl)))
27327 align = 32;
27329 /* If TYPE is NULL, we are allocating a stack slot for caller-save
27330 register in MODE. We will return the largest alignment of XF
27331 and DF. */
27332 if (!type)
27334 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
27335 align = GET_MODE_ALIGNMENT (DFmode);
27336 return align;
27339 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
27340 to 16byte boundary. Exact wording is:
27342 An array uses the same alignment as its elements, except that a local or
27343 global array variable of length at least 16 bytes or
27344 a C99 variable-length array variable always has alignment of at least 16 bytes.
27346 This was added to allow use of aligned SSE instructions at arrays. This
27347 rule is meant for static storage (where compiler can not do the analysis
27348 by itself). We follow it for automatic variables only when convenient.
27349 We fully control everything in the function compiled and functions from
27350 other unit can not rely on the alignment.
27352 Exclude va_list type. It is the common case of local array where
27353 we can not benefit from the alignment.
27355 TODO: Probably one should optimize for size only when var is not escaping. */
27356 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
27357 && TARGET_SSE)
27359 if (AGGREGATE_TYPE_P (type)
27360 && (va_list_type_node == NULL_TREE
27361 || (TYPE_MAIN_VARIANT (type)
27362 != TYPE_MAIN_VARIANT (va_list_type_node)))
27363 && TYPE_SIZE (type)
27364 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
27365 && wi::geu_p (TYPE_SIZE (type), 16)
27366 && align < 128)
27367 return 128;
27369 if (TREE_CODE (type) == ARRAY_TYPE)
27371 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
27372 return 64;
27373 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
27374 return 128;
27376 else if (TREE_CODE (type) == COMPLEX_TYPE)
27378 if (TYPE_MODE (type) == DCmode && align < 64)
27379 return 64;
27380 if ((TYPE_MODE (type) == XCmode
27381 || TYPE_MODE (type) == TCmode) && align < 128)
27382 return 128;
27384 else if ((TREE_CODE (type) == RECORD_TYPE
27385 || TREE_CODE (type) == UNION_TYPE
27386 || TREE_CODE (type) == QUAL_UNION_TYPE)
27387 && TYPE_FIELDS (type))
27389 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
27390 return 64;
27391 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
27392 return 128;
27394 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
27395 || TREE_CODE (type) == INTEGER_TYPE)
27398 if (TYPE_MODE (type) == DFmode && align < 64)
27399 return 64;
27400 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
27401 return 128;
27403 return align;
27406 /* Compute the minimum required alignment for dynamic stack realignment
27407 purposes for a local variable, parameter or a stack slot. EXP is
27408 the data type or decl itself, MODE is its mode and ALIGN is the
27409 alignment that the object would ordinarily have. */
27411 unsigned int
27412 ix86_minimum_alignment (tree exp, machine_mode mode,
27413 unsigned int align)
27415 tree type, decl;
27417 if (exp && DECL_P (exp))
27419 type = TREE_TYPE (exp);
27420 decl = exp;
27422 else
27424 type = exp;
27425 decl = NULL;
27428 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
27429 return align;
27431 /* Don't do dynamic stack realignment for long long objects with
27432 -mpreferred-stack-boundary=2. */
27433 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
27434 && (!type || !TYPE_USER_ALIGN (type))
27435 && (!decl || !DECL_USER_ALIGN (decl)))
27436 return 32;
27438 return align;
27441 /* Find a location for the static chain incoming to a nested function.
27442 This is a register, unless all free registers are used by arguments. */
27444 static rtx
27445 ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
27447 unsigned regno;
27449 /* While this function won't be called by the middle-end when a static
27450 chain isn't needed, it's also used throughout the backend so it's
27451 easiest to keep this check centralized. */
27452 if (DECL_P (fndecl_or_type) && !DECL_STATIC_CHAIN (fndecl_or_type))
27453 return NULL;
27455 if (TARGET_64BIT)
27457 /* We always use R10 in 64-bit mode. */
27458 regno = R10_REG;
27460 else
27462 const_tree fntype, fndecl;
27463 unsigned int ccvt;
27465 /* By default in 32-bit mode we use ECX to pass the static chain. */
27466 regno = CX_REG;
27468 if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
27470 fntype = TREE_TYPE (fndecl_or_type);
27471 fndecl = fndecl_or_type;
27473 else
27475 fntype = fndecl_or_type;
27476 fndecl = NULL;
27479 ccvt = ix86_get_callcvt (fntype);
27480 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
27482 /* Fastcall functions use ecx/edx for arguments, which leaves
27483 us with EAX for the static chain.
27484 Thiscall functions use ecx for arguments, which also
27485 leaves us with EAX for the static chain. */
27486 regno = AX_REG;
27488 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
27490 /* Thiscall functions use ecx for arguments, which leaves
27491 us with EAX and EDX for the static chain.
27492 We are using for abi-compatibility EAX. */
27493 regno = AX_REG;
27495 else if (ix86_function_regparm (fntype, fndecl) == 3)
27497 /* For regparm 3, we have no free call-clobbered registers in
27498 which to store the static chain. In order to implement this,
27499 we have the trampoline push the static chain to the stack.
27500 However, we can't push a value below the return address when
27501 we call the nested function directly, so we have to use an
27502 alternate entry point. For this we use ESI, and have the
27503 alternate entry point push ESI, so that things appear the
27504 same once we're executing the nested function. */
27505 if (incoming_p)
27507 if (fndecl == current_function_decl)
27508 ix86_static_chain_on_stack = true;
27509 return gen_frame_mem (SImode,
27510 plus_constant (Pmode,
27511 arg_pointer_rtx, -8));
27513 regno = SI_REG;
27517 return gen_rtx_REG (Pmode, regno);
27520 /* Emit RTL insns to initialize the variable parts of a trampoline.
27521 FNDECL is the decl of the target address; M_TRAMP is a MEM for
27522 the trampoline, and CHAIN_VALUE is an RTX for the static chain
27523 to be passed to the target function. */
27525 static void
27526 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
27528 rtx mem, fnaddr;
27529 int opcode;
27530 int offset = 0;
27532 fnaddr = XEXP (DECL_RTL (fndecl), 0);
27534 if (TARGET_64BIT)
27536 int size;
27538 /* Load the function address to r11. Try to load address using
27539 the shorter movl instead of movabs. We may want to support
27540 movq for kernel mode, but kernel does not use trampolines at
27541 the moment. FNADDR is a 32bit address and may not be in
27542 DImode when ptr_mode == SImode. Always use movl in this
27543 case. */
27544 if (ptr_mode == SImode
27545 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
27547 fnaddr = copy_addr_to_reg (fnaddr);
27549 mem = adjust_address (m_tramp, HImode, offset);
27550 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
27552 mem = adjust_address (m_tramp, SImode, offset + 2);
27553 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
27554 offset += 6;
27556 else
27558 mem = adjust_address (m_tramp, HImode, offset);
27559 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
27561 mem = adjust_address (m_tramp, DImode, offset + 2);
27562 emit_move_insn (mem, fnaddr);
27563 offset += 10;
27566 /* Load static chain using movabs to r10. Use the shorter movl
27567 instead of movabs when ptr_mode == SImode. */
27568 if (ptr_mode == SImode)
27570 opcode = 0xba41;
27571 size = 6;
27573 else
27575 opcode = 0xba49;
27576 size = 10;
27579 mem = adjust_address (m_tramp, HImode, offset);
27580 emit_move_insn (mem, gen_int_mode (opcode, HImode));
27582 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
27583 emit_move_insn (mem, chain_value);
27584 offset += size;
27586 /* Jump to r11; the last (unused) byte is a nop, only there to
27587 pad the write out to a single 32-bit store. */
27588 mem = adjust_address (m_tramp, SImode, offset);
27589 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
27590 offset += 4;
27592 else
27594 rtx disp, chain;
27596 /* Depending on the static chain location, either load a register
27597 with a constant, or push the constant to the stack. All of the
27598 instructions are the same size. */
27599 chain = ix86_static_chain (fndecl, true);
27600 if (REG_P (chain))
27602 switch (REGNO (chain))
27604 case AX_REG:
27605 opcode = 0xb8; break;
27606 case CX_REG:
27607 opcode = 0xb9; break;
27608 default:
27609 gcc_unreachable ();
27612 else
27613 opcode = 0x68;
27615 mem = adjust_address (m_tramp, QImode, offset);
27616 emit_move_insn (mem, gen_int_mode (opcode, QImode));
27618 mem = adjust_address (m_tramp, SImode, offset + 1);
27619 emit_move_insn (mem, chain_value);
27620 offset += 5;
27622 mem = adjust_address (m_tramp, QImode, offset);
27623 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
27625 mem = adjust_address (m_tramp, SImode, offset + 1);
27627 /* Compute offset from the end of the jmp to the target function.
27628 In the case in which the trampoline stores the static chain on
27629 the stack, we need to skip the first insn which pushes the
27630 (call-saved) register static chain; this push is 1 byte. */
27631 offset += 5;
27632 disp = expand_binop (SImode, sub_optab, fnaddr,
27633 plus_constant (Pmode, XEXP (m_tramp, 0),
27634 offset - (MEM_P (chain) ? 1 : 0)),
27635 NULL_RTX, 1, OPTAB_DIRECT);
27636 emit_move_insn (mem, disp);
27639 gcc_assert (offset <= TRAMPOLINE_SIZE);
27641 #ifdef HAVE_ENABLE_EXECUTE_STACK
27642 #ifdef CHECK_EXECUTE_STACK_ENABLED
27643 if (CHECK_EXECUTE_STACK_ENABLED)
27644 #endif
27645 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
27646 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
27647 #endif
27650 /* The following file contains several enumerations and data structures
27651 built from the definitions in i386-builtin-types.def. */
27653 #include "i386-builtin-types.inc"
27655 /* Table for the ix86 builtin non-function types. */
27656 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
27658 /* Retrieve an element from the above table, building some of
27659 the types lazily. */
27661 static tree
27662 ix86_get_builtin_type (enum ix86_builtin_type tcode)
27664 unsigned int index;
27665 tree type, itype;
27667 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
27669 type = ix86_builtin_type_tab[(int) tcode];
27670 if (type != NULL)
27671 return type;
27673 gcc_assert (tcode > IX86_BT_LAST_PRIM);
27674 if (tcode <= IX86_BT_LAST_VECT)
27676 machine_mode mode;
27678 index = tcode - IX86_BT_LAST_PRIM - 1;
27679 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
27680 mode = ix86_builtin_type_vect_mode[index];
27682 type = build_vector_type_for_mode (itype, mode);
27684 else
27686 int quals;
27688 index = tcode - IX86_BT_LAST_VECT - 1;
27689 if (tcode <= IX86_BT_LAST_PTR)
27690 quals = TYPE_UNQUALIFIED;
27691 else
27692 quals = TYPE_QUAL_CONST;
27694 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
27695 if (quals != TYPE_UNQUALIFIED)
27696 itype = build_qualified_type (itype, quals);
27698 type = build_pointer_type (itype);
27701 ix86_builtin_type_tab[(int) tcode] = type;
27702 return type;
27705 /* Table for the ix86 builtin function types. */
27706 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
27708 /* Retrieve an element from the above table, building some of
27709 the types lazily. */
27711 static tree
27712 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
27714 tree type;
27716 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
27718 type = ix86_builtin_func_type_tab[(int) tcode];
27719 if (type != NULL)
27720 return type;
27722 if (tcode <= IX86_BT_LAST_FUNC)
27724 unsigned start = ix86_builtin_func_start[(int) tcode];
27725 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
27726 tree rtype, atype, args = void_list_node;
27727 unsigned i;
27729 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
27730 for (i = after - 1; i > start; --i)
27732 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
27733 args = tree_cons (NULL, atype, args);
27736 type = build_function_type (rtype, args);
27738 else
27740 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
27741 enum ix86_builtin_func_type icode;
27743 icode = ix86_builtin_func_alias_base[index];
27744 type = ix86_get_builtin_func_type (icode);
27747 ix86_builtin_func_type_tab[(int) tcode] = type;
27748 return type;
27752 /* Codes for all the SSE/MMX builtins. */
27753 enum ix86_builtins
27755 IX86_BUILTIN_ADDPS,
27756 IX86_BUILTIN_ADDSS,
27757 IX86_BUILTIN_DIVPS,
27758 IX86_BUILTIN_DIVSS,
27759 IX86_BUILTIN_MULPS,
27760 IX86_BUILTIN_MULSS,
27761 IX86_BUILTIN_SUBPS,
27762 IX86_BUILTIN_SUBSS,
27764 IX86_BUILTIN_CMPEQPS,
27765 IX86_BUILTIN_CMPLTPS,
27766 IX86_BUILTIN_CMPLEPS,
27767 IX86_BUILTIN_CMPGTPS,
27768 IX86_BUILTIN_CMPGEPS,
27769 IX86_BUILTIN_CMPNEQPS,
27770 IX86_BUILTIN_CMPNLTPS,
27771 IX86_BUILTIN_CMPNLEPS,
27772 IX86_BUILTIN_CMPNGTPS,
27773 IX86_BUILTIN_CMPNGEPS,
27774 IX86_BUILTIN_CMPORDPS,
27775 IX86_BUILTIN_CMPUNORDPS,
27776 IX86_BUILTIN_CMPEQSS,
27777 IX86_BUILTIN_CMPLTSS,
27778 IX86_BUILTIN_CMPLESS,
27779 IX86_BUILTIN_CMPNEQSS,
27780 IX86_BUILTIN_CMPNLTSS,
27781 IX86_BUILTIN_CMPNLESS,
27782 IX86_BUILTIN_CMPORDSS,
27783 IX86_BUILTIN_CMPUNORDSS,
27785 IX86_BUILTIN_COMIEQSS,
27786 IX86_BUILTIN_COMILTSS,
27787 IX86_BUILTIN_COMILESS,
27788 IX86_BUILTIN_COMIGTSS,
27789 IX86_BUILTIN_COMIGESS,
27790 IX86_BUILTIN_COMINEQSS,
27791 IX86_BUILTIN_UCOMIEQSS,
27792 IX86_BUILTIN_UCOMILTSS,
27793 IX86_BUILTIN_UCOMILESS,
27794 IX86_BUILTIN_UCOMIGTSS,
27795 IX86_BUILTIN_UCOMIGESS,
27796 IX86_BUILTIN_UCOMINEQSS,
27798 IX86_BUILTIN_CVTPI2PS,
27799 IX86_BUILTIN_CVTPS2PI,
27800 IX86_BUILTIN_CVTSI2SS,
27801 IX86_BUILTIN_CVTSI642SS,
27802 IX86_BUILTIN_CVTSS2SI,
27803 IX86_BUILTIN_CVTSS2SI64,
27804 IX86_BUILTIN_CVTTPS2PI,
27805 IX86_BUILTIN_CVTTSS2SI,
27806 IX86_BUILTIN_CVTTSS2SI64,
27808 IX86_BUILTIN_MAXPS,
27809 IX86_BUILTIN_MAXSS,
27810 IX86_BUILTIN_MINPS,
27811 IX86_BUILTIN_MINSS,
27813 IX86_BUILTIN_LOADUPS,
27814 IX86_BUILTIN_STOREUPS,
27815 IX86_BUILTIN_MOVSS,
27817 IX86_BUILTIN_MOVHLPS,
27818 IX86_BUILTIN_MOVLHPS,
27819 IX86_BUILTIN_LOADHPS,
27820 IX86_BUILTIN_LOADLPS,
27821 IX86_BUILTIN_STOREHPS,
27822 IX86_BUILTIN_STORELPS,
27824 IX86_BUILTIN_MASKMOVQ,
27825 IX86_BUILTIN_MOVMSKPS,
27826 IX86_BUILTIN_PMOVMSKB,
27828 IX86_BUILTIN_MOVNTPS,
27829 IX86_BUILTIN_MOVNTQ,
27831 IX86_BUILTIN_LOADDQU,
27832 IX86_BUILTIN_STOREDQU,
27834 IX86_BUILTIN_PACKSSWB,
27835 IX86_BUILTIN_PACKSSDW,
27836 IX86_BUILTIN_PACKUSWB,
27838 IX86_BUILTIN_PADDB,
27839 IX86_BUILTIN_PADDW,
27840 IX86_BUILTIN_PADDD,
27841 IX86_BUILTIN_PADDQ,
27842 IX86_BUILTIN_PADDSB,
27843 IX86_BUILTIN_PADDSW,
27844 IX86_BUILTIN_PADDUSB,
27845 IX86_BUILTIN_PADDUSW,
27846 IX86_BUILTIN_PSUBB,
27847 IX86_BUILTIN_PSUBW,
27848 IX86_BUILTIN_PSUBD,
27849 IX86_BUILTIN_PSUBQ,
27850 IX86_BUILTIN_PSUBSB,
27851 IX86_BUILTIN_PSUBSW,
27852 IX86_BUILTIN_PSUBUSB,
27853 IX86_BUILTIN_PSUBUSW,
27855 IX86_BUILTIN_PAND,
27856 IX86_BUILTIN_PANDN,
27857 IX86_BUILTIN_POR,
27858 IX86_BUILTIN_PXOR,
27860 IX86_BUILTIN_PAVGB,
27861 IX86_BUILTIN_PAVGW,
27863 IX86_BUILTIN_PCMPEQB,
27864 IX86_BUILTIN_PCMPEQW,
27865 IX86_BUILTIN_PCMPEQD,
27866 IX86_BUILTIN_PCMPGTB,
27867 IX86_BUILTIN_PCMPGTW,
27868 IX86_BUILTIN_PCMPGTD,
27870 IX86_BUILTIN_PMADDWD,
27872 IX86_BUILTIN_PMAXSW,
27873 IX86_BUILTIN_PMAXUB,
27874 IX86_BUILTIN_PMINSW,
27875 IX86_BUILTIN_PMINUB,
27877 IX86_BUILTIN_PMULHUW,
27878 IX86_BUILTIN_PMULHW,
27879 IX86_BUILTIN_PMULLW,
27881 IX86_BUILTIN_PSADBW,
27882 IX86_BUILTIN_PSHUFW,
27884 IX86_BUILTIN_PSLLW,
27885 IX86_BUILTIN_PSLLD,
27886 IX86_BUILTIN_PSLLQ,
27887 IX86_BUILTIN_PSRAW,
27888 IX86_BUILTIN_PSRAD,
27889 IX86_BUILTIN_PSRLW,
27890 IX86_BUILTIN_PSRLD,
27891 IX86_BUILTIN_PSRLQ,
27892 IX86_BUILTIN_PSLLWI,
27893 IX86_BUILTIN_PSLLDI,
27894 IX86_BUILTIN_PSLLQI,
27895 IX86_BUILTIN_PSRAWI,
27896 IX86_BUILTIN_PSRADI,
27897 IX86_BUILTIN_PSRLWI,
27898 IX86_BUILTIN_PSRLDI,
27899 IX86_BUILTIN_PSRLQI,
27901 IX86_BUILTIN_PUNPCKHBW,
27902 IX86_BUILTIN_PUNPCKHWD,
27903 IX86_BUILTIN_PUNPCKHDQ,
27904 IX86_BUILTIN_PUNPCKLBW,
27905 IX86_BUILTIN_PUNPCKLWD,
27906 IX86_BUILTIN_PUNPCKLDQ,
27908 IX86_BUILTIN_SHUFPS,
27910 IX86_BUILTIN_RCPPS,
27911 IX86_BUILTIN_RCPSS,
27912 IX86_BUILTIN_RSQRTPS,
27913 IX86_BUILTIN_RSQRTPS_NR,
27914 IX86_BUILTIN_RSQRTSS,
27915 IX86_BUILTIN_RSQRTF,
27916 IX86_BUILTIN_SQRTPS,
27917 IX86_BUILTIN_SQRTPS_NR,
27918 IX86_BUILTIN_SQRTSS,
27920 IX86_BUILTIN_UNPCKHPS,
27921 IX86_BUILTIN_UNPCKLPS,
27923 IX86_BUILTIN_ANDPS,
27924 IX86_BUILTIN_ANDNPS,
27925 IX86_BUILTIN_ORPS,
27926 IX86_BUILTIN_XORPS,
27928 IX86_BUILTIN_EMMS,
27929 IX86_BUILTIN_LDMXCSR,
27930 IX86_BUILTIN_STMXCSR,
27931 IX86_BUILTIN_SFENCE,
27933 IX86_BUILTIN_FXSAVE,
27934 IX86_BUILTIN_FXRSTOR,
27935 IX86_BUILTIN_FXSAVE64,
27936 IX86_BUILTIN_FXRSTOR64,
27938 IX86_BUILTIN_XSAVE,
27939 IX86_BUILTIN_XRSTOR,
27940 IX86_BUILTIN_XSAVE64,
27941 IX86_BUILTIN_XRSTOR64,
27943 IX86_BUILTIN_XSAVEOPT,
27944 IX86_BUILTIN_XSAVEOPT64,
27946 IX86_BUILTIN_XSAVEC,
27947 IX86_BUILTIN_XSAVEC64,
27949 IX86_BUILTIN_XSAVES,
27950 IX86_BUILTIN_XRSTORS,
27951 IX86_BUILTIN_XSAVES64,
27952 IX86_BUILTIN_XRSTORS64,
27954 /* 3DNow! Original */
27955 IX86_BUILTIN_FEMMS,
27956 IX86_BUILTIN_PAVGUSB,
27957 IX86_BUILTIN_PF2ID,
27958 IX86_BUILTIN_PFACC,
27959 IX86_BUILTIN_PFADD,
27960 IX86_BUILTIN_PFCMPEQ,
27961 IX86_BUILTIN_PFCMPGE,
27962 IX86_BUILTIN_PFCMPGT,
27963 IX86_BUILTIN_PFMAX,
27964 IX86_BUILTIN_PFMIN,
27965 IX86_BUILTIN_PFMUL,
27966 IX86_BUILTIN_PFRCP,
27967 IX86_BUILTIN_PFRCPIT1,
27968 IX86_BUILTIN_PFRCPIT2,
27969 IX86_BUILTIN_PFRSQIT1,
27970 IX86_BUILTIN_PFRSQRT,
27971 IX86_BUILTIN_PFSUB,
27972 IX86_BUILTIN_PFSUBR,
27973 IX86_BUILTIN_PI2FD,
27974 IX86_BUILTIN_PMULHRW,
27976 /* 3DNow! Athlon Extensions */
27977 IX86_BUILTIN_PF2IW,
27978 IX86_BUILTIN_PFNACC,
27979 IX86_BUILTIN_PFPNACC,
27980 IX86_BUILTIN_PI2FW,
27981 IX86_BUILTIN_PSWAPDSI,
27982 IX86_BUILTIN_PSWAPDSF,
27984 /* SSE2 */
27985 IX86_BUILTIN_ADDPD,
27986 IX86_BUILTIN_ADDSD,
27987 IX86_BUILTIN_DIVPD,
27988 IX86_BUILTIN_DIVSD,
27989 IX86_BUILTIN_MULPD,
27990 IX86_BUILTIN_MULSD,
27991 IX86_BUILTIN_SUBPD,
27992 IX86_BUILTIN_SUBSD,
27994 IX86_BUILTIN_CMPEQPD,
27995 IX86_BUILTIN_CMPLTPD,
27996 IX86_BUILTIN_CMPLEPD,
27997 IX86_BUILTIN_CMPGTPD,
27998 IX86_BUILTIN_CMPGEPD,
27999 IX86_BUILTIN_CMPNEQPD,
28000 IX86_BUILTIN_CMPNLTPD,
28001 IX86_BUILTIN_CMPNLEPD,
28002 IX86_BUILTIN_CMPNGTPD,
28003 IX86_BUILTIN_CMPNGEPD,
28004 IX86_BUILTIN_CMPORDPD,
28005 IX86_BUILTIN_CMPUNORDPD,
28006 IX86_BUILTIN_CMPEQSD,
28007 IX86_BUILTIN_CMPLTSD,
28008 IX86_BUILTIN_CMPLESD,
28009 IX86_BUILTIN_CMPNEQSD,
28010 IX86_BUILTIN_CMPNLTSD,
28011 IX86_BUILTIN_CMPNLESD,
28012 IX86_BUILTIN_CMPORDSD,
28013 IX86_BUILTIN_CMPUNORDSD,
28015 IX86_BUILTIN_COMIEQSD,
28016 IX86_BUILTIN_COMILTSD,
28017 IX86_BUILTIN_COMILESD,
28018 IX86_BUILTIN_COMIGTSD,
28019 IX86_BUILTIN_COMIGESD,
28020 IX86_BUILTIN_COMINEQSD,
28021 IX86_BUILTIN_UCOMIEQSD,
28022 IX86_BUILTIN_UCOMILTSD,
28023 IX86_BUILTIN_UCOMILESD,
28024 IX86_BUILTIN_UCOMIGTSD,
28025 IX86_BUILTIN_UCOMIGESD,
28026 IX86_BUILTIN_UCOMINEQSD,
28028 IX86_BUILTIN_MAXPD,
28029 IX86_BUILTIN_MAXSD,
28030 IX86_BUILTIN_MINPD,
28031 IX86_BUILTIN_MINSD,
28033 IX86_BUILTIN_ANDPD,
28034 IX86_BUILTIN_ANDNPD,
28035 IX86_BUILTIN_ORPD,
28036 IX86_BUILTIN_XORPD,
28038 IX86_BUILTIN_SQRTPD,
28039 IX86_BUILTIN_SQRTSD,
28041 IX86_BUILTIN_UNPCKHPD,
28042 IX86_BUILTIN_UNPCKLPD,
28044 IX86_BUILTIN_SHUFPD,
28046 IX86_BUILTIN_LOADUPD,
28047 IX86_BUILTIN_STOREUPD,
28048 IX86_BUILTIN_MOVSD,
28050 IX86_BUILTIN_LOADHPD,
28051 IX86_BUILTIN_LOADLPD,
28053 IX86_BUILTIN_CVTDQ2PD,
28054 IX86_BUILTIN_CVTDQ2PS,
28056 IX86_BUILTIN_CVTPD2DQ,
28057 IX86_BUILTIN_CVTPD2PI,
28058 IX86_BUILTIN_CVTPD2PS,
28059 IX86_BUILTIN_CVTTPD2DQ,
28060 IX86_BUILTIN_CVTTPD2PI,
28062 IX86_BUILTIN_CVTPI2PD,
28063 IX86_BUILTIN_CVTSI2SD,
28064 IX86_BUILTIN_CVTSI642SD,
28066 IX86_BUILTIN_CVTSD2SI,
28067 IX86_BUILTIN_CVTSD2SI64,
28068 IX86_BUILTIN_CVTSD2SS,
28069 IX86_BUILTIN_CVTSS2SD,
28070 IX86_BUILTIN_CVTTSD2SI,
28071 IX86_BUILTIN_CVTTSD2SI64,
28073 IX86_BUILTIN_CVTPS2DQ,
28074 IX86_BUILTIN_CVTPS2PD,
28075 IX86_BUILTIN_CVTTPS2DQ,
28077 IX86_BUILTIN_MOVNTI,
28078 IX86_BUILTIN_MOVNTI64,
28079 IX86_BUILTIN_MOVNTPD,
28080 IX86_BUILTIN_MOVNTDQ,
28082 IX86_BUILTIN_MOVQ128,
28084 /* SSE2 MMX */
28085 IX86_BUILTIN_MASKMOVDQU,
28086 IX86_BUILTIN_MOVMSKPD,
28087 IX86_BUILTIN_PMOVMSKB128,
28089 IX86_BUILTIN_PACKSSWB128,
28090 IX86_BUILTIN_PACKSSDW128,
28091 IX86_BUILTIN_PACKUSWB128,
28093 IX86_BUILTIN_PADDB128,
28094 IX86_BUILTIN_PADDW128,
28095 IX86_BUILTIN_PADDD128,
28096 IX86_BUILTIN_PADDQ128,
28097 IX86_BUILTIN_PADDSB128,
28098 IX86_BUILTIN_PADDSW128,
28099 IX86_BUILTIN_PADDUSB128,
28100 IX86_BUILTIN_PADDUSW128,
28101 IX86_BUILTIN_PSUBB128,
28102 IX86_BUILTIN_PSUBW128,
28103 IX86_BUILTIN_PSUBD128,
28104 IX86_BUILTIN_PSUBQ128,
28105 IX86_BUILTIN_PSUBSB128,
28106 IX86_BUILTIN_PSUBSW128,
28107 IX86_BUILTIN_PSUBUSB128,
28108 IX86_BUILTIN_PSUBUSW128,
28110 IX86_BUILTIN_PAND128,
28111 IX86_BUILTIN_PANDN128,
28112 IX86_BUILTIN_POR128,
28113 IX86_BUILTIN_PXOR128,
28115 IX86_BUILTIN_PAVGB128,
28116 IX86_BUILTIN_PAVGW128,
28118 IX86_BUILTIN_PCMPEQB128,
28119 IX86_BUILTIN_PCMPEQW128,
28120 IX86_BUILTIN_PCMPEQD128,
28121 IX86_BUILTIN_PCMPGTB128,
28122 IX86_BUILTIN_PCMPGTW128,
28123 IX86_BUILTIN_PCMPGTD128,
28125 IX86_BUILTIN_PMADDWD128,
28127 IX86_BUILTIN_PMAXSW128,
28128 IX86_BUILTIN_PMAXUB128,
28129 IX86_BUILTIN_PMINSW128,
28130 IX86_BUILTIN_PMINUB128,
28132 IX86_BUILTIN_PMULUDQ,
28133 IX86_BUILTIN_PMULUDQ128,
28134 IX86_BUILTIN_PMULHUW128,
28135 IX86_BUILTIN_PMULHW128,
28136 IX86_BUILTIN_PMULLW128,
28138 IX86_BUILTIN_PSADBW128,
28139 IX86_BUILTIN_PSHUFHW,
28140 IX86_BUILTIN_PSHUFLW,
28141 IX86_BUILTIN_PSHUFD,
28143 IX86_BUILTIN_PSLLDQI128,
28144 IX86_BUILTIN_PSLLWI128,
28145 IX86_BUILTIN_PSLLDI128,
28146 IX86_BUILTIN_PSLLQI128,
28147 IX86_BUILTIN_PSRAWI128,
28148 IX86_BUILTIN_PSRADI128,
28149 IX86_BUILTIN_PSRLDQI128,
28150 IX86_BUILTIN_PSRLWI128,
28151 IX86_BUILTIN_PSRLDI128,
28152 IX86_BUILTIN_PSRLQI128,
28154 IX86_BUILTIN_PSLLDQ128,
28155 IX86_BUILTIN_PSLLW128,
28156 IX86_BUILTIN_PSLLD128,
28157 IX86_BUILTIN_PSLLQ128,
28158 IX86_BUILTIN_PSRAW128,
28159 IX86_BUILTIN_PSRAD128,
28160 IX86_BUILTIN_PSRLW128,
28161 IX86_BUILTIN_PSRLD128,
28162 IX86_BUILTIN_PSRLQ128,
28164 IX86_BUILTIN_PUNPCKHBW128,
28165 IX86_BUILTIN_PUNPCKHWD128,
28166 IX86_BUILTIN_PUNPCKHDQ128,
28167 IX86_BUILTIN_PUNPCKHQDQ128,
28168 IX86_BUILTIN_PUNPCKLBW128,
28169 IX86_BUILTIN_PUNPCKLWD128,
28170 IX86_BUILTIN_PUNPCKLDQ128,
28171 IX86_BUILTIN_PUNPCKLQDQ128,
28173 IX86_BUILTIN_CLFLUSH,
28174 IX86_BUILTIN_MFENCE,
28175 IX86_BUILTIN_LFENCE,
28176 IX86_BUILTIN_PAUSE,
28178 IX86_BUILTIN_FNSTENV,
28179 IX86_BUILTIN_FLDENV,
28180 IX86_BUILTIN_FNSTSW,
28181 IX86_BUILTIN_FNCLEX,
28183 IX86_BUILTIN_BSRSI,
28184 IX86_BUILTIN_BSRDI,
28185 IX86_BUILTIN_RDPMC,
28186 IX86_BUILTIN_RDTSC,
28187 IX86_BUILTIN_RDTSCP,
28188 IX86_BUILTIN_ROLQI,
28189 IX86_BUILTIN_ROLHI,
28190 IX86_BUILTIN_RORQI,
28191 IX86_BUILTIN_RORHI,
28193 /* SSE3. */
28194 IX86_BUILTIN_ADDSUBPS,
28195 IX86_BUILTIN_HADDPS,
28196 IX86_BUILTIN_HSUBPS,
28197 IX86_BUILTIN_MOVSHDUP,
28198 IX86_BUILTIN_MOVSLDUP,
28199 IX86_BUILTIN_ADDSUBPD,
28200 IX86_BUILTIN_HADDPD,
28201 IX86_BUILTIN_HSUBPD,
28202 IX86_BUILTIN_LDDQU,
28204 IX86_BUILTIN_MONITOR,
28205 IX86_BUILTIN_MWAIT,
28207 /* SSSE3. */
28208 IX86_BUILTIN_PHADDW,
28209 IX86_BUILTIN_PHADDD,
28210 IX86_BUILTIN_PHADDSW,
28211 IX86_BUILTIN_PHSUBW,
28212 IX86_BUILTIN_PHSUBD,
28213 IX86_BUILTIN_PHSUBSW,
28214 IX86_BUILTIN_PMADDUBSW,
28215 IX86_BUILTIN_PMULHRSW,
28216 IX86_BUILTIN_PSHUFB,
28217 IX86_BUILTIN_PSIGNB,
28218 IX86_BUILTIN_PSIGNW,
28219 IX86_BUILTIN_PSIGND,
28220 IX86_BUILTIN_PALIGNR,
28221 IX86_BUILTIN_PABSB,
28222 IX86_BUILTIN_PABSW,
28223 IX86_BUILTIN_PABSD,
28225 IX86_BUILTIN_PHADDW128,
28226 IX86_BUILTIN_PHADDD128,
28227 IX86_BUILTIN_PHADDSW128,
28228 IX86_BUILTIN_PHSUBW128,
28229 IX86_BUILTIN_PHSUBD128,
28230 IX86_BUILTIN_PHSUBSW128,
28231 IX86_BUILTIN_PMADDUBSW128,
28232 IX86_BUILTIN_PMULHRSW128,
28233 IX86_BUILTIN_PSHUFB128,
28234 IX86_BUILTIN_PSIGNB128,
28235 IX86_BUILTIN_PSIGNW128,
28236 IX86_BUILTIN_PSIGND128,
28237 IX86_BUILTIN_PALIGNR128,
28238 IX86_BUILTIN_PABSB128,
28239 IX86_BUILTIN_PABSW128,
28240 IX86_BUILTIN_PABSD128,
28242 /* AMDFAM10 - SSE4A New Instructions. */
28243 IX86_BUILTIN_MOVNTSD,
28244 IX86_BUILTIN_MOVNTSS,
28245 IX86_BUILTIN_EXTRQI,
28246 IX86_BUILTIN_EXTRQ,
28247 IX86_BUILTIN_INSERTQI,
28248 IX86_BUILTIN_INSERTQ,
28250 /* SSE4.1. */
28251 IX86_BUILTIN_BLENDPD,
28252 IX86_BUILTIN_BLENDPS,
28253 IX86_BUILTIN_BLENDVPD,
28254 IX86_BUILTIN_BLENDVPS,
28255 IX86_BUILTIN_PBLENDVB128,
28256 IX86_BUILTIN_PBLENDW128,
28258 IX86_BUILTIN_DPPD,
28259 IX86_BUILTIN_DPPS,
28261 IX86_BUILTIN_INSERTPS128,
28263 IX86_BUILTIN_MOVNTDQA,
28264 IX86_BUILTIN_MPSADBW128,
28265 IX86_BUILTIN_PACKUSDW128,
28266 IX86_BUILTIN_PCMPEQQ,
28267 IX86_BUILTIN_PHMINPOSUW128,
28269 IX86_BUILTIN_PMAXSB128,
28270 IX86_BUILTIN_PMAXSD128,
28271 IX86_BUILTIN_PMAXUD128,
28272 IX86_BUILTIN_PMAXUW128,
28274 IX86_BUILTIN_PMINSB128,
28275 IX86_BUILTIN_PMINSD128,
28276 IX86_BUILTIN_PMINUD128,
28277 IX86_BUILTIN_PMINUW128,
28279 IX86_BUILTIN_PMOVSXBW128,
28280 IX86_BUILTIN_PMOVSXBD128,
28281 IX86_BUILTIN_PMOVSXBQ128,
28282 IX86_BUILTIN_PMOVSXWD128,
28283 IX86_BUILTIN_PMOVSXWQ128,
28284 IX86_BUILTIN_PMOVSXDQ128,
28286 IX86_BUILTIN_PMOVZXBW128,
28287 IX86_BUILTIN_PMOVZXBD128,
28288 IX86_BUILTIN_PMOVZXBQ128,
28289 IX86_BUILTIN_PMOVZXWD128,
28290 IX86_BUILTIN_PMOVZXWQ128,
28291 IX86_BUILTIN_PMOVZXDQ128,
28293 IX86_BUILTIN_PMULDQ128,
28294 IX86_BUILTIN_PMULLD128,
28296 IX86_BUILTIN_ROUNDSD,
28297 IX86_BUILTIN_ROUNDSS,
28299 IX86_BUILTIN_ROUNDPD,
28300 IX86_BUILTIN_ROUNDPS,
28302 IX86_BUILTIN_FLOORPD,
28303 IX86_BUILTIN_CEILPD,
28304 IX86_BUILTIN_TRUNCPD,
28305 IX86_BUILTIN_RINTPD,
28306 IX86_BUILTIN_ROUNDPD_AZ,
28308 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
28309 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
28310 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
28312 IX86_BUILTIN_FLOORPS,
28313 IX86_BUILTIN_CEILPS,
28314 IX86_BUILTIN_TRUNCPS,
28315 IX86_BUILTIN_RINTPS,
28316 IX86_BUILTIN_ROUNDPS_AZ,
28318 IX86_BUILTIN_FLOORPS_SFIX,
28319 IX86_BUILTIN_CEILPS_SFIX,
28320 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
28322 IX86_BUILTIN_PTESTZ,
28323 IX86_BUILTIN_PTESTC,
28324 IX86_BUILTIN_PTESTNZC,
28326 IX86_BUILTIN_VEC_INIT_V2SI,
28327 IX86_BUILTIN_VEC_INIT_V4HI,
28328 IX86_BUILTIN_VEC_INIT_V8QI,
28329 IX86_BUILTIN_VEC_EXT_V2DF,
28330 IX86_BUILTIN_VEC_EXT_V2DI,
28331 IX86_BUILTIN_VEC_EXT_V4SF,
28332 IX86_BUILTIN_VEC_EXT_V4SI,
28333 IX86_BUILTIN_VEC_EXT_V8HI,
28334 IX86_BUILTIN_VEC_EXT_V2SI,
28335 IX86_BUILTIN_VEC_EXT_V4HI,
28336 IX86_BUILTIN_VEC_EXT_V16QI,
28337 IX86_BUILTIN_VEC_SET_V2DI,
28338 IX86_BUILTIN_VEC_SET_V4SF,
28339 IX86_BUILTIN_VEC_SET_V4SI,
28340 IX86_BUILTIN_VEC_SET_V8HI,
28341 IX86_BUILTIN_VEC_SET_V4HI,
28342 IX86_BUILTIN_VEC_SET_V16QI,
28344 IX86_BUILTIN_VEC_PACK_SFIX,
28345 IX86_BUILTIN_VEC_PACK_SFIX256,
28347 /* SSE4.2. */
28348 IX86_BUILTIN_CRC32QI,
28349 IX86_BUILTIN_CRC32HI,
28350 IX86_BUILTIN_CRC32SI,
28351 IX86_BUILTIN_CRC32DI,
28353 IX86_BUILTIN_PCMPESTRI128,
28354 IX86_BUILTIN_PCMPESTRM128,
28355 IX86_BUILTIN_PCMPESTRA128,
28356 IX86_BUILTIN_PCMPESTRC128,
28357 IX86_BUILTIN_PCMPESTRO128,
28358 IX86_BUILTIN_PCMPESTRS128,
28359 IX86_BUILTIN_PCMPESTRZ128,
28360 IX86_BUILTIN_PCMPISTRI128,
28361 IX86_BUILTIN_PCMPISTRM128,
28362 IX86_BUILTIN_PCMPISTRA128,
28363 IX86_BUILTIN_PCMPISTRC128,
28364 IX86_BUILTIN_PCMPISTRO128,
28365 IX86_BUILTIN_PCMPISTRS128,
28366 IX86_BUILTIN_PCMPISTRZ128,
28368 IX86_BUILTIN_PCMPGTQ,
28370 /* AES instructions */
28371 IX86_BUILTIN_AESENC128,
28372 IX86_BUILTIN_AESENCLAST128,
28373 IX86_BUILTIN_AESDEC128,
28374 IX86_BUILTIN_AESDECLAST128,
28375 IX86_BUILTIN_AESIMC128,
28376 IX86_BUILTIN_AESKEYGENASSIST128,
28378 /* PCLMUL instruction */
28379 IX86_BUILTIN_PCLMULQDQ128,
28381 /* AVX */
28382 IX86_BUILTIN_ADDPD256,
28383 IX86_BUILTIN_ADDPS256,
28384 IX86_BUILTIN_ADDSUBPD256,
28385 IX86_BUILTIN_ADDSUBPS256,
28386 IX86_BUILTIN_ANDPD256,
28387 IX86_BUILTIN_ANDPS256,
28388 IX86_BUILTIN_ANDNPD256,
28389 IX86_BUILTIN_ANDNPS256,
28390 IX86_BUILTIN_BLENDPD256,
28391 IX86_BUILTIN_BLENDPS256,
28392 IX86_BUILTIN_BLENDVPD256,
28393 IX86_BUILTIN_BLENDVPS256,
28394 IX86_BUILTIN_DIVPD256,
28395 IX86_BUILTIN_DIVPS256,
28396 IX86_BUILTIN_DPPS256,
28397 IX86_BUILTIN_HADDPD256,
28398 IX86_BUILTIN_HADDPS256,
28399 IX86_BUILTIN_HSUBPD256,
28400 IX86_BUILTIN_HSUBPS256,
28401 IX86_BUILTIN_MAXPD256,
28402 IX86_BUILTIN_MAXPS256,
28403 IX86_BUILTIN_MINPD256,
28404 IX86_BUILTIN_MINPS256,
28405 IX86_BUILTIN_MULPD256,
28406 IX86_BUILTIN_MULPS256,
28407 IX86_BUILTIN_ORPD256,
28408 IX86_BUILTIN_ORPS256,
28409 IX86_BUILTIN_SHUFPD256,
28410 IX86_BUILTIN_SHUFPS256,
28411 IX86_BUILTIN_SUBPD256,
28412 IX86_BUILTIN_SUBPS256,
28413 IX86_BUILTIN_XORPD256,
28414 IX86_BUILTIN_XORPS256,
28415 IX86_BUILTIN_CMPSD,
28416 IX86_BUILTIN_CMPSS,
28417 IX86_BUILTIN_CMPPD,
28418 IX86_BUILTIN_CMPPS,
28419 IX86_BUILTIN_CMPPD256,
28420 IX86_BUILTIN_CMPPS256,
28421 IX86_BUILTIN_CVTDQ2PD256,
28422 IX86_BUILTIN_CVTDQ2PS256,
28423 IX86_BUILTIN_CVTPD2PS256,
28424 IX86_BUILTIN_CVTPS2DQ256,
28425 IX86_BUILTIN_CVTPS2PD256,
28426 IX86_BUILTIN_CVTTPD2DQ256,
28427 IX86_BUILTIN_CVTPD2DQ256,
28428 IX86_BUILTIN_CVTTPS2DQ256,
28429 IX86_BUILTIN_EXTRACTF128PD256,
28430 IX86_BUILTIN_EXTRACTF128PS256,
28431 IX86_BUILTIN_EXTRACTF128SI256,
28432 IX86_BUILTIN_VZEROALL,
28433 IX86_BUILTIN_VZEROUPPER,
28434 IX86_BUILTIN_VPERMILVARPD,
28435 IX86_BUILTIN_VPERMILVARPS,
28436 IX86_BUILTIN_VPERMILVARPD256,
28437 IX86_BUILTIN_VPERMILVARPS256,
28438 IX86_BUILTIN_VPERMILPD,
28439 IX86_BUILTIN_VPERMILPS,
28440 IX86_BUILTIN_VPERMILPD256,
28441 IX86_BUILTIN_VPERMILPS256,
28442 IX86_BUILTIN_VPERMIL2PD,
28443 IX86_BUILTIN_VPERMIL2PS,
28444 IX86_BUILTIN_VPERMIL2PD256,
28445 IX86_BUILTIN_VPERMIL2PS256,
28446 IX86_BUILTIN_VPERM2F128PD256,
28447 IX86_BUILTIN_VPERM2F128PS256,
28448 IX86_BUILTIN_VPERM2F128SI256,
28449 IX86_BUILTIN_VBROADCASTSS,
28450 IX86_BUILTIN_VBROADCASTSD256,
28451 IX86_BUILTIN_VBROADCASTSS256,
28452 IX86_BUILTIN_VBROADCASTPD256,
28453 IX86_BUILTIN_VBROADCASTPS256,
28454 IX86_BUILTIN_VINSERTF128PD256,
28455 IX86_BUILTIN_VINSERTF128PS256,
28456 IX86_BUILTIN_VINSERTF128SI256,
28457 IX86_BUILTIN_LOADUPD256,
28458 IX86_BUILTIN_LOADUPS256,
28459 IX86_BUILTIN_STOREUPD256,
28460 IX86_BUILTIN_STOREUPS256,
28461 IX86_BUILTIN_LDDQU256,
28462 IX86_BUILTIN_MOVNTDQ256,
28463 IX86_BUILTIN_MOVNTPD256,
28464 IX86_BUILTIN_MOVNTPS256,
28465 IX86_BUILTIN_LOADDQU256,
28466 IX86_BUILTIN_STOREDQU256,
28467 IX86_BUILTIN_MASKLOADPD,
28468 IX86_BUILTIN_MASKLOADPS,
28469 IX86_BUILTIN_MASKSTOREPD,
28470 IX86_BUILTIN_MASKSTOREPS,
28471 IX86_BUILTIN_MASKLOADPD256,
28472 IX86_BUILTIN_MASKLOADPS256,
28473 IX86_BUILTIN_MASKSTOREPD256,
28474 IX86_BUILTIN_MASKSTOREPS256,
28475 IX86_BUILTIN_MOVSHDUP256,
28476 IX86_BUILTIN_MOVSLDUP256,
28477 IX86_BUILTIN_MOVDDUP256,
28479 IX86_BUILTIN_SQRTPD256,
28480 IX86_BUILTIN_SQRTPS256,
28481 IX86_BUILTIN_SQRTPS_NR256,
28482 IX86_BUILTIN_RSQRTPS256,
28483 IX86_BUILTIN_RSQRTPS_NR256,
28485 IX86_BUILTIN_RCPPS256,
28487 IX86_BUILTIN_ROUNDPD256,
28488 IX86_BUILTIN_ROUNDPS256,
28490 IX86_BUILTIN_FLOORPD256,
28491 IX86_BUILTIN_CEILPD256,
28492 IX86_BUILTIN_TRUNCPD256,
28493 IX86_BUILTIN_RINTPD256,
28494 IX86_BUILTIN_ROUNDPD_AZ256,
28496 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
28497 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
28498 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
28500 IX86_BUILTIN_FLOORPS256,
28501 IX86_BUILTIN_CEILPS256,
28502 IX86_BUILTIN_TRUNCPS256,
28503 IX86_BUILTIN_RINTPS256,
28504 IX86_BUILTIN_ROUNDPS_AZ256,
28506 IX86_BUILTIN_FLOORPS_SFIX256,
28507 IX86_BUILTIN_CEILPS_SFIX256,
28508 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
28510 IX86_BUILTIN_UNPCKHPD256,
28511 IX86_BUILTIN_UNPCKLPD256,
28512 IX86_BUILTIN_UNPCKHPS256,
28513 IX86_BUILTIN_UNPCKLPS256,
28515 IX86_BUILTIN_SI256_SI,
28516 IX86_BUILTIN_PS256_PS,
28517 IX86_BUILTIN_PD256_PD,
28518 IX86_BUILTIN_SI_SI256,
28519 IX86_BUILTIN_PS_PS256,
28520 IX86_BUILTIN_PD_PD256,
28522 IX86_BUILTIN_VTESTZPD,
28523 IX86_BUILTIN_VTESTCPD,
28524 IX86_BUILTIN_VTESTNZCPD,
28525 IX86_BUILTIN_VTESTZPS,
28526 IX86_BUILTIN_VTESTCPS,
28527 IX86_BUILTIN_VTESTNZCPS,
28528 IX86_BUILTIN_VTESTZPD256,
28529 IX86_BUILTIN_VTESTCPD256,
28530 IX86_BUILTIN_VTESTNZCPD256,
28531 IX86_BUILTIN_VTESTZPS256,
28532 IX86_BUILTIN_VTESTCPS256,
28533 IX86_BUILTIN_VTESTNZCPS256,
28534 IX86_BUILTIN_PTESTZ256,
28535 IX86_BUILTIN_PTESTC256,
28536 IX86_BUILTIN_PTESTNZC256,
28538 IX86_BUILTIN_MOVMSKPD256,
28539 IX86_BUILTIN_MOVMSKPS256,
28541 /* AVX2 */
28542 IX86_BUILTIN_MPSADBW256,
28543 IX86_BUILTIN_PABSB256,
28544 IX86_BUILTIN_PABSW256,
28545 IX86_BUILTIN_PABSD256,
28546 IX86_BUILTIN_PACKSSDW256,
28547 IX86_BUILTIN_PACKSSWB256,
28548 IX86_BUILTIN_PACKUSDW256,
28549 IX86_BUILTIN_PACKUSWB256,
28550 IX86_BUILTIN_PADDB256,
28551 IX86_BUILTIN_PADDW256,
28552 IX86_BUILTIN_PADDD256,
28553 IX86_BUILTIN_PADDQ256,
28554 IX86_BUILTIN_PADDSB256,
28555 IX86_BUILTIN_PADDSW256,
28556 IX86_BUILTIN_PADDUSB256,
28557 IX86_BUILTIN_PADDUSW256,
28558 IX86_BUILTIN_PALIGNR256,
28559 IX86_BUILTIN_AND256I,
28560 IX86_BUILTIN_ANDNOT256I,
28561 IX86_BUILTIN_PAVGB256,
28562 IX86_BUILTIN_PAVGW256,
28563 IX86_BUILTIN_PBLENDVB256,
28564 IX86_BUILTIN_PBLENDVW256,
28565 IX86_BUILTIN_PCMPEQB256,
28566 IX86_BUILTIN_PCMPEQW256,
28567 IX86_BUILTIN_PCMPEQD256,
28568 IX86_BUILTIN_PCMPEQQ256,
28569 IX86_BUILTIN_PCMPGTB256,
28570 IX86_BUILTIN_PCMPGTW256,
28571 IX86_BUILTIN_PCMPGTD256,
28572 IX86_BUILTIN_PCMPGTQ256,
28573 IX86_BUILTIN_PHADDW256,
28574 IX86_BUILTIN_PHADDD256,
28575 IX86_BUILTIN_PHADDSW256,
28576 IX86_BUILTIN_PHSUBW256,
28577 IX86_BUILTIN_PHSUBD256,
28578 IX86_BUILTIN_PHSUBSW256,
28579 IX86_BUILTIN_PMADDUBSW256,
28580 IX86_BUILTIN_PMADDWD256,
28581 IX86_BUILTIN_PMAXSB256,
28582 IX86_BUILTIN_PMAXSW256,
28583 IX86_BUILTIN_PMAXSD256,
28584 IX86_BUILTIN_PMAXUB256,
28585 IX86_BUILTIN_PMAXUW256,
28586 IX86_BUILTIN_PMAXUD256,
28587 IX86_BUILTIN_PMINSB256,
28588 IX86_BUILTIN_PMINSW256,
28589 IX86_BUILTIN_PMINSD256,
28590 IX86_BUILTIN_PMINUB256,
28591 IX86_BUILTIN_PMINUW256,
28592 IX86_BUILTIN_PMINUD256,
28593 IX86_BUILTIN_PMOVMSKB256,
28594 IX86_BUILTIN_PMOVSXBW256,
28595 IX86_BUILTIN_PMOVSXBD256,
28596 IX86_BUILTIN_PMOVSXBQ256,
28597 IX86_BUILTIN_PMOVSXWD256,
28598 IX86_BUILTIN_PMOVSXWQ256,
28599 IX86_BUILTIN_PMOVSXDQ256,
28600 IX86_BUILTIN_PMOVZXBW256,
28601 IX86_BUILTIN_PMOVZXBD256,
28602 IX86_BUILTIN_PMOVZXBQ256,
28603 IX86_BUILTIN_PMOVZXWD256,
28604 IX86_BUILTIN_PMOVZXWQ256,
28605 IX86_BUILTIN_PMOVZXDQ256,
28606 IX86_BUILTIN_PMULDQ256,
28607 IX86_BUILTIN_PMULHRSW256,
28608 IX86_BUILTIN_PMULHUW256,
28609 IX86_BUILTIN_PMULHW256,
28610 IX86_BUILTIN_PMULLW256,
28611 IX86_BUILTIN_PMULLD256,
28612 IX86_BUILTIN_PMULUDQ256,
28613 IX86_BUILTIN_POR256,
28614 IX86_BUILTIN_PSADBW256,
28615 IX86_BUILTIN_PSHUFB256,
28616 IX86_BUILTIN_PSHUFD256,
28617 IX86_BUILTIN_PSHUFHW256,
28618 IX86_BUILTIN_PSHUFLW256,
28619 IX86_BUILTIN_PSIGNB256,
28620 IX86_BUILTIN_PSIGNW256,
28621 IX86_BUILTIN_PSIGND256,
28622 IX86_BUILTIN_PSLLDQI256,
28623 IX86_BUILTIN_PSLLWI256,
28624 IX86_BUILTIN_PSLLW256,
28625 IX86_BUILTIN_PSLLDI256,
28626 IX86_BUILTIN_PSLLD256,
28627 IX86_BUILTIN_PSLLQI256,
28628 IX86_BUILTIN_PSLLQ256,
28629 IX86_BUILTIN_PSRAWI256,
28630 IX86_BUILTIN_PSRAW256,
28631 IX86_BUILTIN_PSRADI256,
28632 IX86_BUILTIN_PSRAD256,
28633 IX86_BUILTIN_PSRLDQI256,
28634 IX86_BUILTIN_PSRLWI256,
28635 IX86_BUILTIN_PSRLW256,
28636 IX86_BUILTIN_PSRLDI256,
28637 IX86_BUILTIN_PSRLD256,
28638 IX86_BUILTIN_PSRLQI256,
28639 IX86_BUILTIN_PSRLQ256,
28640 IX86_BUILTIN_PSUBB256,
28641 IX86_BUILTIN_PSUBW256,
28642 IX86_BUILTIN_PSUBD256,
28643 IX86_BUILTIN_PSUBQ256,
28644 IX86_BUILTIN_PSUBSB256,
28645 IX86_BUILTIN_PSUBSW256,
28646 IX86_BUILTIN_PSUBUSB256,
28647 IX86_BUILTIN_PSUBUSW256,
28648 IX86_BUILTIN_PUNPCKHBW256,
28649 IX86_BUILTIN_PUNPCKHWD256,
28650 IX86_BUILTIN_PUNPCKHDQ256,
28651 IX86_BUILTIN_PUNPCKHQDQ256,
28652 IX86_BUILTIN_PUNPCKLBW256,
28653 IX86_BUILTIN_PUNPCKLWD256,
28654 IX86_BUILTIN_PUNPCKLDQ256,
28655 IX86_BUILTIN_PUNPCKLQDQ256,
28656 IX86_BUILTIN_PXOR256,
28657 IX86_BUILTIN_MOVNTDQA256,
28658 IX86_BUILTIN_VBROADCASTSS_PS,
28659 IX86_BUILTIN_VBROADCASTSS_PS256,
28660 IX86_BUILTIN_VBROADCASTSD_PD256,
28661 IX86_BUILTIN_VBROADCASTSI256,
28662 IX86_BUILTIN_PBLENDD256,
28663 IX86_BUILTIN_PBLENDD128,
28664 IX86_BUILTIN_PBROADCASTB256,
28665 IX86_BUILTIN_PBROADCASTW256,
28666 IX86_BUILTIN_PBROADCASTD256,
28667 IX86_BUILTIN_PBROADCASTQ256,
28668 IX86_BUILTIN_PBROADCASTB128,
28669 IX86_BUILTIN_PBROADCASTW128,
28670 IX86_BUILTIN_PBROADCASTD128,
28671 IX86_BUILTIN_PBROADCASTQ128,
28672 IX86_BUILTIN_VPERMVARSI256,
28673 IX86_BUILTIN_VPERMDF256,
28674 IX86_BUILTIN_VPERMVARSF256,
28675 IX86_BUILTIN_VPERMDI256,
28676 IX86_BUILTIN_VPERMTI256,
28677 IX86_BUILTIN_VEXTRACT128I256,
28678 IX86_BUILTIN_VINSERT128I256,
28679 IX86_BUILTIN_MASKLOADD,
28680 IX86_BUILTIN_MASKLOADQ,
28681 IX86_BUILTIN_MASKLOADD256,
28682 IX86_BUILTIN_MASKLOADQ256,
28683 IX86_BUILTIN_MASKSTORED,
28684 IX86_BUILTIN_MASKSTOREQ,
28685 IX86_BUILTIN_MASKSTORED256,
28686 IX86_BUILTIN_MASKSTOREQ256,
28687 IX86_BUILTIN_PSLLVV4DI,
28688 IX86_BUILTIN_PSLLVV2DI,
28689 IX86_BUILTIN_PSLLVV8SI,
28690 IX86_BUILTIN_PSLLVV4SI,
28691 IX86_BUILTIN_PSRAVV8SI,
28692 IX86_BUILTIN_PSRAVV4SI,
28693 IX86_BUILTIN_PSRLVV4DI,
28694 IX86_BUILTIN_PSRLVV2DI,
28695 IX86_BUILTIN_PSRLVV8SI,
28696 IX86_BUILTIN_PSRLVV4SI,
28698 IX86_BUILTIN_GATHERSIV2DF,
28699 IX86_BUILTIN_GATHERSIV4DF,
28700 IX86_BUILTIN_GATHERDIV2DF,
28701 IX86_BUILTIN_GATHERDIV4DF,
28702 IX86_BUILTIN_GATHERSIV4SF,
28703 IX86_BUILTIN_GATHERSIV8SF,
28704 IX86_BUILTIN_GATHERDIV4SF,
28705 IX86_BUILTIN_GATHERDIV8SF,
28706 IX86_BUILTIN_GATHERSIV2DI,
28707 IX86_BUILTIN_GATHERSIV4DI,
28708 IX86_BUILTIN_GATHERDIV2DI,
28709 IX86_BUILTIN_GATHERDIV4DI,
28710 IX86_BUILTIN_GATHERSIV4SI,
28711 IX86_BUILTIN_GATHERSIV8SI,
28712 IX86_BUILTIN_GATHERDIV4SI,
28713 IX86_BUILTIN_GATHERDIV8SI,
28715 /* AVX512F */
28716 IX86_BUILTIN_SI512_SI256,
28717 IX86_BUILTIN_PD512_PD256,
28718 IX86_BUILTIN_PS512_PS256,
28719 IX86_BUILTIN_SI512_SI,
28720 IX86_BUILTIN_PD512_PD,
28721 IX86_BUILTIN_PS512_PS,
28722 IX86_BUILTIN_ADDPD512,
28723 IX86_BUILTIN_ADDPS512,
28724 IX86_BUILTIN_ADDSD_ROUND,
28725 IX86_BUILTIN_ADDSS_ROUND,
28726 IX86_BUILTIN_ALIGND512,
28727 IX86_BUILTIN_ALIGNQ512,
28728 IX86_BUILTIN_BLENDMD512,
28729 IX86_BUILTIN_BLENDMPD512,
28730 IX86_BUILTIN_BLENDMPS512,
28731 IX86_BUILTIN_BLENDMQ512,
28732 IX86_BUILTIN_BROADCASTF32X4_512,
28733 IX86_BUILTIN_BROADCASTF64X4_512,
28734 IX86_BUILTIN_BROADCASTI32X4_512,
28735 IX86_BUILTIN_BROADCASTI64X4_512,
28736 IX86_BUILTIN_BROADCASTSD512,
28737 IX86_BUILTIN_BROADCASTSS512,
28738 IX86_BUILTIN_CMPD512,
28739 IX86_BUILTIN_CMPPD512,
28740 IX86_BUILTIN_CMPPS512,
28741 IX86_BUILTIN_CMPQ512,
28742 IX86_BUILTIN_CMPSD_MASK,
28743 IX86_BUILTIN_CMPSS_MASK,
28744 IX86_BUILTIN_COMIDF,
28745 IX86_BUILTIN_COMISF,
28746 IX86_BUILTIN_COMPRESSPD512,
28747 IX86_BUILTIN_COMPRESSPDSTORE512,
28748 IX86_BUILTIN_COMPRESSPS512,
28749 IX86_BUILTIN_COMPRESSPSSTORE512,
28750 IX86_BUILTIN_CVTDQ2PD512,
28751 IX86_BUILTIN_CVTDQ2PS512,
28752 IX86_BUILTIN_CVTPD2DQ512,
28753 IX86_BUILTIN_CVTPD2PS512,
28754 IX86_BUILTIN_CVTPD2UDQ512,
28755 IX86_BUILTIN_CVTPH2PS512,
28756 IX86_BUILTIN_CVTPS2DQ512,
28757 IX86_BUILTIN_CVTPS2PD512,
28758 IX86_BUILTIN_CVTPS2PH512,
28759 IX86_BUILTIN_CVTPS2UDQ512,
28760 IX86_BUILTIN_CVTSD2SS_ROUND,
28761 IX86_BUILTIN_CVTSI2SD64,
28762 IX86_BUILTIN_CVTSI2SS32,
28763 IX86_BUILTIN_CVTSI2SS64,
28764 IX86_BUILTIN_CVTSS2SD_ROUND,
28765 IX86_BUILTIN_CVTTPD2DQ512,
28766 IX86_BUILTIN_CVTTPD2UDQ512,
28767 IX86_BUILTIN_CVTTPS2DQ512,
28768 IX86_BUILTIN_CVTTPS2UDQ512,
28769 IX86_BUILTIN_CVTUDQ2PD512,
28770 IX86_BUILTIN_CVTUDQ2PS512,
28771 IX86_BUILTIN_CVTUSI2SD32,
28772 IX86_BUILTIN_CVTUSI2SD64,
28773 IX86_BUILTIN_CVTUSI2SS32,
28774 IX86_BUILTIN_CVTUSI2SS64,
28775 IX86_BUILTIN_DIVPD512,
28776 IX86_BUILTIN_DIVPS512,
28777 IX86_BUILTIN_DIVSD_ROUND,
28778 IX86_BUILTIN_DIVSS_ROUND,
28779 IX86_BUILTIN_EXPANDPD512,
28780 IX86_BUILTIN_EXPANDPD512Z,
28781 IX86_BUILTIN_EXPANDPDLOAD512,
28782 IX86_BUILTIN_EXPANDPDLOAD512Z,
28783 IX86_BUILTIN_EXPANDPS512,
28784 IX86_BUILTIN_EXPANDPS512Z,
28785 IX86_BUILTIN_EXPANDPSLOAD512,
28786 IX86_BUILTIN_EXPANDPSLOAD512Z,
28787 IX86_BUILTIN_EXTRACTF32X4,
28788 IX86_BUILTIN_EXTRACTF64X4,
28789 IX86_BUILTIN_EXTRACTI32X4,
28790 IX86_BUILTIN_EXTRACTI64X4,
28791 IX86_BUILTIN_FIXUPIMMPD512_MASK,
28792 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
28793 IX86_BUILTIN_FIXUPIMMPS512_MASK,
28794 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
28795 IX86_BUILTIN_FIXUPIMMSD128_MASK,
28796 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
28797 IX86_BUILTIN_FIXUPIMMSS128_MASK,
28798 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
28799 IX86_BUILTIN_GETEXPPD512,
28800 IX86_BUILTIN_GETEXPPS512,
28801 IX86_BUILTIN_GETEXPSD128,
28802 IX86_BUILTIN_GETEXPSS128,
28803 IX86_BUILTIN_GETMANTPD512,
28804 IX86_BUILTIN_GETMANTPS512,
28805 IX86_BUILTIN_GETMANTSD128,
28806 IX86_BUILTIN_GETMANTSS128,
28807 IX86_BUILTIN_INSERTF32X4,
28808 IX86_BUILTIN_INSERTF64X4,
28809 IX86_BUILTIN_INSERTI32X4,
28810 IX86_BUILTIN_INSERTI64X4,
28811 IX86_BUILTIN_LOADAPD512,
28812 IX86_BUILTIN_LOADAPS512,
28813 IX86_BUILTIN_LOADDQUDI512,
28814 IX86_BUILTIN_LOADDQUSI512,
28815 IX86_BUILTIN_LOADUPD512,
28816 IX86_BUILTIN_LOADUPS512,
28817 IX86_BUILTIN_MAXPD512,
28818 IX86_BUILTIN_MAXPS512,
28819 IX86_BUILTIN_MAXSD_ROUND,
28820 IX86_BUILTIN_MAXSS_ROUND,
28821 IX86_BUILTIN_MINPD512,
28822 IX86_BUILTIN_MINPS512,
28823 IX86_BUILTIN_MINSD_ROUND,
28824 IX86_BUILTIN_MINSS_ROUND,
28825 IX86_BUILTIN_MOVAPD512,
28826 IX86_BUILTIN_MOVAPS512,
28827 IX86_BUILTIN_MOVDDUP512,
28828 IX86_BUILTIN_MOVDQA32LOAD512,
28829 IX86_BUILTIN_MOVDQA32STORE512,
28830 IX86_BUILTIN_MOVDQA32_512,
28831 IX86_BUILTIN_MOVDQA64LOAD512,
28832 IX86_BUILTIN_MOVDQA64STORE512,
28833 IX86_BUILTIN_MOVDQA64_512,
28834 IX86_BUILTIN_MOVNTDQ512,
28835 IX86_BUILTIN_MOVNTDQA512,
28836 IX86_BUILTIN_MOVNTPD512,
28837 IX86_BUILTIN_MOVNTPS512,
28838 IX86_BUILTIN_MOVSHDUP512,
28839 IX86_BUILTIN_MOVSLDUP512,
28840 IX86_BUILTIN_MULPD512,
28841 IX86_BUILTIN_MULPS512,
28842 IX86_BUILTIN_MULSD_ROUND,
28843 IX86_BUILTIN_MULSS_ROUND,
28844 IX86_BUILTIN_PABSD512,
28845 IX86_BUILTIN_PABSQ512,
28846 IX86_BUILTIN_PADDD512,
28847 IX86_BUILTIN_PADDQ512,
28848 IX86_BUILTIN_PANDD512,
28849 IX86_BUILTIN_PANDND512,
28850 IX86_BUILTIN_PANDNQ512,
28851 IX86_BUILTIN_PANDQ512,
28852 IX86_BUILTIN_PBROADCASTD512,
28853 IX86_BUILTIN_PBROADCASTD512_GPR,
28854 IX86_BUILTIN_PBROADCASTMB512,
28855 IX86_BUILTIN_PBROADCASTMW512,
28856 IX86_BUILTIN_PBROADCASTQ512,
28857 IX86_BUILTIN_PBROADCASTQ512_GPR,
28858 IX86_BUILTIN_PCMPEQD512_MASK,
28859 IX86_BUILTIN_PCMPEQQ512_MASK,
28860 IX86_BUILTIN_PCMPGTD512_MASK,
28861 IX86_BUILTIN_PCMPGTQ512_MASK,
28862 IX86_BUILTIN_PCOMPRESSD512,
28863 IX86_BUILTIN_PCOMPRESSDSTORE512,
28864 IX86_BUILTIN_PCOMPRESSQ512,
28865 IX86_BUILTIN_PCOMPRESSQSTORE512,
28866 IX86_BUILTIN_PEXPANDD512,
28867 IX86_BUILTIN_PEXPANDD512Z,
28868 IX86_BUILTIN_PEXPANDDLOAD512,
28869 IX86_BUILTIN_PEXPANDDLOAD512Z,
28870 IX86_BUILTIN_PEXPANDQ512,
28871 IX86_BUILTIN_PEXPANDQ512Z,
28872 IX86_BUILTIN_PEXPANDQLOAD512,
28873 IX86_BUILTIN_PEXPANDQLOAD512Z,
28874 IX86_BUILTIN_PMAXSD512,
28875 IX86_BUILTIN_PMAXSQ512,
28876 IX86_BUILTIN_PMAXUD512,
28877 IX86_BUILTIN_PMAXUQ512,
28878 IX86_BUILTIN_PMINSD512,
28879 IX86_BUILTIN_PMINSQ512,
28880 IX86_BUILTIN_PMINUD512,
28881 IX86_BUILTIN_PMINUQ512,
28882 IX86_BUILTIN_PMOVDB512,
28883 IX86_BUILTIN_PMOVDB512_MEM,
28884 IX86_BUILTIN_PMOVDW512,
28885 IX86_BUILTIN_PMOVDW512_MEM,
28886 IX86_BUILTIN_PMOVQB512,
28887 IX86_BUILTIN_PMOVQB512_MEM,
28888 IX86_BUILTIN_PMOVQD512,
28889 IX86_BUILTIN_PMOVQD512_MEM,
28890 IX86_BUILTIN_PMOVQW512,
28891 IX86_BUILTIN_PMOVQW512_MEM,
28892 IX86_BUILTIN_PMOVSDB512,
28893 IX86_BUILTIN_PMOVSDB512_MEM,
28894 IX86_BUILTIN_PMOVSDW512,
28895 IX86_BUILTIN_PMOVSDW512_MEM,
28896 IX86_BUILTIN_PMOVSQB512,
28897 IX86_BUILTIN_PMOVSQB512_MEM,
28898 IX86_BUILTIN_PMOVSQD512,
28899 IX86_BUILTIN_PMOVSQD512_MEM,
28900 IX86_BUILTIN_PMOVSQW512,
28901 IX86_BUILTIN_PMOVSQW512_MEM,
28902 IX86_BUILTIN_PMOVSXBD512,
28903 IX86_BUILTIN_PMOVSXBQ512,
28904 IX86_BUILTIN_PMOVSXDQ512,
28905 IX86_BUILTIN_PMOVSXWD512,
28906 IX86_BUILTIN_PMOVSXWQ512,
28907 IX86_BUILTIN_PMOVUSDB512,
28908 IX86_BUILTIN_PMOVUSDB512_MEM,
28909 IX86_BUILTIN_PMOVUSDW512,
28910 IX86_BUILTIN_PMOVUSDW512_MEM,
28911 IX86_BUILTIN_PMOVUSQB512,
28912 IX86_BUILTIN_PMOVUSQB512_MEM,
28913 IX86_BUILTIN_PMOVUSQD512,
28914 IX86_BUILTIN_PMOVUSQD512_MEM,
28915 IX86_BUILTIN_PMOVUSQW512,
28916 IX86_BUILTIN_PMOVUSQW512_MEM,
28917 IX86_BUILTIN_PMOVZXBD512,
28918 IX86_BUILTIN_PMOVZXBQ512,
28919 IX86_BUILTIN_PMOVZXDQ512,
28920 IX86_BUILTIN_PMOVZXWD512,
28921 IX86_BUILTIN_PMOVZXWQ512,
28922 IX86_BUILTIN_PMULDQ512,
28923 IX86_BUILTIN_PMULLD512,
28924 IX86_BUILTIN_PMULUDQ512,
28925 IX86_BUILTIN_PORD512,
28926 IX86_BUILTIN_PORQ512,
28927 IX86_BUILTIN_PROLD512,
28928 IX86_BUILTIN_PROLQ512,
28929 IX86_BUILTIN_PROLVD512,
28930 IX86_BUILTIN_PROLVQ512,
28931 IX86_BUILTIN_PRORD512,
28932 IX86_BUILTIN_PRORQ512,
28933 IX86_BUILTIN_PRORVD512,
28934 IX86_BUILTIN_PRORVQ512,
28935 IX86_BUILTIN_PSHUFD512,
28936 IX86_BUILTIN_PSLLD512,
28937 IX86_BUILTIN_PSLLDI512,
28938 IX86_BUILTIN_PSLLQ512,
28939 IX86_BUILTIN_PSLLQI512,
28940 IX86_BUILTIN_PSLLVV16SI,
28941 IX86_BUILTIN_PSLLVV8DI,
28942 IX86_BUILTIN_PSRAD512,
28943 IX86_BUILTIN_PSRADI512,
28944 IX86_BUILTIN_PSRAQ512,
28945 IX86_BUILTIN_PSRAQI512,
28946 IX86_BUILTIN_PSRAVV16SI,
28947 IX86_BUILTIN_PSRAVV8DI,
28948 IX86_BUILTIN_PSRLD512,
28949 IX86_BUILTIN_PSRLDI512,
28950 IX86_BUILTIN_PSRLQ512,
28951 IX86_BUILTIN_PSRLQI512,
28952 IX86_BUILTIN_PSRLVV16SI,
28953 IX86_BUILTIN_PSRLVV8DI,
28954 IX86_BUILTIN_PSUBD512,
28955 IX86_BUILTIN_PSUBQ512,
28956 IX86_BUILTIN_PTESTMD512,
28957 IX86_BUILTIN_PTESTMQ512,
28958 IX86_BUILTIN_PTESTNMD512,
28959 IX86_BUILTIN_PTESTNMQ512,
28960 IX86_BUILTIN_PUNPCKHDQ512,
28961 IX86_BUILTIN_PUNPCKHQDQ512,
28962 IX86_BUILTIN_PUNPCKLDQ512,
28963 IX86_BUILTIN_PUNPCKLQDQ512,
28964 IX86_BUILTIN_PXORD512,
28965 IX86_BUILTIN_PXORQ512,
28966 IX86_BUILTIN_RCP14PD512,
28967 IX86_BUILTIN_RCP14PS512,
28968 IX86_BUILTIN_RCP14SD,
28969 IX86_BUILTIN_RCP14SS,
28970 IX86_BUILTIN_RNDSCALEPD,
28971 IX86_BUILTIN_RNDSCALEPS,
28972 IX86_BUILTIN_RNDSCALESD,
28973 IX86_BUILTIN_RNDSCALESS,
28974 IX86_BUILTIN_RSQRT14PD512,
28975 IX86_BUILTIN_RSQRT14PS512,
28976 IX86_BUILTIN_RSQRT14SD,
28977 IX86_BUILTIN_RSQRT14SS,
28978 IX86_BUILTIN_SCALEFPD512,
28979 IX86_BUILTIN_SCALEFPS512,
28980 IX86_BUILTIN_SCALEFSD,
28981 IX86_BUILTIN_SCALEFSS,
28982 IX86_BUILTIN_SHUFPD512,
28983 IX86_BUILTIN_SHUFPS512,
28984 IX86_BUILTIN_SHUF_F32x4,
28985 IX86_BUILTIN_SHUF_F64x2,
28986 IX86_BUILTIN_SHUF_I32x4,
28987 IX86_BUILTIN_SHUF_I64x2,
28988 IX86_BUILTIN_SQRTPD512,
28989 IX86_BUILTIN_SQRTPD512_MASK,
28990 IX86_BUILTIN_SQRTPS512_MASK,
28991 IX86_BUILTIN_SQRTPS_NR512,
28992 IX86_BUILTIN_SQRTSD_ROUND,
28993 IX86_BUILTIN_SQRTSS_ROUND,
28994 IX86_BUILTIN_STOREAPD512,
28995 IX86_BUILTIN_STOREAPS512,
28996 IX86_BUILTIN_STOREDQUDI512,
28997 IX86_BUILTIN_STOREDQUSI512,
28998 IX86_BUILTIN_STOREUPD512,
28999 IX86_BUILTIN_STOREUPS512,
29000 IX86_BUILTIN_SUBPD512,
29001 IX86_BUILTIN_SUBPS512,
29002 IX86_BUILTIN_SUBSD_ROUND,
29003 IX86_BUILTIN_SUBSS_ROUND,
29004 IX86_BUILTIN_UCMPD512,
29005 IX86_BUILTIN_UCMPQ512,
29006 IX86_BUILTIN_UNPCKHPD512,
29007 IX86_BUILTIN_UNPCKHPS512,
29008 IX86_BUILTIN_UNPCKLPD512,
29009 IX86_BUILTIN_UNPCKLPS512,
29010 IX86_BUILTIN_VCVTSD2SI32,
29011 IX86_BUILTIN_VCVTSD2SI64,
29012 IX86_BUILTIN_VCVTSD2USI32,
29013 IX86_BUILTIN_VCVTSD2USI64,
29014 IX86_BUILTIN_VCVTSS2SI32,
29015 IX86_BUILTIN_VCVTSS2SI64,
29016 IX86_BUILTIN_VCVTSS2USI32,
29017 IX86_BUILTIN_VCVTSS2USI64,
29018 IX86_BUILTIN_VCVTTSD2SI32,
29019 IX86_BUILTIN_VCVTTSD2SI64,
29020 IX86_BUILTIN_VCVTTSD2USI32,
29021 IX86_BUILTIN_VCVTTSD2USI64,
29022 IX86_BUILTIN_VCVTTSS2SI32,
29023 IX86_BUILTIN_VCVTTSS2SI64,
29024 IX86_BUILTIN_VCVTTSS2USI32,
29025 IX86_BUILTIN_VCVTTSS2USI64,
29026 IX86_BUILTIN_VFMADDPD512_MASK,
29027 IX86_BUILTIN_VFMADDPD512_MASK3,
29028 IX86_BUILTIN_VFMADDPD512_MASKZ,
29029 IX86_BUILTIN_VFMADDPS512_MASK,
29030 IX86_BUILTIN_VFMADDPS512_MASK3,
29031 IX86_BUILTIN_VFMADDPS512_MASKZ,
29032 IX86_BUILTIN_VFMADDSD3_ROUND,
29033 IX86_BUILTIN_VFMADDSS3_ROUND,
29034 IX86_BUILTIN_VFMADDSUBPD512_MASK,
29035 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
29036 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
29037 IX86_BUILTIN_VFMADDSUBPS512_MASK,
29038 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
29039 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
29040 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
29041 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
29042 IX86_BUILTIN_VFMSUBPD512_MASK3,
29043 IX86_BUILTIN_VFMSUBPS512_MASK3,
29044 IX86_BUILTIN_VFMSUBSD3_MASK3,
29045 IX86_BUILTIN_VFMSUBSS3_MASK3,
29046 IX86_BUILTIN_VFNMADDPD512_MASK,
29047 IX86_BUILTIN_VFNMADDPS512_MASK,
29048 IX86_BUILTIN_VFNMSUBPD512_MASK,
29049 IX86_BUILTIN_VFNMSUBPD512_MASK3,
29050 IX86_BUILTIN_VFNMSUBPS512_MASK,
29051 IX86_BUILTIN_VFNMSUBPS512_MASK3,
29052 IX86_BUILTIN_VPCLZCNTD512,
29053 IX86_BUILTIN_VPCLZCNTQ512,
29054 IX86_BUILTIN_VPCONFLICTD512,
29055 IX86_BUILTIN_VPCONFLICTQ512,
29056 IX86_BUILTIN_VPERMDF512,
29057 IX86_BUILTIN_VPERMDI512,
29058 IX86_BUILTIN_VPERMI2VARD512,
29059 IX86_BUILTIN_VPERMI2VARPD512,
29060 IX86_BUILTIN_VPERMI2VARPS512,
29061 IX86_BUILTIN_VPERMI2VARQ512,
29062 IX86_BUILTIN_VPERMILPD512,
29063 IX86_BUILTIN_VPERMILPS512,
29064 IX86_BUILTIN_VPERMILVARPD512,
29065 IX86_BUILTIN_VPERMILVARPS512,
29066 IX86_BUILTIN_VPERMT2VARD512,
29067 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
29068 IX86_BUILTIN_VPERMT2VARPD512,
29069 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
29070 IX86_BUILTIN_VPERMT2VARPS512,
29071 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
29072 IX86_BUILTIN_VPERMT2VARQ512,
29073 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
29074 IX86_BUILTIN_VPERMVARDF512,
29075 IX86_BUILTIN_VPERMVARDI512,
29076 IX86_BUILTIN_VPERMVARSF512,
29077 IX86_BUILTIN_VPERMVARSI512,
29078 IX86_BUILTIN_VTERNLOGD512_MASK,
29079 IX86_BUILTIN_VTERNLOGD512_MASKZ,
29080 IX86_BUILTIN_VTERNLOGQ512_MASK,
29081 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
29083 /* Mask arithmetic operations */
29084 IX86_BUILTIN_KAND16,
29085 IX86_BUILTIN_KANDN16,
29086 IX86_BUILTIN_KNOT16,
29087 IX86_BUILTIN_KOR16,
29088 IX86_BUILTIN_KORTESTC16,
29089 IX86_BUILTIN_KORTESTZ16,
29090 IX86_BUILTIN_KUNPCKBW,
29091 IX86_BUILTIN_KXNOR16,
29092 IX86_BUILTIN_KXOR16,
29093 IX86_BUILTIN_KMOV16,
29095 /* AVX512VL. */
29096 IX86_BUILTIN_PMOVUSQD256_MEM,
29097 IX86_BUILTIN_PMOVUSQD128_MEM,
29098 IX86_BUILTIN_PMOVSQD256_MEM,
29099 IX86_BUILTIN_PMOVSQD128_MEM,
29100 IX86_BUILTIN_PMOVQD256_MEM,
29101 IX86_BUILTIN_PMOVQD128_MEM,
29102 IX86_BUILTIN_PMOVUSQW256_MEM,
29103 IX86_BUILTIN_PMOVUSQW128_MEM,
29104 IX86_BUILTIN_PMOVSQW256_MEM,
29105 IX86_BUILTIN_PMOVSQW128_MEM,
29106 IX86_BUILTIN_PMOVQW256_MEM,
29107 IX86_BUILTIN_PMOVQW128_MEM,
29108 IX86_BUILTIN_PMOVUSQB256_MEM,
29109 IX86_BUILTIN_PMOVUSQB128_MEM,
29110 IX86_BUILTIN_PMOVSQB256_MEM,
29111 IX86_BUILTIN_PMOVSQB128_MEM,
29112 IX86_BUILTIN_PMOVQB256_MEM,
29113 IX86_BUILTIN_PMOVQB128_MEM,
29114 IX86_BUILTIN_PMOVUSDW256_MEM,
29115 IX86_BUILTIN_PMOVUSDW128_MEM,
29116 IX86_BUILTIN_PMOVSDW256_MEM,
29117 IX86_BUILTIN_PMOVSDW128_MEM,
29118 IX86_BUILTIN_PMOVDW256_MEM,
29119 IX86_BUILTIN_PMOVDW128_MEM,
29120 IX86_BUILTIN_PMOVUSDB256_MEM,
29121 IX86_BUILTIN_PMOVUSDB128_MEM,
29122 IX86_BUILTIN_PMOVSDB256_MEM,
29123 IX86_BUILTIN_PMOVSDB128_MEM,
29124 IX86_BUILTIN_PMOVDB256_MEM,
29125 IX86_BUILTIN_PMOVDB128_MEM,
29126 IX86_BUILTIN_MOVDQA64LOAD256_MASK,
29127 IX86_BUILTIN_MOVDQA64LOAD128_MASK,
29128 IX86_BUILTIN_MOVDQA32LOAD256_MASK,
29129 IX86_BUILTIN_MOVDQA32LOAD128_MASK,
29130 IX86_BUILTIN_MOVDQA64STORE256_MASK,
29131 IX86_BUILTIN_MOVDQA64STORE128_MASK,
29132 IX86_BUILTIN_MOVDQA32STORE256_MASK,
29133 IX86_BUILTIN_MOVDQA32STORE128_MASK,
29134 IX86_BUILTIN_LOADAPD256_MASK,
29135 IX86_BUILTIN_LOADAPD128_MASK,
29136 IX86_BUILTIN_LOADAPS256_MASK,
29137 IX86_BUILTIN_LOADAPS128_MASK,
29138 IX86_BUILTIN_STOREAPD256_MASK,
29139 IX86_BUILTIN_STOREAPD128_MASK,
29140 IX86_BUILTIN_STOREAPS256_MASK,
29141 IX86_BUILTIN_STOREAPS128_MASK,
29142 IX86_BUILTIN_LOADUPD256_MASK,
29143 IX86_BUILTIN_LOADUPD128_MASK,
29144 IX86_BUILTIN_LOADUPS256_MASK,
29145 IX86_BUILTIN_LOADUPS128_MASK,
29146 IX86_BUILTIN_STOREUPD256_MASK,
29147 IX86_BUILTIN_STOREUPD128_MASK,
29148 IX86_BUILTIN_STOREUPS256_MASK,
29149 IX86_BUILTIN_STOREUPS128_MASK,
29150 IX86_BUILTIN_LOADDQUDI256_MASK,
29151 IX86_BUILTIN_LOADDQUDI128_MASK,
29152 IX86_BUILTIN_LOADDQUSI256_MASK,
29153 IX86_BUILTIN_LOADDQUSI128_MASK,
29154 IX86_BUILTIN_LOADDQUHI256_MASK,
29155 IX86_BUILTIN_LOADDQUHI128_MASK,
29156 IX86_BUILTIN_LOADDQUQI256_MASK,
29157 IX86_BUILTIN_LOADDQUQI128_MASK,
29158 IX86_BUILTIN_STOREDQUDI256_MASK,
29159 IX86_BUILTIN_STOREDQUDI128_MASK,
29160 IX86_BUILTIN_STOREDQUSI256_MASK,
29161 IX86_BUILTIN_STOREDQUSI128_MASK,
29162 IX86_BUILTIN_STOREDQUHI256_MASK,
29163 IX86_BUILTIN_STOREDQUHI128_MASK,
29164 IX86_BUILTIN_STOREDQUQI256_MASK,
29165 IX86_BUILTIN_STOREDQUQI128_MASK,
29166 IX86_BUILTIN_COMPRESSPDSTORE256,
29167 IX86_BUILTIN_COMPRESSPDSTORE128,
29168 IX86_BUILTIN_COMPRESSPSSTORE256,
29169 IX86_BUILTIN_COMPRESSPSSTORE128,
29170 IX86_BUILTIN_PCOMPRESSQSTORE256,
29171 IX86_BUILTIN_PCOMPRESSQSTORE128,
29172 IX86_BUILTIN_PCOMPRESSDSTORE256,
29173 IX86_BUILTIN_PCOMPRESSDSTORE128,
29174 IX86_BUILTIN_EXPANDPDLOAD256,
29175 IX86_BUILTIN_EXPANDPDLOAD128,
29176 IX86_BUILTIN_EXPANDPSLOAD256,
29177 IX86_BUILTIN_EXPANDPSLOAD128,
29178 IX86_BUILTIN_PEXPANDQLOAD256,
29179 IX86_BUILTIN_PEXPANDQLOAD128,
29180 IX86_BUILTIN_PEXPANDDLOAD256,
29181 IX86_BUILTIN_PEXPANDDLOAD128,
29182 IX86_BUILTIN_EXPANDPDLOAD256Z,
29183 IX86_BUILTIN_EXPANDPDLOAD128Z,
29184 IX86_BUILTIN_EXPANDPSLOAD256Z,
29185 IX86_BUILTIN_EXPANDPSLOAD128Z,
29186 IX86_BUILTIN_PEXPANDQLOAD256Z,
29187 IX86_BUILTIN_PEXPANDQLOAD128Z,
29188 IX86_BUILTIN_PEXPANDDLOAD256Z,
29189 IX86_BUILTIN_PEXPANDDLOAD128Z,
29190 IX86_BUILTIN_PALIGNR256_MASK,
29191 IX86_BUILTIN_PALIGNR128_MASK,
29192 IX86_BUILTIN_MOVDQA64_256_MASK,
29193 IX86_BUILTIN_MOVDQA64_128_MASK,
29194 IX86_BUILTIN_MOVDQA32_256_MASK,
29195 IX86_BUILTIN_MOVDQA32_128_MASK,
29196 IX86_BUILTIN_MOVAPD256_MASK,
29197 IX86_BUILTIN_MOVAPD128_MASK,
29198 IX86_BUILTIN_MOVAPS256_MASK,
29199 IX86_BUILTIN_MOVAPS128_MASK,
29200 IX86_BUILTIN_MOVDQUHI256_MASK,
29201 IX86_BUILTIN_MOVDQUHI128_MASK,
29202 IX86_BUILTIN_MOVDQUQI256_MASK,
29203 IX86_BUILTIN_MOVDQUQI128_MASK,
29204 IX86_BUILTIN_MINPS128_MASK,
29205 IX86_BUILTIN_MAXPS128_MASK,
29206 IX86_BUILTIN_MINPD128_MASK,
29207 IX86_BUILTIN_MAXPD128_MASK,
29208 IX86_BUILTIN_MAXPD256_MASK,
29209 IX86_BUILTIN_MAXPS256_MASK,
29210 IX86_BUILTIN_MINPD256_MASK,
29211 IX86_BUILTIN_MINPS256_MASK,
29212 IX86_BUILTIN_MULPS128_MASK,
29213 IX86_BUILTIN_DIVPS128_MASK,
29214 IX86_BUILTIN_MULPD128_MASK,
29215 IX86_BUILTIN_DIVPD128_MASK,
29216 IX86_BUILTIN_DIVPD256_MASK,
29217 IX86_BUILTIN_DIVPS256_MASK,
29218 IX86_BUILTIN_MULPD256_MASK,
29219 IX86_BUILTIN_MULPS256_MASK,
29220 IX86_BUILTIN_ADDPD128_MASK,
29221 IX86_BUILTIN_ADDPD256_MASK,
29222 IX86_BUILTIN_ADDPS128_MASK,
29223 IX86_BUILTIN_ADDPS256_MASK,
29224 IX86_BUILTIN_SUBPD128_MASK,
29225 IX86_BUILTIN_SUBPD256_MASK,
29226 IX86_BUILTIN_SUBPS128_MASK,
29227 IX86_BUILTIN_SUBPS256_MASK,
29228 IX86_BUILTIN_XORPD256_MASK,
29229 IX86_BUILTIN_XORPD128_MASK,
29230 IX86_BUILTIN_XORPS256_MASK,
29231 IX86_BUILTIN_XORPS128_MASK,
29232 IX86_BUILTIN_ORPD256_MASK,
29233 IX86_BUILTIN_ORPD128_MASK,
29234 IX86_BUILTIN_ORPS256_MASK,
29235 IX86_BUILTIN_ORPS128_MASK,
29236 IX86_BUILTIN_BROADCASTF32x2_256,
29237 IX86_BUILTIN_BROADCASTI32x2_256,
29238 IX86_BUILTIN_BROADCASTI32x2_128,
29239 IX86_BUILTIN_BROADCASTF64X2_256,
29240 IX86_BUILTIN_BROADCASTI64X2_256,
29241 IX86_BUILTIN_BROADCASTF32X4_256,
29242 IX86_BUILTIN_BROADCASTI32X4_256,
29243 IX86_BUILTIN_EXTRACTF32X4_256,
29244 IX86_BUILTIN_EXTRACTI32X4_256,
29245 IX86_BUILTIN_DBPSADBW256,
29246 IX86_BUILTIN_DBPSADBW128,
29247 IX86_BUILTIN_CVTTPD2QQ256,
29248 IX86_BUILTIN_CVTTPD2QQ128,
29249 IX86_BUILTIN_CVTTPD2UQQ256,
29250 IX86_BUILTIN_CVTTPD2UQQ128,
29251 IX86_BUILTIN_CVTPD2QQ256,
29252 IX86_BUILTIN_CVTPD2QQ128,
29253 IX86_BUILTIN_CVTPD2UQQ256,
29254 IX86_BUILTIN_CVTPD2UQQ128,
29255 IX86_BUILTIN_CVTPD2UDQ256_MASK,
29256 IX86_BUILTIN_CVTPD2UDQ128_MASK,
29257 IX86_BUILTIN_CVTTPS2QQ256,
29258 IX86_BUILTIN_CVTTPS2QQ128,
29259 IX86_BUILTIN_CVTTPS2UQQ256,
29260 IX86_BUILTIN_CVTTPS2UQQ128,
29261 IX86_BUILTIN_CVTTPS2DQ256_MASK,
29262 IX86_BUILTIN_CVTTPS2DQ128_MASK,
29263 IX86_BUILTIN_CVTTPS2UDQ256,
29264 IX86_BUILTIN_CVTTPS2UDQ128,
29265 IX86_BUILTIN_CVTTPD2DQ256_MASK,
29266 IX86_BUILTIN_CVTTPD2DQ128_MASK,
29267 IX86_BUILTIN_CVTTPD2UDQ256_MASK,
29268 IX86_BUILTIN_CVTTPD2UDQ128_MASK,
29269 IX86_BUILTIN_CVTPD2DQ256_MASK,
29270 IX86_BUILTIN_CVTPD2DQ128_MASK,
29271 IX86_BUILTIN_CVTDQ2PD256_MASK,
29272 IX86_BUILTIN_CVTDQ2PD128_MASK,
29273 IX86_BUILTIN_CVTUDQ2PD256_MASK,
29274 IX86_BUILTIN_CVTUDQ2PD128_MASK,
29275 IX86_BUILTIN_CVTDQ2PS256_MASK,
29276 IX86_BUILTIN_CVTDQ2PS128_MASK,
29277 IX86_BUILTIN_CVTUDQ2PS256_MASK,
29278 IX86_BUILTIN_CVTUDQ2PS128_MASK,
29279 IX86_BUILTIN_CVTPS2PD256_MASK,
29280 IX86_BUILTIN_CVTPS2PD128_MASK,
29281 IX86_BUILTIN_PBROADCASTB256_MASK,
29282 IX86_BUILTIN_PBROADCASTB256_GPR_MASK,
29283 IX86_BUILTIN_PBROADCASTB128_MASK,
29284 IX86_BUILTIN_PBROADCASTB128_GPR_MASK,
29285 IX86_BUILTIN_PBROADCASTW256_MASK,
29286 IX86_BUILTIN_PBROADCASTW256_GPR_MASK,
29287 IX86_BUILTIN_PBROADCASTW128_MASK,
29288 IX86_BUILTIN_PBROADCASTW128_GPR_MASK,
29289 IX86_BUILTIN_PBROADCASTD256_MASK,
29290 IX86_BUILTIN_PBROADCASTD256_GPR_MASK,
29291 IX86_BUILTIN_PBROADCASTD128_MASK,
29292 IX86_BUILTIN_PBROADCASTD128_GPR_MASK,
29293 IX86_BUILTIN_PBROADCASTQ256_MASK,
29294 IX86_BUILTIN_PBROADCASTQ256_GPR_MASK,
29295 IX86_BUILTIN_PBROADCASTQ128_MASK,
29296 IX86_BUILTIN_PBROADCASTQ128_GPR_MASK,
29297 IX86_BUILTIN_BROADCASTSS256,
29298 IX86_BUILTIN_BROADCASTSS128,
29299 IX86_BUILTIN_BROADCASTSD256,
29300 IX86_BUILTIN_EXTRACTF64X2_256,
29301 IX86_BUILTIN_EXTRACTI64X2_256,
29302 IX86_BUILTIN_INSERTF32X4_256,
29303 IX86_BUILTIN_INSERTI32X4_256,
29304 IX86_BUILTIN_PMOVSXBW256_MASK,
29305 IX86_BUILTIN_PMOVSXBW128_MASK,
29306 IX86_BUILTIN_PMOVSXBD256_MASK,
29307 IX86_BUILTIN_PMOVSXBD128_MASK,
29308 IX86_BUILTIN_PMOVSXBQ256_MASK,
29309 IX86_BUILTIN_PMOVSXBQ128_MASK,
29310 IX86_BUILTIN_PMOVSXWD256_MASK,
29311 IX86_BUILTIN_PMOVSXWD128_MASK,
29312 IX86_BUILTIN_PMOVSXWQ256_MASK,
29313 IX86_BUILTIN_PMOVSXWQ128_MASK,
29314 IX86_BUILTIN_PMOVSXDQ256_MASK,
29315 IX86_BUILTIN_PMOVSXDQ128_MASK,
29316 IX86_BUILTIN_PMOVZXBW256_MASK,
29317 IX86_BUILTIN_PMOVZXBW128_MASK,
29318 IX86_BUILTIN_PMOVZXBD256_MASK,
29319 IX86_BUILTIN_PMOVZXBD128_MASK,
29320 IX86_BUILTIN_PMOVZXBQ256_MASK,
29321 IX86_BUILTIN_PMOVZXBQ128_MASK,
29322 IX86_BUILTIN_PMOVZXWD256_MASK,
29323 IX86_BUILTIN_PMOVZXWD128_MASK,
29324 IX86_BUILTIN_PMOVZXWQ256_MASK,
29325 IX86_BUILTIN_PMOVZXWQ128_MASK,
29326 IX86_BUILTIN_PMOVZXDQ256_MASK,
29327 IX86_BUILTIN_PMOVZXDQ128_MASK,
29328 IX86_BUILTIN_REDUCEPD256_MASK,
29329 IX86_BUILTIN_REDUCEPD128_MASK,
29330 IX86_BUILTIN_REDUCEPS256_MASK,
29331 IX86_BUILTIN_REDUCEPS128_MASK,
29332 IX86_BUILTIN_REDUCESD_MASK,
29333 IX86_BUILTIN_REDUCESS_MASK,
29334 IX86_BUILTIN_VPERMVARHI256_MASK,
29335 IX86_BUILTIN_VPERMVARHI128_MASK,
29336 IX86_BUILTIN_VPERMT2VARHI256,
29337 IX86_BUILTIN_VPERMT2VARHI256_MASKZ,
29338 IX86_BUILTIN_VPERMT2VARHI128,
29339 IX86_BUILTIN_VPERMT2VARHI128_MASKZ,
29340 IX86_BUILTIN_VPERMI2VARHI256,
29341 IX86_BUILTIN_VPERMI2VARHI128,
29342 IX86_BUILTIN_RCP14PD256,
29343 IX86_BUILTIN_RCP14PD128,
29344 IX86_BUILTIN_RCP14PS256,
29345 IX86_BUILTIN_RCP14PS128,
29346 IX86_BUILTIN_RSQRT14PD256_MASK,
29347 IX86_BUILTIN_RSQRT14PD128_MASK,
29348 IX86_BUILTIN_RSQRT14PS256_MASK,
29349 IX86_BUILTIN_RSQRT14PS128_MASK,
29350 IX86_BUILTIN_SQRTPD256_MASK,
29351 IX86_BUILTIN_SQRTPD128_MASK,
29352 IX86_BUILTIN_SQRTPS256_MASK,
29353 IX86_BUILTIN_SQRTPS128_MASK,
29354 IX86_BUILTIN_PADDB128_MASK,
29355 IX86_BUILTIN_PADDW128_MASK,
29356 IX86_BUILTIN_PADDD128_MASK,
29357 IX86_BUILTIN_PADDQ128_MASK,
29358 IX86_BUILTIN_PSUBB128_MASK,
29359 IX86_BUILTIN_PSUBW128_MASK,
29360 IX86_BUILTIN_PSUBD128_MASK,
29361 IX86_BUILTIN_PSUBQ128_MASK,
29362 IX86_BUILTIN_PADDSB128_MASK,
29363 IX86_BUILTIN_PADDSW128_MASK,
29364 IX86_BUILTIN_PSUBSB128_MASK,
29365 IX86_BUILTIN_PSUBSW128_MASK,
29366 IX86_BUILTIN_PADDUSB128_MASK,
29367 IX86_BUILTIN_PADDUSW128_MASK,
29368 IX86_BUILTIN_PSUBUSB128_MASK,
29369 IX86_BUILTIN_PSUBUSW128_MASK,
29370 IX86_BUILTIN_PADDB256_MASK,
29371 IX86_BUILTIN_PADDW256_MASK,
29372 IX86_BUILTIN_PADDD256_MASK,
29373 IX86_BUILTIN_PADDQ256_MASK,
29374 IX86_BUILTIN_PADDSB256_MASK,
29375 IX86_BUILTIN_PADDSW256_MASK,
29376 IX86_BUILTIN_PADDUSB256_MASK,
29377 IX86_BUILTIN_PADDUSW256_MASK,
29378 IX86_BUILTIN_PSUBB256_MASK,
29379 IX86_BUILTIN_PSUBW256_MASK,
29380 IX86_BUILTIN_PSUBD256_MASK,
29381 IX86_BUILTIN_PSUBQ256_MASK,
29382 IX86_BUILTIN_PSUBSB256_MASK,
29383 IX86_BUILTIN_PSUBSW256_MASK,
29384 IX86_BUILTIN_PSUBUSB256_MASK,
29385 IX86_BUILTIN_PSUBUSW256_MASK,
29386 IX86_BUILTIN_SHUF_F64x2_256,
29387 IX86_BUILTIN_SHUF_I64x2_256,
29388 IX86_BUILTIN_SHUF_I32x4_256,
29389 IX86_BUILTIN_SHUF_F32x4_256,
29390 IX86_BUILTIN_PMOVWB128,
29391 IX86_BUILTIN_PMOVWB256,
29392 IX86_BUILTIN_PMOVSWB128,
29393 IX86_BUILTIN_PMOVSWB256,
29394 IX86_BUILTIN_PMOVUSWB128,
29395 IX86_BUILTIN_PMOVUSWB256,
29396 IX86_BUILTIN_PMOVDB128,
29397 IX86_BUILTIN_PMOVDB256,
29398 IX86_BUILTIN_PMOVSDB128,
29399 IX86_BUILTIN_PMOVSDB256,
29400 IX86_BUILTIN_PMOVUSDB128,
29401 IX86_BUILTIN_PMOVUSDB256,
29402 IX86_BUILTIN_PMOVDW128,
29403 IX86_BUILTIN_PMOVDW256,
29404 IX86_BUILTIN_PMOVSDW128,
29405 IX86_BUILTIN_PMOVSDW256,
29406 IX86_BUILTIN_PMOVUSDW128,
29407 IX86_BUILTIN_PMOVUSDW256,
29408 IX86_BUILTIN_PMOVQB128,
29409 IX86_BUILTIN_PMOVQB256,
29410 IX86_BUILTIN_PMOVSQB128,
29411 IX86_BUILTIN_PMOVSQB256,
29412 IX86_BUILTIN_PMOVUSQB128,
29413 IX86_BUILTIN_PMOVUSQB256,
29414 IX86_BUILTIN_PMOVQW128,
29415 IX86_BUILTIN_PMOVQW256,
29416 IX86_BUILTIN_PMOVSQW128,
29417 IX86_BUILTIN_PMOVSQW256,
29418 IX86_BUILTIN_PMOVUSQW128,
29419 IX86_BUILTIN_PMOVUSQW256,
29420 IX86_BUILTIN_PMOVQD128,
29421 IX86_BUILTIN_PMOVQD256,
29422 IX86_BUILTIN_PMOVSQD128,
29423 IX86_BUILTIN_PMOVSQD256,
29424 IX86_BUILTIN_PMOVUSQD128,
29425 IX86_BUILTIN_PMOVUSQD256,
29426 IX86_BUILTIN_RANGEPD256,
29427 IX86_BUILTIN_RANGEPD128,
29428 IX86_BUILTIN_RANGEPS256,
29429 IX86_BUILTIN_RANGEPS128,
29430 IX86_BUILTIN_GETEXPPS256,
29431 IX86_BUILTIN_GETEXPPD256,
29432 IX86_BUILTIN_GETEXPPS128,
29433 IX86_BUILTIN_GETEXPPD128,
29434 IX86_BUILTIN_FIXUPIMMPD256_MASK,
29435 IX86_BUILTIN_FIXUPIMMPD256_MASKZ,
29436 IX86_BUILTIN_FIXUPIMMPS256_MASK,
29437 IX86_BUILTIN_FIXUPIMMPS256_MASKZ,
29438 IX86_BUILTIN_FIXUPIMMPD128_MASK,
29439 IX86_BUILTIN_FIXUPIMMPD128_MASKZ,
29440 IX86_BUILTIN_FIXUPIMMPS128_MASK,
29441 IX86_BUILTIN_FIXUPIMMPS128_MASKZ,
29442 IX86_BUILTIN_PABSQ256,
29443 IX86_BUILTIN_PABSQ128,
29444 IX86_BUILTIN_PABSD256_MASK,
29445 IX86_BUILTIN_PABSD128_MASK,
29446 IX86_BUILTIN_PMULHRSW256_MASK,
29447 IX86_BUILTIN_PMULHRSW128_MASK,
29448 IX86_BUILTIN_PMULHUW128_MASK,
29449 IX86_BUILTIN_PMULHUW256_MASK,
29450 IX86_BUILTIN_PMULHW256_MASK,
29451 IX86_BUILTIN_PMULHW128_MASK,
29452 IX86_BUILTIN_PMULLW256_MASK,
29453 IX86_BUILTIN_PMULLW128_MASK,
29454 IX86_BUILTIN_PMULLQ256,
29455 IX86_BUILTIN_PMULLQ128,
29456 IX86_BUILTIN_ANDPD256_MASK,
29457 IX86_BUILTIN_ANDPD128_MASK,
29458 IX86_BUILTIN_ANDPS256_MASK,
29459 IX86_BUILTIN_ANDPS128_MASK,
29460 IX86_BUILTIN_ANDNPD256_MASK,
29461 IX86_BUILTIN_ANDNPD128_MASK,
29462 IX86_BUILTIN_ANDNPS256_MASK,
29463 IX86_BUILTIN_ANDNPS128_MASK,
29464 IX86_BUILTIN_PSLLWI128_MASK,
29465 IX86_BUILTIN_PSLLDI128_MASK,
29466 IX86_BUILTIN_PSLLQI128_MASK,
29467 IX86_BUILTIN_PSLLW128_MASK,
29468 IX86_BUILTIN_PSLLD128_MASK,
29469 IX86_BUILTIN_PSLLQ128_MASK,
29470 IX86_BUILTIN_PSLLWI256_MASK ,
29471 IX86_BUILTIN_PSLLW256_MASK,
29472 IX86_BUILTIN_PSLLDI256_MASK,
29473 IX86_BUILTIN_PSLLD256_MASK,
29474 IX86_BUILTIN_PSLLQI256_MASK,
29475 IX86_BUILTIN_PSLLQ256_MASK,
29476 IX86_BUILTIN_PSRADI128_MASK,
29477 IX86_BUILTIN_PSRAD128_MASK,
29478 IX86_BUILTIN_PSRADI256_MASK,
29479 IX86_BUILTIN_PSRAD256_MASK,
29480 IX86_BUILTIN_PSRAQI128_MASK,
29481 IX86_BUILTIN_PSRAQ128_MASK,
29482 IX86_BUILTIN_PSRAQI256_MASK,
29483 IX86_BUILTIN_PSRAQ256_MASK,
29484 IX86_BUILTIN_PANDD256,
29485 IX86_BUILTIN_PANDD128,
29486 IX86_BUILTIN_PSRLDI128_MASK,
29487 IX86_BUILTIN_PSRLD128_MASK,
29488 IX86_BUILTIN_PSRLDI256_MASK,
29489 IX86_BUILTIN_PSRLD256_MASK,
29490 IX86_BUILTIN_PSRLQI128_MASK,
29491 IX86_BUILTIN_PSRLQ128_MASK,
29492 IX86_BUILTIN_PSRLQI256_MASK,
29493 IX86_BUILTIN_PSRLQ256_MASK,
29494 IX86_BUILTIN_PANDQ256,
29495 IX86_BUILTIN_PANDQ128,
29496 IX86_BUILTIN_PANDND256,
29497 IX86_BUILTIN_PANDND128,
29498 IX86_BUILTIN_PANDNQ256,
29499 IX86_BUILTIN_PANDNQ128,
29500 IX86_BUILTIN_PORD256,
29501 IX86_BUILTIN_PORD128,
29502 IX86_BUILTIN_PORQ256,
29503 IX86_BUILTIN_PORQ128,
29504 IX86_BUILTIN_PXORD256,
29505 IX86_BUILTIN_PXORD128,
29506 IX86_BUILTIN_PXORQ256,
29507 IX86_BUILTIN_PXORQ128,
29508 IX86_BUILTIN_PACKSSWB256_MASK,
29509 IX86_BUILTIN_PACKSSWB128_MASK,
29510 IX86_BUILTIN_PACKUSWB256_MASK,
29511 IX86_BUILTIN_PACKUSWB128_MASK,
29512 IX86_BUILTIN_RNDSCALEPS256,
29513 IX86_BUILTIN_RNDSCALEPD256,
29514 IX86_BUILTIN_RNDSCALEPS128,
29515 IX86_BUILTIN_RNDSCALEPD128,
29516 IX86_BUILTIN_VTERNLOGQ256_MASK,
29517 IX86_BUILTIN_VTERNLOGQ256_MASKZ,
29518 IX86_BUILTIN_VTERNLOGD256_MASK,
29519 IX86_BUILTIN_VTERNLOGD256_MASKZ,
29520 IX86_BUILTIN_VTERNLOGQ128_MASK,
29521 IX86_BUILTIN_VTERNLOGQ128_MASKZ,
29522 IX86_BUILTIN_VTERNLOGD128_MASK,
29523 IX86_BUILTIN_VTERNLOGD128_MASKZ,
29524 IX86_BUILTIN_SCALEFPD256,
29525 IX86_BUILTIN_SCALEFPS256,
29526 IX86_BUILTIN_SCALEFPD128,
29527 IX86_BUILTIN_SCALEFPS128,
29528 IX86_BUILTIN_VFMADDPD256_MASK,
29529 IX86_BUILTIN_VFMADDPD256_MASK3,
29530 IX86_BUILTIN_VFMADDPD256_MASKZ,
29531 IX86_BUILTIN_VFMADDPD128_MASK,
29532 IX86_BUILTIN_VFMADDPD128_MASK3,
29533 IX86_BUILTIN_VFMADDPD128_MASKZ,
29534 IX86_BUILTIN_VFMADDPS256_MASK,
29535 IX86_BUILTIN_VFMADDPS256_MASK3,
29536 IX86_BUILTIN_VFMADDPS256_MASKZ,
29537 IX86_BUILTIN_VFMADDPS128_MASK,
29538 IX86_BUILTIN_VFMADDPS128_MASK3,
29539 IX86_BUILTIN_VFMADDPS128_MASKZ,
29540 IX86_BUILTIN_VFMSUBPD256_MASK3,
29541 IX86_BUILTIN_VFMSUBPD128_MASK3,
29542 IX86_BUILTIN_VFMSUBPS256_MASK3,
29543 IX86_BUILTIN_VFMSUBPS128_MASK3,
29544 IX86_BUILTIN_VFNMADDPD256_MASK,
29545 IX86_BUILTIN_VFNMADDPD128_MASK,
29546 IX86_BUILTIN_VFNMADDPS256_MASK,
29547 IX86_BUILTIN_VFNMADDPS128_MASK,
29548 IX86_BUILTIN_VFNMSUBPD256_MASK,
29549 IX86_BUILTIN_VFNMSUBPD256_MASK3,
29550 IX86_BUILTIN_VFNMSUBPD128_MASK,
29551 IX86_BUILTIN_VFNMSUBPD128_MASK3,
29552 IX86_BUILTIN_VFNMSUBPS256_MASK,
29553 IX86_BUILTIN_VFNMSUBPS256_MASK3,
29554 IX86_BUILTIN_VFNMSUBPS128_MASK,
29555 IX86_BUILTIN_VFNMSUBPS128_MASK3,
29556 IX86_BUILTIN_VFMADDSUBPD256_MASK,
29557 IX86_BUILTIN_VFMADDSUBPD256_MASK3,
29558 IX86_BUILTIN_VFMADDSUBPD256_MASKZ,
29559 IX86_BUILTIN_VFMADDSUBPD128_MASK,
29560 IX86_BUILTIN_VFMADDSUBPD128_MASK3,
29561 IX86_BUILTIN_VFMADDSUBPD128_MASKZ,
29562 IX86_BUILTIN_VFMADDSUBPS256_MASK,
29563 IX86_BUILTIN_VFMADDSUBPS256_MASK3,
29564 IX86_BUILTIN_VFMADDSUBPS256_MASKZ,
29565 IX86_BUILTIN_VFMADDSUBPS128_MASK,
29566 IX86_BUILTIN_VFMADDSUBPS128_MASK3,
29567 IX86_BUILTIN_VFMADDSUBPS128_MASKZ,
29568 IX86_BUILTIN_VFMSUBADDPD256_MASK3,
29569 IX86_BUILTIN_VFMSUBADDPD128_MASK3,
29570 IX86_BUILTIN_VFMSUBADDPS256_MASK3,
29571 IX86_BUILTIN_VFMSUBADDPS128_MASK3,
29572 IX86_BUILTIN_INSERTF64X2_256,
29573 IX86_BUILTIN_INSERTI64X2_256,
29574 IX86_BUILTIN_PSRAVV16HI,
29575 IX86_BUILTIN_PSRAVV8HI,
29576 IX86_BUILTIN_PMADDUBSW256_MASK,
29577 IX86_BUILTIN_PMADDUBSW128_MASK,
29578 IX86_BUILTIN_PMADDWD256_MASK,
29579 IX86_BUILTIN_PMADDWD128_MASK,
29580 IX86_BUILTIN_PSRLVV16HI,
29581 IX86_BUILTIN_PSRLVV8HI,
29582 IX86_BUILTIN_CVTPS2DQ256_MASK,
29583 IX86_BUILTIN_CVTPS2DQ128_MASK,
29584 IX86_BUILTIN_CVTPS2UDQ256,
29585 IX86_BUILTIN_CVTPS2UDQ128,
29586 IX86_BUILTIN_CVTPS2QQ256,
29587 IX86_BUILTIN_CVTPS2QQ128,
29588 IX86_BUILTIN_CVTPS2UQQ256,
29589 IX86_BUILTIN_CVTPS2UQQ128,
29590 IX86_BUILTIN_GETMANTPS256,
29591 IX86_BUILTIN_GETMANTPS128,
29592 IX86_BUILTIN_GETMANTPD256,
29593 IX86_BUILTIN_GETMANTPD128,
29594 IX86_BUILTIN_MOVDDUP256_MASK,
29595 IX86_BUILTIN_MOVDDUP128_MASK,
29596 IX86_BUILTIN_MOVSHDUP256_MASK,
29597 IX86_BUILTIN_MOVSHDUP128_MASK,
29598 IX86_BUILTIN_MOVSLDUP256_MASK,
29599 IX86_BUILTIN_MOVSLDUP128_MASK,
29600 IX86_BUILTIN_CVTQQ2PS256,
29601 IX86_BUILTIN_CVTQQ2PS128,
29602 IX86_BUILTIN_CVTUQQ2PS256,
29603 IX86_BUILTIN_CVTUQQ2PS128,
29604 IX86_BUILTIN_CVTQQ2PD256,
29605 IX86_BUILTIN_CVTQQ2PD128,
29606 IX86_BUILTIN_CVTUQQ2PD256,
29607 IX86_BUILTIN_CVTUQQ2PD128,
29608 IX86_BUILTIN_VPERMT2VARQ256,
29609 IX86_BUILTIN_VPERMT2VARQ256_MASKZ,
29610 IX86_BUILTIN_VPERMT2VARD256,
29611 IX86_BUILTIN_VPERMT2VARD256_MASKZ,
29612 IX86_BUILTIN_VPERMI2VARQ256,
29613 IX86_BUILTIN_VPERMI2VARD256,
29614 IX86_BUILTIN_VPERMT2VARPD256,
29615 IX86_BUILTIN_VPERMT2VARPD256_MASKZ,
29616 IX86_BUILTIN_VPERMT2VARPS256,
29617 IX86_BUILTIN_VPERMT2VARPS256_MASKZ,
29618 IX86_BUILTIN_VPERMI2VARPD256,
29619 IX86_BUILTIN_VPERMI2VARPS256,
29620 IX86_BUILTIN_VPERMT2VARQ128,
29621 IX86_BUILTIN_VPERMT2VARQ128_MASKZ,
29622 IX86_BUILTIN_VPERMT2VARD128,
29623 IX86_BUILTIN_VPERMT2VARD128_MASKZ,
29624 IX86_BUILTIN_VPERMI2VARQ128,
29625 IX86_BUILTIN_VPERMI2VARD128,
29626 IX86_BUILTIN_VPERMT2VARPD128,
29627 IX86_BUILTIN_VPERMT2VARPD128_MASKZ,
29628 IX86_BUILTIN_VPERMT2VARPS128,
29629 IX86_BUILTIN_VPERMT2VARPS128_MASKZ,
29630 IX86_BUILTIN_VPERMI2VARPD128,
29631 IX86_BUILTIN_VPERMI2VARPS128,
29632 IX86_BUILTIN_PSHUFB256_MASK,
29633 IX86_BUILTIN_PSHUFB128_MASK,
29634 IX86_BUILTIN_PSHUFHW256_MASK,
29635 IX86_BUILTIN_PSHUFHW128_MASK,
29636 IX86_BUILTIN_PSHUFLW256_MASK,
29637 IX86_BUILTIN_PSHUFLW128_MASK,
29638 IX86_BUILTIN_PSHUFD256_MASK,
29639 IX86_BUILTIN_PSHUFD128_MASK,
29640 IX86_BUILTIN_SHUFPD256_MASK,
29641 IX86_BUILTIN_SHUFPD128_MASK,
29642 IX86_BUILTIN_SHUFPS256_MASK,
29643 IX86_BUILTIN_SHUFPS128_MASK,
29644 IX86_BUILTIN_PROLVQ256,
29645 IX86_BUILTIN_PROLVQ128,
29646 IX86_BUILTIN_PROLQ256,
29647 IX86_BUILTIN_PROLQ128,
29648 IX86_BUILTIN_PRORVQ256,
29649 IX86_BUILTIN_PRORVQ128,
29650 IX86_BUILTIN_PRORQ256,
29651 IX86_BUILTIN_PRORQ128,
29652 IX86_BUILTIN_PSRAVQ128,
29653 IX86_BUILTIN_PSRAVQ256,
29654 IX86_BUILTIN_PSLLVV4DI_MASK,
29655 IX86_BUILTIN_PSLLVV2DI_MASK,
29656 IX86_BUILTIN_PSLLVV8SI_MASK,
29657 IX86_BUILTIN_PSLLVV4SI_MASK,
29658 IX86_BUILTIN_PSRAVV8SI_MASK,
29659 IX86_BUILTIN_PSRAVV4SI_MASK,
29660 IX86_BUILTIN_PSRLVV4DI_MASK,
29661 IX86_BUILTIN_PSRLVV2DI_MASK,
29662 IX86_BUILTIN_PSRLVV8SI_MASK,
29663 IX86_BUILTIN_PSRLVV4SI_MASK,
29664 IX86_BUILTIN_PSRAWI256_MASK,
29665 IX86_BUILTIN_PSRAW256_MASK,
29666 IX86_BUILTIN_PSRAWI128_MASK,
29667 IX86_BUILTIN_PSRAW128_MASK,
29668 IX86_BUILTIN_PSRLWI256_MASK,
29669 IX86_BUILTIN_PSRLW256_MASK,
29670 IX86_BUILTIN_PSRLWI128_MASK,
29671 IX86_BUILTIN_PSRLW128_MASK,
29672 IX86_BUILTIN_PRORVD256,
29673 IX86_BUILTIN_PROLVD256,
29674 IX86_BUILTIN_PRORD256,
29675 IX86_BUILTIN_PROLD256,
29676 IX86_BUILTIN_PRORVD128,
29677 IX86_BUILTIN_PROLVD128,
29678 IX86_BUILTIN_PRORD128,
29679 IX86_BUILTIN_PROLD128,
29680 IX86_BUILTIN_FPCLASSPD256,
29681 IX86_BUILTIN_FPCLASSPD128,
29682 IX86_BUILTIN_FPCLASSSD,
29683 IX86_BUILTIN_FPCLASSPS256,
29684 IX86_BUILTIN_FPCLASSPS128,
29685 IX86_BUILTIN_FPCLASSSS,
29686 IX86_BUILTIN_CVTB2MASK128,
29687 IX86_BUILTIN_CVTB2MASK256,
29688 IX86_BUILTIN_CVTW2MASK128,
29689 IX86_BUILTIN_CVTW2MASK256,
29690 IX86_BUILTIN_CVTD2MASK128,
29691 IX86_BUILTIN_CVTD2MASK256,
29692 IX86_BUILTIN_CVTQ2MASK128,
29693 IX86_BUILTIN_CVTQ2MASK256,
29694 IX86_BUILTIN_CVTMASK2B128,
29695 IX86_BUILTIN_CVTMASK2B256,
29696 IX86_BUILTIN_CVTMASK2W128,
29697 IX86_BUILTIN_CVTMASK2W256,
29698 IX86_BUILTIN_CVTMASK2D128,
29699 IX86_BUILTIN_CVTMASK2D256,
29700 IX86_BUILTIN_CVTMASK2Q128,
29701 IX86_BUILTIN_CVTMASK2Q256,
29702 IX86_BUILTIN_PCMPEQB128_MASK,
29703 IX86_BUILTIN_PCMPEQB256_MASK,
29704 IX86_BUILTIN_PCMPEQW128_MASK,
29705 IX86_BUILTIN_PCMPEQW256_MASK,
29706 IX86_BUILTIN_PCMPEQD128_MASK,
29707 IX86_BUILTIN_PCMPEQD256_MASK,
29708 IX86_BUILTIN_PCMPEQQ128_MASK,
29709 IX86_BUILTIN_PCMPEQQ256_MASK,
29710 IX86_BUILTIN_PCMPGTB128_MASK,
29711 IX86_BUILTIN_PCMPGTB256_MASK,
29712 IX86_BUILTIN_PCMPGTW128_MASK,
29713 IX86_BUILTIN_PCMPGTW256_MASK,
29714 IX86_BUILTIN_PCMPGTD128_MASK,
29715 IX86_BUILTIN_PCMPGTD256_MASK,
29716 IX86_BUILTIN_PCMPGTQ128_MASK,
29717 IX86_BUILTIN_PCMPGTQ256_MASK,
29718 IX86_BUILTIN_PTESTMB128,
29719 IX86_BUILTIN_PTESTMB256,
29720 IX86_BUILTIN_PTESTMW128,
29721 IX86_BUILTIN_PTESTMW256,
29722 IX86_BUILTIN_PTESTMD128,
29723 IX86_BUILTIN_PTESTMD256,
29724 IX86_BUILTIN_PTESTMQ128,
29725 IX86_BUILTIN_PTESTMQ256,
29726 IX86_BUILTIN_PTESTNMB128,
29727 IX86_BUILTIN_PTESTNMB256,
29728 IX86_BUILTIN_PTESTNMW128,
29729 IX86_BUILTIN_PTESTNMW256,
29730 IX86_BUILTIN_PTESTNMD128,
29731 IX86_BUILTIN_PTESTNMD256,
29732 IX86_BUILTIN_PTESTNMQ128,
29733 IX86_BUILTIN_PTESTNMQ256,
29734 IX86_BUILTIN_PBROADCASTMB128,
29735 IX86_BUILTIN_PBROADCASTMB256,
29736 IX86_BUILTIN_PBROADCASTMW128,
29737 IX86_BUILTIN_PBROADCASTMW256,
29738 IX86_BUILTIN_COMPRESSPD256,
29739 IX86_BUILTIN_COMPRESSPD128,
29740 IX86_BUILTIN_COMPRESSPS256,
29741 IX86_BUILTIN_COMPRESSPS128,
29742 IX86_BUILTIN_PCOMPRESSQ256,
29743 IX86_BUILTIN_PCOMPRESSQ128,
29744 IX86_BUILTIN_PCOMPRESSD256,
29745 IX86_BUILTIN_PCOMPRESSD128,
29746 IX86_BUILTIN_EXPANDPD256,
29747 IX86_BUILTIN_EXPANDPD128,
29748 IX86_BUILTIN_EXPANDPS256,
29749 IX86_BUILTIN_EXPANDPS128,
29750 IX86_BUILTIN_PEXPANDQ256,
29751 IX86_BUILTIN_PEXPANDQ128,
29752 IX86_BUILTIN_PEXPANDD256,
29753 IX86_BUILTIN_PEXPANDD128,
29754 IX86_BUILTIN_EXPANDPD256Z,
29755 IX86_BUILTIN_EXPANDPD128Z,
29756 IX86_BUILTIN_EXPANDPS256Z,
29757 IX86_BUILTIN_EXPANDPS128Z,
29758 IX86_BUILTIN_PEXPANDQ256Z,
29759 IX86_BUILTIN_PEXPANDQ128Z,
29760 IX86_BUILTIN_PEXPANDD256Z,
29761 IX86_BUILTIN_PEXPANDD128Z,
29762 IX86_BUILTIN_PMAXSD256_MASK,
29763 IX86_BUILTIN_PMINSD256_MASK,
29764 IX86_BUILTIN_PMAXUD256_MASK,
29765 IX86_BUILTIN_PMINUD256_MASK,
29766 IX86_BUILTIN_PMAXSD128_MASK,
29767 IX86_BUILTIN_PMINSD128_MASK,
29768 IX86_BUILTIN_PMAXUD128_MASK,
29769 IX86_BUILTIN_PMINUD128_MASK,
29770 IX86_BUILTIN_PMAXSQ256_MASK,
29771 IX86_BUILTIN_PMINSQ256_MASK,
29772 IX86_BUILTIN_PMAXUQ256_MASK,
29773 IX86_BUILTIN_PMINUQ256_MASK,
29774 IX86_BUILTIN_PMAXSQ128_MASK,
29775 IX86_BUILTIN_PMINSQ128_MASK,
29776 IX86_BUILTIN_PMAXUQ128_MASK,
29777 IX86_BUILTIN_PMINUQ128_MASK,
29778 IX86_BUILTIN_PMINSB256_MASK,
29779 IX86_BUILTIN_PMINUB256_MASK,
29780 IX86_BUILTIN_PMAXSB256_MASK,
29781 IX86_BUILTIN_PMAXUB256_MASK,
29782 IX86_BUILTIN_PMINSB128_MASK,
29783 IX86_BUILTIN_PMINUB128_MASK,
29784 IX86_BUILTIN_PMAXSB128_MASK,
29785 IX86_BUILTIN_PMAXUB128_MASK,
29786 IX86_BUILTIN_PMINSW256_MASK,
29787 IX86_BUILTIN_PMINUW256_MASK,
29788 IX86_BUILTIN_PMAXSW256_MASK,
29789 IX86_BUILTIN_PMAXUW256_MASK,
29790 IX86_BUILTIN_PMINSW128_MASK,
29791 IX86_BUILTIN_PMINUW128_MASK,
29792 IX86_BUILTIN_PMAXSW128_MASK,
29793 IX86_BUILTIN_PMAXUW128_MASK,
29794 IX86_BUILTIN_VPCONFLICTQ256,
29795 IX86_BUILTIN_VPCONFLICTD256,
29796 IX86_BUILTIN_VPCLZCNTQ256,
29797 IX86_BUILTIN_VPCLZCNTD256,
29798 IX86_BUILTIN_UNPCKHPD256_MASK,
29799 IX86_BUILTIN_UNPCKHPD128_MASK,
29800 IX86_BUILTIN_UNPCKHPS256_MASK,
29801 IX86_BUILTIN_UNPCKHPS128_MASK,
29802 IX86_BUILTIN_UNPCKLPD256_MASK,
29803 IX86_BUILTIN_UNPCKLPD128_MASK,
29804 IX86_BUILTIN_UNPCKLPS256_MASK,
29805 IX86_BUILTIN_VPCONFLICTQ128,
29806 IX86_BUILTIN_VPCONFLICTD128,
29807 IX86_BUILTIN_VPCLZCNTQ128,
29808 IX86_BUILTIN_VPCLZCNTD128,
29809 IX86_BUILTIN_UNPCKLPS128_MASK,
29810 IX86_BUILTIN_ALIGND256,
29811 IX86_BUILTIN_ALIGNQ256,
29812 IX86_BUILTIN_ALIGND128,
29813 IX86_BUILTIN_ALIGNQ128,
29814 IX86_BUILTIN_CVTPS2PH256_MASK,
29815 IX86_BUILTIN_CVTPS2PH_MASK,
29816 IX86_BUILTIN_CVTPH2PS_MASK,
29817 IX86_BUILTIN_CVTPH2PS256_MASK,
29818 IX86_BUILTIN_PUNPCKHDQ128_MASK,
29819 IX86_BUILTIN_PUNPCKHDQ256_MASK,
29820 IX86_BUILTIN_PUNPCKHQDQ128_MASK,
29821 IX86_BUILTIN_PUNPCKHQDQ256_MASK,
29822 IX86_BUILTIN_PUNPCKLDQ128_MASK,
29823 IX86_BUILTIN_PUNPCKLDQ256_MASK,
29824 IX86_BUILTIN_PUNPCKLQDQ128_MASK,
29825 IX86_BUILTIN_PUNPCKLQDQ256_MASK,
29826 IX86_BUILTIN_PUNPCKHBW128_MASK,
29827 IX86_BUILTIN_PUNPCKHBW256_MASK,
29828 IX86_BUILTIN_PUNPCKHWD128_MASK,
29829 IX86_BUILTIN_PUNPCKHWD256_MASK,
29830 IX86_BUILTIN_PUNPCKLBW128_MASK,
29831 IX86_BUILTIN_PUNPCKLBW256_MASK,
29832 IX86_BUILTIN_PUNPCKLWD128_MASK,
29833 IX86_BUILTIN_PUNPCKLWD256_MASK,
29834 IX86_BUILTIN_PSLLVV16HI,
29835 IX86_BUILTIN_PSLLVV8HI,
29836 IX86_BUILTIN_PACKSSDW256_MASK,
29837 IX86_BUILTIN_PACKSSDW128_MASK,
29838 IX86_BUILTIN_PACKUSDW256_MASK,
29839 IX86_BUILTIN_PACKUSDW128_MASK,
29840 IX86_BUILTIN_PAVGB256_MASK,
29841 IX86_BUILTIN_PAVGW256_MASK,
29842 IX86_BUILTIN_PAVGB128_MASK,
29843 IX86_BUILTIN_PAVGW128_MASK,
29844 IX86_BUILTIN_VPERMVARSF256_MASK,
29845 IX86_BUILTIN_VPERMVARDF256_MASK,
29846 IX86_BUILTIN_VPERMDF256_MASK,
29847 IX86_BUILTIN_PABSB256_MASK,
29848 IX86_BUILTIN_PABSB128_MASK,
29849 IX86_BUILTIN_PABSW256_MASK,
29850 IX86_BUILTIN_PABSW128_MASK,
29851 IX86_BUILTIN_VPERMILVARPD_MASK,
29852 IX86_BUILTIN_VPERMILVARPS_MASK,
29853 IX86_BUILTIN_VPERMILVARPD256_MASK,
29854 IX86_BUILTIN_VPERMILVARPS256_MASK,
29855 IX86_BUILTIN_VPERMILPD_MASK,
29856 IX86_BUILTIN_VPERMILPS_MASK,
29857 IX86_BUILTIN_VPERMILPD256_MASK,
29858 IX86_BUILTIN_VPERMILPS256_MASK,
29859 IX86_BUILTIN_BLENDMQ256,
29860 IX86_BUILTIN_BLENDMD256,
29861 IX86_BUILTIN_BLENDMPD256,
29862 IX86_BUILTIN_BLENDMPS256,
29863 IX86_BUILTIN_BLENDMQ128,
29864 IX86_BUILTIN_BLENDMD128,
29865 IX86_BUILTIN_BLENDMPD128,
29866 IX86_BUILTIN_BLENDMPS128,
29867 IX86_BUILTIN_BLENDMW256,
29868 IX86_BUILTIN_BLENDMB256,
29869 IX86_BUILTIN_BLENDMW128,
29870 IX86_BUILTIN_BLENDMB128,
29871 IX86_BUILTIN_PMULLD256_MASK,
29872 IX86_BUILTIN_PMULLD128_MASK,
29873 IX86_BUILTIN_PMULUDQ256_MASK,
29874 IX86_BUILTIN_PMULDQ256_MASK,
29875 IX86_BUILTIN_PMULDQ128_MASK,
29876 IX86_BUILTIN_PMULUDQ128_MASK,
29877 IX86_BUILTIN_CVTPD2PS256_MASK,
29878 IX86_BUILTIN_CVTPD2PS_MASK,
29879 IX86_BUILTIN_VPERMVARSI256_MASK,
29880 IX86_BUILTIN_VPERMVARDI256_MASK,
29881 IX86_BUILTIN_VPERMDI256_MASK,
29882 IX86_BUILTIN_CMPQ256,
29883 IX86_BUILTIN_CMPD256,
29884 IX86_BUILTIN_UCMPQ256,
29885 IX86_BUILTIN_UCMPD256,
29886 IX86_BUILTIN_CMPB256,
29887 IX86_BUILTIN_CMPW256,
29888 IX86_BUILTIN_UCMPB256,
29889 IX86_BUILTIN_UCMPW256,
29890 IX86_BUILTIN_CMPPD256_MASK,
29891 IX86_BUILTIN_CMPPS256_MASK,
29892 IX86_BUILTIN_CMPQ128,
29893 IX86_BUILTIN_CMPD128,
29894 IX86_BUILTIN_UCMPQ128,
29895 IX86_BUILTIN_UCMPD128,
29896 IX86_BUILTIN_CMPB128,
29897 IX86_BUILTIN_CMPW128,
29898 IX86_BUILTIN_UCMPB128,
29899 IX86_BUILTIN_UCMPW128,
29900 IX86_BUILTIN_CMPPD128_MASK,
29901 IX86_BUILTIN_CMPPS128_MASK,
29903 IX86_BUILTIN_GATHER3SIV8SF,
29904 IX86_BUILTIN_GATHER3SIV4SF,
29905 IX86_BUILTIN_GATHER3SIV4DF,
29906 IX86_BUILTIN_GATHER3SIV2DF,
29907 IX86_BUILTIN_GATHER3DIV8SF,
29908 IX86_BUILTIN_GATHER3DIV4SF,
29909 IX86_BUILTIN_GATHER3DIV4DF,
29910 IX86_BUILTIN_GATHER3DIV2DF,
29911 IX86_BUILTIN_GATHER3SIV8SI,
29912 IX86_BUILTIN_GATHER3SIV4SI,
29913 IX86_BUILTIN_GATHER3SIV4DI,
29914 IX86_BUILTIN_GATHER3SIV2DI,
29915 IX86_BUILTIN_GATHER3DIV8SI,
29916 IX86_BUILTIN_GATHER3DIV4SI,
29917 IX86_BUILTIN_GATHER3DIV4DI,
29918 IX86_BUILTIN_GATHER3DIV2DI,
29919 IX86_BUILTIN_SCATTERSIV8SF,
29920 IX86_BUILTIN_SCATTERSIV4SF,
29921 IX86_BUILTIN_SCATTERSIV4DF,
29922 IX86_BUILTIN_SCATTERSIV2DF,
29923 IX86_BUILTIN_SCATTERDIV8SF,
29924 IX86_BUILTIN_SCATTERDIV4SF,
29925 IX86_BUILTIN_SCATTERDIV4DF,
29926 IX86_BUILTIN_SCATTERDIV2DF,
29927 IX86_BUILTIN_SCATTERSIV8SI,
29928 IX86_BUILTIN_SCATTERSIV4SI,
29929 IX86_BUILTIN_SCATTERSIV4DI,
29930 IX86_BUILTIN_SCATTERSIV2DI,
29931 IX86_BUILTIN_SCATTERDIV8SI,
29932 IX86_BUILTIN_SCATTERDIV4SI,
29933 IX86_BUILTIN_SCATTERDIV4DI,
29934 IX86_BUILTIN_SCATTERDIV2DI,
29936 /* AVX512DQ. */
29937 IX86_BUILTIN_RANGESD128,
29938 IX86_BUILTIN_RANGESS128,
29939 IX86_BUILTIN_KUNPCKWD,
29940 IX86_BUILTIN_KUNPCKDQ,
29941 IX86_BUILTIN_BROADCASTF32x2_512,
29942 IX86_BUILTIN_BROADCASTI32x2_512,
29943 IX86_BUILTIN_BROADCASTF64X2_512,
29944 IX86_BUILTIN_BROADCASTI64X2_512,
29945 IX86_BUILTIN_BROADCASTF32X8_512,
29946 IX86_BUILTIN_BROADCASTI32X8_512,
29947 IX86_BUILTIN_EXTRACTF64X2_512,
29948 IX86_BUILTIN_EXTRACTF32X8,
29949 IX86_BUILTIN_EXTRACTI64X2_512,
29950 IX86_BUILTIN_EXTRACTI32X8,
29951 IX86_BUILTIN_REDUCEPD512_MASK,
29952 IX86_BUILTIN_REDUCEPS512_MASK,
29953 IX86_BUILTIN_PMULLQ512,
29954 IX86_BUILTIN_XORPD512,
29955 IX86_BUILTIN_XORPS512,
29956 IX86_BUILTIN_ORPD512,
29957 IX86_BUILTIN_ORPS512,
29958 IX86_BUILTIN_ANDPD512,
29959 IX86_BUILTIN_ANDPS512,
29960 IX86_BUILTIN_ANDNPD512,
29961 IX86_BUILTIN_ANDNPS512,
29962 IX86_BUILTIN_INSERTF32X8,
29963 IX86_BUILTIN_INSERTI32X8,
29964 IX86_BUILTIN_INSERTF64X2_512,
29965 IX86_BUILTIN_INSERTI64X2_512,
29966 IX86_BUILTIN_FPCLASSPD512,
29967 IX86_BUILTIN_FPCLASSPS512,
29968 IX86_BUILTIN_CVTD2MASK512,
29969 IX86_BUILTIN_CVTQ2MASK512,
29970 IX86_BUILTIN_CVTMASK2D512,
29971 IX86_BUILTIN_CVTMASK2Q512,
29972 IX86_BUILTIN_CVTPD2QQ512,
29973 IX86_BUILTIN_CVTPS2QQ512,
29974 IX86_BUILTIN_CVTPD2UQQ512,
29975 IX86_BUILTIN_CVTPS2UQQ512,
29976 IX86_BUILTIN_CVTQQ2PS512,
29977 IX86_BUILTIN_CVTUQQ2PS512,
29978 IX86_BUILTIN_CVTQQ2PD512,
29979 IX86_BUILTIN_CVTUQQ2PD512,
29980 IX86_BUILTIN_CVTTPS2QQ512,
29981 IX86_BUILTIN_CVTTPS2UQQ512,
29982 IX86_BUILTIN_CVTTPD2QQ512,
29983 IX86_BUILTIN_CVTTPD2UQQ512,
29984 IX86_BUILTIN_RANGEPS512,
29985 IX86_BUILTIN_RANGEPD512,
29987 /* AVX512BW. */
29988 IX86_BUILTIN_PACKUSDW512,
29989 IX86_BUILTIN_PACKSSDW512,
29990 IX86_BUILTIN_LOADDQUHI512_MASK,
29991 IX86_BUILTIN_LOADDQUQI512_MASK,
29992 IX86_BUILTIN_PSLLDQ512,
29993 IX86_BUILTIN_PSRLDQ512,
29994 IX86_BUILTIN_STOREDQUHI512_MASK,
29995 IX86_BUILTIN_STOREDQUQI512_MASK,
29996 IX86_BUILTIN_PALIGNR512,
29997 IX86_BUILTIN_PALIGNR512_MASK,
29998 IX86_BUILTIN_MOVDQUHI512_MASK,
29999 IX86_BUILTIN_MOVDQUQI512_MASK,
30000 IX86_BUILTIN_PSADBW512,
30001 IX86_BUILTIN_DBPSADBW512,
30002 IX86_BUILTIN_PBROADCASTB512,
30003 IX86_BUILTIN_PBROADCASTB512_GPR,
30004 IX86_BUILTIN_PBROADCASTW512,
30005 IX86_BUILTIN_PBROADCASTW512_GPR,
30006 IX86_BUILTIN_PMOVSXBW512_MASK,
30007 IX86_BUILTIN_PMOVZXBW512_MASK,
30008 IX86_BUILTIN_VPERMVARHI512_MASK,
30009 IX86_BUILTIN_VPERMT2VARHI512,
30010 IX86_BUILTIN_VPERMT2VARHI512_MASKZ,
30011 IX86_BUILTIN_VPERMI2VARHI512,
30012 IX86_BUILTIN_PAVGB512,
30013 IX86_BUILTIN_PAVGW512,
30014 IX86_BUILTIN_PADDB512,
30015 IX86_BUILTIN_PSUBB512,
30016 IX86_BUILTIN_PSUBSB512,
30017 IX86_BUILTIN_PADDSB512,
30018 IX86_BUILTIN_PSUBUSB512,
30019 IX86_BUILTIN_PADDUSB512,
30020 IX86_BUILTIN_PSUBW512,
30021 IX86_BUILTIN_PADDW512,
30022 IX86_BUILTIN_PSUBSW512,
30023 IX86_BUILTIN_PADDSW512,
30024 IX86_BUILTIN_PSUBUSW512,
30025 IX86_BUILTIN_PADDUSW512,
30026 IX86_BUILTIN_PMAXUW512,
30027 IX86_BUILTIN_PMAXSW512,
30028 IX86_BUILTIN_PMINUW512,
30029 IX86_BUILTIN_PMINSW512,
30030 IX86_BUILTIN_PMAXUB512,
30031 IX86_BUILTIN_PMAXSB512,
30032 IX86_BUILTIN_PMINUB512,
30033 IX86_BUILTIN_PMINSB512,
30034 IX86_BUILTIN_PMOVWB512,
30035 IX86_BUILTIN_PMOVSWB512,
30036 IX86_BUILTIN_PMOVUSWB512,
30037 IX86_BUILTIN_PMULHRSW512_MASK,
30038 IX86_BUILTIN_PMULHUW512_MASK,
30039 IX86_BUILTIN_PMULHW512_MASK,
30040 IX86_BUILTIN_PMULLW512_MASK,
30041 IX86_BUILTIN_PSLLWI512_MASK,
30042 IX86_BUILTIN_PSLLW512_MASK,
30043 IX86_BUILTIN_PACKSSWB512,
30044 IX86_BUILTIN_PACKUSWB512,
30045 IX86_BUILTIN_PSRAVV32HI,
30046 IX86_BUILTIN_PMADDUBSW512_MASK,
30047 IX86_BUILTIN_PMADDWD512_MASK,
30048 IX86_BUILTIN_PSRLVV32HI,
30049 IX86_BUILTIN_PUNPCKHBW512,
30050 IX86_BUILTIN_PUNPCKHWD512,
30051 IX86_BUILTIN_PUNPCKLBW512,
30052 IX86_BUILTIN_PUNPCKLWD512,
30053 IX86_BUILTIN_PSHUFB512,
30054 IX86_BUILTIN_PSHUFHW512,
30055 IX86_BUILTIN_PSHUFLW512,
30056 IX86_BUILTIN_PSRAWI512,
30057 IX86_BUILTIN_PSRAW512,
30058 IX86_BUILTIN_PSRLWI512,
30059 IX86_BUILTIN_PSRLW512,
30060 IX86_BUILTIN_CVTB2MASK512,
30061 IX86_BUILTIN_CVTW2MASK512,
30062 IX86_BUILTIN_CVTMASK2B512,
30063 IX86_BUILTIN_CVTMASK2W512,
30064 IX86_BUILTIN_PCMPEQB512_MASK,
30065 IX86_BUILTIN_PCMPEQW512_MASK,
30066 IX86_BUILTIN_PCMPGTB512_MASK,
30067 IX86_BUILTIN_PCMPGTW512_MASK,
30068 IX86_BUILTIN_PTESTMB512,
30069 IX86_BUILTIN_PTESTMW512,
30070 IX86_BUILTIN_PTESTNMB512,
30071 IX86_BUILTIN_PTESTNMW512,
30072 IX86_BUILTIN_PSLLVV32HI,
30073 IX86_BUILTIN_PABSB512,
30074 IX86_BUILTIN_PABSW512,
30075 IX86_BUILTIN_BLENDMW512,
30076 IX86_BUILTIN_BLENDMB512,
30077 IX86_BUILTIN_CMPB512,
30078 IX86_BUILTIN_CMPW512,
30079 IX86_BUILTIN_UCMPB512,
30080 IX86_BUILTIN_UCMPW512,
30082 /* Alternate 4 and 8 element gather/scatter for the vectorizer
30083 where all operands are 32-byte or 64-byte wide respectively. */
30084 IX86_BUILTIN_GATHERALTSIV4DF,
30085 IX86_BUILTIN_GATHERALTDIV8SF,
30086 IX86_BUILTIN_GATHERALTSIV4DI,
30087 IX86_BUILTIN_GATHERALTDIV8SI,
30088 IX86_BUILTIN_GATHER3ALTDIV16SF,
30089 IX86_BUILTIN_GATHER3ALTDIV16SI,
30090 IX86_BUILTIN_GATHER3ALTSIV4DF,
30091 IX86_BUILTIN_GATHER3ALTDIV8SF,
30092 IX86_BUILTIN_GATHER3ALTSIV4DI,
30093 IX86_BUILTIN_GATHER3ALTDIV8SI,
30094 IX86_BUILTIN_GATHER3ALTSIV8DF,
30095 IX86_BUILTIN_GATHER3ALTSIV8DI,
30096 IX86_BUILTIN_GATHER3DIV16SF,
30097 IX86_BUILTIN_GATHER3DIV16SI,
30098 IX86_BUILTIN_GATHER3DIV8DF,
30099 IX86_BUILTIN_GATHER3DIV8DI,
30100 IX86_BUILTIN_GATHER3SIV16SF,
30101 IX86_BUILTIN_GATHER3SIV16SI,
30102 IX86_BUILTIN_GATHER3SIV8DF,
30103 IX86_BUILTIN_GATHER3SIV8DI,
30104 IX86_BUILTIN_SCATTERDIV16SF,
30105 IX86_BUILTIN_SCATTERDIV16SI,
30106 IX86_BUILTIN_SCATTERDIV8DF,
30107 IX86_BUILTIN_SCATTERDIV8DI,
30108 IX86_BUILTIN_SCATTERSIV16SF,
30109 IX86_BUILTIN_SCATTERSIV16SI,
30110 IX86_BUILTIN_SCATTERSIV8DF,
30111 IX86_BUILTIN_SCATTERSIV8DI,
30113 /* AVX512PF */
30114 IX86_BUILTIN_GATHERPFQPD,
30115 IX86_BUILTIN_GATHERPFDPS,
30116 IX86_BUILTIN_GATHERPFDPD,
30117 IX86_BUILTIN_GATHERPFQPS,
30118 IX86_BUILTIN_SCATTERPFDPD,
30119 IX86_BUILTIN_SCATTERPFDPS,
30120 IX86_BUILTIN_SCATTERPFQPD,
30121 IX86_BUILTIN_SCATTERPFQPS,
30123 /* AVX-512ER */
30124 IX86_BUILTIN_EXP2PD_MASK,
30125 IX86_BUILTIN_EXP2PS_MASK,
30126 IX86_BUILTIN_EXP2PS,
30127 IX86_BUILTIN_RCP28PD,
30128 IX86_BUILTIN_RCP28PS,
30129 IX86_BUILTIN_RCP28SD,
30130 IX86_BUILTIN_RCP28SS,
30131 IX86_BUILTIN_RSQRT28PD,
30132 IX86_BUILTIN_RSQRT28PS,
30133 IX86_BUILTIN_RSQRT28SD,
30134 IX86_BUILTIN_RSQRT28SS,
30136 /* AVX-512IFMA */
30137 IX86_BUILTIN_VPMADD52LUQ512,
30138 IX86_BUILTIN_VPMADD52HUQ512,
30139 IX86_BUILTIN_VPMADD52LUQ256,
30140 IX86_BUILTIN_VPMADD52HUQ256,
30141 IX86_BUILTIN_VPMADD52LUQ128,
30142 IX86_BUILTIN_VPMADD52HUQ128,
30143 IX86_BUILTIN_VPMADD52LUQ512_MASKZ,
30144 IX86_BUILTIN_VPMADD52HUQ512_MASKZ,
30145 IX86_BUILTIN_VPMADD52LUQ256_MASKZ,
30146 IX86_BUILTIN_VPMADD52HUQ256_MASKZ,
30147 IX86_BUILTIN_VPMADD52LUQ128_MASKZ,
30148 IX86_BUILTIN_VPMADD52HUQ128_MASKZ,
30150 /* AVX-512VBMI */
30151 IX86_BUILTIN_VPMULTISHIFTQB512,
30152 IX86_BUILTIN_VPMULTISHIFTQB256,
30153 IX86_BUILTIN_VPMULTISHIFTQB128,
30154 IX86_BUILTIN_VPERMVARQI512_MASK,
30155 IX86_BUILTIN_VPERMT2VARQI512,
30156 IX86_BUILTIN_VPERMT2VARQI512_MASKZ,
30157 IX86_BUILTIN_VPERMI2VARQI512,
30158 IX86_BUILTIN_VPERMVARQI256_MASK,
30159 IX86_BUILTIN_VPERMVARQI128_MASK,
30160 IX86_BUILTIN_VPERMT2VARQI256,
30161 IX86_BUILTIN_VPERMT2VARQI256_MASKZ,
30162 IX86_BUILTIN_VPERMT2VARQI128,
30163 IX86_BUILTIN_VPERMT2VARQI128_MASKZ,
30164 IX86_BUILTIN_VPERMI2VARQI256,
30165 IX86_BUILTIN_VPERMI2VARQI128,
30167 /* SHA builtins. */
30168 IX86_BUILTIN_SHA1MSG1,
30169 IX86_BUILTIN_SHA1MSG2,
30170 IX86_BUILTIN_SHA1NEXTE,
30171 IX86_BUILTIN_SHA1RNDS4,
30172 IX86_BUILTIN_SHA256MSG1,
30173 IX86_BUILTIN_SHA256MSG2,
30174 IX86_BUILTIN_SHA256RNDS2,
30176 /* CLWB instructions. */
30177 IX86_BUILTIN_CLWB,
30179 /* PCOMMIT instructions. */
30180 IX86_BUILTIN_PCOMMIT,
30182 /* CLFLUSHOPT instructions. */
30183 IX86_BUILTIN_CLFLUSHOPT,
30185 /* TFmode support builtins. */
30186 IX86_BUILTIN_INFQ,
30187 IX86_BUILTIN_HUGE_VALQ,
30188 IX86_BUILTIN_FABSQ,
30189 IX86_BUILTIN_COPYSIGNQ,
30191 /* Vectorizer support builtins. */
30192 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
30193 IX86_BUILTIN_CPYSGNPS,
30194 IX86_BUILTIN_CPYSGNPD,
30195 IX86_BUILTIN_CPYSGNPS256,
30196 IX86_BUILTIN_CPYSGNPS512,
30197 IX86_BUILTIN_CPYSGNPD256,
30198 IX86_BUILTIN_CPYSGNPD512,
30199 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
30200 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
30203 /* FMA4 instructions. */
30204 IX86_BUILTIN_VFMADDSS,
30205 IX86_BUILTIN_VFMADDSD,
30206 IX86_BUILTIN_VFMADDPS,
30207 IX86_BUILTIN_VFMADDPD,
30208 IX86_BUILTIN_VFMADDPS256,
30209 IX86_BUILTIN_VFMADDPD256,
30210 IX86_BUILTIN_VFMADDSUBPS,
30211 IX86_BUILTIN_VFMADDSUBPD,
30212 IX86_BUILTIN_VFMADDSUBPS256,
30213 IX86_BUILTIN_VFMADDSUBPD256,
30215 /* FMA3 instructions. */
30216 IX86_BUILTIN_VFMADDSS3,
30217 IX86_BUILTIN_VFMADDSD3,
30219 /* XOP instructions. */
30220 IX86_BUILTIN_VPCMOV,
30221 IX86_BUILTIN_VPCMOV_V2DI,
30222 IX86_BUILTIN_VPCMOV_V4SI,
30223 IX86_BUILTIN_VPCMOV_V8HI,
30224 IX86_BUILTIN_VPCMOV_V16QI,
30225 IX86_BUILTIN_VPCMOV_V4SF,
30226 IX86_BUILTIN_VPCMOV_V2DF,
30227 IX86_BUILTIN_VPCMOV256,
30228 IX86_BUILTIN_VPCMOV_V4DI256,
30229 IX86_BUILTIN_VPCMOV_V8SI256,
30230 IX86_BUILTIN_VPCMOV_V16HI256,
30231 IX86_BUILTIN_VPCMOV_V32QI256,
30232 IX86_BUILTIN_VPCMOV_V8SF256,
30233 IX86_BUILTIN_VPCMOV_V4DF256,
30235 IX86_BUILTIN_VPPERM,
30237 IX86_BUILTIN_VPMACSSWW,
30238 IX86_BUILTIN_VPMACSWW,
30239 IX86_BUILTIN_VPMACSSWD,
30240 IX86_BUILTIN_VPMACSWD,
30241 IX86_BUILTIN_VPMACSSDD,
30242 IX86_BUILTIN_VPMACSDD,
30243 IX86_BUILTIN_VPMACSSDQL,
30244 IX86_BUILTIN_VPMACSSDQH,
30245 IX86_BUILTIN_VPMACSDQL,
30246 IX86_BUILTIN_VPMACSDQH,
30247 IX86_BUILTIN_VPMADCSSWD,
30248 IX86_BUILTIN_VPMADCSWD,
30250 IX86_BUILTIN_VPHADDBW,
30251 IX86_BUILTIN_VPHADDBD,
30252 IX86_BUILTIN_VPHADDBQ,
30253 IX86_BUILTIN_VPHADDWD,
30254 IX86_BUILTIN_VPHADDWQ,
30255 IX86_BUILTIN_VPHADDDQ,
30256 IX86_BUILTIN_VPHADDUBW,
30257 IX86_BUILTIN_VPHADDUBD,
30258 IX86_BUILTIN_VPHADDUBQ,
30259 IX86_BUILTIN_VPHADDUWD,
30260 IX86_BUILTIN_VPHADDUWQ,
30261 IX86_BUILTIN_VPHADDUDQ,
30262 IX86_BUILTIN_VPHSUBBW,
30263 IX86_BUILTIN_VPHSUBWD,
30264 IX86_BUILTIN_VPHSUBDQ,
30266 IX86_BUILTIN_VPROTB,
30267 IX86_BUILTIN_VPROTW,
30268 IX86_BUILTIN_VPROTD,
30269 IX86_BUILTIN_VPROTQ,
30270 IX86_BUILTIN_VPROTB_IMM,
30271 IX86_BUILTIN_VPROTW_IMM,
30272 IX86_BUILTIN_VPROTD_IMM,
30273 IX86_BUILTIN_VPROTQ_IMM,
30275 IX86_BUILTIN_VPSHLB,
30276 IX86_BUILTIN_VPSHLW,
30277 IX86_BUILTIN_VPSHLD,
30278 IX86_BUILTIN_VPSHLQ,
30279 IX86_BUILTIN_VPSHAB,
30280 IX86_BUILTIN_VPSHAW,
30281 IX86_BUILTIN_VPSHAD,
30282 IX86_BUILTIN_VPSHAQ,
30284 IX86_BUILTIN_VFRCZSS,
30285 IX86_BUILTIN_VFRCZSD,
30286 IX86_BUILTIN_VFRCZPS,
30287 IX86_BUILTIN_VFRCZPD,
30288 IX86_BUILTIN_VFRCZPS256,
30289 IX86_BUILTIN_VFRCZPD256,
30291 IX86_BUILTIN_VPCOMEQUB,
30292 IX86_BUILTIN_VPCOMNEUB,
30293 IX86_BUILTIN_VPCOMLTUB,
30294 IX86_BUILTIN_VPCOMLEUB,
30295 IX86_BUILTIN_VPCOMGTUB,
30296 IX86_BUILTIN_VPCOMGEUB,
30297 IX86_BUILTIN_VPCOMFALSEUB,
30298 IX86_BUILTIN_VPCOMTRUEUB,
30300 IX86_BUILTIN_VPCOMEQUW,
30301 IX86_BUILTIN_VPCOMNEUW,
30302 IX86_BUILTIN_VPCOMLTUW,
30303 IX86_BUILTIN_VPCOMLEUW,
30304 IX86_BUILTIN_VPCOMGTUW,
30305 IX86_BUILTIN_VPCOMGEUW,
30306 IX86_BUILTIN_VPCOMFALSEUW,
30307 IX86_BUILTIN_VPCOMTRUEUW,
30309 IX86_BUILTIN_VPCOMEQUD,
30310 IX86_BUILTIN_VPCOMNEUD,
30311 IX86_BUILTIN_VPCOMLTUD,
30312 IX86_BUILTIN_VPCOMLEUD,
30313 IX86_BUILTIN_VPCOMGTUD,
30314 IX86_BUILTIN_VPCOMGEUD,
30315 IX86_BUILTIN_VPCOMFALSEUD,
30316 IX86_BUILTIN_VPCOMTRUEUD,
30318 IX86_BUILTIN_VPCOMEQUQ,
30319 IX86_BUILTIN_VPCOMNEUQ,
30320 IX86_BUILTIN_VPCOMLTUQ,
30321 IX86_BUILTIN_VPCOMLEUQ,
30322 IX86_BUILTIN_VPCOMGTUQ,
30323 IX86_BUILTIN_VPCOMGEUQ,
30324 IX86_BUILTIN_VPCOMFALSEUQ,
30325 IX86_BUILTIN_VPCOMTRUEUQ,
30327 IX86_BUILTIN_VPCOMEQB,
30328 IX86_BUILTIN_VPCOMNEB,
30329 IX86_BUILTIN_VPCOMLTB,
30330 IX86_BUILTIN_VPCOMLEB,
30331 IX86_BUILTIN_VPCOMGTB,
30332 IX86_BUILTIN_VPCOMGEB,
30333 IX86_BUILTIN_VPCOMFALSEB,
30334 IX86_BUILTIN_VPCOMTRUEB,
30336 IX86_BUILTIN_VPCOMEQW,
30337 IX86_BUILTIN_VPCOMNEW,
30338 IX86_BUILTIN_VPCOMLTW,
30339 IX86_BUILTIN_VPCOMLEW,
30340 IX86_BUILTIN_VPCOMGTW,
30341 IX86_BUILTIN_VPCOMGEW,
30342 IX86_BUILTIN_VPCOMFALSEW,
30343 IX86_BUILTIN_VPCOMTRUEW,
30345 IX86_BUILTIN_VPCOMEQD,
30346 IX86_BUILTIN_VPCOMNED,
30347 IX86_BUILTIN_VPCOMLTD,
30348 IX86_BUILTIN_VPCOMLED,
30349 IX86_BUILTIN_VPCOMGTD,
30350 IX86_BUILTIN_VPCOMGED,
30351 IX86_BUILTIN_VPCOMFALSED,
30352 IX86_BUILTIN_VPCOMTRUED,
30354 IX86_BUILTIN_VPCOMEQQ,
30355 IX86_BUILTIN_VPCOMNEQ,
30356 IX86_BUILTIN_VPCOMLTQ,
30357 IX86_BUILTIN_VPCOMLEQ,
30358 IX86_BUILTIN_VPCOMGTQ,
30359 IX86_BUILTIN_VPCOMGEQ,
30360 IX86_BUILTIN_VPCOMFALSEQ,
30361 IX86_BUILTIN_VPCOMTRUEQ,
30363 /* LWP instructions. */
30364 IX86_BUILTIN_LLWPCB,
30365 IX86_BUILTIN_SLWPCB,
30366 IX86_BUILTIN_LWPVAL32,
30367 IX86_BUILTIN_LWPVAL64,
30368 IX86_BUILTIN_LWPINS32,
30369 IX86_BUILTIN_LWPINS64,
30371 IX86_BUILTIN_CLZS,
30373 /* RTM */
30374 IX86_BUILTIN_XBEGIN,
30375 IX86_BUILTIN_XEND,
30376 IX86_BUILTIN_XABORT,
30377 IX86_BUILTIN_XTEST,
30379 /* MPX */
30380 IX86_BUILTIN_BNDMK,
30381 IX86_BUILTIN_BNDSTX,
30382 IX86_BUILTIN_BNDLDX,
30383 IX86_BUILTIN_BNDCL,
30384 IX86_BUILTIN_BNDCU,
30385 IX86_BUILTIN_BNDRET,
30386 IX86_BUILTIN_BNDNARROW,
30387 IX86_BUILTIN_BNDINT,
30388 IX86_BUILTIN_SIZEOF,
30389 IX86_BUILTIN_BNDLOWER,
30390 IX86_BUILTIN_BNDUPPER,
30392 /* BMI instructions. */
30393 IX86_BUILTIN_BEXTR32,
30394 IX86_BUILTIN_BEXTR64,
30395 IX86_BUILTIN_CTZS,
30397 /* TBM instructions. */
30398 IX86_BUILTIN_BEXTRI32,
30399 IX86_BUILTIN_BEXTRI64,
30401 /* BMI2 instructions. */
30402 IX86_BUILTIN_BZHI32,
30403 IX86_BUILTIN_BZHI64,
30404 IX86_BUILTIN_PDEP32,
30405 IX86_BUILTIN_PDEP64,
30406 IX86_BUILTIN_PEXT32,
30407 IX86_BUILTIN_PEXT64,
30409 /* ADX instructions. */
30410 IX86_BUILTIN_ADDCARRYX32,
30411 IX86_BUILTIN_ADDCARRYX64,
30413 /* SBB instructions. */
30414 IX86_BUILTIN_SBB32,
30415 IX86_BUILTIN_SBB64,
30417 /* FSGSBASE instructions. */
30418 IX86_BUILTIN_RDFSBASE32,
30419 IX86_BUILTIN_RDFSBASE64,
30420 IX86_BUILTIN_RDGSBASE32,
30421 IX86_BUILTIN_RDGSBASE64,
30422 IX86_BUILTIN_WRFSBASE32,
30423 IX86_BUILTIN_WRFSBASE64,
30424 IX86_BUILTIN_WRGSBASE32,
30425 IX86_BUILTIN_WRGSBASE64,
30427 /* RDRND instructions. */
30428 IX86_BUILTIN_RDRAND16_STEP,
30429 IX86_BUILTIN_RDRAND32_STEP,
30430 IX86_BUILTIN_RDRAND64_STEP,
30432 /* RDSEED instructions. */
30433 IX86_BUILTIN_RDSEED16_STEP,
30434 IX86_BUILTIN_RDSEED32_STEP,
30435 IX86_BUILTIN_RDSEED64_STEP,
30437 /* F16C instructions. */
30438 IX86_BUILTIN_CVTPH2PS,
30439 IX86_BUILTIN_CVTPH2PS256,
30440 IX86_BUILTIN_CVTPS2PH,
30441 IX86_BUILTIN_CVTPS2PH256,
30443 /* CFString built-in for darwin */
30444 IX86_BUILTIN_CFSTRING,
30446 /* Builtins to get CPU type and supported features. */
30447 IX86_BUILTIN_CPU_INIT,
30448 IX86_BUILTIN_CPU_IS,
30449 IX86_BUILTIN_CPU_SUPPORTS,
30451 /* Read/write FLAGS register built-ins. */
30452 IX86_BUILTIN_READ_FLAGS,
30453 IX86_BUILTIN_WRITE_FLAGS,
30455 IX86_BUILTIN_MAX
30458 /* Table for the ix86 builtin decls. */
30459 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
30461 /* Table of all of the builtin functions that are possible with different ISA's
30462 but are waiting to be built until a function is declared to use that
30463 ISA. */
30464 struct builtin_isa {
30465 const char *name; /* function name */
30466 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
30467 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
30468 bool const_p; /* true if the declaration is constant */
30469 bool leaf_p; /* true if the declaration has leaf attribute */
30470 bool nothrow_p; /* true if the declaration has nothrow attribute */
30471 bool set_and_not_built_p;
30474 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
30477 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
30478 of which isa_flags to use in the ix86_builtins_isa array. Stores the
30479 function decl in the ix86_builtins array. Returns the function decl or
30480 NULL_TREE, if the builtin was not added.
30482 If the front end has a special hook for builtin functions, delay adding
30483 builtin functions that aren't in the current ISA until the ISA is changed
30484 with function specific optimization. Doing so, can save about 300K for the
30485 default compiler. When the builtin is expanded, check at that time whether
30486 it is valid.
30488 If the front end doesn't have a special hook, record all builtins, even if
30489 it isn't an instruction set in the current ISA in case the user uses
30490 function specific options for a different ISA, so that we don't get scope
30491 errors if a builtin is added in the middle of a function scope. */
30493 static inline tree
30494 def_builtin (HOST_WIDE_INT mask, const char *name,
30495 enum ix86_builtin_func_type tcode,
30496 enum ix86_builtins code)
30498 tree decl = NULL_TREE;
30500 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
30502 ix86_builtins_isa[(int) code].isa = mask;
30504 mask &= ~OPTION_MASK_ISA_64BIT;
30505 if (mask == 0
30506 || (mask & ix86_isa_flags) != 0
30507 || (lang_hooks.builtin_function
30508 == lang_hooks.builtin_function_ext_scope))
30511 tree type = ix86_get_builtin_func_type (tcode);
30512 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
30513 NULL, NULL_TREE);
30514 ix86_builtins[(int) code] = decl;
30515 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
30517 else
30519 ix86_builtins[(int) code] = NULL_TREE;
30520 ix86_builtins_isa[(int) code].tcode = tcode;
30521 ix86_builtins_isa[(int) code].name = name;
30522 ix86_builtins_isa[(int) code].leaf_p = false;
30523 ix86_builtins_isa[(int) code].nothrow_p = false;
30524 ix86_builtins_isa[(int) code].const_p = false;
30525 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
30529 return decl;
30532 /* Like def_builtin, but also marks the function decl "const". */
30534 static inline tree
30535 def_builtin_const (HOST_WIDE_INT mask, const char *name,
30536 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
30538 tree decl = def_builtin (mask, name, tcode, code);
30539 if (decl)
30540 TREE_READONLY (decl) = 1;
30541 else
30542 ix86_builtins_isa[(int) code].const_p = true;
30544 return decl;
30547 /* Add any new builtin functions for a given ISA that may not have been
30548 declared. This saves a bit of space compared to adding all of the
30549 declarations to the tree, even if we didn't use them. */
30551 static void
30552 ix86_add_new_builtins (HOST_WIDE_INT isa)
30554 int i;
30556 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
30558 if ((ix86_builtins_isa[i].isa & isa) != 0
30559 && ix86_builtins_isa[i].set_and_not_built_p)
30561 tree decl, type;
30563 /* Don't define the builtin again. */
30564 ix86_builtins_isa[i].set_and_not_built_p = false;
30566 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
30567 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
30568 type, i, BUILT_IN_MD, NULL,
30569 NULL_TREE);
30571 ix86_builtins[i] = decl;
30572 if (ix86_builtins_isa[i].const_p)
30573 TREE_READONLY (decl) = 1;
30574 if (ix86_builtins_isa[i].leaf_p)
30575 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
30576 NULL_TREE);
30577 if (ix86_builtins_isa[i].nothrow_p)
30578 TREE_NOTHROW (decl) = 1;
30583 /* Bits for builtin_description.flag. */
30585 /* Set when we don't support the comparison natively, and should
30586 swap_comparison in order to support it. */
30587 #define BUILTIN_DESC_SWAP_OPERANDS 1
30589 struct builtin_description
30591 const HOST_WIDE_INT mask;
30592 const enum insn_code icode;
30593 const char *const name;
30594 const enum ix86_builtins code;
30595 const enum rtx_code comparison;
30596 const int flag;
30599 static const struct builtin_description bdesc_comi[] =
30601 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
30602 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
30603 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
30604 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
30605 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
30606 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
30607 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
30608 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
30609 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
30610 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
30611 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
30612 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
30613 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
30614 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
30615 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
30616 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
30617 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
30618 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
30619 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
30620 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
30621 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
30622 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
30623 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
30624 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
30627 static const struct builtin_description bdesc_pcmpestr[] =
30629 /* SSE4.2 */
30630 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
30631 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
30632 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
30633 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
30634 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
30635 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
30636 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
30639 static const struct builtin_description bdesc_pcmpistr[] =
30641 /* SSE4.2 */
30642 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
30643 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
30644 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
30645 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
30646 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
30647 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
30648 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
30651 /* Special builtins with variable number of arguments. */
30652 static const struct builtin_description bdesc_special_args[] =
30654 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
30655 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
30656 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
30658 /* 80387 (for use internally for atomic compound assignment). */
30659 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
30660 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
30661 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) USHORT_FTYPE_VOID },
30662 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
30664 /* MMX */
30665 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
30667 /* 3DNow! */
30668 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
30670 /* FXSR, XSAVE, XSAVEOPT, XSAVEC and XSAVES. */
30671 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
30672 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
30673 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30674 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30675 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30676 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xsaves", IX86_BUILTIN_XSAVES, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30677 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xrstors", IX86_BUILTIN_XRSTORS, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30678 { OPTION_MASK_ISA_XSAVEC, CODE_FOR_nothing, "__builtin_ia32_xsavec", IX86_BUILTIN_XSAVEC, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30680 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
30681 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
30682 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30683 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30684 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30685 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaves64", IX86_BUILTIN_XSAVES64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30686 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30687 { OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30689 /* SSE */
30690 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30691 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30692 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
30694 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
30695 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
30696 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
30697 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
30699 /* SSE or 3DNow!A */
30700 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30701 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
30703 /* SSE2 */
30704 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30705 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30706 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30707 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
30708 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30709 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
30710 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
30711 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
30712 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
30713 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
30715 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
30716 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
30718 /* SSE3 */
30719 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
30721 /* SSE4.1 */
30722 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
30724 /* SSE4A */
30725 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30726 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30728 /* AVX */
30729 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
30730 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
30732 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
30733 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
30734 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
30735 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
30736 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
30738 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
30739 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
30740 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
30741 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
30742 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
30743 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
30744 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
30746 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
30747 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
30748 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
30750 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
30751 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
30752 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
30753 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
30754 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
30755 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
30756 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
30757 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
30759 /* AVX2 */
30760 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
30761 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
30762 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
30763 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
30764 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
30765 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
30766 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
30767 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
30768 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
30770 /* AVX512F */
30771 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30772 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30773 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30774 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30775 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30776 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30777 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30778 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30779 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30780 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30781 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30782 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30783 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30784 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30785 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30786 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30787 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30788 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30789 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30790 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30791 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
30792 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
30793 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
30794 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
30795 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30796 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30797 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30798 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30799 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30800 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30801 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30802 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30803 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30804 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30805 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30806 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30807 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30808 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30809 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30810 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30811 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30812 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30813 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30814 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30815 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30816 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30817 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30819 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
30820 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
30821 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
30822 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
30823 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
30824 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
30826 /* FSGSBASE */
30827 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30828 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
30829 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30830 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
30831 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
30832 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
30833 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
30834 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
30836 /* RTM */
30837 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30838 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
30839 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
30841 /* AVX512BW */
30842 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_loaddquhi512_mask", IX86_BUILTIN_LOADDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_SI },
30843 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_loaddquqi512_mask", IX86_BUILTIN_LOADDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_DI },
30844 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv32hi_mask, "__builtin_ia32_storedquhi512_mask", IX86_BUILTIN_STOREDQUHI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV32HI_V32HI_SI },
30845 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv64qi_mask, "__builtin_ia32_storedquqi512_mask", IX86_BUILTIN_STOREDQUQI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV64QI_V64QI_DI },
30847 /* AVX512VL */
30848 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_loaddquhi256_mask", IX86_BUILTIN_LOADDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_PCV16HI_V16HI_HI },
30849 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_loaddquhi128_mask", IX86_BUILTIN_LOADDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_PCV8HI_V8HI_QI },
30850 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_loaddquqi256_mask", IX86_BUILTIN_LOADDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_PCV32QI_V32QI_SI },
30851 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_loaddquqi128_mask", IX86_BUILTIN_LOADDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_PCV16QI_V16QI_HI },
30852 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64load256_mask", IX86_BUILTIN_MOVDQA64LOAD256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30853 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64load128_mask", IX86_BUILTIN_MOVDQA64LOAD128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30854 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32load256_mask", IX86_BUILTIN_MOVDQA32LOAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30855 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32load128_mask", IX86_BUILTIN_MOVDQA32LOAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30856 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4di_mask, "__builtin_ia32_movdqa64store256_mask", IX86_BUILTIN_MOVDQA64STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
30857 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2di_mask, "__builtin_ia32_movdqa64store128_mask", IX86_BUILTIN_MOVDQA64STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
30858 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8si_mask, "__builtin_ia32_movdqa32store256_mask", IX86_BUILTIN_MOVDQA32STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
30859 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4si_mask, "__builtin_ia32_movdqa32store128_mask", IX86_BUILTIN_MOVDQA32STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
30860 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_loadapd256_mask", IX86_BUILTIN_LOADAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30861 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_loadapd128_mask", IX86_BUILTIN_LOADAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30862 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_loadaps256_mask", IX86_BUILTIN_LOADAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30863 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_loadaps128_mask", IX86_BUILTIN_LOADAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30864 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4df_mask, "__builtin_ia32_storeapd256_mask", IX86_BUILTIN_STOREAPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
30865 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2df_mask, "__builtin_ia32_storeapd128_mask", IX86_BUILTIN_STOREAPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
30866 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8sf_mask, "__builtin_ia32_storeaps256_mask", IX86_BUILTIN_STOREAPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
30867 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4sf_mask, "__builtin_ia32_storeaps128_mask", IX86_BUILTIN_STOREAPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
30868 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadupd256_mask, "__builtin_ia32_loadupd256_mask", IX86_BUILTIN_LOADUPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30869 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loadupd_mask, "__builtin_ia32_loadupd128_mask", IX86_BUILTIN_LOADUPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30870 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadups256_mask, "__builtin_ia32_loadups256_mask", IX86_BUILTIN_LOADUPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30871 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_loadups_mask, "__builtin_ia32_loadups128_mask", IX86_BUILTIN_LOADUPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30872 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd256_mask, "__builtin_ia32_storeupd256_mask", IX86_BUILTIN_STOREUPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
30873 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd_mask, "__builtin_ia32_storeupd128_mask", IX86_BUILTIN_STOREUPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
30874 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups256_mask, "__builtin_ia32_storeups256_mask", IX86_BUILTIN_STOREUPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
30875 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups_mask, "__builtin_ia32_storeups128_mask", IX86_BUILTIN_STOREUPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
30876 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv4di_mask, "__builtin_ia32_loaddqudi256_mask", IX86_BUILTIN_LOADDQUDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30877 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv2di_mask, "__builtin_ia32_loaddqudi128_mask", IX86_BUILTIN_LOADDQUDI128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30878 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv8si_mask, "__builtin_ia32_loaddqusi256_mask", IX86_BUILTIN_LOADDQUSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30879 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv4si_mask, "__builtin_ia32_loaddqusi128_mask", IX86_BUILTIN_LOADDQUSI128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30880 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4di_mask, "__builtin_ia32_storedqudi256_mask", IX86_BUILTIN_STOREDQUDI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
30881 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv2di_mask, "__builtin_ia32_storedqudi128_mask", IX86_BUILTIN_STOREDQUDI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
30882 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8si_mask, "__builtin_ia32_storedqusi256_mask", IX86_BUILTIN_STOREDQUSI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
30883 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4si_mask, "__builtin_ia32_storedqusi128_mask", IX86_BUILTIN_STOREDQUSI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
30884 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16hi_mask, "__builtin_ia32_storedquhi256_mask", IX86_BUILTIN_STOREDQUHI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16HI_HI },
30885 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8hi_mask, "__builtin_ia32_storedquhi128_mask", IX86_BUILTIN_STOREDQUHI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8HI_QI },
30886 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv32qi_mask, "__builtin_ia32_storedquqi256_mask", IX86_BUILTIN_STOREDQUQI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32QI_SI },
30887 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16qi_mask, "__builtin_ia32_storedquqi128_mask", IX86_BUILTIN_STOREDQUQI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16QI_HI },
30888 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4df_mask, "__builtin_ia32_compressstoredf256_mask", IX86_BUILTIN_COMPRESSPDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
30889 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2df_mask, "__builtin_ia32_compressstoredf128_mask", IX86_BUILTIN_COMPRESSPDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
30890 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8sf_mask, "__builtin_ia32_compressstoresf256_mask", IX86_BUILTIN_COMPRESSPSSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
30891 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4sf_mask, "__builtin_ia32_compressstoresf128_mask", IX86_BUILTIN_COMPRESSPSSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
30892 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4di_mask, "__builtin_ia32_compressstoredi256_mask", IX86_BUILTIN_PCOMPRESSQSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
30893 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2di_mask, "__builtin_ia32_compressstoredi128_mask", IX86_BUILTIN_PCOMPRESSQSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
30894 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8si_mask, "__builtin_ia32_compressstoresi256_mask", IX86_BUILTIN_PCOMPRESSDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
30895 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4si_mask, "__builtin_ia32_compressstoresi128_mask", IX86_BUILTIN_PCOMPRESSDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
30896 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expandloaddf256_mask", IX86_BUILTIN_EXPANDPDLOAD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30897 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expandloaddf128_mask", IX86_BUILTIN_EXPANDPDLOAD128, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30898 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandloadsf256_mask", IX86_BUILTIN_EXPANDPSLOAD256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30899 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandloadsf128_mask", IX86_BUILTIN_EXPANDPSLOAD128, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30900 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expandloaddi256_mask", IX86_BUILTIN_PEXPANDQLOAD256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30901 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expandloaddi128_mask", IX86_BUILTIN_PEXPANDQLOAD128, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30902 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandloadsi256_mask", IX86_BUILTIN_PEXPANDDLOAD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30903 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandloadsi128_mask", IX86_BUILTIN_PEXPANDDLOAD128, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30904 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expandloaddf256_maskz", IX86_BUILTIN_EXPANDPDLOAD256Z, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30905 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expandloaddf128_maskz", IX86_BUILTIN_EXPANDPDLOAD128Z, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30906 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandloadsf256_maskz", IX86_BUILTIN_EXPANDPSLOAD256Z, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30907 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandloadsf128_maskz", IX86_BUILTIN_EXPANDPSLOAD128Z, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30908 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expandloaddi256_maskz", IX86_BUILTIN_PEXPANDQLOAD256Z, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30909 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expandloaddi128_maskz", IX86_BUILTIN_PEXPANDQLOAD128Z, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30910 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandloadsi256_maskz", IX86_BUILTIN_PEXPANDDLOAD256Z, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30911 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandloadsi128_maskz", IX86_BUILTIN_PEXPANDDLOAD128Z, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30912 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask_store, "__builtin_ia32_pmovqd256mem_mask", IX86_BUILTIN_PMOVQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
30913 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask_store, "__builtin_ia32_pmovqd128mem_mask", IX86_BUILTIN_PMOVQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
30914 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask_store, "__builtin_ia32_pmovsqd256mem_mask", IX86_BUILTIN_PMOVSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
30915 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask_store, "__builtin_ia32_pmovsqd128mem_mask", IX86_BUILTIN_PMOVSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
30916 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask_store, "__builtin_ia32_pmovusqd256mem_mask", IX86_BUILTIN_PMOVUSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
30917 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask_store, "__builtin_ia32_pmovusqd128mem_mask", IX86_BUILTIN_PMOVUSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
30918 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovqw256mem_mask", IX86_BUILTIN_PMOVQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
30919 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovqw128mem_mask", IX86_BUILTIN_PMOVQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
30920 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovsqw256mem_mask", IX86_BUILTIN_PMOVSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
30921 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovsqw128mem_mask", IX86_BUILTIN_PMOVSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
30922 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovusqw256mem_mask", IX86_BUILTIN_PMOVUSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
30923 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovusqw128mem_mask", IX86_BUILTIN_PMOVUSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
30924 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovqb256mem_mask", IX86_BUILTIN_PMOVQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
30925 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovqb128mem_mask", IX86_BUILTIN_PMOVQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
30926 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovsqb256mem_mask", IX86_BUILTIN_PMOVSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
30927 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovsqb128mem_mask", IX86_BUILTIN_PMOVSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
30928 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovusqb256mem_mask", IX86_BUILTIN_PMOVUSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
30929 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovusqb128mem_mask", IX86_BUILTIN_PMOVUSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
30930 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovdb256mem_mask", IX86_BUILTIN_PMOVDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
30931 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovdb128mem_mask", IX86_BUILTIN_PMOVDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
30932 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovsdb256mem_mask", IX86_BUILTIN_PMOVSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
30933 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovsdb128mem_mask", IX86_BUILTIN_PMOVSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
30934 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovusdb256mem_mask", IX86_BUILTIN_PMOVUSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
30935 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovusdb128mem_mask", IX86_BUILTIN_PMOVUSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
30936 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovdw256mem_mask", IX86_BUILTIN_PMOVDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
30937 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovdw128mem_mask", IX86_BUILTIN_PMOVDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
30938 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovsdw256mem_mask", IX86_BUILTIN_PMOVSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
30939 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovsdw128mem_mask", IX86_BUILTIN_PMOVSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
30940 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovusdw256mem_mask", IX86_BUILTIN_PMOVUSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
30941 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovusdw128mem_mask", IX86_BUILTIN_PMOVUSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
30943 /* PCOMMIT. */
30944 { OPTION_MASK_ISA_PCOMMIT, CODE_FOR_pcommit, "__builtin_ia32_pcommit", IX86_BUILTIN_PCOMMIT, UNKNOWN, (int) VOID_FTYPE_VOID },
30947 /* Builtins with variable number of arguments. */
30948 static const struct builtin_description bdesc_args[] =
30950 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
30951 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
30952 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
30953 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
30954 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
30955 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
30956 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
30958 /* MMX */
30959 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30960 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30961 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30962 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30963 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30964 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30966 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30967 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30968 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30969 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30970 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30971 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30972 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30973 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30975 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30976 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30978 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30979 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30980 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30981 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30983 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30984 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30985 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30986 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30987 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30988 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30990 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30991 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30992 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30993 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30994 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
30995 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
30997 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
30998 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
30999 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
31001 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
31003 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31004 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31005 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
31006 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31007 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31008 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
31010 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31011 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31012 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
31013 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31014 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31015 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
31017 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31018 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31019 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31020 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31022 /* 3DNow! */
31023 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
31024 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
31025 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31026 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31028 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31029 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31030 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31031 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31032 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31033 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31034 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31035 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31036 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31037 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31038 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31039 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31040 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31041 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31042 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31044 /* 3DNow!A */
31045 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
31046 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
31047 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
31048 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31049 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31050 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31052 /* SSE */
31053 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
31054 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31055 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31056 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31057 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31058 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31059 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
31060 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
31061 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
31062 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
31063 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
31064 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
31066 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31068 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31069 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31070 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31071 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31072 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31073 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31074 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31075 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31077 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
31078 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
31079 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
31080 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31081 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31082 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31083 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
31084 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
31085 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
31086 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31087 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
31088 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31089 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
31090 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
31091 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
31092 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31093 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
31094 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
31095 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
31096 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31098 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31099 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31100 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31101 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31103 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31104 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31105 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31106 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31108 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31110 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31111 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31112 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31113 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31114 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31116 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
31117 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
31118 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
31120 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
31122 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31123 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31124 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31126 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
31127 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
31129 /* SSE MMX or 3Dnow!A */
31130 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31131 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31132 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31134 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31135 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31136 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31137 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31139 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
31140 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
31142 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
31144 /* SSE2 */
31145 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31147 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
31148 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
31149 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
31150 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
31151 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
31153 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
31154 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
31155 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
31156 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
31157 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
31159 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
31161 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
31162 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
31163 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
31164 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
31166 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31167 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
31168 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31170 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31171 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31172 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31173 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31174 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31175 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31176 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31177 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31179 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
31180 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
31181 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
31182 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31183 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
31184 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31185 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
31186 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
31187 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
31188 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31189 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31190 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31191 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
31192 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
31193 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
31194 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31195 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
31196 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
31197 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
31198 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31200 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31201 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31202 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31203 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31205 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31206 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31207 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31208 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31210 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31212 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31213 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31214 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31216 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
31218 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31219 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31220 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31221 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31222 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31223 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31224 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31225 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31227 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31228 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31229 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31230 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31231 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31232 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31233 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31234 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31236 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31237 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
31239 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31240 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31241 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31242 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31244 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31245 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31247 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31248 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31249 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31250 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31251 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31252 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31254 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31255 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31256 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31257 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31259 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31260 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31261 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31262 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31263 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31264 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31265 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31266 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31268 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
31269 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
31270 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
31272 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31273 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
31275 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
31276 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
31278 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
31280 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
31281 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
31282 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
31283 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
31285 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
31286 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31287 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31288 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
31289 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31290 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31291 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
31293 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
31294 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31295 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31296 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
31297 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31298 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31299 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
31301 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31302 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31303 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31304 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31306 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
31307 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
31308 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
31310 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
31312 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31314 /* SSE2 MMX */
31315 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
31316 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
31318 /* SSE3 */
31319 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
31320 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31322 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31323 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31324 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31325 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31326 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31327 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31329 /* SSSE3 */
31330 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
31331 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
31332 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31333 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
31334 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
31335 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
31337 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31338 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31339 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31340 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31341 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31342 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31343 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31344 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31345 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31346 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31347 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31348 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31349 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
31350 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
31351 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31352 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31353 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31354 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31355 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31356 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31357 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31358 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31359 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31360 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31362 /* SSSE3. */
31363 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
31364 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
31366 /* SSE4.1 */
31367 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31368 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31369 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
31370 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
31371 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31372 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31373 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31374 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
31375 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
31376 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
31378 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
31379 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
31380 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
31381 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
31382 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
31383 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
31384 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
31385 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
31386 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
31387 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
31388 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
31389 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
31390 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31392 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
31393 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31394 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31395 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31396 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31397 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31398 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31399 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31400 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31401 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31402 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
31403 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31405 /* SSE4.1 */
31406 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
31407 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
31408 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31409 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31411 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
31412 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
31413 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
31414 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
31416 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
31417 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
31419 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
31420 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
31422 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
31423 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
31424 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
31425 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
31427 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
31428 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
31430 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31431 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31433 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31434 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31435 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31437 /* SSE4.2 */
31438 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31439 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
31440 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
31441 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31442 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31444 /* SSE4A */
31445 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
31446 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
31447 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
31448 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31450 /* AES */
31451 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
31452 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31454 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31455 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31456 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31457 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31459 /* PCLMUL */
31460 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
31462 /* AVX */
31463 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31464 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31465 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31466 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31467 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31468 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31469 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31470 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31471 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31472 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31473 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31474 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31475 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31476 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31477 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31478 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31479 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31480 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31481 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31482 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31483 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31484 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31485 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31486 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31487 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31488 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31490 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
31491 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
31492 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
31493 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
31495 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31496 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31497 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
31498 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
31499 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31500 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31501 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31502 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31503 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31504 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31505 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31506 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31507 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31508 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
31509 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
31510 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
31511 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
31512 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
31513 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
31514 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31515 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
31516 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
31517 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
31518 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31519 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31520 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31521 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
31522 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
31523 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
31524 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31525 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
31526 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
31527 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
31528 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
31530 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31531 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31532 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31534 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31535 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31536 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31537 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31538 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31540 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31542 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31543 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
31545 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
31546 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
31547 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
31548 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
31550 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31551 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
31553 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
31554 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
31556 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
31557 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
31558 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
31559 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
31561 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
31562 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
31564 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31565 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31567 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31568 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31569 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31570 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31572 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
31573 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
31574 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
31575 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
31576 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
31577 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
31579 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31580 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31581 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31582 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31583 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31584 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31585 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31586 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31587 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31588 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31589 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31590 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31591 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31592 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31593 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31595 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
31596 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
31598 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31599 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31601 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
31603 /* AVX2 */
31604 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
31605 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
31606 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
31607 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
31608 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
31609 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
31610 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
31611 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
31612 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31613 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31614 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31615 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31616 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31617 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31618 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31619 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31620 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
31621 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31622 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31623 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31624 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31625 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
31626 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
31627 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31628 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31629 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31630 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31631 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31632 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31633 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31634 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31635 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31636 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31637 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31638 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31639 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31640 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31641 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
31642 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
31643 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31644 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31645 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31646 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31647 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31648 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31649 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31650 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31651 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31652 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31653 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31654 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31655 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
31656 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
31657 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
31658 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
31659 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
31660 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
31661 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
31662 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
31663 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
31664 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
31665 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
31666 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
31667 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
31668 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
31669 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31670 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31671 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31672 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31673 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31674 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
31675 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31676 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
31677 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31678 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
31679 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
31680 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
31681 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31682 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31683 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31684 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
31685 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31686 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31687 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31688 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31689 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
31690 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
31691 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31692 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31693 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31694 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31695 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
31696 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31697 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31698 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31699 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31700 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
31701 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
31702 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31703 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31704 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31705 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31706 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31707 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31708 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31709 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31710 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31711 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31712 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31713 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31714 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31715 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31716 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31717 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31718 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31719 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31720 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
31721 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
31722 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
31723 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
31724 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
31725 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
31726 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
31727 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
31728 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
31729 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
31730 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31731 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
31732 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31733 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31734 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
31735 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31736 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
31737 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
31738 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vextractf128v4di, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
31739 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vinsertf128v4di, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
31740 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31741 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31742 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31743 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31744 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31745 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31746 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31747 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31748 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31749 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31751 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
31753 /* BMI */
31754 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31755 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31756 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
31758 /* TBM */
31759 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31760 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31762 /* F16C */
31763 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
31764 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
31765 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
31766 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
31768 /* BMI2 */
31769 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31770 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31771 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31772 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31773 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31774 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31776 /* AVX512F */
31777 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_256si, "__builtin_ia32_si512_256si", IX86_BUILTIN_SI512_SI256, UNKNOWN, (int) V16SI_FTYPE_V8SI },
31778 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_256ps, "__builtin_ia32_ps512_256ps", IX86_BUILTIN_PS512_PS256, UNKNOWN, (int) V16SF_FTYPE_V8SF },
31779 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_256pd, "__builtin_ia32_pd512_256pd", IX86_BUILTIN_PD512_PD256, UNKNOWN, (int) V8DF_FTYPE_V4DF },
31780 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_si, "__builtin_ia32_si512_si", IX86_BUILTIN_SI512_SI, UNKNOWN, (int) V16SI_FTYPE_V4SI },
31781 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_ps, "__builtin_ia32_ps512_ps", IX86_BUILTIN_PS512_PS, UNKNOWN, (int) V16SF_FTYPE_V4SF },
31782 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_pd, "__builtin_ia32_pd512_pd", IX86_BUILTIN_PD512_PD, UNKNOWN, (int) V8DF_FTYPE_V2DF },
31783 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
31784 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
31785 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31786 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31787 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31788 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31789 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
31790 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_QI },
31791 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
31792 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_QI },
31793 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
31794 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
31795 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
31796 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
31797 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31798 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31799 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
31800 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
31801 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
31802 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
31803 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31804 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31805 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31806 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31807 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_QI },
31808 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_QI },
31809 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_QI },
31810 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_QI },
31811 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI },
31812 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI },
31813 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI },
31814 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI },
31815 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31816 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31817 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31818 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31819 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31820 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31821 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31822 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31823 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31824 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31825 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31826 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31827 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31828 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31829 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31830 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
31831 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI },
31832 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
31833 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
31834 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
31835 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
31836 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31837 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31838 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31839 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31840 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31841 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31842 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31843 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31844 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31845 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31846 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31847 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31848 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31849 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31850 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31851 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31852 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31853 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31854 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
31855 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
31856 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
31857 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
31858 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
31859 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
31860 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
31861 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
31862 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
31863 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
31864 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
31865 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
31866 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
31867 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
31868 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
31869 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
31870 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
31871 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
31872 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
31873 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
31874 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
31875 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
31876 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
31877 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
31878 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
31879 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
31880 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31881 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
31882 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31883 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31884 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31885 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31886 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31887 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31888 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31889 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31890 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31891 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31892 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31893 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
31894 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31895 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
31896 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31897 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31898 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31899 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
31900 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31901 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
31902 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31903 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31904 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31905 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
31906 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31907 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
31908 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31909 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31910 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31911 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31912 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31913 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31914 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31915 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31916 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31917 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31918 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31919 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31920 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31921 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31922 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31923 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31924 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31925 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31926 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31927 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31928 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31929 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31930 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31931 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
31932 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
31933 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
31934 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
31935 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
31936 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
31937 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
31938 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
31939 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
31940 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
31941 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
31942 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
31943 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31944 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31945 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31946 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31947 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
31948 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31949 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31950 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
31951 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
31952 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31953 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
31954 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
31955 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
31956 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
31957 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31958 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31959 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
31960 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
31961 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
31962 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
31963 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31964 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31965 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
31966 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31967 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
31968 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31969 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
31970 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
31971 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
31972 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
31974 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
31975 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
31976 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
31977 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
31978 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
31979 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
31980 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
31981 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
31983 /* Mask arithmetic operations */
31984 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31985 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31986 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) HI_FTYPE_HI },
31987 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31988 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31989 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31990 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) HI_FTYPE_HI_HI },
31991 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31992 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31993 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) HI_FTYPE_HI },
31995 /* SHA */
31996 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31997 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31998 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31999 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
32000 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32001 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32002 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
32004 /* AVX512VL. */
32005 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_palignrv32qi_mask, "__builtin_ia32_palignr256_mask", IX86_BUILTIN_PALIGNR256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT },
32006 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_palignrv16qi_mask, "__builtin_ia32_palignr128_mask", IX86_BUILTIN_PALIGNR128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT },
32007 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64_256_mask", IX86_BUILTIN_MOVDQA64_256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32008 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64_128_mask", IX86_BUILTIN_MOVDQA64_128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32009 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32_256_mask", IX86_BUILTIN_MOVDQA32_256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32010 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32_128_mask", IX86_BUILTIN_MOVDQA32_128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32011 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_movapd256_mask", IX86_BUILTIN_MOVAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32012 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_movapd128_mask", IX86_BUILTIN_MOVAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32013 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_movaps256_mask", IX86_BUILTIN_MOVAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32014 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_movaps128_mask", IX86_BUILTIN_MOVAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32015 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_movdquhi256_mask", IX86_BUILTIN_MOVDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32016 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_movdquhi128_mask", IX86_BUILTIN_MOVDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32017 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_movdquqi256_mask", IX86_BUILTIN_MOVDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32018 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_movdquqi128_mask", IX86_BUILTIN_MOVDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32019 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4sf3_mask, "__builtin_ia32_minps_mask", IX86_BUILTIN_MINPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32020 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4sf3_mask, "__builtin_ia32_maxps_mask", IX86_BUILTIN_MAXPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32021 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2df3_mask, "__builtin_ia32_minpd_mask", IX86_BUILTIN_MINPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32022 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2df3_mask, "__builtin_ia32_maxpd_mask", IX86_BUILTIN_MAXPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32023 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4df3_mask, "__builtin_ia32_maxpd256_mask", IX86_BUILTIN_MAXPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32024 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8sf3_mask, "__builtin_ia32_maxps256_mask", IX86_BUILTIN_MAXPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32025 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4df3_mask, "__builtin_ia32_minpd256_mask", IX86_BUILTIN_MINPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32026 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8sf3_mask, "__builtin_ia32_minps256_mask", IX86_BUILTIN_MINPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32027 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4sf3_mask, "__builtin_ia32_mulps_mask", IX86_BUILTIN_MULPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32028 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_divv4sf3_mask, "__builtin_ia32_divps_mask", IX86_BUILTIN_DIVPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32029 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv2df3_mask, "__builtin_ia32_mulpd_mask", IX86_BUILTIN_MULPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32030 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_divv2df3_mask, "__builtin_ia32_divpd_mask", IX86_BUILTIN_DIVPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32031 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv4df3_mask, "__builtin_ia32_divpd256_mask", IX86_BUILTIN_DIVPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32032 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv8sf3_mask, "__builtin_ia32_divps256_mask", IX86_BUILTIN_DIVPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32033 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4df3_mask, "__builtin_ia32_mulpd256_mask", IX86_BUILTIN_MULPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32034 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8sf3_mask, "__builtin_ia32_mulps256_mask", IX86_BUILTIN_MULPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32035 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2df3_mask, "__builtin_ia32_addpd128_mask", IX86_BUILTIN_ADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32036 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4df3_mask, "__builtin_ia32_addpd256_mask", IX86_BUILTIN_ADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32037 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4sf3_mask, "__builtin_ia32_addps128_mask", IX86_BUILTIN_ADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32038 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8sf3_mask, "__builtin_ia32_addps256_mask", IX86_BUILTIN_ADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32039 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2df3_mask, "__builtin_ia32_subpd128_mask", IX86_BUILTIN_SUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32040 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4df3_mask, "__builtin_ia32_subpd256_mask", IX86_BUILTIN_SUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32041 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4sf3_mask, "__builtin_ia32_subps128_mask", IX86_BUILTIN_SUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32042 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8sf3_mask, "__builtin_ia32_subps256_mask", IX86_BUILTIN_SUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32043 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4df3_mask, "__builtin_ia32_xorpd256_mask", IX86_BUILTIN_XORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32044 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2df3_mask, "__builtin_ia32_xorpd128_mask", IX86_BUILTIN_XORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32045 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8sf3_mask, "__builtin_ia32_xorps256_mask", IX86_BUILTIN_XORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32046 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4sf3_mask, "__builtin_ia32_xorps128_mask", IX86_BUILTIN_XORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32047 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4df3_mask, "__builtin_ia32_orpd256_mask", IX86_BUILTIN_ORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32048 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2df3_mask, "__builtin_ia32_orpd128_mask", IX86_BUILTIN_ORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32049 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8sf3_mask, "__builtin_ia32_orps256_mask", IX86_BUILTIN_ORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32050 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4sf3_mask, "__builtin_ia32_orps128_mask", IX86_BUILTIN_ORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32051 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8sf_mask, "__builtin_ia32_broadcastf32x2_256_mask", IX86_BUILTIN_BROADCASTF32x2_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32052 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8si_mask, "__builtin_ia32_broadcasti32x2_256_mask", IX86_BUILTIN_BROADCASTI32x2_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32053 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4si_mask, "__builtin_ia32_broadcasti32x2_128_mask", IX86_BUILTIN_BROADCASTI32x2_128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32054 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4df_mask_1, "__builtin_ia32_broadcastf64x2_256_mask", IX86_BUILTIN_BROADCASTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
32055 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4di_mask_1, "__builtin_ia32_broadcasti64x2_256_mask", IX86_BUILTIN_BROADCASTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
32056 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8sf_mask_1, "__builtin_ia32_broadcastf32x4_256_mask", IX86_BUILTIN_BROADCASTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32057 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8si_mask_1, "__builtin_ia32_broadcasti32x4_256_mask", IX86_BUILTIN_BROADCASTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32058 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8sf, "__builtin_ia32_extractf32x4_256_mask", IX86_BUILTIN_EXTRACTF32X4_256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT_V4SF_QI },
32059 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8si, "__builtin_ia32_extracti32x4_256_mask", IX86_BUILTIN_EXTRACTI32X4_256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT_V4SI_QI },
32060 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv16hi_mask, "__builtin_ia32_dbpsadbw256_mask", IX86_BUILTIN_DBPSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI },
32061 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv8hi_mask, "__builtin_ia32_dbpsadbw128_mask", IX86_BUILTIN_DBPSADBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI },
32062 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2qq256_mask", IX86_BUILTIN_CVTTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32063 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2qq128_mask", IX86_BUILTIN_CVTTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32064 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2uqq256_mask", IX86_BUILTIN_CVTTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32065 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2uqq128_mask", IX86_BUILTIN_CVTTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32066 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2qq256_mask", IX86_BUILTIN_CVTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32067 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2qq128_mask", IX86_BUILTIN_CVTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32068 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2uqq256_mask", IX86_BUILTIN_CVTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32069 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2uqq128_mask", IX86_BUILTIN_CVTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32070 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4si2_mask, "__builtin_ia32_cvtpd2udq256_mask", IX86_BUILTIN_CVTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32071 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2si2_mask, "__builtin_ia32_cvtpd2udq128_mask", IX86_BUILTIN_CVTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32072 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2qq256_mask", IX86_BUILTIN_CVTTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32073 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2qq128_mask", IX86_BUILTIN_CVTTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32074 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2uqq256_mask", IX86_BUILTIN_CVTTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32075 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2uqq128_mask", IX86_BUILTIN_CVTTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32076 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2dq256_mask", IX86_BUILTIN_CVTTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32077 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2dq128_mask", IX86_BUILTIN_CVTTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32078 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2udq256_mask", IX86_BUILTIN_CVTTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32079 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2udq128_mask", IX86_BUILTIN_CVTTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32080 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2dq256_mask", IX86_BUILTIN_CVTTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32081 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvttpd2dq_mask, "__builtin_ia32_cvttpd2dq128_mask", IX86_BUILTIN_CVTTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32082 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2udq256_mask", IX86_BUILTIN_CVTTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32083 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2si2_mask, "__builtin_ia32_cvttpd2udq128_mask", IX86_BUILTIN_CVTTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32084 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2dq256_mask, "__builtin_ia32_cvtpd2dq256_mask", IX86_BUILTIN_CVTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32085 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2dq_mask, "__builtin_ia32_cvtpd2dq128_mask", IX86_BUILTIN_CVTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32086 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4df2_mask, "__builtin_ia32_cvtdq2pd256_mask", IX86_BUILTIN_CVTDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI },
32087 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtdq2pd_mask, "__builtin_ia32_cvtdq2pd128_mask", IX86_BUILTIN_CVTDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI },
32088 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4df2_mask, "__builtin_ia32_cvtudq2pd256_mask", IX86_BUILTIN_CVTUDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI },
32089 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2siv2df2_mask, "__builtin_ia32_cvtudq2pd128_mask", IX86_BUILTIN_CVTUDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI },
32090 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv8siv8sf2_mask, "__builtin_ia32_cvtdq2ps256_mask", IX86_BUILTIN_CVTDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI },
32091 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4sf2_mask, "__builtin_ia32_cvtdq2ps128_mask", IX86_BUILTIN_CVTDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI },
32092 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv8siv8sf2_mask, "__builtin_ia32_cvtudq2ps256_mask", IX86_BUILTIN_CVTUDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI },
32093 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4sf2_mask, "__builtin_ia32_cvtudq2ps128_mask", IX86_BUILTIN_CVTUDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI },
32094 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtps2pd256_mask, "__builtin_ia32_cvtps2pd256_mask", IX86_BUILTIN_CVTPS2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SF_V4DF_QI },
32095 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtps2pd_mask, "__builtin_ia32_cvtps2pd128_mask", IX86_BUILTIN_CVTPS2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SF_V2DF_QI },
32096 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv32qi_mask, "__builtin_ia32_pbroadcastb256_mask", IX86_BUILTIN_PBROADCASTB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16QI_V32QI_SI },
32097 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv32qi_mask, "__builtin_ia32_pbroadcastb256_gpr_mask", IX86_BUILTIN_PBROADCASTB256_GPR_MASK, UNKNOWN, (int) V32QI_FTYPE_QI_V32QI_SI },
32098 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16qi_mask, "__builtin_ia32_pbroadcastb128_mask", IX86_BUILTIN_PBROADCASTB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32099 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16qi_mask, "__builtin_ia32_pbroadcastb128_gpr_mask", IX86_BUILTIN_PBROADCASTB128_GPR_MASK, UNKNOWN, (int) V16QI_FTYPE_QI_V16QI_HI },
32100 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16hi_mask, "__builtin_ia32_pbroadcastw256_mask", IX86_BUILTIN_PBROADCASTW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8HI_V16HI_HI },
32101 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16hi_mask, "__builtin_ia32_pbroadcastw256_gpr_mask", IX86_BUILTIN_PBROADCASTW256_GPR_MASK, UNKNOWN, (int) V16HI_FTYPE_HI_V16HI_HI },
32102 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8hi_mask, "__builtin_ia32_pbroadcastw128_mask", IX86_BUILTIN_PBROADCASTW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32103 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8hi_mask, "__builtin_ia32_pbroadcastw128_gpr_mask", IX86_BUILTIN_PBROADCASTW128_GPR_MASK, UNKNOWN, (int) V8HI_FTYPE_HI_V8HI_QI },
32104 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8si_mask, "__builtin_ia32_pbroadcastd256_mask", IX86_BUILTIN_PBROADCASTD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32105 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8si_mask, "__builtin_ia32_pbroadcastd256_gpr_mask", IX86_BUILTIN_PBROADCASTD256_GPR_MASK, UNKNOWN, (int) V8SI_FTYPE_SI_V8SI_QI },
32106 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4si_mask, "__builtin_ia32_pbroadcastd128_mask", IX86_BUILTIN_PBROADCASTD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32107 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4si_mask, "__builtin_ia32_pbroadcastd128_gpr_mask", IX86_BUILTIN_PBROADCASTD128_GPR_MASK, UNKNOWN, (int) V4SI_FTYPE_SI_V4SI_QI },
32108 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4di_mask, "__builtin_ia32_pbroadcastq256_mask", IX86_BUILTIN_PBROADCASTQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
32109 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4di_mask, "__builtin_ia32_pbroadcastq256_gpr_mask", IX86_BUILTIN_PBROADCASTQ256_GPR_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_QI },
32110 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv2di_mask, "__builtin_ia32_pbroadcastq128_mask", IX86_BUILTIN_PBROADCASTQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32111 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv2di_mask, "__builtin_ia32_pbroadcastq128_gpr_mask", IX86_BUILTIN_PBROADCASTQ128_GPR_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_QI },
32112 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8sf_mask, "__builtin_ia32_broadcastss256_mask", IX86_BUILTIN_BROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32113 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4sf_mask, "__builtin_ia32_broadcastss128_mask", IX86_BUILTIN_BROADCASTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32114 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4df_mask, "__builtin_ia32_broadcastsd256_mask", IX86_BUILTIN_BROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
32115 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4df, "__builtin_ia32_extractf64x2_256_mask", IX86_BUILTIN_EXTRACTF64X2_256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT_V2DF_QI },
32116 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4di, "__builtin_ia32_extracti64x2_256_mask", IX86_BUILTIN_EXTRACTI64X2_256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT_V2DI_QI },
32117 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8sf, "__builtin_ia32_insertf32x4_256_mask", IX86_BUILTIN_INSERTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI },
32118 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8si, "__builtin_ia32_inserti32x4_256_mask", IX86_BUILTIN_INSERTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI },
32119 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv16qiv16hi2_mask, "__builtin_ia32_pmovsxbw256_mask", IX86_BUILTIN_PMOVSXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI },
32120 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv8qiv8hi2_mask, "__builtin_ia32_pmovsxbw128_mask", IX86_BUILTIN_PMOVSXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI },
32121 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8qiv8si2_mask, "__builtin_ia32_pmovsxbd256_mask", IX86_BUILTIN_PMOVSXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI },
32122 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4qiv4si2_mask, "__builtin_ia32_pmovsxbd128_mask", IX86_BUILTIN_PMOVSXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI },
32123 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4qiv4di2_mask, "__builtin_ia32_pmovsxbq256_mask", IX86_BUILTIN_PMOVSXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI },
32124 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2qiv2di2_mask, "__builtin_ia32_pmovsxbq128_mask", IX86_BUILTIN_PMOVSXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI },
32125 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8hiv8si2_mask, "__builtin_ia32_pmovsxwd256_mask", IX86_BUILTIN_PMOVSXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI },
32126 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4hiv4si2_mask, "__builtin_ia32_pmovsxwd128_mask", IX86_BUILTIN_PMOVSXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI },
32127 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4hiv4di2_mask, "__builtin_ia32_pmovsxwq256_mask", IX86_BUILTIN_PMOVSXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI },
32128 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2hiv2di2_mask, "__builtin_ia32_pmovsxwq128_mask", IX86_BUILTIN_PMOVSXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI },
32129 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4siv4di2_mask, "__builtin_ia32_pmovsxdq256_mask", IX86_BUILTIN_PMOVSXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI },
32130 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2siv2di2_mask, "__builtin_ia32_pmovsxdq128_mask", IX86_BUILTIN_PMOVSXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI },
32131 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv16qiv16hi2_mask, "__builtin_ia32_pmovzxbw256_mask", IX86_BUILTIN_PMOVZXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI },
32132 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv8qiv8hi2_mask, "__builtin_ia32_pmovzxbw128_mask", IX86_BUILTIN_PMOVZXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI },
32133 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8qiv8si2_mask, "__builtin_ia32_pmovzxbd256_mask", IX86_BUILTIN_PMOVZXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI },
32134 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4qiv4si2_mask, "__builtin_ia32_pmovzxbd128_mask", IX86_BUILTIN_PMOVZXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI },
32135 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4qiv4di2_mask, "__builtin_ia32_pmovzxbq256_mask", IX86_BUILTIN_PMOVZXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI },
32136 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2qiv2di2_mask, "__builtin_ia32_pmovzxbq128_mask", IX86_BUILTIN_PMOVZXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI },
32137 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8hiv8si2_mask, "__builtin_ia32_pmovzxwd256_mask", IX86_BUILTIN_PMOVZXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI },
32138 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4hiv4si2_mask, "__builtin_ia32_pmovzxwd128_mask", IX86_BUILTIN_PMOVZXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI },
32139 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4hiv4di2_mask, "__builtin_ia32_pmovzxwq256_mask", IX86_BUILTIN_PMOVZXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI },
32140 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2hiv2di2_mask, "__builtin_ia32_pmovzxwq128_mask", IX86_BUILTIN_PMOVZXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI },
32141 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4siv4di2_mask, "__builtin_ia32_pmovzxdq256_mask", IX86_BUILTIN_PMOVZXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI },
32142 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2siv2di2_mask, "__builtin_ia32_pmovzxdq128_mask", IX86_BUILTIN_PMOVZXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI },
32143 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4df_mask, "__builtin_ia32_reducepd256_mask", IX86_BUILTIN_REDUCEPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32144 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv2df_mask, "__builtin_ia32_reducepd128_mask", IX86_BUILTIN_REDUCEPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32145 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv8sf_mask, "__builtin_ia32_reduceps256_mask", IX86_BUILTIN_REDUCEPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32146 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4sf_mask, "__builtin_ia32_reduceps128_mask", IX86_BUILTIN_REDUCEPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32147 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv2df, "__builtin_ia32_reducesd", IX86_BUILTIN_REDUCESD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32148 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv4sf, "__builtin_ia32_reducess", IX86_BUILTIN_REDUCESS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32149 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16hi_mask, "__builtin_ia32_permvarhi256_mask", IX86_BUILTIN_VPERMVARHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32150 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv8hi_mask, "__builtin_ia32_permvarhi128_mask", IX86_BUILTIN_VPERMVARHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32151 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_mask, "__builtin_ia32_vpermt2varhi256_mask", IX86_BUILTIN_VPERMT2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32152 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_maskz, "__builtin_ia32_vpermt2varhi256_maskz", IX86_BUILTIN_VPERMT2VARHI256_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32153 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_mask, "__builtin_ia32_vpermt2varhi128_mask", IX86_BUILTIN_VPERMT2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32154 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_maskz, "__builtin_ia32_vpermt2varhi128_maskz", IX86_BUILTIN_VPERMT2VARHI128_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32155 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16hi3_mask, "__builtin_ia32_vpermi2varhi256_mask", IX86_BUILTIN_VPERMI2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32156 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8hi3_mask, "__builtin_ia32_vpermi2varhi128_mask", IX86_BUILTIN_VPERMI2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32157 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4df_mask, "__builtin_ia32_rcp14pd256_mask", IX86_BUILTIN_RCP14PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32158 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v2df_mask, "__builtin_ia32_rcp14pd128_mask", IX86_BUILTIN_RCP14PD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32159 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v8sf_mask, "__builtin_ia32_rcp14ps256_mask", IX86_BUILTIN_RCP14PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32160 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4sf_mask, "__builtin_ia32_rcp14ps128_mask", IX86_BUILTIN_RCP14PS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32161 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4df_mask, "__builtin_ia32_rsqrt14pd256_mask", IX86_BUILTIN_RSQRT14PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32162 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v2df_mask, "__builtin_ia32_rsqrt14pd128_mask", IX86_BUILTIN_RSQRT14PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32163 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v8sf_mask, "__builtin_ia32_rsqrt14ps256_mask", IX86_BUILTIN_RSQRT14PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32164 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4sf_mask, "__builtin_ia32_rsqrt14ps128_mask", IX86_BUILTIN_RSQRT14PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32165 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv4df2_mask, "__builtin_ia32_sqrtpd256_mask", IX86_BUILTIN_SQRTPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32166 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sqrtv2df2_mask, "__builtin_ia32_sqrtpd128_mask", IX86_BUILTIN_SQRTPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32167 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv8sf2_mask, "__builtin_ia32_sqrtps256_mask", IX86_BUILTIN_SQRTPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32168 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_sqrtv4sf2_mask, "__builtin_ia32_sqrtps128_mask", IX86_BUILTIN_SQRTPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32169 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16qi3_mask, "__builtin_ia32_paddb128_mask", IX86_BUILTIN_PADDB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32170 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8hi3_mask, "__builtin_ia32_paddw128_mask", IX86_BUILTIN_PADDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32171 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4si3_mask, "__builtin_ia32_paddd128_mask", IX86_BUILTIN_PADDD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32172 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2di3_mask, "__builtin_ia32_paddq128_mask", IX86_BUILTIN_PADDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32173 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16qi3_mask, "__builtin_ia32_psubb128_mask", IX86_BUILTIN_PSUBB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32174 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8hi3_mask, "__builtin_ia32_psubw128_mask", IX86_BUILTIN_PSUBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32175 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4si3_mask, "__builtin_ia32_psubd128_mask", IX86_BUILTIN_PSUBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32176 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2di3_mask, "__builtin_ia32_psubq128_mask", IX86_BUILTIN_PSUBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32177 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv16qi3_mask, "__builtin_ia32_paddsb128_mask", IX86_BUILTIN_PADDSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32178 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv8hi3_mask, "__builtin_ia32_paddsw128_mask", IX86_BUILTIN_PADDSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32179 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv16qi3_mask, "__builtin_ia32_psubsb128_mask", IX86_BUILTIN_PSUBSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32180 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv8hi3_mask, "__builtin_ia32_psubsw128_mask", IX86_BUILTIN_PSUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32181 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv16qi3_mask, "__builtin_ia32_paddusb128_mask", IX86_BUILTIN_PADDUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32182 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv8hi3_mask, "__builtin_ia32_paddusw128_mask", IX86_BUILTIN_PADDUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32183 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv16qi3_mask, "__builtin_ia32_psubusb128_mask", IX86_BUILTIN_PSUBUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32184 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv8hi3_mask, "__builtin_ia32_psubusw128_mask", IX86_BUILTIN_PSUBUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32185 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv32qi3_mask, "__builtin_ia32_paddb256_mask", IX86_BUILTIN_PADDB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32186 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16hi3_mask, "__builtin_ia32_paddw256_mask", IX86_BUILTIN_PADDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32187 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8si3_mask, "__builtin_ia32_paddd256_mask", IX86_BUILTIN_PADDD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32188 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4di3_mask, "__builtin_ia32_paddq256_mask", IX86_BUILTIN_PADDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32189 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv32qi3_mask, "__builtin_ia32_paddsb256_mask", IX86_BUILTIN_PADDSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32190 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv16hi3_mask, "__builtin_ia32_paddsw256_mask", IX86_BUILTIN_PADDSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32191 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv32qi3_mask, "__builtin_ia32_paddusb256_mask", IX86_BUILTIN_PADDUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32192 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv16hi3_mask, "__builtin_ia32_paddusw256_mask", IX86_BUILTIN_PADDUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32193 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv32qi3_mask, "__builtin_ia32_psubb256_mask", IX86_BUILTIN_PSUBB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32194 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16hi3_mask, "__builtin_ia32_psubw256_mask", IX86_BUILTIN_PSUBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32195 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8si3_mask, "__builtin_ia32_psubd256_mask", IX86_BUILTIN_PSUBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32196 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4di3_mask, "__builtin_ia32_psubq256_mask", IX86_BUILTIN_PSUBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32197 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv32qi3_mask, "__builtin_ia32_psubsb256_mask", IX86_BUILTIN_PSUBSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32198 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv16hi3_mask, "__builtin_ia32_psubsw256_mask", IX86_BUILTIN_PSUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32199 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv32qi3_mask, "__builtin_ia32_psubusb256_mask", IX86_BUILTIN_PSUBUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32200 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv16hi3_mask, "__builtin_ia32_psubusw256_mask", IX86_BUILTIN_PSUBUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32201 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_256_mask", IX86_BUILTIN_SHUF_F64x2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32202 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_256_mask", IX86_BUILTIN_SHUF_I64x2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI },
32203 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_256_mask", IX86_BUILTIN_SHUF_I32x4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI },
32204 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_256_mask", IX86_BUILTIN_SHUF_F32x4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32205 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovwb128_mask", IX86_BUILTIN_PMOVWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32206 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovwb256_mask", IX86_BUILTIN_PMOVWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32207 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovswb128_mask", IX86_BUILTIN_PMOVSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32208 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovswb256_mask", IX86_BUILTIN_PMOVSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32209 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovuswb128_mask", IX86_BUILTIN_PMOVUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32210 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovuswb256_mask", IX86_BUILTIN_PMOVUSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32211 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask, "__builtin_ia32_pmovdb128_mask", IX86_BUILTIN_PMOVDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32212 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask, "__builtin_ia32_pmovdb256_mask", IX86_BUILTIN_PMOVDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32213 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask, "__builtin_ia32_pmovsdb128_mask", IX86_BUILTIN_PMOVSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32214 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask, "__builtin_ia32_pmovsdb256_mask", IX86_BUILTIN_PMOVSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32215 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask, "__builtin_ia32_pmovusdb128_mask", IX86_BUILTIN_PMOVUSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32216 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask, "__builtin_ia32_pmovusdb256_mask", IX86_BUILTIN_PMOVUSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32217 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask, "__builtin_ia32_pmovdw128_mask", IX86_BUILTIN_PMOVDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32218 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask, "__builtin_ia32_pmovdw256_mask", IX86_BUILTIN_PMOVDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32219 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask, "__builtin_ia32_pmovsdw128_mask", IX86_BUILTIN_PMOVSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32220 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask, "__builtin_ia32_pmovsdw256_mask", IX86_BUILTIN_PMOVSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32221 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask, "__builtin_ia32_pmovusdw128_mask", IX86_BUILTIN_PMOVUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32222 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask, "__builtin_ia32_pmovusdw256_mask", IX86_BUILTIN_PMOVUSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32223 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask, "__builtin_ia32_pmovqb128_mask", IX86_BUILTIN_PMOVQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32224 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask, "__builtin_ia32_pmovqb256_mask", IX86_BUILTIN_PMOVQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32225 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask, "__builtin_ia32_pmovsqb128_mask", IX86_BUILTIN_PMOVSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32226 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask, "__builtin_ia32_pmovsqb256_mask", IX86_BUILTIN_PMOVSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32227 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask, "__builtin_ia32_pmovusqb128_mask", IX86_BUILTIN_PMOVUSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32228 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask, "__builtin_ia32_pmovusqb256_mask", IX86_BUILTIN_PMOVUSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32229 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask, "__builtin_ia32_pmovqw128_mask", IX86_BUILTIN_PMOVQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32230 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask, "__builtin_ia32_pmovqw256_mask", IX86_BUILTIN_PMOVQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32231 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask, "__builtin_ia32_pmovsqw128_mask", IX86_BUILTIN_PMOVSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32232 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask, "__builtin_ia32_pmovsqw256_mask", IX86_BUILTIN_PMOVSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32233 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask, "__builtin_ia32_pmovusqw128_mask", IX86_BUILTIN_PMOVUSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32234 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask, "__builtin_ia32_pmovusqw256_mask", IX86_BUILTIN_PMOVUSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32235 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask, "__builtin_ia32_pmovqd128_mask", IX86_BUILTIN_PMOVQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32236 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask, "__builtin_ia32_pmovqd256_mask", IX86_BUILTIN_PMOVQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32237 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask, "__builtin_ia32_pmovsqd128_mask", IX86_BUILTIN_PMOVSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32238 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask, "__builtin_ia32_pmovsqd256_mask", IX86_BUILTIN_PMOVSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32239 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask, "__builtin_ia32_pmovusqd128_mask", IX86_BUILTIN_PMOVUSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32240 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask, "__builtin_ia32_pmovusqd256_mask", IX86_BUILTIN_PMOVUSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32241 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4df_mask, "__builtin_ia32_rangepd256_mask", IX86_BUILTIN_RANGEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32242 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv2df_mask, "__builtin_ia32_rangepd128_mask", IX86_BUILTIN_RANGEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI },
32243 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv8sf_mask, "__builtin_ia32_rangeps256_mask", IX86_BUILTIN_RANGEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32244 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4sf_mask, "__builtin_ia32_rangeps128_mask", IX86_BUILTIN_RANGEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI },
32245 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv8sf_mask, "__builtin_ia32_getexpps256_mask", IX86_BUILTIN_GETEXPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32246 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4df_mask, "__builtin_ia32_getexppd256_mask", IX86_BUILTIN_GETEXPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32247 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4sf_mask, "__builtin_ia32_getexpps128_mask", IX86_BUILTIN_GETEXPPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32248 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv2df_mask, "__builtin_ia32_getexppd128_mask", IX86_BUILTIN_GETEXPPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32249 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_mask, "__builtin_ia32_fixupimmpd256_mask", IX86_BUILTIN_FIXUPIMMPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI },
32250 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_maskz, "__builtin_ia32_fixupimmpd256_maskz", IX86_BUILTIN_FIXUPIMMPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI },
32251 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_mask, "__builtin_ia32_fixupimmps256_mask", IX86_BUILTIN_FIXUPIMMPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI },
32252 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_maskz, "__builtin_ia32_fixupimmps256_maskz", IX86_BUILTIN_FIXUPIMMPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI },
32253 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_mask, "__builtin_ia32_fixupimmpd128_mask", IX86_BUILTIN_FIXUPIMMPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI },
32254 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_maskz, "__builtin_ia32_fixupimmpd128_maskz", IX86_BUILTIN_FIXUPIMMPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI },
32255 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_mask, "__builtin_ia32_fixupimmps128_mask", IX86_BUILTIN_FIXUPIMMPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI },
32256 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_maskz, "__builtin_ia32_fixupimmps128_maskz", IX86_BUILTIN_FIXUPIMMPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI },
32257 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4di2_mask, "__builtin_ia32_pabsq256_mask", IX86_BUILTIN_PABSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32258 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv2di2_mask, "__builtin_ia32_pabsq128_mask", IX86_BUILTIN_PABSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32259 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8si2_mask, "__builtin_ia32_pabsd256_mask", IX86_BUILTIN_PABSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32260 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4si2_mask, "__builtin_ia32_pabsd128_mask", IX86_BUILTIN_PABSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32261 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pmulhrswv16hi3_mask , "__builtin_ia32_pmulhrsw256_mask", IX86_BUILTIN_PMULHRSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32262 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pmulhrswv8hi3_mask, "__builtin_ia32_pmulhrsw128_mask", IX86_BUILTIN_PMULHRSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32263 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv8hi3_highpart_mask, "__builtin_ia32_pmulhuw128_mask", IX86_BUILTIN_PMULHUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32264 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv16hi3_highpart_mask, "__builtin_ia32_pmulhuw256_mask" , IX86_BUILTIN_PMULHUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32265 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv16hi3_highpart_mask, "__builtin_ia32_pmulhw256_mask" , IX86_BUILTIN_PMULHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32266 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv8hi3_highpart_mask, "__builtin_ia32_pmulhw128_mask", IX86_BUILTIN_PMULHW128_MASK, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32267 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv16hi3_mask, "__builtin_ia32_pmullw256_mask" , IX86_BUILTIN_PMULLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32268 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8hi3_mask, "__builtin_ia32_pmullw128_mask", IX86_BUILTIN_PMULLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32269 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv4di3_mask, "__builtin_ia32_pmullq256_mask", IX86_BUILTIN_PMULLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32270 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv2di3_mask, "__builtin_ia32_pmullq128_mask", IX86_BUILTIN_PMULLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32271 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4df3_mask, "__builtin_ia32_andpd256_mask", IX86_BUILTIN_ANDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32272 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2df3_mask, "__builtin_ia32_andpd128_mask", IX86_BUILTIN_ANDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32273 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8sf3_mask, "__builtin_ia32_andps256_mask", IX86_BUILTIN_ANDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32274 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4sf3_mask, "__builtin_ia32_andps128_mask", IX86_BUILTIN_ANDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32275 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv4df3_mask, "__builtin_ia32_andnpd256_mask", IX86_BUILTIN_ANDNPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32276 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2df3_mask, "__builtin_ia32_andnpd128_mask", IX86_BUILTIN_ANDNPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32277 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv8sf3_mask, "__builtin_ia32_andnps256_mask", IX86_BUILTIN_ANDNPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32278 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_andnotv4sf3_mask, "__builtin_ia32_andnps128_mask", IX86_BUILTIN_ANDNPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32279 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllwi128_mask", IX86_BUILTIN_PSLLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32280 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslldi128_mask", IX86_BUILTIN_PSLLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32281 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllqi128_mask", IX86_BUILTIN_PSLLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32282 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllw128_mask", IX86_BUILTIN_PSLLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32283 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslld128_mask", IX86_BUILTIN_PSLLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32284 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllq128_mask", IX86_BUILTIN_PSLLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32285 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllwi256_mask", IX86_BUILTIN_PSLLWI256_MASK , UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32286 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllw256_mask", IX86_BUILTIN_PSLLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32287 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslldi256_mask", IX86_BUILTIN_PSLLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32288 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslld256_mask", IX86_BUILTIN_PSLLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32289 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllqi256_mask", IX86_BUILTIN_PSLLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32290 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllq256_mask", IX86_BUILTIN_PSLLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32291 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psradi128_mask", IX86_BUILTIN_PSRADI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32292 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psrad128_mask", IX86_BUILTIN_PSRAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32293 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psradi256_mask", IX86_BUILTIN_PSRADI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32294 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psrad256_mask", IX86_BUILTIN_PSRAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32295 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraqi128_mask", IX86_BUILTIN_PSRAQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32296 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraq128_mask", IX86_BUILTIN_PSRAQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32297 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraqi256_mask", IX86_BUILTIN_PSRAQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32298 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraq256_mask", IX86_BUILTIN_PSRAQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32299 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8si3_mask, "__builtin_ia32_pandd256_mask", IX86_BUILTIN_PANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32300 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4si3_mask, "__builtin_ia32_pandd128_mask", IX86_BUILTIN_PANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32301 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrldi128_mask", IX86_BUILTIN_PSRLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32302 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrld128_mask", IX86_BUILTIN_PSRLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32303 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrldi256_mask", IX86_BUILTIN_PSRLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32304 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrld256_mask", IX86_BUILTIN_PSRLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32305 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlqi128_mask", IX86_BUILTIN_PSRLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32306 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlq128_mask", IX86_BUILTIN_PSRLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32307 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlqi256_mask", IX86_BUILTIN_PSRLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32308 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlq256_mask", IX86_BUILTIN_PSRLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32309 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4di3_mask, "__builtin_ia32_pandq256_mask", IX86_BUILTIN_PANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32310 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2di3_mask, "__builtin_ia32_pandq128_mask", IX86_BUILTIN_PANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32311 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv8si3_mask, "__builtin_ia32_pandnd256_mask", IX86_BUILTIN_PANDND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32312 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv4si3_mask, "__builtin_ia32_pandnd128_mask", IX86_BUILTIN_PANDND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32313 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv4di3_mask, "__builtin_ia32_pandnq256_mask", IX86_BUILTIN_PANDNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32314 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2di3_mask, "__builtin_ia32_pandnq128_mask", IX86_BUILTIN_PANDNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32315 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8si3_mask, "__builtin_ia32_pord256_mask", IX86_BUILTIN_PORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32316 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4si3_mask, "__builtin_ia32_pord128_mask", IX86_BUILTIN_PORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32317 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4di3_mask, "__builtin_ia32_porq256_mask", IX86_BUILTIN_PORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32318 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2di3_mask, "__builtin_ia32_porq128_mask", IX86_BUILTIN_PORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32319 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8si3_mask, "__builtin_ia32_pxord256_mask", IX86_BUILTIN_PXORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32320 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4si3_mask, "__builtin_ia32_pxord128_mask", IX86_BUILTIN_PXORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32321 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4di3_mask, "__builtin_ia32_pxorq256_mask", IX86_BUILTIN_PXORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32322 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2di3_mask, "__builtin_ia32_pxorq128_mask", IX86_BUILTIN_PXORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32323 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packsswb_mask, "__builtin_ia32_packsswb256_mask", IX86_BUILTIN_PACKSSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI },
32324 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packsswb_mask, "__builtin_ia32_packsswb128_mask", IX86_BUILTIN_PACKSSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI },
32325 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packuswb_mask, "__builtin_ia32_packuswb256_mask", IX86_BUILTIN_PACKUSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI },
32326 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packuswb_mask, "__builtin_ia32_packuswb128_mask", IX86_BUILTIN_PACKUSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI },
32327 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev8sf_mask, "__builtin_ia32_rndscaleps_256_mask", IX86_BUILTIN_RNDSCALEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32328 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4df_mask, "__builtin_ia32_rndscalepd_256_mask", IX86_BUILTIN_RNDSCALEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32329 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4sf_mask, "__builtin_ia32_rndscaleps_128_mask", IX86_BUILTIN_RNDSCALEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32330 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev2df_mask, "__builtin_ia32_rndscalepd_128_mask", IX86_BUILTIN_RNDSCALEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32331 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_mask, "__builtin_ia32_pternlogq256_mask", IX86_BUILTIN_VTERNLOGQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI },
32332 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_maskz, "__builtin_ia32_pternlogq256_maskz", IX86_BUILTIN_VTERNLOGQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI },
32333 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_mask, "__builtin_ia32_pternlogd256_mask", IX86_BUILTIN_VTERNLOGD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI },
32334 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_maskz, "__builtin_ia32_pternlogd256_maskz", IX86_BUILTIN_VTERNLOGD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI },
32335 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_mask, "__builtin_ia32_pternlogq128_mask", IX86_BUILTIN_VTERNLOGQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI },
32336 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_maskz, "__builtin_ia32_pternlogq128_maskz", IX86_BUILTIN_VTERNLOGQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI },
32337 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_mask, "__builtin_ia32_pternlogd128_mask", IX86_BUILTIN_VTERNLOGD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI },
32338 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_maskz, "__builtin_ia32_pternlogd128_maskz", IX86_BUILTIN_VTERNLOGD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI },
32339 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4df_mask, "__builtin_ia32_scalefpd256_mask", IX86_BUILTIN_SCALEFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32340 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv8sf_mask, "__builtin_ia32_scalefps256_mask", IX86_BUILTIN_SCALEFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32341 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv2df_mask, "__builtin_ia32_scalefpd128_mask", IX86_BUILTIN_SCALEFPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32342 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4sf_mask, "__builtin_ia32_scalefps128_mask", IX86_BUILTIN_SCALEFPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32343 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask, "__builtin_ia32_vfmaddpd256_mask", IX86_BUILTIN_VFMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32344 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask3, "__builtin_ia32_vfmaddpd256_mask3", IX86_BUILTIN_VFMADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32345 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_maskz, "__builtin_ia32_vfmaddpd256_maskz", IX86_BUILTIN_VFMADDPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32346 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask, "__builtin_ia32_vfmaddpd128_mask", IX86_BUILTIN_VFMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32347 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask3, "__builtin_ia32_vfmaddpd128_mask3", IX86_BUILTIN_VFMADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32348 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_maskz, "__builtin_ia32_vfmaddpd128_maskz", IX86_BUILTIN_VFMADDPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32349 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask, "__builtin_ia32_vfmaddps256_mask", IX86_BUILTIN_VFMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32350 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask3, "__builtin_ia32_vfmaddps256_mask3", IX86_BUILTIN_VFMADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32351 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_maskz, "__builtin_ia32_vfmaddps256_maskz", IX86_BUILTIN_VFMADDPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32352 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask, "__builtin_ia32_vfmaddps128_mask", IX86_BUILTIN_VFMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32353 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask3, "__builtin_ia32_vfmaddps128_mask3", IX86_BUILTIN_VFMADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32354 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_maskz, "__builtin_ia32_vfmaddps128_maskz", IX86_BUILTIN_VFMADDPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32355 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4df_mask3, "__builtin_ia32_vfmsubpd256_mask3", IX86_BUILTIN_VFMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32356 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v2df_mask3, "__builtin_ia32_vfmsubpd128_mask3", IX86_BUILTIN_VFMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32357 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v8sf_mask3, "__builtin_ia32_vfmsubps256_mask3", IX86_BUILTIN_VFMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32358 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4sf_mask3, "__builtin_ia32_vfmsubps128_mask3", IX86_BUILTIN_VFMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32359 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4df_mask, "__builtin_ia32_vfnmaddpd256_mask", IX86_BUILTIN_VFNMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32360 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v2df_mask, "__builtin_ia32_vfnmaddpd128_mask", IX86_BUILTIN_VFNMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32361 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v8sf_mask, "__builtin_ia32_vfnmaddps256_mask", IX86_BUILTIN_VFNMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32362 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4sf_mask, "__builtin_ia32_vfnmaddps128_mask", IX86_BUILTIN_VFNMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32363 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask, "__builtin_ia32_vfnmsubpd256_mask", IX86_BUILTIN_VFNMSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32364 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask3, "__builtin_ia32_vfnmsubpd256_mask3", IX86_BUILTIN_VFNMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32365 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask, "__builtin_ia32_vfnmsubpd128_mask", IX86_BUILTIN_VFNMSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32366 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask3, "__builtin_ia32_vfnmsubpd128_mask3", IX86_BUILTIN_VFNMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32367 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask, "__builtin_ia32_vfnmsubps256_mask", IX86_BUILTIN_VFNMSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32368 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask3, "__builtin_ia32_vfnmsubps256_mask3", IX86_BUILTIN_VFNMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32369 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask, "__builtin_ia32_vfnmsubps128_mask", IX86_BUILTIN_VFNMSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32370 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask3, "__builtin_ia32_vfnmsubps128_mask3", IX86_BUILTIN_VFNMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32371 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask, "__builtin_ia32_vfmaddsubpd256_mask", IX86_BUILTIN_VFMADDSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32372 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask3, "__builtin_ia32_vfmaddsubpd256_mask3", IX86_BUILTIN_VFMADDSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32373 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_maskz, "__builtin_ia32_vfmaddsubpd256_maskz", IX86_BUILTIN_VFMADDSUBPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32374 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask, "__builtin_ia32_vfmaddsubpd128_mask", IX86_BUILTIN_VFMADDSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32375 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask3, "__builtin_ia32_vfmaddsubpd128_mask3", IX86_BUILTIN_VFMADDSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32376 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_maskz, "__builtin_ia32_vfmaddsubpd128_maskz", IX86_BUILTIN_VFMADDSUBPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32377 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask, "__builtin_ia32_vfmaddsubps256_mask", IX86_BUILTIN_VFMADDSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32378 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask3, "__builtin_ia32_vfmaddsubps256_mask3", IX86_BUILTIN_VFMADDSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32379 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_maskz, "__builtin_ia32_vfmaddsubps256_maskz", IX86_BUILTIN_VFMADDSUBPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32380 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask, "__builtin_ia32_vfmaddsubps128_mask", IX86_BUILTIN_VFMADDSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32381 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask3, "__builtin_ia32_vfmaddsubps128_mask3", IX86_BUILTIN_VFMADDSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32382 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_maskz, "__builtin_ia32_vfmaddsubps128_maskz", IX86_BUILTIN_VFMADDSUBPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32383 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4df_mask3, "__builtin_ia32_vfmsubaddpd256_mask3", IX86_BUILTIN_VFMSUBADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32384 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v2df_mask3, "__builtin_ia32_vfmsubaddpd128_mask3", IX86_BUILTIN_VFMSUBADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32385 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v8sf_mask3, "__builtin_ia32_vfmsubaddps256_mask3", IX86_BUILTIN_VFMSUBADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32386 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4sf_mask3, "__builtin_ia32_vfmsubaddps128_mask3", IX86_BUILTIN_VFMSUBADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32387 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4df, "__builtin_ia32_insertf64x2_256_mask", IX86_BUILTIN_INSERTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI },
32388 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4di, "__builtin_ia32_inserti64x2_256_mask", IX86_BUILTIN_INSERTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI },
32389 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv16hi_mask, "__builtin_ia32_psrav16hi_mask", IX86_BUILTIN_PSRAVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32390 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv8hi_mask, "__builtin_ia32_psrav8hi_mask", IX86_BUILTIN_PSRAVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32391 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v16hi_mask, "__builtin_ia32_pmaddubsw256_mask", IX86_BUILTIN_PMADDUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_V16HI_HI },
32392 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v8hi_mask, "__builtin_ia32_pmaddubsw128_mask", IX86_BUILTIN_PMADDUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_V8HI_QI },
32393 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v16hi_mask, "__builtin_ia32_pmaddwd256_mask", IX86_BUILTIN_PMADDWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI_V8SI_QI },
32394 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v8hi_mask, "__builtin_ia32_pmaddwd128_mask", IX86_BUILTIN_PMADDWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI_V4SI_QI },
32395 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv16hi_mask, "__builtin_ia32_psrlv16hi_mask", IX86_BUILTIN_PSRLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32396 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv8hi_mask, "__builtin_ia32_psrlv8hi_mask", IX86_BUILTIN_PSRLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32397 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_fix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2dq256_mask", IX86_BUILTIN_CVTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32398 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_fix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2dq128_mask", IX86_BUILTIN_CVTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32399 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2udq256_mask", IX86_BUILTIN_CVTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32400 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2udq128_mask", IX86_BUILTIN_CVTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32401 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv4di_mask, "__builtin_ia32_cvtps2qq256_mask", IX86_BUILTIN_CVTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32402 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv2di_mask, "__builtin_ia32_cvtps2qq128_mask", IX86_BUILTIN_CVTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32403 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv4di_mask, "__builtin_ia32_cvtps2uqq256_mask", IX86_BUILTIN_CVTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32404 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv2di_mask, "__builtin_ia32_cvtps2uqq128_mask", IX86_BUILTIN_CVTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32405 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv8sf_mask, "__builtin_ia32_getmantps256_mask", IX86_BUILTIN_GETMANTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32406 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4sf_mask, "__builtin_ia32_getmantps128_mask", IX86_BUILTIN_GETMANTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32407 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4df_mask, "__builtin_ia32_getmantpd256_mask", IX86_BUILTIN_GETMANTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32408 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv2df_mask, "__builtin_ia32_getmantpd128_mask", IX86_BUILTIN_GETMANTPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32409 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movddup256_mask, "__builtin_ia32_movddup256_mask", IX86_BUILTIN_MOVDDUP256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32410 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_dupv2df_mask, "__builtin_ia32_movddup128_mask", IX86_BUILTIN_MOVDDUP128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32411 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movshdup256_mask, "__builtin_ia32_movshdup256_mask", IX86_BUILTIN_MOVSHDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32412 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movshdup_mask, "__builtin_ia32_movshdup128_mask", IX86_BUILTIN_MOVSHDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32413 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movsldup256_mask, "__builtin_ia32_movsldup256_mask", IX86_BUILTIN_MOVSLDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32414 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movsldup_mask, "__builtin_ia32_movsldup128_mask", IX86_BUILTIN_MOVSLDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32415 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4sf2_mask, "__builtin_ia32_cvtqq2ps256_mask", IX86_BUILTIN_CVTQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI },
32416 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2sf2_mask, "__builtin_ia32_cvtqq2ps128_mask", IX86_BUILTIN_CVTQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI },
32417 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4sf2_mask, "__builtin_ia32_cvtuqq2ps256_mask", IX86_BUILTIN_CVTUQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI },
32418 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2sf2_mask, "__builtin_ia32_cvtuqq2ps128_mask", IX86_BUILTIN_CVTUQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI },
32419 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4df2_mask, "__builtin_ia32_cvtqq2pd256_mask", IX86_BUILTIN_CVTQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI },
32420 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2df2_mask, "__builtin_ia32_cvtqq2pd128_mask", IX86_BUILTIN_CVTQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI },
32421 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4df2_mask, "__builtin_ia32_cvtuqq2pd256_mask", IX86_BUILTIN_CVTUQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI },
32422 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2df2_mask, "__builtin_ia32_cvtuqq2pd128_mask", IX86_BUILTIN_CVTUQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI },
32423 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_mask, "__builtin_ia32_vpermt2varq256_mask", IX86_BUILTIN_VPERMT2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32424 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_maskz, "__builtin_ia32_vpermt2varq256_maskz", IX86_BUILTIN_VPERMT2VARQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32425 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_mask, "__builtin_ia32_vpermt2vard256_mask", IX86_BUILTIN_VPERMT2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32426 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_maskz, "__builtin_ia32_vpermt2vard256_maskz", IX86_BUILTIN_VPERMT2VARD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32427 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4di3_mask, "__builtin_ia32_vpermi2varq256_mask", IX86_BUILTIN_VPERMI2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32428 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8si3_mask, "__builtin_ia32_vpermi2vard256_mask", IX86_BUILTIN_VPERMI2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32429 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_mask, "__builtin_ia32_vpermt2varpd256_mask", IX86_BUILTIN_VPERMT2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI },
32430 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_maskz, "__builtin_ia32_vpermt2varpd256_maskz", IX86_BUILTIN_VPERMT2VARPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI },
32431 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_mask, "__builtin_ia32_vpermt2varps256_mask", IX86_BUILTIN_VPERMT2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI },
32432 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_maskz, "__builtin_ia32_vpermt2varps256_maskz", IX86_BUILTIN_VPERMT2VARPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI },
32433 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4df3_mask, "__builtin_ia32_vpermi2varpd256_mask", IX86_BUILTIN_VPERMI2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32434 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8sf3_mask, "__builtin_ia32_vpermi2varps256_mask", IX86_BUILTIN_VPERMI2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32435 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_mask, "__builtin_ia32_vpermt2varq128_mask", IX86_BUILTIN_VPERMT2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32436 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_maskz, "__builtin_ia32_vpermt2varq128_maskz", IX86_BUILTIN_VPERMT2VARQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32437 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_mask, "__builtin_ia32_vpermt2vard128_mask", IX86_BUILTIN_VPERMT2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32438 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_maskz, "__builtin_ia32_vpermt2vard128_maskz", IX86_BUILTIN_VPERMT2VARD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32439 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2di3_mask, "__builtin_ia32_vpermi2varq128_mask", IX86_BUILTIN_VPERMI2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32440 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4si3_mask, "__builtin_ia32_vpermi2vard128_mask", IX86_BUILTIN_VPERMI2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32441 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_mask, "__builtin_ia32_vpermt2varpd128_mask", IX86_BUILTIN_VPERMT2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI },
32442 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_maskz, "__builtin_ia32_vpermt2varpd128_maskz", IX86_BUILTIN_VPERMT2VARPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI },
32443 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_mask, "__builtin_ia32_vpermt2varps128_mask", IX86_BUILTIN_VPERMT2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI },
32444 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_maskz, "__builtin_ia32_vpermt2varps128_maskz", IX86_BUILTIN_VPERMT2VARPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI },
32445 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2df3_mask, "__builtin_ia32_vpermi2varpd128_mask", IX86_BUILTIN_VPERMI2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI },
32446 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4sf3_mask, "__builtin_ia32_vpermi2varps128_mask", IX86_BUILTIN_VPERMI2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI },
32447 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pshufbv32qi3_mask, "__builtin_ia32_pshufb256_mask", IX86_BUILTIN_PSHUFB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32448 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pshufbv16qi3_mask, "__builtin_ia32_pshufb128_mask", IX86_BUILTIN_PSHUFB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32449 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhwv3_mask, "__builtin_ia32_pshufhw256_mask", IX86_BUILTIN_PSHUFHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32450 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhw_mask, "__builtin_ia32_pshufhw128_mask", IX86_BUILTIN_PSHUFHW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32451 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflwv3_mask, "__builtin_ia32_pshuflw256_mask", IX86_BUILTIN_PSHUFLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32452 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflw_mask, "__builtin_ia32_pshuflw128_mask", IX86_BUILTIN_PSHUFLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32453 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufdv3_mask, "__builtin_ia32_pshufd256_mask", IX86_BUILTIN_PSHUFD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32454 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufd_mask, "__builtin_ia32_pshufd128_mask", IX86_BUILTIN_PSHUFD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32455 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufpd256_mask, "__builtin_ia32_shufpd256_mask", IX86_BUILTIN_SHUFPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32456 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_shufpd_mask, "__builtin_ia32_shufpd128_mask", IX86_BUILTIN_SHUFPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI },
32457 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufps256_mask, "__builtin_ia32_shufps256_mask", IX86_BUILTIN_SHUFPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32458 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_shufps_mask, "__builtin_ia32_shufps128_mask", IX86_BUILTIN_SHUFPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI },
32459 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4di_mask, "__builtin_ia32_prolvq256_mask", IX86_BUILTIN_PROLVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32460 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv2di_mask, "__builtin_ia32_prolvq128_mask", IX86_BUILTIN_PROLVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32461 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4di_mask, "__builtin_ia32_prolq256_mask", IX86_BUILTIN_PROLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32462 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv2di_mask, "__builtin_ia32_prolq128_mask", IX86_BUILTIN_PROLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32463 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4di_mask, "__builtin_ia32_prorvq256_mask", IX86_BUILTIN_PRORVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32464 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv2di_mask, "__builtin_ia32_prorvq128_mask", IX86_BUILTIN_PRORVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32465 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4di_mask, "__builtin_ia32_prorq256_mask", IX86_BUILTIN_PRORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32466 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv2di_mask, "__builtin_ia32_prorq128_mask", IX86_BUILTIN_PRORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32467 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv2di_mask, "__builtin_ia32_psravq128_mask", IX86_BUILTIN_PSRAVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32468 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4di_mask, "__builtin_ia32_psravq256_mask", IX86_BUILTIN_PSRAVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32469 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4di_mask, "__builtin_ia32_psllv4di_mask", IX86_BUILTIN_PSLLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32470 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv2di_mask, "__builtin_ia32_psllv2di_mask", IX86_BUILTIN_PSLLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32471 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv8si_mask, "__builtin_ia32_psllv8si_mask", IX86_BUILTIN_PSLLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32472 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4si_mask, "__builtin_ia32_psllv4si_mask", IX86_BUILTIN_PSLLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32473 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv8si_mask, "__builtin_ia32_psrav8si_mask", IX86_BUILTIN_PSRAVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32474 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4si_mask, "__builtin_ia32_psrav4si_mask", IX86_BUILTIN_PSRAVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32475 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4di_mask, "__builtin_ia32_psrlv4di_mask", IX86_BUILTIN_PSRLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32476 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv2di_mask, "__builtin_ia32_psrlv2di_mask", IX86_BUILTIN_PSRLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32477 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv8si_mask, "__builtin_ia32_psrlv8si_mask", IX86_BUILTIN_PSRLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32478 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4si_mask, "__builtin_ia32_psrlv4si_mask", IX86_BUILTIN_PSRLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32479 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psrawi256_mask", IX86_BUILTIN_PSRAWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32480 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psraw256_mask", IX86_BUILTIN_PSRAW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32481 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psrawi128_mask", IX86_BUILTIN_PSRAWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32482 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psraw128_mask", IX86_BUILTIN_PSRAW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32483 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlwi256_mask", IX86_BUILTIN_PSRLWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32484 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlw256_mask", IX86_BUILTIN_PSRLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32485 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlwi128_mask", IX86_BUILTIN_PSRLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32486 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlw128_mask", IX86_BUILTIN_PSRLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32487 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv8si_mask, "__builtin_ia32_prorvd256_mask", IX86_BUILTIN_PRORVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32488 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv8si_mask, "__builtin_ia32_prolvd256_mask", IX86_BUILTIN_PROLVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32489 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv8si_mask, "__builtin_ia32_prord256_mask", IX86_BUILTIN_PRORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32490 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv8si_mask, "__builtin_ia32_prold256_mask", IX86_BUILTIN_PROLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32491 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4si_mask, "__builtin_ia32_prorvd128_mask", IX86_BUILTIN_PRORVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32492 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4si_mask, "__builtin_ia32_prolvd128_mask", IX86_BUILTIN_PROLVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32493 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4si_mask, "__builtin_ia32_prord128_mask", IX86_BUILTIN_PRORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32494 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4si_mask, "__builtin_ia32_prold128_mask", IX86_BUILTIN_PROLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32495 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4df_mask, "__builtin_ia32_fpclasspd256_mask", IX86_BUILTIN_FPCLASSPD256, UNKNOWN, (int) QI_FTYPE_V4DF_INT_QI },
32496 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv2df_mask, "__builtin_ia32_fpclasspd128_mask", IX86_BUILTIN_FPCLASSPD128, UNKNOWN, (int) QI_FTYPE_V2DF_INT_QI },
32497 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv2df, "__builtin_ia32_fpclasssd", IX86_BUILTIN_FPCLASSSD, UNKNOWN, (int) QI_FTYPE_V2DF_INT },
32498 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv8sf_mask, "__builtin_ia32_fpclassps256_mask", IX86_BUILTIN_FPCLASSPS256, UNKNOWN, (int) QI_FTYPE_V8SF_INT_QI },
32499 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4sf_mask, "__builtin_ia32_fpclassps128_mask", IX86_BUILTIN_FPCLASSPS128, UNKNOWN, (int) QI_FTYPE_V4SF_INT_QI },
32500 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv4sf, "__builtin_ia32_fpclassss", IX86_BUILTIN_FPCLASSSS, UNKNOWN, (int) QI_FTYPE_V4SF_INT },
32501 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv16qi, "__builtin_ia32_cvtb2mask128", IX86_BUILTIN_CVTB2MASK128, UNKNOWN, (int) HI_FTYPE_V16QI },
32502 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv32qi, "__builtin_ia32_cvtb2mask256", IX86_BUILTIN_CVTB2MASK256, UNKNOWN, (int) SI_FTYPE_V32QI },
32503 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv8hi, "__builtin_ia32_cvtw2mask128", IX86_BUILTIN_CVTW2MASK128, UNKNOWN, (int) QI_FTYPE_V8HI },
32504 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv16hi, "__builtin_ia32_cvtw2mask256", IX86_BUILTIN_CVTW2MASK256, UNKNOWN, (int) HI_FTYPE_V16HI },
32505 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv4si, "__builtin_ia32_cvtd2mask128", IX86_BUILTIN_CVTD2MASK128, UNKNOWN, (int) QI_FTYPE_V4SI },
32506 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv8si, "__builtin_ia32_cvtd2mask256", IX86_BUILTIN_CVTD2MASK256, UNKNOWN, (int) QI_FTYPE_V8SI },
32507 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv2di, "__builtin_ia32_cvtq2mask128", IX86_BUILTIN_CVTQ2MASK128, UNKNOWN, (int) QI_FTYPE_V2DI },
32508 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv4di, "__builtin_ia32_cvtq2mask256", IX86_BUILTIN_CVTQ2MASK256, UNKNOWN, (int) QI_FTYPE_V4DI },
32509 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv16qi, "__builtin_ia32_cvtmask2b128", IX86_BUILTIN_CVTMASK2B128, UNKNOWN, (int) V16QI_FTYPE_HI },
32510 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv32qi, "__builtin_ia32_cvtmask2b256", IX86_BUILTIN_CVTMASK2B256, UNKNOWN, (int) V32QI_FTYPE_SI },
32511 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv8hi, "__builtin_ia32_cvtmask2w128", IX86_BUILTIN_CVTMASK2W128, UNKNOWN, (int) V8HI_FTYPE_QI },
32512 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv16hi, "__builtin_ia32_cvtmask2w256", IX86_BUILTIN_CVTMASK2W256, UNKNOWN, (int) V16HI_FTYPE_HI },
32513 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv4si, "__builtin_ia32_cvtmask2d128", IX86_BUILTIN_CVTMASK2D128, UNKNOWN, (int) V4SI_FTYPE_QI },
32514 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv8si, "__builtin_ia32_cvtmask2d256", IX86_BUILTIN_CVTMASK2D256, UNKNOWN, (int) V8SI_FTYPE_QI },
32515 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv2di, "__builtin_ia32_cvtmask2q128", IX86_BUILTIN_CVTMASK2Q128, UNKNOWN, (int) V2DI_FTYPE_QI },
32516 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv4di, "__builtin_ia32_cvtmask2q256", IX86_BUILTIN_CVTMASK2Q256, UNKNOWN, (int) V4DI_FTYPE_QI },
32517 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16qi3_mask, "__builtin_ia32_pcmpeqb128_mask", IX86_BUILTIN_PCMPEQB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32518 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv32qi3_mask, "__builtin_ia32_pcmpeqb256_mask", IX86_BUILTIN_PCMPEQB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32519 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8hi3_mask, "__builtin_ia32_pcmpeqw128_mask", IX86_BUILTIN_PCMPEQW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32520 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16hi3_mask, "__builtin_ia32_pcmpeqw256_mask", IX86_BUILTIN_PCMPEQW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32521 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4si3_mask, "__builtin_ia32_pcmpeqd128_mask", IX86_BUILTIN_PCMPEQD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32522 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8si3_mask, "__builtin_ia32_pcmpeqd256_mask", IX86_BUILTIN_PCMPEQD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32523 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv2di3_mask, "__builtin_ia32_pcmpeqq128_mask", IX86_BUILTIN_PCMPEQQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32524 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4di3_mask, "__builtin_ia32_pcmpeqq256_mask", IX86_BUILTIN_PCMPEQQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32525 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16qi3_mask, "__builtin_ia32_pcmpgtb128_mask", IX86_BUILTIN_PCMPGTB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32526 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv32qi3_mask, "__builtin_ia32_pcmpgtb256_mask", IX86_BUILTIN_PCMPGTB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32527 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8hi3_mask, "__builtin_ia32_pcmpgtw128_mask", IX86_BUILTIN_PCMPGTW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32528 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16hi3_mask, "__builtin_ia32_pcmpgtw256_mask", IX86_BUILTIN_PCMPGTW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32529 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4si3_mask, "__builtin_ia32_pcmpgtd128_mask", IX86_BUILTIN_PCMPGTD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32530 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8si3_mask, "__builtin_ia32_pcmpgtd256_mask", IX86_BUILTIN_PCMPGTD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32531 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv2di3_mask, "__builtin_ia32_pcmpgtq128_mask", IX86_BUILTIN_PCMPGTQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32532 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4di3_mask, "__builtin_ia32_pcmpgtq256_mask", IX86_BUILTIN_PCMPGTQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32533 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16qi3_mask, "__builtin_ia32_ptestmb128", IX86_BUILTIN_PTESTMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32534 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv32qi3_mask, "__builtin_ia32_ptestmb256", IX86_BUILTIN_PTESTMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32535 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8hi3_mask, "__builtin_ia32_ptestmw128", IX86_BUILTIN_PTESTMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32536 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16hi3_mask, "__builtin_ia32_ptestmw256", IX86_BUILTIN_PTESTMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32537 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4si3_mask, "__builtin_ia32_ptestmd128", IX86_BUILTIN_PTESTMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32538 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8si3_mask, "__builtin_ia32_ptestmd256", IX86_BUILTIN_PTESTMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32539 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv2di3_mask, "__builtin_ia32_ptestmq128", IX86_BUILTIN_PTESTMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32540 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4di3_mask, "__builtin_ia32_ptestmq256", IX86_BUILTIN_PTESTMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32541 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16qi3_mask, "__builtin_ia32_ptestnmb128", IX86_BUILTIN_PTESTNMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32542 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv32qi3_mask, "__builtin_ia32_ptestnmb256", IX86_BUILTIN_PTESTNMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32543 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8hi3_mask, "__builtin_ia32_ptestnmw128", IX86_BUILTIN_PTESTNMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32544 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16hi3_mask, "__builtin_ia32_ptestnmw256", IX86_BUILTIN_PTESTNMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32545 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4si3_mask, "__builtin_ia32_ptestnmd128", IX86_BUILTIN_PTESTNMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32546 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8si3_mask, "__builtin_ia32_ptestnmd256", IX86_BUILTIN_PTESTNMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32547 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv2di3_mask, "__builtin_ia32_ptestnmq128", IX86_BUILTIN_PTESTNMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32548 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4di3_mask, "__builtin_ia32_ptestnmq256", IX86_BUILTIN_PTESTNMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32549 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv2di, "__builtin_ia32_broadcastmb128", IX86_BUILTIN_PBROADCASTMB128, UNKNOWN, (int) V2DI_FTYPE_QI },
32550 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv4di, "__builtin_ia32_broadcastmb256", IX86_BUILTIN_PBROADCASTMB256, UNKNOWN, (int) V4DI_FTYPE_QI },
32551 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv4si, "__builtin_ia32_broadcastmw128", IX86_BUILTIN_PBROADCASTMW128, UNKNOWN, (int) V4SI_FTYPE_HI },
32552 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv8si, "__builtin_ia32_broadcastmw256", IX86_BUILTIN_PBROADCASTMW256, UNKNOWN, (int) V8SI_FTYPE_HI },
32553 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4df_mask, "__builtin_ia32_compressdf256_mask", IX86_BUILTIN_COMPRESSPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32554 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2df_mask, "__builtin_ia32_compressdf128_mask", IX86_BUILTIN_COMPRESSPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32555 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8sf_mask, "__builtin_ia32_compresssf256_mask", IX86_BUILTIN_COMPRESSPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32556 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4sf_mask, "__builtin_ia32_compresssf128_mask", IX86_BUILTIN_COMPRESSPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32557 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4di_mask, "__builtin_ia32_compressdi256_mask", IX86_BUILTIN_PCOMPRESSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32558 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2di_mask, "__builtin_ia32_compressdi128_mask", IX86_BUILTIN_PCOMPRESSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32559 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8si_mask, "__builtin_ia32_compresssi256_mask", IX86_BUILTIN_PCOMPRESSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32560 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4si_mask, "__builtin_ia32_compresssi128_mask", IX86_BUILTIN_PCOMPRESSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32561 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expanddf256_mask", IX86_BUILTIN_EXPANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32562 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expanddf128_mask", IX86_BUILTIN_EXPANDPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32563 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandsf256_mask", IX86_BUILTIN_EXPANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32564 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandsf128_mask", IX86_BUILTIN_EXPANDPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32565 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expanddi256_mask", IX86_BUILTIN_PEXPANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32566 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expanddi128_mask", IX86_BUILTIN_PEXPANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32567 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandsi256_mask", IX86_BUILTIN_PEXPANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32568 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandsi128_mask", IX86_BUILTIN_PEXPANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32569 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expanddf256_maskz", IX86_BUILTIN_EXPANDPD256Z, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32570 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expanddf128_maskz", IX86_BUILTIN_EXPANDPD128Z, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32571 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandsf256_maskz", IX86_BUILTIN_EXPANDPS256Z, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32572 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandsf128_maskz", IX86_BUILTIN_EXPANDPS128Z, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32573 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expanddi256_maskz", IX86_BUILTIN_PEXPANDQ256Z, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32574 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expanddi128_maskz", IX86_BUILTIN_PEXPANDQ128Z, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32575 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandsi256_maskz", IX86_BUILTIN_PEXPANDD256Z, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32576 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandsi128_maskz", IX86_BUILTIN_PEXPANDD128Z, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32577 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8si3_mask, "__builtin_ia32_pmaxsd256_mask", IX86_BUILTIN_PMAXSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32578 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8si3_mask, "__builtin_ia32_pminsd256_mask", IX86_BUILTIN_PMINSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32579 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8si3_mask, "__builtin_ia32_pmaxud256_mask", IX86_BUILTIN_PMAXUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32580 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8si3_mask, "__builtin_ia32_pminud256_mask", IX86_BUILTIN_PMINUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32581 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4si3_mask, "__builtin_ia32_pmaxsd128_mask", IX86_BUILTIN_PMAXSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32582 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4si3_mask, "__builtin_ia32_pminsd128_mask", IX86_BUILTIN_PMINSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32583 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4si3_mask, "__builtin_ia32_pmaxud128_mask", IX86_BUILTIN_PMAXUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32584 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4si3_mask, "__builtin_ia32_pminud128_mask", IX86_BUILTIN_PMINUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32585 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4di3_mask, "__builtin_ia32_pmaxsq256_mask", IX86_BUILTIN_PMAXSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32586 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4di3_mask, "__builtin_ia32_pminsq256_mask", IX86_BUILTIN_PMINSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32587 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4di3_mask, "__builtin_ia32_pmaxuq256_mask", IX86_BUILTIN_PMAXUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32588 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4di3_mask, "__builtin_ia32_pminuq256_mask", IX86_BUILTIN_PMINUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32589 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2di3_mask, "__builtin_ia32_pmaxsq128_mask", IX86_BUILTIN_PMAXSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32590 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2di3_mask, "__builtin_ia32_pminsq128_mask", IX86_BUILTIN_PMINSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32591 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv2di3_mask, "__builtin_ia32_pmaxuq128_mask", IX86_BUILTIN_PMAXUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32592 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv2di3_mask, "__builtin_ia32_pminuq128_mask", IX86_BUILTIN_PMINUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32593 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv32qi3_mask, "__builtin_ia32_pminsb256_mask", IX86_BUILTIN_PMINSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32594 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv32qi3_mask, "__builtin_ia32_pminub256_mask", IX86_BUILTIN_PMINUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32595 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv32qi3_mask, "__builtin_ia32_pmaxsb256_mask", IX86_BUILTIN_PMAXSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32596 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv32qi3_mask, "__builtin_ia32_pmaxub256_mask", IX86_BUILTIN_PMAXUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32597 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16qi3_mask, "__builtin_ia32_pminsb128_mask", IX86_BUILTIN_PMINSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32598 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16qi3_mask, "__builtin_ia32_pminub128_mask", IX86_BUILTIN_PMINUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32599 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16qi3_mask, "__builtin_ia32_pmaxsb128_mask", IX86_BUILTIN_PMAXSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32600 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16qi3_mask, "__builtin_ia32_pmaxub128_mask", IX86_BUILTIN_PMAXUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32601 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16hi3_mask, "__builtin_ia32_pminsw256_mask", IX86_BUILTIN_PMINSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32602 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16hi3_mask, "__builtin_ia32_pminuw256_mask", IX86_BUILTIN_PMINUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32603 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16hi3_mask, "__builtin_ia32_pmaxsw256_mask", IX86_BUILTIN_PMAXSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32604 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16hi3_mask, "__builtin_ia32_pmaxuw256_mask", IX86_BUILTIN_PMAXUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32605 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8hi3_mask, "__builtin_ia32_pminsw128_mask", IX86_BUILTIN_PMINSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32606 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8hi3_mask, "__builtin_ia32_pminuw128_mask", IX86_BUILTIN_PMINUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32607 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8hi3_mask, "__builtin_ia32_pmaxsw128_mask", IX86_BUILTIN_PMAXSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32608 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8hi3_mask, "__builtin_ia32_pmaxuw128_mask", IX86_BUILTIN_PMAXUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32609 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4di_mask, "__builtin_ia32_vpconflictdi_256_mask", IX86_BUILTIN_VPCONFLICTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32610 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv8si_mask, "__builtin_ia32_vpconflictsi_256_mask", IX86_BUILTIN_VPCONFLICTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32611 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4di2_mask, "__builtin_ia32_vplzcntq_256_mask", IX86_BUILTIN_VPCLZCNTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32612 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv8si2_mask, "__builtin_ia32_vplzcntd_256_mask", IX86_BUILTIN_VPCLZCNTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32613 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhpd256_mask, "__builtin_ia32_unpckhpd256_mask", IX86_BUILTIN_UNPCKHPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32614 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpckhpd128_mask, "__builtin_ia32_unpckhpd128_mask", IX86_BUILTIN_UNPCKHPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32615 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhps256_mask, "__builtin_ia32_unpckhps256_mask", IX86_BUILTIN_UNPCKHPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32616 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4sf_mask, "__builtin_ia32_unpckhps128_mask", IX86_BUILTIN_UNPCKHPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32617 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklpd256_mask, "__builtin_ia32_unpcklpd256_mask", IX86_BUILTIN_UNPCKLPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32618 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpcklpd128_mask, "__builtin_ia32_unpcklpd128_mask", IX86_BUILTIN_UNPCKLPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32619 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklps256_mask, "__builtin_ia32_unpcklps256_mask", IX86_BUILTIN_UNPCKLPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32620 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv2di_mask, "__builtin_ia32_vpconflictdi_128_mask", IX86_BUILTIN_VPCONFLICTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32621 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4si_mask, "__builtin_ia32_vpconflictsi_128_mask", IX86_BUILTIN_VPCONFLICTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32622 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv2di2_mask, "__builtin_ia32_vplzcntq_128_mask", IX86_BUILTIN_VPCLZCNTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32623 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4si2_mask, "__builtin_ia32_vplzcntd_128_mask", IX86_BUILTIN_VPCLZCNTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32624 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_unpcklps128_mask, "__builtin_ia32_unpcklps128_mask", IX86_BUILTIN_UNPCKLPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32625 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv8si_mask, "__builtin_ia32_alignd256_mask", IX86_BUILTIN_ALIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI },
32626 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4di_mask, "__builtin_ia32_alignq256_mask", IX86_BUILTIN_ALIGNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI },
32627 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4si_mask, "__builtin_ia32_alignd128_mask", IX86_BUILTIN_ALIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI },
32628 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv2di_mask, "__builtin_ia32_alignq128_mask", IX86_BUILTIN_ALIGNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI },
32629 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph256_mask, "__builtin_ia32_vcvtps2ph256_mask", IX86_BUILTIN_CVTPS2PH256_MASK, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT_V8HI_QI },
32630 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph_mask, "__builtin_ia32_vcvtps2ph_mask", IX86_BUILTIN_CVTPS2PH_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT_V8HI_QI },
32631 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps_mask, "__builtin_ia32_vcvtph2ps_mask", IX86_BUILTIN_CVTPH2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V8HI_V4SF_QI },
32632 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps256_mask, "__builtin_ia32_vcvtph2ps256_mask", IX86_BUILTIN_CVTPH2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8HI_V8SF_QI },
32633 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4si_mask, "__builtin_ia32_punpckhdq128_mask", IX86_BUILTIN_PUNPCKHDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32634 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv8si_mask, "__builtin_ia32_punpckhdq256_mask", IX86_BUILTIN_PUNPCKHDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32635 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv2di_mask, "__builtin_ia32_punpckhqdq128_mask", IX86_BUILTIN_PUNPCKHQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32636 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv4di_mask, "__builtin_ia32_punpckhqdq256_mask", IX86_BUILTIN_PUNPCKHQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32637 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv4si_mask, "__builtin_ia32_punpckldq128_mask", IX86_BUILTIN_PUNPCKLDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32638 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv8si_mask, "__builtin_ia32_punpckldq256_mask", IX86_BUILTIN_PUNPCKLDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32639 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv2di_mask, "__builtin_ia32_punpcklqdq128_mask", IX86_BUILTIN_PUNPCKLQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32640 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv4di_mask, "__builtin_ia32_punpcklqdq256_mask", IX86_BUILTIN_PUNPCKLQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32641 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv16qi_mask, "__builtin_ia32_punpckhbw128_mask", IX86_BUILTIN_PUNPCKHBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32642 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv32qi_mask, "__builtin_ia32_punpckhbw256_mask", IX86_BUILTIN_PUNPCKHBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32643 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv8hi_mask, "__builtin_ia32_punpckhwd128_mask", IX86_BUILTIN_PUNPCKHWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32644 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv16hi_mask, "__builtin_ia32_punpckhwd256_mask", IX86_BUILTIN_PUNPCKHWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32645 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv16qi_mask, "__builtin_ia32_punpcklbw128_mask", IX86_BUILTIN_PUNPCKLBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32646 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv32qi_mask, "__builtin_ia32_punpcklbw256_mask", IX86_BUILTIN_PUNPCKLBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32647 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv8hi_mask, "__builtin_ia32_punpcklwd128_mask", IX86_BUILTIN_PUNPCKLWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32648 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv16hi_mask, "__builtin_ia32_punpcklwd256_mask", IX86_BUILTIN_PUNPCKLWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32649 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv16hi_mask, "__builtin_ia32_psllv16hi_mask", IX86_BUILTIN_PSLLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32650 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv8hi_mask, "__builtin_ia32_psllv8hi_mask", IX86_BUILTIN_PSLLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32651 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packssdw_mask, "__builtin_ia32_packssdw256_mask", IX86_BUILTIN_PACKSSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI },
32652 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packssdw_mask, "__builtin_ia32_packssdw128_mask", IX86_BUILTIN_PACKSSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI },
32653 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packusdw_mask, "__builtin_ia32_packusdw256_mask", IX86_BUILTIN_PACKUSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI },
32654 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_packusdw_mask, "__builtin_ia32_packusdw128_mask", IX86_BUILTIN_PACKUSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI },
32655 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv32qi3_mask, "__builtin_ia32_pavgb256_mask", IX86_BUILTIN_PAVGB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32656 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv16hi3_mask, "__builtin_ia32_pavgw256_mask", IX86_BUILTIN_PAVGW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32657 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv16qi3_mask, "__builtin_ia32_pavgb128_mask", IX86_BUILTIN_PAVGB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32658 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv8hi3_mask, "__builtin_ia32_pavgw128_mask", IX86_BUILTIN_PAVGW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32659 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8sf_mask, "__builtin_ia32_permvarsf256_mask", IX86_BUILTIN_VPERMVARSF256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32660 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4df_mask, "__builtin_ia32_permvardf256_mask", IX86_BUILTIN_VPERMVARDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32661 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4df_mask, "__builtin_ia32_permdf256_mask", IX86_BUILTIN_VPERMDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32662 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv32qi2_mask, "__builtin_ia32_pabsb256_mask", IX86_BUILTIN_PABSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32663 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16qi2_mask, "__builtin_ia32_pabsb128_mask", IX86_BUILTIN_PABSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32664 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16hi2_mask, "__builtin_ia32_pabsw256_mask", IX86_BUILTIN_PABSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32665 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8hi2_mask, "__builtin_ia32_pabsw128_mask", IX86_BUILTIN_PABSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32666 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv2df3_mask, "__builtin_ia32_vpermilvarpd_mask", IX86_BUILTIN_VPERMILVARPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI },
32667 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4sf3_mask, "__builtin_ia32_vpermilvarps_mask", IX86_BUILTIN_VPERMILVARPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI },
32668 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4df3_mask, "__builtin_ia32_vpermilvarpd256_mask", IX86_BUILTIN_VPERMILVARPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32669 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv8sf3_mask, "__builtin_ia32_vpermilvarps256_mask", IX86_BUILTIN_VPERMILVARPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32670 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv2df_mask, "__builtin_ia32_vpermilpd_mask", IX86_BUILTIN_VPERMILPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32671 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4sf_mask, "__builtin_ia32_vpermilps_mask", IX86_BUILTIN_VPERMILPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32672 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4df_mask, "__builtin_ia32_vpermilpd256_mask", IX86_BUILTIN_VPERMILPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32673 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv8sf_mask, "__builtin_ia32_vpermilps256_mask", IX86_BUILTIN_VPERMILPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32674 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4di, "__builtin_ia32_blendmq_256_mask", IX86_BUILTIN_BLENDMQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32675 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8si, "__builtin_ia32_blendmd_256_mask", IX86_BUILTIN_BLENDMD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32676 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4df, "__builtin_ia32_blendmpd_256_mask", IX86_BUILTIN_BLENDMPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32677 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8sf, "__builtin_ia32_blendmps_256_mask", IX86_BUILTIN_BLENDMPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32678 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2di, "__builtin_ia32_blendmq_128_mask", IX86_BUILTIN_BLENDMQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32679 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4si, "__builtin_ia32_blendmd_128_mask", IX86_BUILTIN_BLENDMD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32680 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2df, "__builtin_ia32_blendmpd_128_mask", IX86_BUILTIN_BLENDMPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32681 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4sf, "__builtin_ia32_blendmps_128_mask", IX86_BUILTIN_BLENDMPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32682 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16hi, "__builtin_ia32_blendmw_256_mask", IX86_BUILTIN_BLENDMW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32683 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv32qi, "__builtin_ia32_blendmb_256_mask", IX86_BUILTIN_BLENDMB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32684 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8hi, "__builtin_ia32_blendmw_128_mask", IX86_BUILTIN_BLENDMW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32685 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16qi, "__builtin_ia32_blendmb_128_mask", IX86_BUILTIN_BLENDMB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32686 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8si3_mask, "__builtin_ia32_pmulld256_mask", IX86_BUILTIN_PMULLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32687 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4si3_mask, "__builtin_ia32_pmulld128_mask", IX86_BUILTIN_PMULLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32688 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v8si_mask, "__builtin_ia32_pmuludq256_mask", IX86_BUILTIN_PMULUDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI },
32689 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_smult_even_v8si_mask, "__builtin_ia32_pmuldq256_mask", IX86_BUILTIN_PMULDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI },
32690 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_mulv2siv2di3_mask, "__builtin_ia32_pmuldq128_mask", IX86_BUILTIN_PMULDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI },
32691 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v4si_mask, "__builtin_ia32_pmuludq128_mask", IX86_BUILTIN_PMULUDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI },
32692 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2ps256_mask, "__builtin_ia32_cvtpd2ps256_mask", IX86_BUILTIN_CVTPD2PS256_MASK, UNKNOWN, (int) V4SF_FTYPE_V4DF_V4SF_QI },
32693 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2ps_mask, "__builtin_ia32_cvtpd2ps_mask", IX86_BUILTIN_CVTPD2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V2DF_V4SF_QI },
32694 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8si_mask, "__builtin_ia32_permvarsi256_mask", IX86_BUILTIN_VPERMVARSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32695 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4di_mask, "__builtin_ia32_permvardi256_mask", IX86_BUILTIN_VPERMVARDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32696 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4di_mask, "__builtin_ia32_permdi256_mask", IX86_BUILTIN_VPERMDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32697 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4di3_mask, "__builtin_ia32_cmpq256_mask", IX86_BUILTIN_CMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI },
32698 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8si3_mask, "__builtin_ia32_cmpd256_mask", IX86_BUILTIN_CMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI },
32699 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4di3_mask, "__builtin_ia32_ucmpq256_mask", IX86_BUILTIN_UCMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI },
32700 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8si3_mask, "__builtin_ia32_ucmpd256_mask", IX86_BUILTIN_UCMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI },
32701 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv32qi3_mask, "__builtin_ia32_cmpb256_mask", IX86_BUILTIN_CMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI },
32702 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16hi3_mask, "__builtin_ia32_cmpw256_mask", IX86_BUILTIN_CMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI },
32703 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv32qi3_mask, "__builtin_ia32_ucmpb256_mask", IX86_BUILTIN_UCMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI },
32704 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16hi3_mask, "__builtin_ia32_ucmpw256_mask", IX86_BUILTIN_UCMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI },
32705 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4df3_mask, "__builtin_ia32_cmppd256_mask", IX86_BUILTIN_CMPPD256_MASK, UNKNOWN, (int) QI_FTYPE_V4DF_V4DF_INT_QI },
32706 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8sf3_mask, "__builtin_ia32_cmpps256_mask", IX86_BUILTIN_CMPPS256_MASK, UNKNOWN, (int) QI_FTYPE_V8SF_V8SF_INT_QI },
32707 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2di3_mask, "__builtin_ia32_cmpq128_mask", IX86_BUILTIN_CMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI },
32708 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4si3_mask, "__builtin_ia32_cmpd128_mask", IX86_BUILTIN_CMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI },
32709 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv2di3_mask, "__builtin_ia32_ucmpq128_mask", IX86_BUILTIN_UCMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI },
32710 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4si3_mask, "__builtin_ia32_ucmpd128_mask", IX86_BUILTIN_UCMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI },
32711 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16qi3_mask, "__builtin_ia32_cmpb128_mask", IX86_BUILTIN_CMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI },
32712 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8hi3_mask, "__builtin_ia32_cmpw128_mask", IX86_BUILTIN_CMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI },
32713 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16qi3_mask, "__builtin_ia32_ucmpb128_mask", IX86_BUILTIN_UCMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI },
32714 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8hi3_mask, "__builtin_ia32_ucmpw128_mask", IX86_BUILTIN_UCMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI },
32715 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2df3_mask, "__builtin_ia32_cmppd128_mask", IX86_BUILTIN_CMPPD128_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI },
32716 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4sf3_mask, "__builtin_ia32_cmpps128_mask", IX86_BUILTIN_CMPPS128_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI },
32718 /* AVX512DQ. */
32719 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x2_512_mask", IX86_BUILTIN_BROADCASTF32x2_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
32720 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask, "__builtin_ia32_broadcasti32x2_512_mask", IX86_BUILTIN_BROADCASTI32x2_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
32721 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8df_mask_1, "__builtin_ia32_broadcastf64x2_512_mask", IX86_BUILTIN_BROADCASTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
32722 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8di_mask_1, "__builtin_ia32_broadcasti64x2_512_mask", IX86_BUILTIN_BROADCASTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
32723 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask_1, "__builtin_ia32_broadcastf32x8_512_mask", IX86_BUILTIN_BROADCASTF32X8_512, UNKNOWN, (int) V16SF_FTYPE_V8SF_V16SF_HI },
32724 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask_1, "__builtin_ia32_broadcasti32x8_512_mask", IX86_BUILTIN_BROADCASTI32X8_512, UNKNOWN, (int) V16SI_FTYPE_V8SI_V16SI_HI },
32725 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf64x2_mask, "__builtin_ia32_extractf64x2_512_mask", IX86_BUILTIN_EXTRACTF64X2_512, UNKNOWN, (int) V2DF_FTYPE_V8DF_INT_V2DF_QI },
32726 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf32x8_mask, "__builtin_ia32_extractf32x8_mask", IX86_BUILTIN_EXTRACTF32X8, UNKNOWN, (int) V8SF_FTYPE_V16SF_INT_V8SF_QI },
32727 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti64x2_mask, "__builtin_ia32_extracti64x2_512_mask", IX86_BUILTIN_EXTRACTI64X2_512, UNKNOWN, (int) V2DI_FTYPE_V8DI_INT_V2DI_QI },
32728 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti32x8_mask, "__builtin_ia32_extracti32x8_mask", IX86_BUILTIN_EXTRACTI32X8, UNKNOWN, (int) V8SI_FTYPE_V16SI_INT_V8SI_QI },
32729 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv8df_mask, "__builtin_ia32_reducepd512_mask", IX86_BUILTIN_REDUCEPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
32730 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv16sf_mask, "__builtin_ia32_reduceps512_mask", IX86_BUILTIN_REDUCEPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
32731 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_mulv8di3_mask, "__builtin_ia32_pmullq512_mask", IX86_BUILTIN_PMULLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32732 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv8df3_mask, "__builtin_ia32_xorpd512_mask", IX86_BUILTIN_XORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32733 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv16sf3_mask, "__builtin_ia32_xorps512_mask", IX86_BUILTIN_XORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32734 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv8df3_mask, "__builtin_ia32_orpd512_mask", IX86_BUILTIN_ORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32735 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv16sf3_mask, "__builtin_ia32_orps512_mask", IX86_BUILTIN_ORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32736 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv8df3_mask, "__builtin_ia32_andpd512_mask", IX86_BUILTIN_ANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32737 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv16sf3_mask, "__builtin_ia32_andps512_mask", IX86_BUILTIN_ANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32738 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv8df3_mask, "__builtin_ia32_andnpd512_mask", IX86_BUILTIN_ANDNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI},
32739 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv16sf3_mask, "__builtin_ia32_andnps512_mask", IX86_BUILTIN_ANDNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32740 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf32x8_mask, "__builtin_ia32_insertf32x8_mask", IX86_BUILTIN_INSERTF32X8, UNKNOWN, (int) V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI },
32741 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti32x8_mask, "__builtin_ia32_inserti32x8_mask", IX86_BUILTIN_INSERTI32X8, UNKNOWN, (int) V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI },
32742 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf64x2_mask, "__builtin_ia32_insertf64x2_512_mask", IX86_BUILTIN_INSERTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI },
32743 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti64x2_mask, "__builtin_ia32_inserti64x2_512_mask", IX86_BUILTIN_INSERTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI },
32744 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv8df_mask, "__builtin_ia32_fpclasspd512_mask", IX86_BUILTIN_FPCLASSPD512, UNKNOWN, (int) QI_FTYPE_V8DF_INT_QI },
32745 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv16sf_mask, "__builtin_ia32_fpclassps512_mask", IX86_BUILTIN_FPCLASSPS512, UNKNOWN, (int) HI_FTYPE_V16SF_INT_HI },
32746 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtd2maskv16si, "__builtin_ia32_cvtd2mask512", IX86_BUILTIN_CVTD2MASK512, UNKNOWN, (int) HI_FTYPE_V16SI },
32747 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtq2maskv8di, "__builtin_ia32_cvtq2mask512", IX86_BUILTIN_CVTQ2MASK512, UNKNOWN, (int) QI_FTYPE_V8DI },
32748 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2dv16si, "__builtin_ia32_cvtmask2d512", IX86_BUILTIN_CVTMASK2D512, UNKNOWN, (int) V16SI_FTYPE_HI },
32749 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2qv8di, "__builtin_ia32_cvtmask2q512", IX86_BUILTIN_CVTMASK2Q512, UNKNOWN, (int) V8DI_FTYPE_QI },
32751 /* AVX512BW. */
32752 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpcksi, "__builtin_ia32_kunpcksi", IX86_BUILTIN_KUNPCKWD, UNKNOWN, (int) SI_FTYPE_SI_SI },
32753 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpckdi, "__builtin_ia32_kunpckdi", IX86_BUILTIN_KUNPCKDQ, UNKNOWN, (int) DI_FTYPE_DI_DI },
32754 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packusdw_mask, "__builtin_ia32_packusdw512_mask", IX86_BUILTIN_PACKUSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI },
32755 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlv4ti3, "__builtin_ia32_pslldq512", IX86_BUILTIN_PSLLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
32756 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrv4ti3, "__builtin_ia32_psrldq512", IX86_BUILTIN_PSRLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
32757 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packssdw_mask, "__builtin_ia32_packssdw512_mask", IX86_BUILTIN_PACKSSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI },
32758 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv4ti, "__builtin_ia32_palignr512", IX86_BUILTIN_PALIGNR512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_CONVERT },
32759 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv64qi_mask, "__builtin_ia32_palignr512_mask", IX86_BUILTIN_PALIGNR512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT },
32760 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_movdquhi512_mask", IX86_BUILTIN_MOVDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32761 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_movdquqi512_mask", IX86_BUILTIN_MOVDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32762 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_psadbw, "__builtin_ia32_psadbw512", IX86_BUILTIN_PSADBW512, UNKNOWN, (int) V8DI_FTYPE_V64QI_V64QI },
32763 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_dbpsadbwv32hi_mask, "__builtin_ia32_dbpsadbw512_mask", IX86_BUILTIN_DBPSADBW512, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI },
32764 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv64qi_mask, "__builtin_ia32_pbroadcastb512_mask", IX86_BUILTIN_PBROADCASTB512, UNKNOWN, (int) V64QI_FTYPE_V16QI_V64QI_DI },
32765 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv64qi_mask, "__builtin_ia32_pbroadcastb512_gpr_mask", IX86_BUILTIN_PBROADCASTB512_GPR, UNKNOWN, (int) V64QI_FTYPE_QI_V64QI_DI },
32766 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv32hi_mask, "__builtin_ia32_pbroadcastw512_mask", IX86_BUILTIN_PBROADCASTW512, UNKNOWN, (int) V32HI_FTYPE_V8HI_V32HI_SI },
32767 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv32hi_mask, "__builtin_ia32_pbroadcastw512_gpr_mask", IX86_BUILTIN_PBROADCASTW512_GPR, UNKNOWN, (int) V32HI_FTYPE_HI_V32HI_SI },
32768 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sign_extendv32qiv32hi2_mask, "__builtin_ia32_pmovsxbw512_mask", IX86_BUILTIN_PMOVSXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI },
32769 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_zero_extendv32qiv32hi2_mask, "__builtin_ia32_pmovzxbw512_mask", IX86_BUILTIN_PMOVZXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI },
32770 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_permvarv32hi_mask, "__builtin_ia32_permvarhi512_mask", IX86_BUILTIN_VPERMVARHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32771 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_mask, "__builtin_ia32_vpermt2varhi512_mask", IX86_BUILTIN_VPERMT2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32772 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_maskz, "__builtin_ia32_vpermt2varhi512_maskz", IX86_BUILTIN_VPERMT2VARHI512_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32773 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermi2varv32hi3_mask, "__builtin_ia32_vpermi2varhi512_mask", IX86_BUILTIN_VPERMI2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32774 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv64qi3_mask, "__builtin_ia32_pavgb512_mask", IX86_BUILTIN_PAVGB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32775 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv32hi3_mask, "__builtin_ia32_pavgw512_mask", IX86_BUILTIN_PAVGW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32776 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv64qi3_mask, "__builtin_ia32_paddb512_mask", IX86_BUILTIN_PADDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32777 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv64qi3_mask, "__builtin_ia32_psubb512_mask", IX86_BUILTIN_PSUBB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32778 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv64qi3_mask, "__builtin_ia32_psubsb512_mask", IX86_BUILTIN_PSUBSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32779 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv64qi3_mask, "__builtin_ia32_paddsb512_mask", IX86_BUILTIN_PADDSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32780 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv64qi3_mask, "__builtin_ia32_psubusb512_mask", IX86_BUILTIN_PSUBUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32781 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv64qi3_mask, "__builtin_ia32_paddusb512_mask", IX86_BUILTIN_PADDUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32782 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv32hi3_mask, "__builtin_ia32_psubw512_mask", IX86_BUILTIN_PSUBW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32783 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv32hi3_mask, "__builtin_ia32_paddw512_mask", IX86_BUILTIN_PADDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32784 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv32hi3_mask, "__builtin_ia32_psubsw512_mask", IX86_BUILTIN_PSUBSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32785 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv32hi3_mask, "__builtin_ia32_paddsw512_mask", IX86_BUILTIN_PADDSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32786 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv32hi3_mask, "__builtin_ia32_psubusw512_mask", IX86_BUILTIN_PSUBUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32787 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv32hi3_mask, "__builtin_ia32_paddusw512_mask", IX86_BUILTIN_PADDUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32788 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv32hi3_mask, "__builtin_ia32_pmaxuw512_mask", IX86_BUILTIN_PMAXUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32789 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv32hi3_mask, "__builtin_ia32_pmaxsw512_mask", IX86_BUILTIN_PMAXSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32790 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv32hi3_mask, "__builtin_ia32_pminuw512_mask", IX86_BUILTIN_PMINUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32791 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv32hi3_mask, "__builtin_ia32_pminsw512_mask", IX86_BUILTIN_PMINSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32792 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv64qi3_mask, "__builtin_ia32_pmaxub512_mask", IX86_BUILTIN_PMAXUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32793 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv64qi3_mask, "__builtin_ia32_pmaxsb512_mask", IX86_BUILTIN_PMAXSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32794 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv64qi3_mask, "__builtin_ia32_pminub512_mask", IX86_BUILTIN_PMINUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32795 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv64qi3_mask, "__builtin_ia32_pminsb512_mask", IX86_BUILTIN_PMINSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32796 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovwb512_mask", IX86_BUILTIN_PMOVWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32797 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovswb512_mask", IX86_BUILTIN_PMOVSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32798 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovuswb512_mask", IX86_BUILTIN_PMOVUSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32799 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_umulhrswv32hi3_mask, "__builtin_ia32_pmulhrsw512_mask", IX86_BUILTIN_PMULHRSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32800 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umulv32hi3_highpart_mask, "__builtin_ia32_pmulhuw512_mask" , IX86_BUILTIN_PMULHUW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32801 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smulv32hi3_highpart_mask, "__builtin_ia32_pmulhw512_mask" , IX86_BUILTIN_PMULHW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32802 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_mulv32hi3_mask, "__builtin_ia32_pmullw512_mask", IX86_BUILTIN_PMULLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32803 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllwi512_mask", IX86_BUILTIN_PSLLWI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32804 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllw512_mask", IX86_BUILTIN_PSLLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32805 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packsswb_mask, "__builtin_ia32_packsswb512_mask", IX86_BUILTIN_PACKSSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI },
32806 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packuswb_mask, "__builtin_ia32_packuswb512_mask", IX86_BUILTIN_PACKUSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI },
32807 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashrvv32hi_mask, "__builtin_ia32_psrav32hi_mask", IX86_BUILTIN_PSRAVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32808 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddubsw512v32hi_mask, "__builtin_ia32_pmaddubsw512_mask", IX86_BUILTIN_PMADDUBSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_V32HI_SI },
32809 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddwd512v32hi_mask, "__builtin_ia32_pmaddwd512_mask", IX86_BUILTIN_PMADDWD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V32HI_V32HI_V16SI_HI },
32810 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrvv32hi_mask, "__builtin_ia32_psrlv32hi_mask", IX86_BUILTIN_PSRLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32811 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv64qi_mask, "__builtin_ia32_punpckhbw512_mask", IX86_BUILTIN_PUNPCKHBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32812 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv32hi_mask, "__builtin_ia32_punpckhwd512_mask", IX86_BUILTIN_PUNPCKHWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32813 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv64qi_mask, "__builtin_ia32_punpcklbw512_mask", IX86_BUILTIN_PUNPCKLBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32814 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv32hi_mask, "__builtin_ia32_punpcklwd512_mask", IX86_BUILTIN_PUNPCKLWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32815 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufbv64qi3_mask, "__builtin_ia32_pshufb512_mask", IX86_BUILTIN_PSHUFB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32816 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufhwv32hi_mask, "__builtin_ia32_pshufhw512_mask", IX86_BUILTIN_PSHUFHW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32817 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshuflwv32hi_mask, "__builtin_ia32_pshuflw512_mask", IX86_BUILTIN_PSHUFLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32818 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psrawi512_mask", IX86_BUILTIN_PSRAWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32819 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psraw512_mask", IX86_BUILTIN_PSRAW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32820 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlwi512_mask", IX86_BUILTIN_PSRLWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32821 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlw512_mask", IX86_BUILTIN_PSRLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32822 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtb2maskv64qi, "__builtin_ia32_cvtb2mask512", IX86_BUILTIN_CVTB2MASK512, UNKNOWN, (int) DI_FTYPE_V64QI },
32823 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtw2maskv32hi, "__builtin_ia32_cvtw2mask512", IX86_BUILTIN_CVTW2MASK512, UNKNOWN, (int) SI_FTYPE_V32HI },
32824 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2bv64qi, "__builtin_ia32_cvtmask2b512", IX86_BUILTIN_CVTMASK2B512, UNKNOWN, (int) V64QI_FTYPE_DI },
32825 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2wv32hi, "__builtin_ia32_cvtmask2w512", IX86_BUILTIN_CVTMASK2W512, UNKNOWN, (int) V32HI_FTYPE_SI },
32826 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv64qi3_mask, "__builtin_ia32_pcmpeqb512_mask", IX86_BUILTIN_PCMPEQB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32827 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv32hi3_mask, "__builtin_ia32_pcmpeqw512_mask", IX86_BUILTIN_PCMPEQW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32828 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv64qi3_mask, "__builtin_ia32_pcmpgtb512_mask", IX86_BUILTIN_PCMPGTB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32829 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv32hi3_mask, "__builtin_ia32_pcmpgtw512_mask", IX86_BUILTIN_PCMPGTW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32830 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv64qi3_mask, "__builtin_ia32_ptestmb512", IX86_BUILTIN_PTESTMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32831 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv32hi3_mask, "__builtin_ia32_ptestmw512", IX86_BUILTIN_PTESTMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32832 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv64qi3_mask, "__builtin_ia32_ptestnmb512", IX86_BUILTIN_PTESTNMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32833 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv32hi3_mask, "__builtin_ia32_ptestnmw512", IX86_BUILTIN_PTESTNMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32834 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlvv32hi_mask, "__builtin_ia32_psllv32hi_mask", IX86_BUILTIN_PSLLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32835 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv64qi2_mask, "__builtin_ia32_pabsb512_mask", IX86_BUILTIN_PABSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32836 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv32hi2_mask, "__builtin_ia32_pabsw512_mask", IX86_BUILTIN_PABSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32837 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv32hi, "__builtin_ia32_blendmw_512_mask", IX86_BUILTIN_BLENDMW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32838 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv64qi, "__builtin_ia32_blendmb_512_mask", IX86_BUILTIN_BLENDMB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32839 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv64qi3_mask, "__builtin_ia32_cmpb512_mask", IX86_BUILTIN_CMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI },
32840 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv32hi3_mask, "__builtin_ia32_cmpw512_mask", IX86_BUILTIN_CMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI },
32841 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv64qi3_mask, "__builtin_ia32_ucmpb512_mask", IX86_BUILTIN_UCMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI },
32842 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv32hi3_mask, "__builtin_ia32_ucmpw512_mask", IX86_BUILTIN_UCMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI },
32844 /* AVX512IFMA */
32845 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_mask, "__builtin_ia32_vpmadd52luq512_mask", IX86_BUILTIN_VPMADD52LUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32846 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_maskz, "__builtin_ia32_vpmadd52luq512_maskz", IX86_BUILTIN_VPMADD52LUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32847 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_mask, "__builtin_ia32_vpmadd52huq512_mask", IX86_BUILTIN_VPMADD52HUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32848 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_maskz, "__builtin_ia32_vpmadd52huq512_maskz", IX86_BUILTIN_VPMADD52HUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32849 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_mask, "__builtin_ia32_vpmadd52luq256_mask", IX86_BUILTIN_VPMADD52LUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32850 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_maskz, "__builtin_ia32_vpmadd52luq256_maskz", IX86_BUILTIN_VPMADD52LUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32851 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_mask, "__builtin_ia32_vpmadd52huq256_mask", IX86_BUILTIN_VPMADD52HUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32852 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_maskz, "__builtin_ia32_vpmadd52huq256_maskz", IX86_BUILTIN_VPMADD52HUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32853 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_mask, "__builtin_ia32_vpmadd52luq128_mask", IX86_BUILTIN_VPMADD52LUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32854 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_maskz, "__builtin_ia32_vpmadd52luq128_maskz", IX86_BUILTIN_VPMADD52LUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32855 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_mask, "__builtin_ia32_vpmadd52huq128_mask", IX86_BUILTIN_VPMADD52HUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32856 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_maskz, "__builtin_ia32_vpmadd52huq128_maskz", IX86_BUILTIN_VPMADD52HUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32858 /* AVX512VBMI */
32859 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_vpmultishiftqbv64qi_mask, "__builtin_ia32_vpmultishiftqb512_mask", IX86_BUILTIN_VPMULTISHIFTQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32860 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv32qi_mask, "__builtin_ia32_vpmultishiftqb256_mask", IX86_BUILTIN_VPMULTISHIFTQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32861 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv16qi_mask, "__builtin_ia32_vpmultishiftqb128_mask", IX86_BUILTIN_VPMULTISHIFTQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32862 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_permvarv64qi_mask, "__builtin_ia32_permvarqi512_mask", IX86_BUILTIN_VPERMVARQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32863 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_mask, "__builtin_ia32_vpermt2varqi512_mask", IX86_BUILTIN_VPERMT2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32864 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_maskz, "__builtin_ia32_vpermt2varqi512_maskz", IX86_BUILTIN_VPERMT2VARQI512_MASKZ, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32865 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermi2varv64qi3_mask, "__builtin_ia32_vpermi2varqi512_mask", IX86_BUILTIN_VPERMI2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32866 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv32qi_mask, "__builtin_ia32_permvarqi256_mask", IX86_BUILTIN_VPERMVARQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32867 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16qi_mask, "__builtin_ia32_permvarqi128_mask", IX86_BUILTIN_VPERMVARQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32868 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_mask, "__builtin_ia32_vpermt2varqi256_mask", IX86_BUILTIN_VPERMT2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32869 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_maskz, "__builtin_ia32_vpermt2varqi256_maskz", IX86_BUILTIN_VPERMT2VARQI256_MASKZ, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32870 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_mask, "__builtin_ia32_vpermt2varqi128_mask", IX86_BUILTIN_VPERMT2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32871 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_maskz, "__builtin_ia32_vpermt2varqi128_maskz", IX86_BUILTIN_VPERMT2VARQI128_MASKZ, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32872 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv32qi3_mask, "__builtin_ia32_vpermi2varqi256_mask", IX86_BUILTIN_VPERMI2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32873 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16qi3_mask, "__builtin_ia32_vpermi2varqi128_mask", IX86_BUILTIN_VPERMI2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32876 /* Builtins with rounding support. */
32877 static const struct builtin_description bdesc_round_args[] =
32879 /* AVX512F */
32880 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32881 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32882 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32883 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32884 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT },
32885 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT },
32886 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT },
32887 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI_INT },
32888 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
32889 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
32890 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
32891 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32892 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
32893 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32894 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
32895 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32896 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
32897 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32898 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
32899 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
32900 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
32901 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
32902 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
32903 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32904 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32905 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32906 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32907 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
32908 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
32909 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
32910 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
32911 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32912 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32913 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32914 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32915 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
32916 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
32917 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
32918 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
32919 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
32920 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
32921 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
32922 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
32923 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
32924 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
32925 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32926 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32927 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
32928 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
32929 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
32930 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
32931 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32932 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32933 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32934 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32935 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32936 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32937 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32938 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32939 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32940 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32941 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32942 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32943 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
32944 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
32945 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
32946 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
32947 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32948 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32949 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32950 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32951 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
32952 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
32953 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32954 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32955 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32956 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32957 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32958 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32959 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
32960 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
32961 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
32962 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
32963 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
32964 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
32965 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
32966 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
32967 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
32968 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
32969 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
32970 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
32971 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
32972 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
32973 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
32974 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
32975 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32976 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32977 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32978 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32979 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32980 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32981 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
32982 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
32983 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32984 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32985 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32986 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32987 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32988 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32989 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32990 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32991 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32992 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32993 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32994 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32995 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32996 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32997 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32998 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33000 /* AVX512ER */
33001 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33002 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33003 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33004 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33005 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33006 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33007 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33008 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33009 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33010 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33012 /* AVX512DQ. */
33013 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv2df_round, "__builtin_ia32_rangesd128_round", IX86_BUILTIN_RANGESD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
33014 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv4sf_round, "__builtin_ia32_rangess128_round", IX86_BUILTIN_RANGESS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
33015 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2qq512_mask", IX86_BUILTIN_CVTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33016 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2qqv8di_mask_round, "__builtin_ia32_cvtps2qq512_mask", IX86_BUILTIN_CVTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33017 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2uqq512_mask", IX86_BUILTIN_CVTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33018 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2uqqv8di_mask_round, "__builtin_ia32_cvtps2uqq512_mask", IX86_BUILTIN_CVTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33019 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8sf2_mask_round, "__builtin_ia32_cvtqq2ps512_mask", IX86_BUILTIN_CVTQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
33020 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8sf2_mask_round, "__builtin_ia32_cvtuqq2ps512_mask", IX86_BUILTIN_CVTUQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
33021 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8df2_mask_round, "__builtin_ia32_cvtqq2pd512_mask", IX86_BUILTIN_CVTQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
33022 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8df2_mask_round, "__builtin_ia32_cvtuqq2pd512_mask", IX86_BUILTIN_CVTUQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
33023 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2qq512_mask", IX86_BUILTIN_CVTTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33024 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2uqq512_mask", IX86_BUILTIN_CVTTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33025 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2qq512_mask", IX86_BUILTIN_CVTTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33026 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2uqq512_mask", IX86_BUILTIN_CVTTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33027 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv16sf_mask_round, "__builtin_ia32_rangeps512_mask", IX86_BUILTIN_RANGEPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT },
33028 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv8df_mask_round, "__builtin_ia32_rangepd512_mask", IX86_BUILTIN_RANGEPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT },
33031 /* Bultins for MPX. */
33032 static const struct builtin_description bdesc_mpx[] =
33034 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndstx", IX86_BUILTIN_BNDSTX, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND_PCVOID },
33035 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcl", IX86_BUILTIN_BNDCL, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
33036 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcu", IX86_BUILTIN_BNDCU, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
33039 /* Const builtins for MPX. */
33040 static const struct builtin_description bdesc_mpx_const[] =
33042 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndmk", IX86_BUILTIN_BNDMK, UNKNOWN, (int) BND_FTYPE_PCVOID_ULONG },
33043 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndldx", IX86_BUILTIN_BNDLDX, UNKNOWN, (int) BND_FTYPE_PCVOID_PCVOID },
33044 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_narrow_bounds", IX86_BUILTIN_BNDNARROW, UNKNOWN, (int) PVOID_FTYPE_PCVOID_BND_ULONG },
33045 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndint", IX86_BUILTIN_BNDINT, UNKNOWN, (int) BND_FTYPE_BND_BND },
33046 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_sizeof", IX86_BUILTIN_SIZEOF, UNKNOWN, (int) ULONG_FTYPE_VOID },
33047 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndlower", IX86_BUILTIN_BNDLOWER, UNKNOWN, (int) PVOID_FTYPE_BND },
33048 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndupper", IX86_BUILTIN_BNDUPPER, UNKNOWN, (int) PVOID_FTYPE_BND },
33049 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndret", IX86_BUILTIN_BNDRET, UNKNOWN, (int) BND_FTYPE_PCVOID },
33052 /* FMA4 and XOP. */
33053 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
33054 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
33055 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
33056 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
33057 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
33058 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
33059 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
33060 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
33061 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
33062 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
33063 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
33064 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
33065 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
33066 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
33067 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
33068 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
33069 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
33070 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
33071 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
33072 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
33073 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
33074 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
33075 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
33076 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
33077 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
33078 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
33079 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
33080 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
33081 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
33082 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
33083 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
33084 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
33085 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
33086 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
33087 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
33088 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
33089 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
33090 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
33091 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
33092 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
33093 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
33094 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
33095 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
33096 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
33097 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
33098 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
33099 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
33100 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
33101 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
33102 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
33103 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
33104 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
33106 static const struct builtin_description bdesc_multi_arg[] =
33108 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
33109 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
33110 UNKNOWN, (int)MULTI_ARG_3_SF },
33111 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
33112 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
33113 UNKNOWN, (int)MULTI_ARG_3_DF },
33115 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
33116 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
33117 UNKNOWN, (int)MULTI_ARG_3_SF },
33118 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
33119 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
33120 UNKNOWN, (int)MULTI_ARG_3_DF },
33122 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
33123 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
33124 UNKNOWN, (int)MULTI_ARG_3_SF },
33125 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
33126 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
33127 UNKNOWN, (int)MULTI_ARG_3_DF },
33128 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
33129 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
33130 UNKNOWN, (int)MULTI_ARG_3_SF2 },
33131 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
33132 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
33133 UNKNOWN, (int)MULTI_ARG_3_DF2 },
33135 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
33136 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
33137 UNKNOWN, (int)MULTI_ARG_3_SF },
33138 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
33139 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
33140 UNKNOWN, (int)MULTI_ARG_3_DF },
33141 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
33142 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
33143 UNKNOWN, (int)MULTI_ARG_3_SF2 },
33144 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
33145 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
33146 UNKNOWN, (int)MULTI_ARG_3_DF2 },
33148 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
33149 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
33150 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
33151 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
33152 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
33153 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
33154 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
33156 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
33157 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
33158 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
33159 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
33160 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
33161 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
33162 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
33164 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
33166 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
33167 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
33168 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33169 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33170 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
33171 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
33172 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33173 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33174 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33175 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33176 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33177 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33179 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33180 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
33181 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
33182 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
33183 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
33184 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
33185 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
33186 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
33187 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33188 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
33189 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
33190 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
33191 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33192 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
33193 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
33194 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
33196 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
33197 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
33198 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
33199 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
33200 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
33201 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
33203 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33204 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
33205 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
33206 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33207 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
33208 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33209 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33210 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
33211 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
33212 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33213 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
33214 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33215 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33216 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33217 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33219 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
33220 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
33221 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
33222 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
33223 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
33224 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
33225 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
33227 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
33228 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
33229 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
33230 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
33231 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
33232 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
33233 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
33235 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
33236 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
33237 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
33238 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
33239 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
33240 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
33241 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
33243 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
33244 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
33245 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
33246 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
33247 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
33248 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
33249 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
33251 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
33252 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
33253 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
33254 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
33255 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
33256 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
33257 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
33259 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
33260 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
33261 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
33262 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
33263 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
33264 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
33265 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
33267 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
33268 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
33269 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
33270 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
33271 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
33272 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
33273 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
33275 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
33276 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
33277 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
33278 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
33279 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
33280 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
33281 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
33283 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
33284 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
33285 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
33286 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
33287 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
33288 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
33289 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
33290 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
33292 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
33293 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
33294 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
33295 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
33296 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
33297 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
33298 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
33299 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
33301 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
33302 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
33303 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
33304 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
33308 /* TM vector builtins. */
33310 /* Reuse the existing x86-specific `struct builtin_description' cause
33311 we're lazy. Add casts to make them fit. */
33312 static const struct builtin_description bdesc_tm[] =
33314 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33315 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33316 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33317 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33318 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33319 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33320 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33322 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33323 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33324 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33325 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33326 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33327 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33328 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33330 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33331 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33332 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33333 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33334 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33335 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33336 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33338 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
33339 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
33340 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
33343 /* TM callbacks. */
33345 /* Return the builtin decl needed to load a vector of TYPE. */
33347 static tree
33348 ix86_builtin_tm_load (tree type)
33350 if (TREE_CODE (type) == VECTOR_TYPE)
33352 switch (tree_to_uhwi (TYPE_SIZE (type)))
33354 case 64:
33355 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
33356 case 128:
33357 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
33358 case 256:
33359 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
33362 return NULL_TREE;
33365 /* Return the builtin decl needed to store a vector of TYPE. */
33367 static tree
33368 ix86_builtin_tm_store (tree type)
33370 if (TREE_CODE (type) == VECTOR_TYPE)
33372 switch (tree_to_uhwi (TYPE_SIZE (type)))
33374 case 64:
33375 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
33376 case 128:
33377 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
33378 case 256:
33379 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
33382 return NULL_TREE;
33385 /* Initialize the transactional memory vector load/store builtins. */
33387 static void
33388 ix86_init_tm_builtins (void)
33390 enum ix86_builtin_func_type ftype;
33391 const struct builtin_description *d;
33392 size_t i;
33393 tree decl;
33394 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
33395 tree attrs_log, attrs_type_log;
33397 if (!flag_tm)
33398 return;
33400 /* If there are no builtins defined, we must be compiling in a
33401 language without trans-mem support. */
33402 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
33403 return;
33405 /* Use whatever attributes a normal TM load has. */
33406 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
33407 attrs_load = DECL_ATTRIBUTES (decl);
33408 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33409 /* Use whatever attributes a normal TM store has. */
33410 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
33411 attrs_store = DECL_ATTRIBUTES (decl);
33412 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33413 /* Use whatever attributes a normal TM log has. */
33414 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
33415 attrs_log = DECL_ATTRIBUTES (decl);
33416 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33418 for (i = 0, d = bdesc_tm;
33419 i < ARRAY_SIZE (bdesc_tm);
33420 i++, d++)
33422 if ((d->mask & ix86_isa_flags) != 0
33423 || (lang_hooks.builtin_function
33424 == lang_hooks.builtin_function_ext_scope))
33426 tree type, attrs, attrs_type;
33427 enum built_in_function code = (enum built_in_function) d->code;
33429 ftype = (enum ix86_builtin_func_type) d->flag;
33430 type = ix86_get_builtin_func_type (ftype);
33432 if (BUILTIN_TM_LOAD_P (code))
33434 attrs = attrs_load;
33435 attrs_type = attrs_type_load;
33437 else if (BUILTIN_TM_STORE_P (code))
33439 attrs = attrs_store;
33440 attrs_type = attrs_type_store;
33442 else
33444 attrs = attrs_log;
33445 attrs_type = attrs_type_log;
33447 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
33448 /* The builtin without the prefix for
33449 calling it directly. */
33450 d->name + strlen ("__builtin_"),
33451 attrs);
33452 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
33453 set the TYPE_ATTRIBUTES. */
33454 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
33456 set_builtin_decl (code, decl, false);
33461 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
33462 in the current target ISA to allow the user to compile particular modules
33463 with different target specific options that differ from the command line
33464 options. */
33465 static void
33466 ix86_init_mmx_sse_builtins (void)
33468 const struct builtin_description * d;
33469 enum ix86_builtin_func_type ftype;
33470 size_t i;
33472 /* Add all special builtins with variable number of operands. */
33473 for (i = 0, d = bdesc_special_args;
33474 i < ARRAY_SIZE (bdesc_special_args);
33475 i++, d++)
33477 if (d->name == 0)
33478 continue;
33480 ftype = (enum ix86_builtin_func_type) d->flag;
33481 def_builtin (d->mask, d->name, ftype, d->code);
33484 /* Add all builtins with variable number of operands. */
33485 for (i = 0, d = bdesc_args;
33486 i < ARRAY_SIZE (bdesc_args);
33487 i++, d++)
33489 if (d->name == 0)
33490 continue;
33492 ftype = (enum ix86_builtin_func_type) d->flag;
33493 def_builtin_const (d->mask, d->name, ftype, d->code);
33496 /* Add all builtins with rounding. */
33497 for (i = 0, d = bdesc_round_args;
33498 i < ARRAY_SIZE (bdesc_round_args);
33499 i++, d++)
33501 if (d->name == 0)
33502 continue;
33504 ftype = (enum ix86_builtin_func_type) d->flag;
33505 def_builtin_const (d->mask, d->name, ftype, d->code);
33508 /* pcmpestr[im] insns. */
33509 for (i = 0, d = bdesc_pcmpestr;
33510 i < ARRAY_SIZE (bdesc_pcmpestr);
33511 i++, d++)
33513 if (d->code == IX86_BUILTIN_PCMPESTRM128)
33514 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
33515 else
33516 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
33517 def_builtin_const (d->mask, d->name, ftype, d->code);
33520 /* pcmpistr[im] insns. */
33521 for (i = 0, d = bdesc_pcmpistr;
33522 i < ARRAY_SIZE (bdesc_pcmpistr);
33523 i++, d++)
33525 if (d->code == IX86_BUILTIN_PCMPISTRM128)
33526 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
33527 else
33528 ftype = INT_FTYPE_V16QI_V16QI_INT;
33529 def_builtin_const (d->mask, d->name, ftype, d->code);
33532 /* comi/ucomi insns. */
33533 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
33535 if (d->mask == OPTION_MASK_ISA_SSE2)
33536 ftype = INT_FTYPE_V2DF_V2DF;
33537 else
33538 ftype = INT_FTYPE_V4SF_V4SF;
33539 def_builtin_const (d->mask, d->name, ftype, d->code);
33542 /* SSE */
33543 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
33544 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
33545 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
33546 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
33548 /* SSE or 3DNow!A */
33549 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
33550 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
33551 IX86_BUILTIN_MASKMOVQ);
33553 /* SSE2 */
33554 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
33555 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
33557 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
33558 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
33559 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
33560 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
33562 /* SSE3. */
33563 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
33564 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
33565 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
33566 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
33568 /* AES */
33569 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
33570 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
33571 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
33572 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
33573 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
33574 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
33575 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
33576 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
33577 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
33578 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
33579 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
33580 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
33582 /* PCLMUL */
33583 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
33584 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
33586 /* RDRND */
33587 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
33588 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
33589 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
33590 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
33591 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
33592 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
33593 IX86_BUILTIN_RDRAND64_STEP);
33595 /* AVX2 */
33596 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
33597 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
33598 IX86_BUILTIN_GATHERSIV2DF);
33600 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
33601 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
33602 IX86_BUILTIN_GATHERSIV4DF);
33604 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
33605 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
33606 IX86_BUILTIN_GATHERDIV2DF);
33608 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
33609 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
33610 IX86_BUILTIN_GATHERDIV4DF);
33612 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
33613 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
33614 IX86_BUILTIN_GATHERSIV4SF);
33616 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
33617 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
33618 IX86_BUILTIN_GATHERSIV8SF);
33620 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
33621 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
33622 IX86_BUILTIN_GATHERDIV4SF);
33624 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
33625 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
33626 IX86_BUILTIN_GATHERDIV8SF);
33628 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
33629 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
33630 IX86_BUILTIN_GATHERSIV2DI);
33632 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
33633 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
33634 IX86_BUILTIN_GATHERSIV4DI);
33636 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
33637 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
33638 IX86_BUILTIN_GATHERDIV2DI);
33640 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
33641 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
33642 IX86_BUILTIN_GATHERDIV4DI);
33644 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
33645 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
33646 IX86_BUILTIN_GATHERSIV4SI);
33648 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
33649 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
33650 IX86_BUILTIN_GATHERSIV8SI);
33652 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
33653 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
33654 IX86_BUILTIN_GATHERDIV4SI);
33656 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
33657 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
33658 IX86_BUILTIN_GATHERDIV8SI);
33660 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
33661 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
33662 IX86_BUILTIN_GATHERALTSIV4DF);
33664 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
33665 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
33666 IX86_BUILTIN_GATHERALTDIV8SF);
33668 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
33669 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
33670 IX86_BUILTIN_GATHERALTSIV4DI);
33672 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
33673 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
33674 IX86_BUILTIN_GATHERALTDIV8SI);
33676 /* AVX512F */
33677 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
33678 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
33679 IX86_BUILTIN_GATHER3SIV16SF);
33681 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
33682 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
33683 IX86_BUILTIN_GATHER3SIV8DF);
33685 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
33686 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
33687 IX86_BUILTIN_GATHER3DIV16SF);
33689 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
33690 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
33691 IX86_BUILTIN_GATHER3DIV8DF);
33693 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
33694 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
33695 IX86_BUILTIN_GATHER3SIV16SI);
33697 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
33698 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
33699 IX86_BUILTIN_GATHER3SIV8DI);
33701 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
33702 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
33703 IX86_BUILTIN_GATHER3DIV16SI);
33705 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
33706 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
33707 IX86_BUILTIN_GATHER3DIV8DI);
33709 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
33710 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
33711 IX86_BUILTIN_GATHER3ALTSIV8DF);
33713 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
33714 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
33715 IX86_BUILTIN_GATHER3ALTDIV16SF);
33717 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
33718 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
33719 IX86_BUILTIN_GATHER3ALTSIV8DI);
33721 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
33722 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
33723 IX86_BUILTIN_GATHER3ALTDIV16SI);
33725 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
33726 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
33727 IX86_BUILTIN_SCATTERSIV16SF);
33729 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
33730 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
33731 IX86_BUILTIN_SCATTERSIV8DF);
33733 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
33734 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
33735 IX86_BUILTIN_SCATTERDIV16SF);
33737 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
33738 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
33739 IX86_BUILTIN_SCATTERDIV8DF);
33741 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
33742 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
33743 IX86_BUILTIN_SCATTERSIV16SI);
33745 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
33746 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
33747 IX86_BUILTIN_SCATTERSIV8DI);
33749 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
33750 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
33751 IX86_BUILTIN_SCATTERDIV16SI);
33753 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
33754 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
33755 IX86_BUILTIN_SCATTERDIV8DI);
33757 /* AVX512VL */
33758 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2df",
33759 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_QI_INT,
33760 IX86_BUILTIN_GATHER3SIV2DF);
33762 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4df",
33763 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_QI_INT,
33764 IX86_BUILTIN_GATHER3SIV4DF);
33766 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2df",
33767 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_QI_INT,
33768 IX86_BUILTIN_GATHER3DIV2DF);
33770 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4df",
33771 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_QI_INT,
33772 IX86_BUILTIN_GATHER3DIV4DF);
33774 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4sf",
33775 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_QI_INT,
33776 IX86_BUILTIN_GATHER3SIV4SF);
33778 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8sf",
33779 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_QI_INT,
33780 IX86_BUILTIN_GATHER3SIV8SF);
33782 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4sf",
33783 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_QI_INT,
33784 IX86_BUILTIN_GATHER3DIV4SF);
33786 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8sf",
33787 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_QI_INT,
33788 IX86_BUILTIN_GATHER3DIV8SF);
33790 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2di",
33791 V2DI_FTYPE_V2DI_PCINT64_V4SI_QI_INT,
33792 IX86_BUILTIN_GATHER3SIV2DI);
33794 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4di",
33795 V4DI_FTYPE_V4DI_PCINT64_V4SI_QI_INT,
33796 IX86_BUILTIN_GATHER3SIV4DI);
33798 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2di",
33799 V2DI_FTYPE_V2DI_PCINT64_V2DI_QI_INT,
33800 IX86_BUILTIN_GATHER3DIV2DI);
33802 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4di",
33803 V4DI_FTYPE_V4DI_PCINT64_V4DI_QI_INT,
33804 IX86_BUILTIN_GATHER3DIV4DI);
33806 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4si",
33807 V4SI_FTYPE_V4SI_PCINT_V4SI_QI_INT,
33808 IX86_BUILTIN_GATHER3SIV4SI);
33810 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8si",
33811 V8SI_FTYPE_V8SI_PCINT_V8SI_QI_INT,
33812 IX86_BUILTIN_GATHER3SIV8SI);
33814 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4si",
33815 V4SI_FTYPE_V4SI_PCINT_V2DI_QI_INT,
33816 IX86_BUILTIN_GATHER3DIV4SI);
33818 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8si",
33819 V4SI_FTYPE_V4SI_PCINT_V4DI_QI_INT,
33820 IX86_BUILTIN_GATHER3DIV8SI);
33822 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4df ",
33823 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_QI_INT,
33824 IX86_BUILTIN_GATHER3ALTSIV4DF);
33826 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8sf ",
33827 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_QI_INT,
33828 IX86_BUILTIN_GATHER3ALTDIV8SF);
33830 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4di ",
33831 V4DI_FTYPE_V4DI_PCINT64_V8SI_QI_INT,
33832 IX86_BUILTIN_GATHER3ALTSIV4DI);
33834 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8si ",
33835 V8SI_FTYPE_V8SI_PCINT_V4DI_QI_INT,
33836 IX86_BUILTIN_GATHER3ALTDIV8SI);
33838 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8sf",
33839 VOID_FTYPE_PFLOAT_QI_V8SI_V8SF_INT,
33840 IX86_BUILTIN_SCATTERSIV8SF);
33842 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4sf",
33843 VOID_FTYPE_PFLOAT_QI_V4SI_V4SF_INT,
33844 IX86_BUILTIN_SCATTERSIV4SF);
33846 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4df",
33847 VOID_FTYPE_PDOUBLE_QI_V4SI_V4DF_INT,
33848 IX86_BUILTIN_SCATTERSIV4DF);
33850 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2df",
33851 VOID_FTYPE_PDOUBLE_QI_V4SI_V2DF_INT,
33852 IX86_BUILTIN_SCATTERSIV2DF);
33854 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8sf",
33855 VOID_FTYPE_PFLOAT_QI_V4DI_V4SF_INT,
33856 IX86_BUILTIN_SCATTERDIV8SF);
33858 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4sf",
33859 VOID_FTYPE_PFLOAT_QI_V2DI_V4SF_INT,
33860 IX86_BUILTIN_SCATTERDIV4SF);
33862 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4df",
33863 VOID_FTYPE_PDOUBLE_QI_V4DI_V4DF_INT,
33864 IX86_BUILTIN_SCATTERDIV4DF);
33866 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2df",
33867 VOID_FTYPE_PDOUBLE_QI_V2DI_V2DF_INT,
33868 IX86_BUILTIN_SCATTERDIV2DF);
33870 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8si",
33871 VOID_FTYPE_PINT_QI_V8SI_V8SI_INT,
33872 IX86_BUILTIN_SCATTERSIV8SI);
33874 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4si",
33875 VOID_FTYPE_PINT_QI_V4SI_V4SI_INT,
33876 IX86_BUILTIN_SCATTERSIV4SI);
33878 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4di",
33879 VOID_FTYPE_PLONGLONG_QI_V4SI_V4DI_INT,
33880 IX86_BUILTIN_SCATTERSIV4DI);
33882 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2di",
33883 VOID_FTYPE_PLONGLONG_QI_V4SI_V2DI_INT,
33884 IX86_BUILTIN_SCATTERSIV2DI);
33886 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8si",
33887 VOID_FTYPE_PINT_QI_V4DI_V4SI_INT,
33888 IX86_BUILTIN_SCATTERDIV8SI);
33890 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4si",
33891 VOID_FTYPE_PINT_QI_V2DI_V4SI_INT,
33892 IX86_BUILTIN_SCATTERDIV4SI);
33894 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4di",
33895 VOID_FTYPE_PLONGLONG_QI_V4DI_V4DI_INT,
33896 IX86_BUILTIN_SCATTERDIV4DI);
33898 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2di",
33899 VOID_FTYPE_PLONGLONG_QI_V2DI_V2DI_INT,
33900 IX86_BUILTIN_SCATTERDIV2DI);
33902 /* AVX512PF */
33903 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
33904 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
33905 IX86_BUILTIN_GATHERPFDPD);
33906 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
33907 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
33908 IX86_BUILTIN_GATHERPFDPS);
33909 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
33910 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
33911 IX86_BUILTIN_GATHERPFQPD);
33912 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
33913 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
33914 IX86_BUILTIN_GATHERPFQPS);
33915 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
33916 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
33917 IX86_BUILTIN_SCATTERPFDPD);
33918 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
33919 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
33920 IX86_BUILTIN_SCATTERPFDPS);
33921 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
33922 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
33923 IX86_BUILTIN_SCATTERPFQPD);
33924 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
33925 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
33926 IX86_BUILTIN_SCATTERPFQPS);
33928 /* SHA */
33929 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
33930 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
33931 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
33932 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
33933 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
33934 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
33935 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
33936 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
33937 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
33938 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
33939 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
33940 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
33941 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
33942 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
33944 /* RTM. */
33945 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
33946 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
33948 /* MMX access to the vec_init patterns. */
33949 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
33950 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
33952 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
33953 V4HI_FTYPE_HI_HI_HI_HI,
33954 IX86_BUILTIN_VEC_INIT_V4HI);
33956 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
33957 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
33958 IX86_BUILTIN_VEC_INIT_V8QI);
33960 /* Access to the vec_extract patterns. */
33961 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
33962 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
33963 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
33964 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
33965 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
33966 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
33967 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
33968 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
33969 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
33970 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
33972 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
33973 "__builtin_ia32_vec_ext_v4hi",
33974 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
33976 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
33977 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
33979 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
33980 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
33982 /* Access to the vec_set patterns. */
33983 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
33984 "__builtin_ia32_vec_set_v2di",
33985 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
33987 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
33988 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
33990 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
33991 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
33993 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
33994 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
33996 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
33997 "__builtin_ia32_vec_set_v4hi",
33998 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
34000 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
34001 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
34003 /* RDSEED */
34004 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
34005 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
34006 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
34007 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
34008 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
34009 "__builtin_ia32_rdseed_di_step",
34010 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
34012 /* ADCX */
34013 def_builtin (0, "__builtin_ia32_addcarryx_u32",
34014 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
34015 def_builtin (OPTION_MASK_ISA_64BIT,
34016 "__builtin_ia32_addcarryx_u64",
34017 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
34018 IX86_BUILTIN_ADDCARRYX64);
34020 /* SBB */
34021 def_builtin (0, "__builtin_ia32_sbb_u32",
34022 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32);
34023 def_builtin (OPTION_MASK_ISA_64BIT,
34024 "__builtin_ia32_sbb_u64",
34025 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
34026 IX86_BUILTIN_SBB64);
34028 /* Read/write FLAGS. */
34029 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
34030 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
34031 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
34032 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
34033 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
34034 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
34035 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
34036 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
34038 /* CLFLUSHOPT. */
34039 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
34040 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
34042 /* CLWB. */
34043 def_builtin (OPTION_MASK_ISA_CLWB, "__builtin_ia32_clwb",
34044 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLWB);
34046 /* Add FMA4 multi-arg argument instructions */
34047 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
34049 if (d->name == 0)
34050 continue;
34052 ftype = (enum ix86_builtin_func_type) d->flag;
34053 def_builtin_const (d->mask, d->name, ftype, d->code);
34057 static void
34058 ix86_init_mpx_builtins ()
34060 const struct builtin_description * d;
34061 enum ix86_builtin_func_type ftype;
34062 tree decl;
34063 size_t i;
34065 for (i = 0, d = bdesc_mpx;
34066 i < ARRAY_SIZE (bdesc_mpx);
34067 i++, d++)
34069 if (d->name == 0)
34070 continue;
34072 ftype = (enum ix86_builtin_func_type) d->flag;
34073 decl = def_builtin (d->mask, d->name, ftype, d->code);
34075 /* With no leaf and nothrow flags for MPX builtins
34076 abnormal edges may follow its call when setjmp
34077 presents in the function. Since we may have a lot
34078 of MPX builtins calls it causes lots of useless
34079 edges and enormous PHI nodes. To avoid this we mark
34080 MPX builtins as leaf and nothrow. */
34081 if (decl)
34083 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
34084 NULL_TREE);
34085 TREE_NOTHROW (decl) = 1;
34087 else
34089 ix86_builtins_isa[(int)d->code].leaf_p = true;
34090 ix86_builtins_isa[(int)d->code].nothrow_p = true;
34094 for (i = 0, d = bdesc_mpx_const;
34095 i < ARRAY_SIZE (bdesc_mpx_const);
34096 i++, d++)
34098 if (d->name == 0)
34099 continue;
34101 ftype = (enum ix86_builtin_func_type) d->flag;
34102 decl = def_builtin_const (d->mask, d->name, ftype, d->code);
34104 if (decl)
34106 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
34107 NULL_TREE);
34108 TREE_NOTHROW (decl) = 1;
34110 else
34112 ix86_builtins_isa[(int)d->code].leaf_p = true;
34113 ix86_builtins_isa[(int)d->code].nothrow_p = true;
34118 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
34119 to return a pointer to VERSION_DECL if the outcome of the expression
34120 formed by PREDICATE_CHAIN is true. This function will be called during
34121 version dispatch to decide which function version to execute. It returns
34122 the basic block at the end, to which more conditions can be added. */
34124 static basic_block
34125 add_condition_to_bb (tree function_decl, tree version_decl,
34126 tree predicate_chain, basic_block new_bb)
34128 gimple return_stmt;
34129 tree convert_expr, result_var;
34130 gimple convert_stmt;
34131 gimple call_cond_stmt;
34132 gimple if_else_stmt;
34134 basic_block bb1, bb2, bb3;
34135 edge e12, e23;
34137 tree cond_var, and_expr_var = NULL_TREE;
34138 gimple_seq gseq;
34140 tree predicate_decl, predicate_arg;
34142 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
34144 gcc_assert (new_bb != NULL);
34145 gseq = bb_seq (new_bb);
34148 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
34149 build_fold_addr_expr (version_decl));
34150 result_var = create_tmp_var (ptr_type_node);
34151 convert_stmt = gimple_build_assign (result_var, convert_expr);
34152 return_stmt = gimple_build_return (result_var);
34154 if (predicate_chain == NULL_TREE)
34156 gimple_seq_add_stmt (&gseq, convert_stmt);
34157 gimple_seq_add_stmt (&gseq, return_stmt);
34158 set_bb_seq (new_bb, gseq);
34159 gimple_set_bb (convert_stmt, new_bb);
34160 gimple_set_bb (return_stmt, new_bb);
34161 pop_cfun ();
34162 return new_bb;
34165 while (predicate_chain != NULL)
34167 cond_var = create_tmp_var (integer_type_node);
34168 predicate_decl = TREE_PURPOSE (predicate_chain);
34169 predicate_arg = TREE_VALUE (predicate_chain);
34170 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
34171 gimple_call_set_lhs (call_cond_stmt, cond_var);
34173 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
34174 gimple_set_bb (call_cond_stmt, new_bb);
34175 gimple_seq_add_stmt (&gseq, call_cond_stmt);
34177 predicate_chain = TREE_CHAIN (predicate_chain);
34179 if (and_expr_var == NULL)
34180 and_expr_var = cond_var;
34181 else
34183 gimple assign_stmt;
34184 /* Use MIN_EXPR to check if any integer is zero?.
34185 and_expr_var = min_expr <cond_var, and_expr_var> */
34186 assign_stmt = gimple_build_assign (and_expr_var,
34187 build2 (MIN_EXPR, integer_type_node,
34188 cond_var, and_expr_var));
34190 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
34191 gimple_set_bb (assign_stmt, new_bb);
34192 gimple_seq_add_stmt (&gseq, assign_stmt);
34196 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
34197 integer_zero_node,
34198 NULL_TREE, NULL_TREE);
34199 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
34200 gimple_set_bb (if_else_stmt, new_bb);
34201 gimple_seq_add_stmt (&gseq, if_else_stmt);
34203 gimple_seq_add_stmt (&gseq, convert_stmt);
34204 gimple_seq_add_stmt (&gseq, return_stmt);
34205 set_bb_seq (new_bb, gseq);
34207 bb1 = new_bb;
34208 e12 = split_block (bb1, if_else_stmt);
34209 bb2 = e12->dest;
34210 e12->flags &= ~EDGE_FALLTHRU;
34211 e12->flags |= EDGE_TRUE_VALUE;
34213 e23 = split_block (bb2, return_stmt);
34215 gimple_set_bb (convert_stmt, bb2);
34216 gimple_set_bb (return_stmt, bb2);
34218 bb3 = e23->dest;
34219 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
34221 remove_edge (e23);
34222 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
34224 pop_cfun ();
34226 return bb3;
34229 /* This parses the attribute arguments to target in DECL and determines
34230 the right builtin to use to match the platform specification.
34231 It returns the priority value for this version decl. If PREDICATE_LIST
34232 is not NULL, it stores the list of cpu features that need to be checked
34233 before dispatching this function. */
34235 static unsigned int
34236 get_builtin_code_for_version (tree decl, tree *predicate_list)
34238 tree attrs;
34239 struct cl_target_option cur_target;
34240 tree target_node;
34241 struct cl_target_option *new_target;
34242 const char *arg_str = NULL;
34243 const char *attrs_str = NULL;
34244 char *tok_str = NULL;
34245 char *token;
34247 /* Priority of i386 features, greater value is higher priority. This is
34248 used to decide the order in which function dispatch must happen. For
34249 instance, a version specialized for SSE4.2 should be checked for dispatch
34250 before a version for SSE3, as SSE4.2 implies SSE3. */
34251 enum feature_priority
34253 P_ZERO = 0,
34254 P_MMX,
34255 P_SSE,
34256 P_SSE2,
34257 P_SSE3,
34258 P_SSSE3,
34259 P_PROC_SSSE3,
34260 P_SSE4_A,
34261 P_PROC_SSE4_A,
34262 P_SSE4_1,
34263 P_SSE4_2,
34264 P_PROC_SSE4_2,
34265 P_POPCNT,
34266 P_AVX,
34267 P_PROC_AVX,
34268 P_FMA4,
34269 P_XOP,
34270 P_PROC_XOP,
34271 P_FMA,
34272 P_PROC_FMA,
34273 P_AVX2,
34274 P_PROC_AVX2,
34275 P_AVX512F,
34276 P_PROC_AVX512F
34279 enum feature_priority priority = P_ZERO;
34281 /* These are the target attribute strings for which a dispatcher is
34282 available, from fold_builtin_cpu. */
34284 static struct _feature_list
34286 const char *const name;
34287 const enum feature_priority priority;
34289 const feature_list[] =
34291 {"mmx", P_MMX},
34292 {"sse", P_SSE},
34293 {"sse2", P_SSE2},
34294 {"sse3", P_SSE3},
34295 {"sse4a", P_SSE4_A},
34296 {"ssse3", P_SSSE3},
34297 {"sse4.1", P_SSE4_1},
34298 {"sse4.2", P_SSE4_2},
34299 {"popcnt", P_POPCNT},
34300 {"avx", P_AVX},
34301 {"fma4", P_FMA4},
34302 {"xop", P_XOP},
34303 {"fma", P_FMA},
34304 {"avx2", P_AVX2},
34305 {"avx512f", P_AVX512F}
34309 static unsigned int NUM_FEATURES
34310 = sizeof (feature_list) / sizeof (struct _feature_list);
34312 unsigned int i;
34314 tree predicate_chain = NULL_TREE;
34315 tree predicate_decl, predicate_arg;
34317 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34318 gcc_assert (attrs != NULL);
34320 attrs = TREE_VALUE (TREE_VALUE (attrs));
34322 gcc_assert (TREE_CODE (attrs) == STRING_CST);
34323 attrs_str = TREE_STRING_POINTER (attrs);
34325 /* Return priority zero for default function. */
34326 if (strcmp (attrs_str, "default") == 0)
34327 return 0;
34329 /* Handle arch= if specified. For priority, set it to be 1 more than
34330 the best instruction set the processor can handle. For instance, if
34331 there is a version for atom and a version for ssse3 (the highest ISA
34332 priority for atom), the atom version must be checked for dispatch
34333 before the ssse3 version. */
34334 if (strstr (attrs_str, "arch=") != NULL)
34336 cl_target_option_save (&cur_target, &global_options);
34337 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
34338 &global_options_set);
34340 gcc_assert (target_node);
34341 new_target = TREE_TARGET_OPTION (target_node);
34342 gcc_assert (new_target);
34344 if (new_target->arch_specified && new_target->arch > 0)
34346 switch (new_target->arch)
34348 case PROCESSOR_CORE2:
34349 arg_str = "core2";
34350 priority = P_PROC_SSSE3;
34351 break;
34352 case PROCESSOR_NEHALEM:
34353 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
34354 arg_str = "westmere";
34355 else
34356 /* We translate "arch=corei7" and "arch=nehalem" to
34357 "corei7" so that it will be mapped to M_INTEL_COREI7
34358 as cpu type to cover all M_INTEL_COREI7_XXXs. */
34359 arg_str = "corei7";
34360 priority = P_PROC_SSE4_2;
34361 break;
34362 case PROCESSOR_SANDYBRIDGE:
34363 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
34364 arg_str = "ivybridge";
34365 else
34366 arg_str = "sandybridge";
34367 priority = P_PROC_AVX;
34368 break;
34369 case PROCESSOR_HASWELL:
34370 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
34371 arg_str = "broadwell";
34372 else
34373 arg_str = "haswell";
34374 priority = P_PROC_AVX2;
34375 break;
34376 case PROCESSOR_BONNELL:
34377 arg_str = "bonnell";
34378 priority = P_PROC_SSSE3;
34379 break;
34380 case PROCESSOR_KNL:
34381 arg_str = "knl";
34382 priority = P_PROC_AVX512F;
34383 break;
34384 case PROCESSOR_SILVERMONT:
34385 arg_str = "silvermont";
34386 priority = P_PROC_SSE4_2;
34387 break;
34388 case PROCESSOR_AMDFAM10:
34389 arg_str = "amdfam10h";
34390 priority = P_PROC_SSE4_A;
34391 break;
34392 case PROCESSOR_BTVER1:
34393 arg_str = "btver1";
34394 priority = P_PROC_SSE4_A;
34395 break;
34396 case PROCESSOR_BTVER2:
34397 arg_str = "btver2";
34398 priority = P_PROC_AVX;
34399 break;
34400 case PROCESSOR_BDVER1:
34401 arg_str = "bdver1";
34402 priority = P_PROC_XOP;
34403 break;
34404 case PROCESSOR_BDVER2:
34405 arg_str = "bdver2";
34406 priority = P_PROC_FMA;
34407 break;
34408 case PROCESSOR_BDVER3:
34409 arg_str = "bdver3";
34410 priority = P_PROC_FMA;
34411 break;
34412 case PROCESSOR_BDVER4:
34413 arg_str = "bdver4";
34414 priority = P_PROC_AVX2;
34415 break;
34419 cl_target_option_restore (&global_options, &cur_target);
34421 if (predicate_list && arg_str == NULL)
34423 error_at (DECL_SOURCE_LOCATION (decl),
34424 "No dispatcher found for the versioning attributes");
34425 return 0;
34428 if (predicate_list)
34430 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
34431 /* For a C string literal the length includes the trailing NULL. */
34432 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
34433 predicate_chain = tree_cons (predicate_decl, predicate_arg,
34434 predicate_chain);
34438 /* Process feature name. */
34439 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
34440 strcpy (tok_str, attrs_str);
34441 token = strtok (tok_str, ",");
34442 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
34444 while (token != NULL)
34446 /* Do not process "arch=" */
34447 if (strncmp (token, "arch=", 5) == 0)
34449 token = strtok (NULL, ",");
34450 continue;
34452 for (i = 0; i < NUM_FEATURES; ++i)
34454 if (strcmp (token, feature_list[i].name) == 0)
34456 if (predicate_list)
34458 predicate_arg = build_string_literal (
34459 strlen (feature_list[i].name) + 1,
34460 feature_list[i].name);
34461 predicate_chain = tree_cons (predicate_decl, predicate_arg,
34462 predicate_chain);
34464 /* Find the maximum priority feature. */
34465 if (feature_list[i].priority > priority)
34466 priority = feature_list[i].priority;
34468 break;
34471 if (predicate_list && i == NUM_FEATURES)
34473 error_at (DECL_SOURCE_LOCATION (decl),
34474 "No dispatcher found for %s", token);
34475 return 0;
34477 token = strtok (NULL, ",");
34479 free (tok_str);
34481 if (predicate_list && predicate_chain == NULL_TREE)
34483 error_at (DECL_SOURCE_LOCATION (decl),
34484 "No dispatcher found for the versioning attributes : %s",
34485 attrs_str);
34486 return 0;
34488 else if (predicate_list)
34490 predicate_chain = nreverse (predicate_chain);
34491 *predicate_list = predicate_chain;
34494 return priority;
34497 /* This compares the priority of target features in function DECL1
34498 and DECL2. It returns positive value if DECL1 is higher priority,
34499 negative value if DECL2 is higher priority and 0 if they are the
34500 same. */
34502 static int
34503 ix86_compare_version_priority (tree decl1, tree decl2)
34505 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
34506 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
34508 return (int)priority1 - (int)priority2;
34511 /* V1 and V2 point to function versions with different priorities
34512 based on the target ISA. This function compares their priorities. */
34514 static int
34515 feature_compare (const void *v1, const void *v2)
34517 typedef struct _function_version_info
34519 tree version_decl;
34520 tree predicate_chain;
34521 unsigned int dispatch_priority;
34522 } function_version_info;
34524 const function_version_info c1 = *(const function_version_info *)v1;
34525 const function_version_info c2 = *(const function_version_info *)v2;
34526 return (c2.dispatch_priority - c1.dispatch_priority);
34529 /* This function generates the dispatch function for
34530 multi-versioned functions. DISPATCH_DECL is the function which will
34531 contain the dispatch logic. FNDECLS are the function choices for
34532 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
34533 in DISPATCH_DECL in which the dispatch code is generated. */
34535 static int
34536 dispatch_function_versions (tree dispatch_decl,
34537 void *fndecls_p,
34538 basic_block *empty_bb)
34540 tree default_decl;
34541 gimple ifunc_cpu_init_stmt;
34542 gimple_seq gseq;
34543 int ix;
34544 tree ele;
34545 vec<tree> *fndecls;
34546 unsigned int num_versions = 0;
34547 unsigned int actual_versions = 0;
34548 unsigned int i;
34550 struct _function_version_info
34552 tree version_decl;
34553 tree predicate_chain;
34554 unsigned int dispatch_priority;
34555 }*function_version_info;
34557 gcc_assert (dispatch_decl != NULL
34558 && fndecls_p != NULL
34559 && empty_bb != NULL);
34561 /*fndecls_p is actually a vector. */
34562 fndecls = static_cast<vec<tree> *> (fndecls_p);
34564 /* At least one more version other than the default. */
34565 num_versions = fndecls->length ();
34566 gcc_assert (num_versions >= 2);
34568 function_version_info = (struct _function_version_info *)
34569 XNEWVEC (struct _function_version_info, (num_versions - 1));
34571 /* The first version in the vector is the default decl. */
34572 default_decl = (*fndecls)[0];
34574 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
34576 gseq = bb_seq (*empty_bb);
34577 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
34578 constructors, so explicity call __builtin_cpu_init here. */
34579 ifunc_cpu_init_stmt = gimple_build_call_vec (
34580 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
34581 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
34582 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
34583 set_bb_seq (*empty_bb, gseq);
34585 pop_cfun ();
34588 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
34590 tree version_decl = ele;
34591 tree predicate_chain = NULL_TREE;
34592 unsigned int priority;
34593 /* Get attribute string, parse it and find the right predicate decl.
34594 The predicate function could be a lengthy combination of many
34595 features, like arch-type and various isa-variants. */
34596 priority = get_builtin_code_for_version (version_decl,
34597 &predicate_chain);
34599 if (predicate_chain == NULL_TREE)
34600 continue;
34602 function_version_info [actual_versions].version_decl = version_decl;
34603 function_version_info [actual_versions].predicate_chain
34604 = predicate_chain;
34605 function_version_info [actual_versions].dispatch_priority = priority;
34606 actual_versions++;
34609 /* Sort the versions according to descending order of dispatch priority. The
34610 priority is based on the ISA. This is not a perfect solution. There
34611 could still be ambiguity. If more than one function version is suitable
34612 to execute, which one should be dispatched? In future, allow the user
34613 to specify a dispatch priority next to the version. */
34614 qsort (function_version_info, actual_versions,
34615 sizeof (struct _function_version_info), feature_compare);
34617 for (i = 0; i < actual_versions; ++i)
34618 *empty_bb = add_condition_to_bb (dispatch_decl,
34619 function_version_info[i].version_decl,
34620 function_version_info[i].predicate_chain,
34621 *empty_bb);
34623 /* dispatch default version at the end. */
34624 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
34625 NULL, *empty_bb);
34627 free (function_version_info);
34628 return 0;
34631 /* Comparator function to be used in qsort routine to sort attribute
34632 specification strings to "target". */
34634 static int
34635 attr_strcmp (const void *v1, const void *v2)
34637 const char *c1 = *(char *const*)v1;
34638 const char *c2 = *(char *const*)v2;
34639 return strcmp (c1, c2);
34642 /* ARGLIST is the argument to target attribute. This function tokenizes
34643 the comma separated arguments, sorts them and returns a string which
34644 is a unique identifier for the comma separated arguments. It also
34645 replaces non-identifier characters "=,-" with "_". */
34647 static char *
34648 sorted_attr_string (tree arglist)
34650 tree arg;
34651 size_t str_len_sum = 0;
34652 char **args = NULL;
34653 char *attr_str, *ret_str;
34654 char *attr = NULL;
34655 unsigned int argnum = 1;
34656 unsigned int i;
34658 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
34660 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
34661 size_t len = strlen (str);
34662 str_len_sum += len + 1;
34663 if (arg != arglist)
34664 argnum++;
34665 for (i = 0; i < strlen (str); i++)
34666 if (str[i] == ',')
34667 argnum++;
34670 attr_str = XNEWVEC (char, str_len_sum);
34671 str_len_sum = 0;
34672 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
34674 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
34675 size_t len = strlen (str);
34676 memcpy (attr_str + str_len_sum, str, len);
34677 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
34678 str_len_sum += len + 1;
34681 /* Replace "=,-" with "_". */
34682 for (i = 0; i < strlen (attr_str); i++)
34683 if (attr_str[i] == '=' || attr_str[i]== '-')
34684 attr_str[i] = '_';
34686 if (argnum == 1)
34687 return attr_str;
34689 args = XNEWVEC (char *, argnum);
34691 i = 0;
34692 attr = strtok (attr_str, ",");
34693 while (attr != NULL)
34695 args[i] = attr;
34696 i++;
34697 attr = strtok (NULL, ",");
34700 qsort (args, argnum, sizeof (char *), attr_strcmp);
34702 ret_str = XNEWVEC (char, str_len_sum);
34703 str_len_sum = 0;
34704 for (i = 0; i < argnum; i++)
34706 size_t len = strlen (args[i]);
34707 memcpy (ret_str + str_len_sum, args[i], len);
34708 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
34709 str_len_sum += len + 1;
34712 XDELETEVEC (args);
34713 XDELETEVEC (attr_str);
34714 return ret_str;
34717 /* This function changes the assembler name for functions that are
34718 versions. If DECL is a function version and has a "target"
34719 attribute, it appends the attribute string to its assembler name. */
34721 static tree
34722 ix86_mangle_function_version_assembler_name (tree decl, tree id)
34724 tree version_attr;
34725 const char *orig_name, *version_string;
34726 char *attr_str, *assembler_name;
34728 if (DECL_DECLARED_INLINE_P (decl)
34729 && lookup_attribute ("gnu_inline",
34730 DECL_ATTRIBUTES (decl)))
34731 error_at (DECL_SOURCE_LOCATION (decl),
34732 "Function versions cannot be marked as gnu_inline,"
34733 " bodies have to be generated");
34735 if (DECL_VIRTUAL_P (decl)
34736 || DECL_VINDEX (decl))
34737 sorry ("Virtual function multiversioning not supported");
34739 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34741 /* target attribute string cannot be NULL. */
34742 gcc_assert (version_attr != NULL_TREE);
34744 orig_name = IDENTIFIER_POINTER (id);
34745 version_string
34746 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
34748 if (strcmp (version_string, "default") == 0)
34749 return id;
34751 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
34752 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
34754 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
34756 /* Allow assembler name to be modified if already set. */
34757 if (DECL_ASSEMBLER_NAME_SET_P (decl))
34758 SET_DECL_RTL (decl, NULL);
34760 tree ret = get_identifier (assembler_name);
34761 XDELETEVEC (attr_str);
34762 XDELETEVEC (assembler_name);
34763 return ret;
34766 /* This function returns true if FN1 and FN2 are versions of the same function,
34767 that is, the target strings of the function decls are different. This assumes
34768 that FN1 and FN2 have the same signature. */
34770 static bool
34771 ix86_function_versions (tree fn1, tree fn2)
34773 tree attr1, attr2;
34774 char *target1, *target2;
34775 bool result;
34777 if (TREE_CODE (fn1) != FUNCTION_DECL
34778 || TREE_CODE (fn2) != FUNCTION_DECL)
34779 return false;
34781 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
34782 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
34784 /* At least one function decl should have the target attribute specified. */
34785 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
34786 return false;
34788 /* Diagnose missing target attribute if one of the decls is already
34789 multi-versioned. */
34790 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
34792 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
34794 if (attr2 != NULL_TREE)
34796 tree tem = fn1;
34797 fn1 = fn2;
34798 fn2 = tem;
34799 attr1 = attr2;
34801 error_at (DECL_SOURCE_LOCATION (fn2),
34802 "missing %<target%> attribute for multi-versioned %D",
34803 fn2);
34804 inform (DECL_SOURCE_LOCATION (fn1),
34805 "previous declaration of %D", fn1);
34806 /* Prevent diagnosing of the same error multiple times. */
34807 DECL_ATTRIBUTES (fn2)
34808 = tree_cons (get_identifier ("target"),
34809 copy_node (TREE_VALUE (attr1)),
34810 DECL_ATTRIBUTES (fn2));
34812 return false;
34815 target1 = sorted_attr_string (TREE_VALUE (attr1));
34816 target2 = sorted_attr_string (TREE_VALUE (attr2));
34818 /* The sorted target strings must be different for fn1 and fn2
34819 to be versions. */
34820 if (strcmp (target1, target2) == 0)
34821 result = false;
34822 else
34823 result = true;
34825 XDELETEVEC (target1);
34826 XDELETEVEC (target2);
34828 return result;
34831 static tree
34832 ix86_mangle_decl_assembler_name (tree decl, tree id)
34834 /* For function version, add the target suffix to the assembler name. */
34835 if (TREE_CODE (decl) == FUNCTION_DECL
34836 && DECL_FUNCTION_VERSIONED (decl))
34837 id = ix86_mangle_function_version_assembler_name (decl, id);
34838 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
34839 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
34840 #endif
34842 return id;
34845 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
34846 is true, append the full path name of the source file. */
34848 static char *
34849 make_name (tree decl, const char *suffix, bool make_unique)
34851 char *global_var_name;
34852 int name_len;
34853 const char *name;
34854 const char *unique_name = NULL;
34856 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
34858 /* Get a unique name that can be used globally without any chances
34859 of collision at link time. */
34860 if (make_unique)
34861 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
34863 name_len = strlen (name) + strlen (suffix) + 2;
34865 if (make_unique)
34866 name_len += strlen (unique_name) + 1;
34867 global_var_name = XNEWVEC (char, name_len);
34869 /* Use '.' to concatenate names as it is demangler friendly. */
34870 if (make_unique)
34871 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
34872 suffix);
34873 else
34874 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
34876 return global_var_name;
34879 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
34881 /* Make a dispatcher declaration for the multi-versioned function DECL.
34882 Calls to DECL function will be replaced with calls to the dispatcher
34883 by the front-end. Return the decl created. */
34885 static tree
34886 make_dispatcher_decl (const tree decl)
34888 tree func_decl;
34889 char *func_name;
34890 tree fn_type, func_type;
34891 bool is_uniq = false;
34893 if (TREE_PUBLIC (decl) == 0)
34894 is_uniq = true;
34896 func_name = make_name (decl, "ifunc", is_uniq);
34898 fn_type = TREE_TYPE (decl);
34899 func_type = build_function_type (TREE_TYPE (fn_type),
34900 TYPE_ARG_TYPES (fn_type));
34902 func_decl = build_fn_decl (func_name, func_type);
34903 XDELETEVEC (func_name);
34904 TREE_USED (func_decl) = 1;
34905 DECL_CONTEXT (func_decl) = NULL_TREE;
34906 DECL_INITIAL (func_decl) = error_mark_node;
34907 DECL_ARTIFICIAL (func_decl) = 1;
34908 /* Mark this func as external, the resolver will flip it again if
34909 it gets generated. */
34910 DECL_EXTERNAL (func_decl) = 1;
34911 /* This will be of type IFUNCs have to be externally visible. */
34912 TREE_PUBLIC (func_decl) = 1;
34914 return func_decl;
34917 #endif
34919 /* Returns true if decl is multi-versioned and DECL is the default function,
34920 that is it is not tagged with target specific optimization. */
34922 static bool
34923 is_function_default_version (const tree decl)
34925 if (TREE_CODE (decl) != FUNCTION_DECL
34926 || !DECL_FUNCTION_VERSIONED (decl))
34927 return false;
34928 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34929 gcc_assert (attr);
34930 attr = TREE_VALUE (TREE_VALUE (attr));
34931 return (TREE_CODE (attr) == STRING_CST
34932 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
34935 /* Make a dispatcher declaration for the multi-versioned function DECL.
34936 Calls to DECL function will be replaced with calls to the dispatcher
34937 by the front-end. Returns the decl of the dispatcher function. */
34939 static tree
34940 ix86_get_function_versions_dispatcher (void *decl)
34942 tree fn = (tree) decl;
34943 struct cgraph_node *node = NULL;
34944 struct cgraph_node *default_node = NULL;
34945 struct cgraph_function_version_info *node_v = NULL;
34946 struct cgraph_function_version_info *first_v = NULL;
34948 tree dispatch_decl = NULL;
34950 struct cgraph_function_version_info *default_version_info = NULL;
34952 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
34954 node = cgraph_node::get (fn);
34955 gcc_assert (node != NULL);
34957 node_v = node->function_version ();
34958 gcc_assert (node_v != NULL);
34960 if (node_v->dispatcher_resolver != NULL)
34961 return node_v->dispatcher_resolver;
34963 /* Find the default version and make it the first node. */
34964 first_v = node_v;
34965 /* Go to the beginning of the chain. */
34966 while (first_v->prev != NULL)
34967 first_v = first_v->prev;
34968 default_version_info = first_v;
34969 while (default_version_info != NULL)
34971 if (is_function_default_version
34972 (default_version_info->this_node->decl))
34973 break;
34974 default_version_info = default_version_info->next;
34977 /* If there is no default node, just return NULL. */
34978 if (default_version_info == NULL)
34979 return NULL;
34981 /* Make default info the first node. */
34982 if (first_v != default_version_info)
34984 default_version_info->prev->next = default_version_info->next;
34985 if (default_version_info->next)
34986 default_version_info->next->prev = default_version_info->prev;
34987 first_v->prev = default_version_info;
34988 default_version_info->next = first_v;
34989 default_version_info->prev = NULL;
34992 default_node = default_version_info->this_node;
34994 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
34995 if (targetm.has_ifunc_p ())
34997 struct cgraph_function_version_info *it_v = NULL;
34998 struct cgraph_node *dispatcher_node = NULL;
34999 struct cgraph_function_version_info *dispatcher_version_info = NULL;
35001 /* Right now, the dispatching is done via ifunc. */
35002 dispatch_decl = make_dispatcher_decl (default_node->decl);
35004 dispatcher_node = cgraph_node::get_create (dispatch_decl);
35005 gcc_assert (dispatcher_node != NULL);
35006 dispatcher_node->dispatcher_function = 1;
35007 dispatcher_version_info
35008 = dispatcher_node->insert_new_function_version ();
35009 dispatcher_version_info->next = default_version_info;
35010 dispatcher_node->definition = 1;
35012 /* Set the dispatcher for all the versions. */
35013 it_v = default_version_info;
35014 while (it_v != NULL)
35016 it_v->dispatcher_resolver = dispatch_decl;
35017 it_v = it_v->next;
35020 else
35021 #endif
35023 error_at (DECL_SOURCE_LOCATION (default_node->decl),
35024 "multiversioning needs ifunc which is not supported "
35025 "on this target");
35028 return dispatch_decl;
35031 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
35032 it to CHAIN. */
35034 static tree
35035 make_attribute (const char *name, const char *arg_name, tree chain)
35037 tree attr_name;
35038 tree attr_arg_name;
35039 tree attr_args;
35040 tree attr;
35042 attr_name = get_identifier (name);
35043 attr_arg_name = build_string (strlen (arg_name), arg_name);
35044 attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
35045 attr = tree_cons (attr_name, attr_args, chain);
35046 return attr;
35049 /* Make the resolver function decl to dispatch the versions of
35050 a multi-versioned function, DEFAULT_DECL. Create an
35051 empty basic block in the resolver and store the pointer in
35052 EMPTY_BB. Return the decl of the resolver function. */
35054 static tree
35055 make_resolver_func (const tree default_decl,
35056 const tree dispatch_decl,
35057 basic_block *empty_bb)
35059 char *resolver_name;
35060 tree decl, type, decl_name, t;
35061 bool is_uniq = false;
35063 /* IFUNC's have to be globally visible. So, if the default_decl is
35064 not, then the name of the IFUNC should be made unique. */
35065 if (TREE_PUBLIC (default_decl) == 0)
35066 is_uniq = true;
35068 /* Append the filename to the resolver function if the versions are
35069 not externally visible. This is because the resolver function has
35070 to be externally visible for the loader to find it. So, appending
35071 the filename will prevent conflicts with a resolver function from
35072 another module which is based on the same version name. */
35073 resolver_name = make_name (default_decl, "resolver", is_uniq);
35075 /* The resolver function should return a (void *). */
35076 type = build_function_type_list (ptr_type_node, NULL_TREE);
35078 decl = build_fn_decl (resolver_name, type);
35079 decl_name = get_identifier (resolver_name);
35080 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
35082 DECL_NAME (decl) = decl_name;
35083 TREE_USED (decl) = 1;
35084 DECL_ARTIFICIAL (decl) = 1;
35085 DECL_IGNORED_P (decl) = 0;
35086 /* IFUNC resolvers have to be externally visible. */
35087 TREE_PUBLIC (decl) = 1;
35088 DECL_UNINLINABLE (decl) = 1;
35090 /* Resolver is not external, body is generated. */
35091 DECL_EXTERNAL (decl) = 0;
35092 DECL_EXTERNAL (dispatch_decl) = 0;
35094 DECL_CONTEXT (decl) = NULL_TREE;
35095 DECL_INITIAL (decl) = make_node (BLOCK);
35096 DECL_STATIC_CONSTRUCTOR (decl) = 0;
35098 if (DECL_COMDAT_GROUP (default_decl)
35099 || TREE_PUBLIC (default_decl))
35101 /* In this case, each translation unit with a call to this
35102 versioned function will put out a resolver. Ensure it
35103 is comdat to keep just one copy. */
35104 DECL_COMDAT (decl) = 1;
35105 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
35107 /* Build result decl and add to function_decl. */
35108 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
35109 DECL_ARTIFICIAL (t) = 1;
35110 DECL_IGNORED_P (t) = 1;
35111 DECL_RESULT (decl) = t;
35113 gimplify_function_tree (decl);
35114 push_cfun (DECL_STRUCT_FUNCTION (decl));
35115 *empty_bb = init_lowered_empty_function (decl, false);
35117 cgraph_node::add_new_function (decl, true);
35118 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
35120 pop_cfun ();
35122 gcc_assert (dispatch_decl != NULL);
35123 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
35124 DECL_ATTRIBUTES (dispatch_decl)
35125 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
35127 /* Create the alias for dispatch to resolver here. */
35128 /*cgraph_create_function_alias (dispatch_decl, decl);*/
35129 cgraph_node::create_same_body_alias (dispatch_decl, decl);
35130 XDELETEVEC (resolver_name);
35131 return decl;
35134 /* Generate the dispatching code body to dispatch multi-versioned function
35135 DECL. The target hook is called to process the "target" attributes and
35136 provide the code to dispatch the right function at run-time. NODE points
35137 to the dispatcher decl whose body will be created. */
35139 static tree
35140 ix86_generate_version_dispatcher_body (void *node_p)
35142 tree resolver_decl;
35143 basic_block empty_bb;
35144 tree default_ver_decl;
35145 struct cgraph_node *versn;
35146 struct cgraph_node *node;
35148 struct cgraph_function_version_info *node_version_info = NULL;
35149 struct cgraph_function_version_info *versn_info = NULL;
35151 node = (cgraph_node *)node_p;
35153 node_version_info = node->function_version ();
35154 gcc_assert (node->dispatcher_function
35155 && node_version_info != NULL);
35157 if (node_version_info->dispatcher_resolver)
35158 return node_version_info->dispatcher_resolver;
35160 /* The first version in the chain corresponds to the default version. */
35161 default_ver_decl = node_version_info->next->this_node->decl;
35163 /* node is going to be an alias, so remove the finalized bit. */
35164 node->definition = false;
35166 resolver_decl = make_resolver_func (default_ver_decl,
35167 node->decl, &empty_bb);
35169 node_version_info->dispatcher_resolver = resolver_decl;
35171 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
35173 auto_vec<tree, 2> fn_ver_vec;
35175 for (versn_info = node_version_info->next; versn_info;
35176 versn_info = versn_info->next)
35178 versn = versn_info->this_node;
35179 /* Check for virtual functions here again, as by this time it should
35180 have been determined if this function needs a vtable index or
35181 not. This happens for methods in derived classes that override
35182 virtual methods in base classes but are not explicitly marked as
35183 virtual. */
35184 if (DECL_VINDEX (versn->decl))
35185 sorry ("Virtual function multiversioning not supported");
35187 fn_ver_vec.safe_push (versn->decl);
35190 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
35191 cgraph_edge::rebuild_edges ();
35192 pop_cfun ();
35193 return resolver_decl;
35195 /* This builds the processor_model struct type defined in
35196 libgcc/config/i386/cpuinfo.c */
35198 static tree
35199 build_processor_model_struct (void)
35201 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
35202 "__cpu_features"};
35203 tree field = NULL_TREE, field_chain = NULL_TREE;
35204 int i;
35205 tree type = make_node (RECORD_TYPE);
35207 /* The first 3 fields are unsigned int. */
35208 for (i = 0; i < 3; ++i)
35210 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
35211 get_identifier (field_name[i]), unsigned_type_node);
35212 if (field_chain != NULL_TREE)
35213 DECL_CHAIN (field) = field_chain;
35214 field_chain = field;
35217 /* The last field is an array of unsigned integers of size one. */
35218 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
35219 get_identifier (field_name[3]),
35220 build_array_type (unsigned_type_node,
35221 build_index_type (size_one_node)));
35222 if (field_chain != NULL_TREE)
35223 DECL_CHAIN (field) = field_chain;
35224 field_chain = field;
35226 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
35227 return type;
35230 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
35232 static tree
35233 make_var_decl (tree type, const char *name)
35235 tree new_decl;
35237 new_decl = build_decl (UNKNOWN_LOCATION,
35238 VAR_DECL,
35239 get_identifier(name),
35240 type);
35242 DECL_EXTERNAL (new_decl) = 1;
35243 TREE_STATIC (new_decl) = 1;
35244 TREE_PUBLIC (new_decl) = 1;
35245 DECL_INITIAL (new_decl) = 0;
35246 DECL_ARTIFICIAL (new_decl) = 0;
35247 DECL_PRESERVE_P (new_decl) = 1;
35249 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
35250 assemble_variable (new_decl, 0, 0, 0);
35252 return new_decl;
35255 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
35256 into an integer defined in libgcc/config/i386/cpuinfo.c */
35258 static tree
35259 fold_builtin_cpu (tree fndecl, tree *args)
35261 unsigned int i;
35262 enum ix86_builtins fn_code = (enum ix86_builtins)
35263 DECL_FUNCTION_CODE (fndecl);
35264 tree param_string_cst = NULL;
35266 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
35267 enum processor_features
35269 F_CMOV = 0,
35270 F_MMX,
35271 F_POPCNT,
35272 F_SSE,
35273 F_SSE2,
35274 F_SSE3,
35275 F_SSSE3,
35276 F_SSE4_1,
35277 F_SSE4_2,
35278 F_AVX,
35279 F_AVX2,
35280 F_SSE4_A,
35281 F_FMA4,
35282 F_XOP,
35283 F_FMA,
35284 F_AVX512F,
35285 F_MAX
35288 /* These are the values for vendor types and cpu types and subtypes
35289 in cpuinfo.c. Cpu types and subtypes should be subtracted by
35290 the corresponding start value. */
35291 enum processor_model
35293 M_INTEL = 1,
35294 M_AMD,
35295 M_CPU_TYPE_START,
35296 M_INTEL_BONNELL,
35297 M_INTEL_CORE2,
35298 M_INTEL_COREI7,
35299 M_AMDFAM10H,
35300 M_AMDFAM15H,
35301 M_INTEL_SILVERMONT,
35302 M_INTEL_KNL,
35303 M_AMD_BTVER1,
35304 M_AMD_BTVER2,
35305 M_CPU_SUBTYPE_START,
35306 M_INTEL_COREI7_NEHALEM,
35307 M_INTEL_COREI7_WESTMERE,
35308 M_INTEL_COREI7_SANDYBRIDGE,
35309 M_AMDFAM10H_BARCELONA,
35310 M_AMDFAM10H_SHANGHAI,
35311 M_AMDFAM10H_ISTANBUL,
35312 M_AMDFAM15H_BDVER1,
35313 M_AMDFAM15H_BDVER2,
35314 M_AMDFAM15H_BDVER3,
35315 M_AMDFAM15H_BDVER4,
35316 M_INTEL_COREI7_IVYBRIDGE,
35317 M_INTEL_COREI7_HASWELL
35320 static struct _arch_names_table
35322 const char *const name;
35323 const enum processor_model model;
35325 const arch_names_table[] =
35327 {"amd", M_AMD},
35328 {"intel", M_INTEL},
35329 {"atom", M_INTEL_BONNELL},
35330 {"slm", M_INTEL_SILVERMONT},
35331 {"core2", M_INTEL_CORE2},
35332 {"corei7", M_INTEL_COREI7},
35333 {"nehalem", M_INTEL_COREI7_NEHALEM},
35334 {"westmere", M_INTEL_COREI7_WESTMERE},
35335 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
35336 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
35337 {"haswell", M_INTEL_COREI7_HASWELL},
35338 {"bonnell", M_INTEL_BONNELL},
35339 {"silvermont", M_INTEL_SILVERMONT},
35340 {"knl", M_INTEL_KNL},
35341 {"amdfam10h", M_AMDFAM10H},
35342 {"barcelona", M_AMDFAM10H_BARCELONA},
35343 {"shanghai", M_AMDFAM10H_SHANGHAI},
35344 {"istanbul", M_AMDFAM10H_ISTANBUL},
35345 {"btver1", M_AMD_BTVER1},
35346 {"amdfam15h", M_AMDFAM15H},
35347 {"bdver1", M_AMDFAM15H_BDVER1},
35348 {"bdver2", M_AMDFAM15H_BDVER2},
35349 {"bdver3", M_AMDFAM15H_BDVER3},
35350 {"bdver4", M_AMDFAM15H_BDVER4},
35351 {"btver2", M_AMD_BTVER2},
35354 static struct _isa_names_table
35356 const char *const name;
35357 const enum processor_features feature;
35359 const isa_names_table[] =
35361 {"cmov", F_CMOV},
35362 {"mmx", F_MMX},
35363 {"popcnt", F_POPCNT},
35364 {"sse", F_SSE},
35365 {"sse2", F_SSE2},
35366 {"sse3", F_SSE3},
35367 {"ssse3", F_SSSE3},
35368 {"sse4a", F_SSE4_A},
35369 {"sse4.1", F_SSE4_1},
35370 {"sse4.2", F_SSE4_2},
35371 {"avx", F_AVX},
35372 {"fma4", F_FMA4},
35373 {"xop", F_XOP},
35374 {"fma", F_FMA},
35375 {"avx2", F_AVX2},
35376 {"avx512f",F_AVX512F}
35379 tree __processor_model_type = build_processor_model_struct ();
35380 tree __cpu_model_var = make_var_decl (__processor_model_type,
35381 "__cpu_model");
35384 varpool_node::add (__cpu_model_var);
35386 gcc_assert ((args != NULL) && (*args != NULL));
35388 param_string_cst = *args;
35389 while (param_string_cst
35390 && TREE_CODE (param_string_cst) != STRING_CST)
35392 /* *args must be a expr that can contain other EXPRS leading to a
35393 STRING_CST. */
35394 if (!EXPR_P (param_string_cst))
35396 error ("Parameter to builtin must be a string constant or literal");
35397 return integer_zero_node;
35399 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
35402 gcc_assert (param_string_cst);
35404 if (fn_code == IX86_BUILTIN_CPU_IS)
35406 tree ref;
35407 tree field;
35408 tree final;
35410 unsigned int field_val = 0;
35411 unsigned int NUM_ARCH_NAMES
35412 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
35414 for (i = 0; i < NUM_ARCH_NAMES; i++)
35415 if (strcmp (arch_names_table[i].name,
35416 TREE_STRING_POINTER (param_string_cst)) == 0)
35417 break;
35419 if (i == NUM_ARCH_NAMES)
35421 error ("Parameter to builtin not valid: %s",
35422 TREE_STRING_POINTER (param_string_cst));
35423 return integer_zero_node;
35426 field = TYPE_FIELDS (__processor_model_type);
35427 field_val = arch_names_table[i].model;
35429 /* CPU types are stored in the next field. */
35430 if (field_val > M_CPU_TYPE_START
35431 && field_val < M_CPU_SUBTYPE_START)
35433 field = DECL_CHAIN (field);
35434 field_val -= M_CPU_TYPE_START;
35437 /* CPU subtypes are stored in the next field. */
35438 if (field_val > M_CPU_SUBTYPE_START)
35440 field = DECL_CHAIN ( DECL_CHAIN (field));
35441 field_val -= M_CPU_SUBTYPE_START;
35444 /* Get the appropriate field in __cpu_model. */
35445 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
35446 field, NULL_TREE);
35448 /* Check the value. */
35449 final = build2 (EQ_EXPR, unsigned_type_node, ref,
35450 build_int_cstu (unsigned_type_node, field_val));
35451 return build1 (CONVERT_EXPR, integer_type_node, final);
35453 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
35455 tree ref;
35456 tree array_elt;
35457 tree field;
35458 tree final;
35460 unsigned int field_val = 0;
35461 unsigned int NUM_ISA_NAMES
35462 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
35464 for (i = 0; i < NUM_ISA_NAMES; i++)
35465 if (strcmp (isa_names_table[i].name,
35466 TREE_STRING_POINTER (param_string_cst)) == 0)
35467 break;
35469 if (i == NUM_ISA_NAMES)
35471 error ("Parameter to builtin not valid: %s",
35472 TREE_STRING_POINTER (param_string_cst));
35473 return integer_zero_node;
35476 field = TYPE_FIELDS (__processor_model_type);
35477 /* Get the last field, which is __cpu_features. */
35478 while (DECL_CHAIN (field))
35479 field = DECL_CHAIN (field);
35481 /* Get the appropriate field: __cpu_model.__cpu_features */
35482 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
35483 field, NULL_TREE);
35485 /* Access the 0th element of __cpu_features array. */
35486 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
35487 integer_zero_node, NULL_TREE, NULL_TREE);
35489 field_val = (1 << isa_names_table[i].feature);
35490 /* Return __cpu_model.__cpu_features[0] & field_val */
35491 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
35492 build_int_cstu (unsigned_type_node, field_val));
35493 return build1 (CONVERT_EXPR, integer_type_node, final);
35495 gcc_unreachable ();
35498 static tree
35499 ix86_fold_builtin (tree fndecl, int n_args,
35500 tree *args, bool ignore ATTRIBUTE_UNUSED)
35502 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
35504 enum ix86_builtins fn_code = (enum ix86_builtins)
35505 DECL_FUNCTION_CODE (fndecl);
35506 if (fn_code == IX86_BUILTIN_CPU_IS
35507 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
35509 gcc_assert (n_args == 1);
35510 return fold_builtin_cpu (fndecl, args);
35514 #ifdef SUBTARGET_FOLD_BUILTIN
35515 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
35516 #endif
35518 return NULL_TREE;
35521 /* Make builtins to detect cpu type and features supported. NAME is
35522 the builtin name, CODE is the builtin code, and FTYPE is the function
35523 type of the builtin. */
35525 static void
35526 make_cpu_type_builtin (const char* name, int code,
35527 enum ix86_builtin_func_type ftype, bool is_const)
35529 tree decl;
35530 tree type;
35532 type = ix86_get_builtin_func_type (ftype);
35533 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
35534 NULL, NULL_TREE);
35535 gcc_assert (decl != NULL_TREE);
35536 ix86_builtins[(int) code] = decl;
35537 TREE_READONLY (decl) = is_const;
35540 /* Make builtins to get CPU type and features supported. The created
35541 builtins are :
35543 __builtin_cpu_init (), to detect cpu type and features,
35544 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
35545 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
35548 static void
35549 ix86_init_platform_type_builtins (void)
35551 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
35552 INT_FTYPE_VOID, false);
35553 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
35554 INT_FTYPE_PCCHAR, true);
35555 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
35556 INT_FTYPE_PCCHAR, true);
35559 /* Internal method for ix86_init_builtins. */
35561 static void
35562 ix86_init_builtins_va_builtins_abi (void)
35564 tree ms_va_ref, sysv_va_ref;
35565 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
35566 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
35567 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
35568 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
35570 if (!TARGET_64BIT)
35571 return;
35572 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
35573 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
35574 ms_va_ref = build_reference_type (ms_va_list_type_node);
35575 sysv_va_ref =
35576 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
35578 fnvoid_va_end_ms =
35579 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
35580 fnvoid_va_start_ms =
35581 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
35582 fnvoid_va_end_sysv =
35583 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
35584 fnvoid_va_start_sysv =
35585 build_varargs_function_type_list (void_type_node, sysv_va_ref,
35586 NULL_TREE);
35587 fnvoid_va_copy_ms =
35588 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
35589 NULL_TREE);
35590 fnvoid_va_copy_sysv =
35591 build_function_type_list (void_type_node, sysv_va_ref,
35592 sysv_va_ref, NULL_TREE);
35594 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
35595 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
35596 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
35597 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
35598 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
35599 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
35600 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
35601 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35602 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
35603 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35604 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
35605 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35608 static void
35609 ix86_init_builtin_types (void)
35611 tree float128_type_node, float80_type_node;
35613 /* The __float80 type. */
35614 float80_type_node = long_double_type_node;
35615 if (TYPE_MODE (float80_type_node) != XFmode)
35617 /* The __float80 type. */
35618 float80_type_node = make_node (REAL_TYPE);
35620 TYPE_PRECISION (float80_type_node) = 80;
35621 layout_type (float80_type_node);
35623 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
35625 /* The __float128 type. */
35626 float128_type_node = make_node (REAL_TYPE);
35627 TYPE_PRECISION (float128_type_node) = 128;
35628 layout_type (float128_type_node);
35629 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
35631 /* This macro is built by i386-builtin-types.awk. */
35632 DEFINE_BUILTIN_PRIMITIVE_TYPES;
35635 static void
35636 ix86_init_builtins (void)
35638 tree t;
35640 ix86_init_builtin_types ();
35642 /* Builtins to get CPU type and features. */
35643 ix86_init_platform_type_builtins ();
35645 /* TFmode support builtins. */
35646 def_builtin_const (0, "__builtin_infq",
35647 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
35648 def_builtin_const (0, "__builtin_huge_valq",
35649 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
35651 /* We will expand them to normal call if SSE isn't available since
35652 they are used by libgcc. */
35653 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
35654 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
35655 BUILT_IN_MD, "__fabstf2", NULL_TREE);
35656 TREE_READONLY (t) = 1;
35657 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
35659 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
35660 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
35661 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
35662 TREE_READONLY (t) = 1;
35663 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
35665 ix86_init_tm_builtins ();
35666 ix86_init_mmx_sse_builtins ();
35667 ix86_init_mpx_builtins ();
35669 if (TARGET_LP64)
35670 ix86_init_builtins_va_builtins_abi ();
35672 #ifdef SUBTARGET_INIT_BUILTINS
35673 SUBTARGET_INIT_BUILTINS;
35674 #endif
35677 /* Return the ix86 builtin for CODE. */
35679 static tree
35680 ix86_builtin_decl (unsigned code, bool)
35682 if (code >= IX86_BUILTIN_MAX)
35683 return error_mark_node;
35685 return ix86_builtins[code];
35688 /* Errors in the source file can cause expand_expr to return const0_rtx
35689 where we expect a vector. To avoid crashing, use one of the vector
35690 clear instructions. */
35691 static rtx
35692 safe_vector_operand (rtx x, machine_mode mode)
35694 if (x == const0_rtx)
35695 x = CONST0_RTX (mode);
35696 return x;
35699 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
35701 static rtx
35702 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
35704 rtx pat;
35705 tree arg0 = CALL_EXPR_ARG (exp, 0);
35706 tree arg1 = CALL_EXPR_ARG (exp, 1);
35707 rtx op0 = expand_normal (arg0);
35708 rtx op1 = expand_normal (arg1);
35709 machine_mode tmode = insn_data[icode].operand[0].mode;
35710 machine_mode mode0 = insn_data[icode].operand[1].mode;
35711 machine_mode mode1 = insn_data[icode].operand[2].mode;
35713 if (VECTOR_MODE_P (mode0))
35714 op0 = safe_vector_operand (op0, mode0);
35715 if (VECTOR_MODE_P (mode1))
35716 op1 = safe_vector_operand (op1, mode1);
35718 if (optimize || !target
35719 || GET_MODE (target) != tmode
35720 || !insn_data[icode].operand[0].predicate (target, tmode))
35721 target = gen_reg_rtx (tmode);
35723 if (GET_MODE (op1) == SImode && mode1 == TImode)
35725 rtx x = gen_reg_rtx (V4SImode);
35726 emit_insn (gen_sse2_loadd (x, op1));
35727 op1 = gen_lowpart (TImode, x);
35730 if (!insn_data[icode].operand[1].predicate (op0, mode0))
35731 op0 = copy_to_mode_reg (mode0, op0);
35732 if (!insn_data[icode].operand[2].predicate (op1, mode1))
35733 op1 = copy_to_mode_reg (mode1, op1);
35735 pat = GEN_FCN (icode) (target, op0, op1);
35736 if (! pat)
35737 return 0;
35739 emit_insn (pat);
35741 return target;
35744 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
35746 static rtx
35747 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
35748 enum ix86_builtin_func_type m_type,
35749 enum rtx_code sub_code)
35751 rtx pat;
35752 int i;
35753 int nargs;
35754 bool comparison_p = false;
35755 bool tf_p = false;
35756 bool last_arg_constant = false;
35757 int num_memory = 0;
35758 struct {
35759 rtx op;
35760 machine_mode mode;
35761 } args[4];
35763 machine_mode tmode = insn_data[icode].operand[0].mode;
35765 switch (m_type)
35767 case MULTI_ARG_4_DF2_DI_I:
35768 case MULTI_ARG_4_DF2_DI_I1:
35769 case MULTI_ARG_4_SF2_SI_I:
35770 case MULTI_ARG_4_SF2_SI_I1:
35771 nargs = 4;
35772 last_arg_constant = true;
35773 break;
35775 case MULTI_ARG_3_SF:
35776 case MULTI_ARG_3_DF:
35777 case MULTI_ARG_3_SF2:
35778 case MULTI_ARG_3_DF2:
35779 case MULTI_ARG_3_DI:
35780 case MULTI_ARG_3_SI:
35781 case MULTI_ARG_3_SI_DI:
35782 case MULTI_ARG_3_HI:
35783 case MULTI_ARG_3_HI_SI:
35784 case MULTI_ARG_3_QI:
35785 case MULTI_ARG_3_DI2:
35786 case MULTI_ARG_3_SI2:
35787 case MULTI_ARG_3_HI2:
35788 case MULTI_ARG_3_QI2:
35789 nargs = 3;
35790 break;
35792 case MULTI_ARG_2_SF:
35793 case MULTI_ARG_2_DF:
35794 case MULTI_ARG_2_DI:
35795 case MULTI_ARG_2_SI:
35796 case MULTI_ARG_2_HI:
35797 case MULTI_ARG_2_QI:
35798 nargs = 2;
35799 break;
35801 case MULTI_ARG_2_DI_IMM:
35802 case MULTI_ARG_2_SI_IMM:
35803 case MULTI_ARG_2_HI_IMM:
35804 case MULTI_ARG_2_QI_IMM:
35805 nargs = 2;
35806 last_arg_constant = true;
35807 break;
35809 case MULTI_ARG_1_SF:
35810 case MULTI_ARG_1_DF:
35811 case MULTI_ARG_1_SF2:
35812 case MULTI_ARG_1_DF2:
35813 case MULTI_ARG_1_DI:
35814 case MULTI_ARG_1_SI:
35815 case MULTI_ARG_1_HI:
35816 case MULTI_ARG_1_QI:
35817 case MULTI_ARG_1_SI_DI:
35818 case MULTI_ARG_1_HI_DI:
35819 case MULTI_ARG_1_HI_SI:
35820 case MULTI_ARG_1_QI_DI:
35821 case MULTI_ARG_1_QI_SI:
35822 case MULTI_ARG_1_QI_HI:
35823 nargs = 1;
35824 break;
35826 case MULTI_ARG_2_DI_CMP:
35827 case MULTI_ARG_2_SI_CMP:
35828 case MULTI_ARG_2_HI_CMP:
35829 case MULTI_ARG_2_QI_CMP:
35830 nargs = 2;
35831 comparison_p = true;
35832 break;
35834 case MULTI_ARG_2_SF_TF:
35835 case MULTI_ARG_2_DF_TF:
35836 case MULTI_ARG_2_DI_TF:
35837 case MULTI_ARG_2_SI_TF:
35838 case MULTI_ARG_2_HI_TF:
35839 case MULTI_ARG_2_QI_TF:
35840 nargs = 2;
35841 tf_p = true;
35842 break;
35844 default:
35845 gcc_unreachable ();
35848 if (optimize || !target
35849 || GET_MODE (target) != tmode
35850 || !insn_data[icode].operand[0].predicate (target, tmode))
35851 target = gen_reg_rtx (tmode);
35853 gcc_assert (nargs <= 4);
35855 for (i = 0; i < nargs; i++)
35857 tree arg = CALL_EXPR_ARG (exp, i);
35858 rtx op = expand_normal (arg);
35859 int adjust = (comparison_p) ? 1 : 0;
35860 machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
35862 if (last_arg_constant && i == nargs - 1)
35864 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
35866 enum insn_code new_icode = icode;
35867 switch (icode)
35869 case CODE_FOR_xop_vpermil2v2df3:
35870 case CODE_FOR_xop_vpermil2v4sf3:
35871 case CODE_FOR_xop_vpermil2v4df3:
35872 case CODE_FOR_xop_vpermil2v8sf3:
35873 error ("the last argument must be a 2-bit immediate");
35874 return gen_reg_rtx (tmode);
35875 case CODE_FOR_xop_rotlv2di3:
35876 new_icode = CODE_FOR_rotlv2di3;
35877 goto xop_rotl;
35878 case CODE_FOR_xop_rotlv4si3:
35879 new_icode = CODE_FOR_rotlv4si3;
35880 goto xop_rotl;
35881 case CODE_FOR_xop_rotlv8hi3:
35882 new_icode = CODE_FOR_rotlv8hi3;
35883 goto xop_rotl;
35884 case CODE_FOR_xop_rotlv16qi3:
35885 new_icode = CODE_FOR_rotlv16qi3;
35886 xop_rotl:
35887 if (CONST_INT_P (op))
35889 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
35890 op = GEN_INT (INTVAL (op) & mask);
35891 gcc_checking_assert
35892 (insn_data[icode].operand[i + 1].predicate (op, mode));
35894 else
35896 gcc_checking_assert
35897 (nargs == 2
35898 && insn_data[new_icode].operand[0].mode == tmode
35899 && insn_data[new_icode].operand[1].mode == tmode
35900 && insn_data[new_icode].operand[2].mode == mode
35901 && insn_data[new_icode].operand[0].predicate
35902 == insn_data[icode].operand[0].predicate
35903 && insn_data[new_icode].operand[1].predicate
35904 == insn_data[icode].operand[1].predicate);
35905 icode = new_icode;
35906 goto non_constant;
35908 break;
35909 default:
35910 gcc_unreachable ();
35914 else
35916 non_constant:
35917 if (VECTOR_MODE_P (mode))
35918 op = safe_vector_operand (op, mode);
35920 /* If we aren't optimizing, only allow one memory operand to be
35921 generated. */
35922 if (memory_operand (op, mode))
35923 num_memory++;
35925 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
35927 if (optimize
35928 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
35929 || num_memory > 1)
35930 op = force_reg (mode, op);
35933 args[i].op = op;
35934 args[i].mode = mode;
35937 switch (nargs)
35939 case 1:
35940 pat = GEN_FCN (icode) (target, args[0].op);
35941 break;
35943 case 2:
35944 if (tf_p)
35945 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
35946 GEN_INT ((int)sub_code));
35947 else if (! comparison_p)
35948 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
35949 else
35951 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
35952 args[0].op,
35953 args[1].op);
35955 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
35957 break;
35959 case 3:
35960 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
35961 break;
35963 case 4:
35964 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
35965 break;
35967 default:
35968 gcc_unreachable ();
35971 if (! pat)
35972 return 0;
35974 emit_insn (pat);
35975 return target;
35978 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
35979 insns with vec_merge. */
35981 static rtx
35982 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
35983 rtx target)
35985 rtx pat;
35986 tree arg0 = CALL_EXPR_ARG (exp, 0);
35987 rtx op1, op0 = expand_normal (arg0);
35988 machine_mode tmode = insn_data[icode].operand[0].mode;
35989 machine_mode mode0 = insn_data[icode].operand[1].mode;
35991 if (optimize || !target
35992 || GET_MODE (target) != tmode
35993 || !insn_data[icode].operand[0].predicate (target, tmode))
35994 target = gen_reg_rtx (tmode);
35996 if (VECTOR_MODE_P (mode0))
35997 op0 = safe_vector_operand (op0, mode0);
35999 if ((optimize && !register_operand (op0, mode0))
36000 || !insn_data[icode].operand[1].predicate (op0, mode0))
36001 op0 = copy_to_mode_reg (mode0, op0);
36003 op1 = op0;
36004 if (!insn_data[icode].operand[2].predicate (op1, mode0))
36005 op1 = copy_to_mode_reg (mode0, op1);
36007 pat = GEN_FCN (icode) (target, op0, op1);
36008 if (! pat)
36009 return 0;
36010 emit_insn (pat);
36011 return target;
36014 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
36016 static rtx
36017 ix86_expand_sse_compare (const struct builtin_description *d,
36018 tree exp, rtx target, bool swap)
36020 rtx pat;
36021 tree arg0 = CALL_EXPR_ARG (exp, 0);
36022 tree arg1 = CALL_EXPR_ARG (exp, 1);
36023 rtx op0 = expand_normal (arg0);
36024 rtx op1 = expand_normal (arg1);
36025 rtx op2;
36026 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36027 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36028 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
36029 enum rtx_code comparison = d->comparison;
36031 if (VECTOR_MODE_P (mode0))
36032 op0 = safe_vector_operand (op0, mode0);
36033 if (VECTOR_MODE_P (mode1))
36034 op1 = safe_vector_operand (op1, mode1);
36036 /* Swap operands if we have a comparison that isn't available in
36037 hardware. */
36038 if (swap)
36040 rtx tmp = gen_reg_rtx (mode1);
36041 emit_move_insn (tmp, op1);
36042 op1 = op0;
36043 op0 = tmp;
36046 if (optimize || !target
36047 || GET_MODE (target) != tmode
36048 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36049 target = gen_reg_rtx (tmode);
36051 if ((optimize && !register_operand (op0, mode0))
36052 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
36053 op0 = copy_to_mode_reg (mode0, op0);
36054 if ((optimize && !register_operand (op1, mode1))
36055 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
36056 op1 = copy_to_mode_reg (mode1, op1);
36058 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
36059 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
36060 if (! pat)
36061 return 0;
36062 emit_insn (pat);
36063 return target;
36066 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
36068 static rtx
36069 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
36070 rtx target)
36072 rtx pat;
36073 tree arg0 = CALL_EXPR_ARG (exp, 0);
36074 tree arg1 = CALL_EXPR_ARG (exp, 1);
36075 rtx op0 = expand_normal (arg0);
36076 rtx op1 = expand_normal (arg1);
36077 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
36078 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
36079 enum rtx_code comparison = d->comparison;
36081 if (VECTOR_MODE_P (mode0))
36082 op0 = safe_vector_operand (op0, mode0);
36083 if (VECTOR_MODE_P (mode1))
36084 op1 = safe_vector_operand (op1, mode1);
36086 /* Swap operands if we have a comparison that isn't available in
36087 hardware. */
36088 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
36089 std::swap (op1, op0);
36091 target = gen_reg_rtx (SImode);
36092 emit_move_insn (target, const0_rtx);
36093 target = gen_rtx_SUBREG (QImode, target, 0);
36095 if ((optimize && !register_operand (op0, mode0))
36096 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36097 op0 = copy_to_mode_reg (mode0, op0);
36098 if ((optimize && !register_operand (op1, mode1))
36099 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36100 op1 = copy_to_mode_reg (mode1, op1);
36102 pat = GEN_FCN (d->icode) (op0, op1);
36103 if (! pat)
36104 return 0;
36105 emit_insn (pat);
36106 emit_insn (gen_rtx_SET (VOIDmode,
36107 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36108 gen_rtx_fmt_ee (comparison, QImode,
36109 SET_DEST (pat),
36110 const0_rtx)));
36112 return SUBREG_REG (target);
36115 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
36117 static rtx
36118 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
36119 rtx target)
36121 rtx pat;
36122 tree arg0 = CALL_EXPR_ARG (exp, 0);
36123 rtx op1, op0 = expand_normal (arg0);
36124 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36125 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36127 if (optimize || target == 0
36128 || GET_MODE (target) != tmode
36129 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36130 target = gen_reg_rtx (tmode);
36132 if (VECTOR_MODE_P (mode0))
36133 op0 = safe_vector_operand (op0, mode0);
36135 if ((optimize && !register_operand (op0, mode0))
36136 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36137 op0 = copy_to_mode_reg (mode0, op0);
36139 op1 = GEN_INT (d->comparison);
36141 pat = GEN_FCN (d->icode) (target, op0, op1);
36142 if (! pat)
36143 return 0;
36144 emit_insn (pat);
36145 return target;
36148 static rtx
36149 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
36150 tree exp, rtx target)
36152 rtx pat;
36153 tree arg0 = CALL_EXPR_ARG (exp, 0);
36154 tree arg1 = CALL_EXPR_ARG (exp, 1);
36155 rtx op0 = expand_normal (arg0);
36156 rtx op1 = expand_normal (arg1);
36157 rtx op2;
36158 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36159 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36160 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
36162 if (optimize || target == 0
36163 || GET_MODE (target) != tmode
36164 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36165 target = gen_reg_rtx (tmode);
36167 op0 = safe_vector_operand (op0, mode0);
36168 op1 = safe_vector_operand (op1, mode1);
36170 if ((optimize && !register_operand (op0, mode0))
36171 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36172 op0 = copy_to_mode_reg (mode0, op0);
36173 if ((optimize && !register_operand (op1, mode1))
36174 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36175 op1 = copy_to_mode_reg (mode1, op1);
36177 op2 = GEN_INT (d->comparison);
36179 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
36180 if (! pat)
36181 return 0;
36182 emit_insn (pat);
36183 return target;
36186 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
36188 static rtx
36189 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
36190 rtx target)
36192 rtx pat;
36193 tree arg0 = CALL_EXPR_ARG (exp, 0);
36194 tree arg1 = CALL_EXPR_ARG (exp, 1);
36195 rtx op0 = expand_normal (arg0);
36196 rtx op1 = expand_normal (arg1);
36197 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
36198 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
36199 enum rtx_code comparison = d->comparison;
36201 if (VECTOR_MODE_P (mode0))
36202 op0 = safe_vector_operand (op0, mode0);
36203 if (VECTOR_MODE_P (mode1))
36204 op1 = safe_vector_operand (op1, mode1);
36206 target = gen_reg_rtx (SImode);
36207 emit_move_insn (target, const0_rtx);
36208 target = gen_rtx_SUBREG (QImode, target, 0);
36210 if ((optimize && !register_operand (op0, mode0))
36211 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36212 op0 = copy_to_mode_reg (mode0, op0);
36213 if ((optimize && !register_operand (op1, mode1))
36214 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36215 op1 = copy_to_mode_reg (mode1, op1);
36217 pat = GEN_FCN (d->icode) (op0, op1);
36218 if (! pat)
36219 return 0;
36220 emit_insn (pat);
36221 emit_insn (gen_rtx_SET (VOIDmode,
36222 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36223 gen_rtx_fmt_ee (comparison, QImode,
36224 SET_DEST (pat),
36225 const0_rtx)));
36227 return SUBREG_REG (target);
36230 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
36232 static rtx
36233 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
36234 tree exp, rtx target)
36236 rtx pat;
36237 tree arg0 = CALL_EXPR_ARG (exp, 0);
36238 tree arg1 = CALL_EXPR_ARG (exp, 1);
36239 tree arg2 = CALL_EXPR_ARG (exp, 2);
36240 tree arg3 = CALL_EXPR_ARG (exp, 3);
36241 tree arg4 = CALL_EXPR_ARG (exp, 4);
36242 rtx scratch0, scratch1;
36243 rtx op0 = expand_normal (arg0);
36244 rtx op1 = expand_normal (arg1);
36245 rtx op2 = expand_normal (arg2);
36246 rtx op3 = expand_normal (arg3);
36247 rtx op4 = expand_normal (arg4);
36248 machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
36250 tmode0 = insn_data[d->icode].operand[0].mode;
36251 tmode1 = insn_data[d->icode].operand[1].mode;
36252 modev2 = insn_data[d->icode].operand[2].mode;
36253 modei3 = insn_data[d->icode].operand[3].mode;
36254 modev4 = insn_data[d->icode].operand[4].mode;
36255 modei5 = insn_data[d->icode].operand[5].mode;
36256 modeimm = insn_data[d->icode].operand[6].mode;
36258 if (VECTOR_MODE_P (modev2))
36259 op0 = safe_vector_operand (op0, modev2);
36260 if (VECTOR_MODE_P (modev4))
36261 op2 = safe_vector_operand (op2, modev4);
36263 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
36264 op0 = copy_to_mode_reg (modev2, op0);
36265 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
36266 op1 = copy_to_mode_reg (modei3, op1);
36267 if ((optimize && !register_operand (op2, modev4))
36268 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
36269 op2 = copy_to_mode_reg (modev4, op2);
36270 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
36271 op3 = copy_to_mode_reg (modei5, op3);
36273 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
36275 error ("the fifth argument must be an 8-bit immediate");
36276 return const0_rtx;
36279 if (d->code == IX86_BUILTIN_PCMPESTRI128)
36281 if (optimize || !target
36282 || GET_MODE (target) != tmode0
36283 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
36284 target = gen_reg_rtx (tmode0);
36286 scratch1 = gen_reg_rtx (tmode1);
36288 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
36290 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
36292 if (optimize || !target
36293 || GET_MODE (target) != tmode1
36294 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
36295 target = gen_reg_rtx (tmode1);
36297 scratch0 = gen_reg_rtx (tmode0);
36299 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
36301 else
36303 gcc_assert (d->flag);
36305 scratch0 = gen_reg_rtx (tmode0);
36306 scratch1 = gen_reg_rtx (tmode1);
36308 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
36311 if (! pat)
36312 return 0;
36314 emit_insn (pat);
36316 if (d->flag)
36318 target = gen_reg_rtx (SImode);
36319 emit_move_insn (target, const0_rtx);
36320 target = gen_rtx_SUBREG (QImode, target, 0);
36322 emit_insn
36323 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36324 gen_rtx_fmt_ee (EQ, QImode,
36325 gen_rtx_REG ((machine_mode) d->flag,
36326 FLAGS_REG),
36327 const0_rtx)));
36328 return SUBREG_REG (target);
36330 else
36331 return target;
36335 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
36337 static rtx
36338 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
36339 tree exp, rtx target)
36341 rtx pat;
36342 tree arg0 = CALL_EXPR_ARG (exp, 0);
36343 tree arg1 = CALL_EXPR_ARG (exp, 1);
36344 tree arg2 = CALL_EXPR_ARG (exp, 2);
36345 rtx scratch0, scratch1;
36346 rtx op0 = expand_normal (arg0);
36347 rtx op1 = expand_normal (arg1);
36348 rtx op2 = expand_normal (arg2);
36349 machine_mode tmode0, tmode1, modev2, modev3, modeimm;
36351 tmode0 = insn_data[d->icode].operand[0].mode;
36352 tmode1 = insn_data[d->icode].operand[1].mode;
36353 modev2 = insn_data[d->icode].operand[2].mode;
36354 modev3 = insn_data[d->icode].operand[3].mode;
36355 modeimm = insn_data[d->icode].operand[4].mode;
36357 if (VECTOR_MODE_P (modev2))
36358 op0 = safe_vector_operand (op0, modev2);
36359 if (VECTOR_MODE_P (modev3))
36360 op1 = safe_vector_operand (op1, modev3);
36362 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
36363 op0 = copy_to_mode_reg (modev2, op0);
36364 if ((optimize && !register_operand (op1, modev3))
36365 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
36366 op1 = copy_to_mode_reg (modev3, op1);
36368 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
36370 error ("the third argument must be an 8-bit immediate");
36371 return const0_rtx;
36374 if (d->code == IX86_BUILTIN_PCMPISTRI128)
36376 if (optimize || !target
36377 || GET_MODE (target) != tmode0
36378 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
36379 target = gen_reg_rtx (tmode0);
36381 scratch1 = gen_reg_rtx (tmode1);
36383 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
36385 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
36387 if (optimize || !target
36388 || GET_MODE (target) != tmode1
36389 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
36390 target = gen_reg_rtx (tmode1);
36392 scratch0 = gen_reg_rtx (tmode0);
36394 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
36396 else
36398 gcc_assert (d->flag);
36400 scratch0 = gen_reg_rtx (tmode0);
36401 scratch1 = gen_reg_rtx (tmode1);
36403 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
36406 if (! pat)
36407 return 0;
36409 emit_insn (pat);
36411 if (d->flag)
36413 target = gen_reg_rtx (SImode);
36414 emit_move_insn (target, const0_rtx);
36415 target = gen_rtx_SUBREG (QImode, target, 0);
36417 emit_insn
36418 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36419 gen_rtx_fmt_ee (EQ, QImode,
36420 gen_rtx_REG ((machine_mode) d->flag,
36421 FLAGS_REG),
36422 const0_rtx)));
36423 return SUBREG_REG (target);
36425 else
36426 return target;
36429 /* Subroutine of ix86_expand_builtin to take care of insns with
36430 variable number of operands. */
36432 static rtx
36433 ix86_expand_args_builtin (const struct builtin_description *d,
36434 tree exp, rtx target)
36436 rtx pat, real_target;
36437 unsigned int i, nargs;
36438 unsigned int nargs_constant = 0;
36439 unsigned int mask_pos = 0;
36440 int num_memory = 0;
36441 struct
36443 rtx op;
36444 machine_mode mode;
36445 } args[6];
36446 bool last_arg_count = false;
36447 enum insn_code icode = d->icode;
36448 const struct insn_data_d *insn_p = &insn_data[icode];
36449 machine_mode tmode = insn_p->operand[0].mode;
36450 machine_mode rmode = VOIDmode;
36451 bool swap = false;
36452 enum rtx_code comparison = d->comparison;
36454 switch ((enum ix86_builtin_func_type) d->flag)
36456 case V2DF_FTYPE_V2DF_ROUND:
36457 case V4DF_FTYPE_V4DF_ROUND:
36458 case V4SF_FTYPE_V4SF_ROUND:
36459 case V8SF_FTYPE_V8SF_ROUND:
36460 case V4SI_FTYPE_V4SF_ROUND:
36461 case V8SI_FTYPE_V8SF_ROUND:
36462 return ix86_expand_sse_round (d, exp, target);
36463 case V4SI_FTYPE_V2DF_V2DF_ROUND:
36464 case V8SI_FTYPE_V4DF_V4DF_ROUND:
36465 case V16SI_FTYPE_V8DF_V8DF_ROUND:
36466 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
36467 case INT_FTYPE_V8SF_V8SF_PTEST:
36468 case INT_FTYPE_V4DI_V4DI_PTEST:
36469 case INT_FTYPE_V4DF_V4DF_PTEST:
36470 case INT_FTYPE_V4SF_V4SF_PTEST:
36471 case INT_FTYPE_V2DI_V2DI_PTEST:
36472 case INT_FTYPE_V2DF_V2DF_PTEST:
36473 return ix86_expand_sse_ptest (d, exp, target);
36474 case FLOAT128_FTYPE_FLOAT128:
36475 case FLOAT_FTYPE_FLOAT:
36476 case INT_FTYPE_INT:
36477 case UINT64_FTYPE_INT:
36478 case UINT16_FTYPE_UINT16:
36479 case INT64_FTYPE_INT64:
36480 case INT64_FTYPE_V4SF:
36481 case INT64_FTYPE_V2DF:
36482 case INT_FTYPE_V16QI:
36483 case INT_FTYPE_V8QI:
36484 case INT_FTYPE_V8SF:
36485 case INT_FTYPE_V4DF:
36486 case INT_FTYPE_V4SF:
36487 case INT_FTYPE_V2DF:
36488 case INT_FTYPE_V32QI:
36489 case V16QI_FTYPE_V16QI:
36490 case V8SI_FTYPE_V8SF:
36491 case V8SI_FTYPE_V4SI:
36492 case V8HI_FTYPE_V8HI:
36493 case V8HI_FTYPE_V16QI:
36494 case V8QI_FTYPE_V8QI:
36495 case V8SF_FTYPE_V8SF:
36496 case V8SF_FTYPE_V8SI:
36497 case V8SF_FTYPE_V4SF:
36498 case V8SF_FTYPE_V8HI:
36499 case V4SI_FTYPE_V4SI:
36500 case V4SI_FTYPE_V16QI:
36501 case V4SI_FTYPE_V4SF:
36502 case V4SI_FTYPE_V8SI:
36503 case V4SI_FTYPE_V8HI:
36504 case V4SI_FTYPE_V4DF:
36505 case V4SI_FTYPE_V2DF:
36506 case V4HI_FTYPE_V4HI:
36507 case V4DF_FTYPE_V4DF:
36508 case V4DF_FTYPE_V4SI:
36509 case V4DF_FTYPE_V4SF:
36510 case V4DF_FTYPE_V2DF:
36511 case V4SF_FTYPE_V4SF:
36512 case V4SF_FTYPE_V4SI:
36513 case V4SF_FTYPE_V8SF:
36514 case V4SF_FTYPE_V4DF:
36515 case V4SF_FTYPE_V8HI:
36516 case V4SF_FTYPE_V2DF:
36517 case V2DI_FTYPE_V2DI:
36518 case V2DI_FTYPE_V16QI:
36519 case V2DI_FTYPE_V8HI:
36520 case V2DI_FTYPE_V4SI:
36521 case V2DF_FTYPE_V2DF:
36522 case V2DF_FTYPE_V4SI:
36523 case V2DF_FTYPE_V4DF:
36524 case V2DF_FTYPE_V4SF:
36525 case V2DF_FTYPE_V2SI:
36526 case V2SI_FTYPE_V2SI:
36527 case V2SI_FTYPE_V4SF:
36528 case V2SI_FTYPE_V2SF:
36529 case V2SI_FTYPE_V2DF:
36530 case V2SF_FTYPE_V2SF:
36531 case V2SF_FTYPE_V2SI:
36532 case V32QI_FTYPE_V32QI:
36533 case V32QI_FTYPE_V16QI:
36534 case V16HI_FTYPE_V16HI:
36535 case V16HI_FTYPE_V8HI:
36536 case V8SI_FTYPE_V8SI:
36537 case V16HI_FTYPE_V16QI:
36538 case V8SI_FTYPE_V16QI:
36539 case V4DI_FTYPE_V16QI:
36540 case V8SI_FTYPE_V8HI:
36541 case V4DI_FTYPE_V8HI:
36542 case V4DI_FTYPE_V4SI:
36543 case V4DI_FTYPE_V2DI:
36544 case HI_FTYPE_HI:
36545 case HI_FTYPE_V16QI:
36546 case SI_FTYPE_V32QI:
36547 case DI_FTYPE_V64QI:
36548 case V16QI_FTYPE_HI:
36549 case V32QI_FTYPE_SI:
36550 case V64QI_FTYPE_DI:
36551 case V8HI_FTYPE_QI:
36552 case V16HI_FTYPE_HI:
36553 case V32HI_FTYPE_SI:
36554 case V4SI_FTYPE_QI:
36555 case V8SI_FTYPE_QI:
36556 case V4SI_FTYPE_HI:
36557 case V8SI_FTYPE_HI:
36558 case QI_FTYPE_V8HI:
36559 case HI_FTYPE_V16HI:
36560 case SI_FTYPE_V32HI:
36561 case QI_FTYPE_V4SI:
36562 case QI_FTYPE_V8SI:
36563 case HI_FTYPE_V16SI:
36564 case QI_FTYPE_V2DI:
36565 case QI_FTYPE_V4DI:
36566 case QI_FTYPE_V8DI:
36567 case UINT_FTYPE_V2DF:
36568 case UINT_FTYPE_V4SF:
36569 case UINT64_FTYPE_V2DF:
36570 case UINT64_FTYPE_V4SF:
36571 case V16QI_FTYPE_V8DI:
36572 case V16HI_FTYPE_V16SI:
36573 case V16SI_FTYPE_HI:
36574 case V2DI_FTYPE_QI:
36575 case V4DI_FTYPE_QI:
36576 case V16SI_FTYPE_V16SI:
36577 case V16SI_FTYPE_INT:
36578 case V16SF_FTYPE_FLOAT:
36579 case V16SF_FTYPE_V8SF:
36580 case V16SI_FTYPE_V8SI:
36581 case V16SF_FTYPE_V4SF:
36582 case V16SI_FTYPE_V4SI:
36583 case V16SF_FTYPE_V16SF:
36584 case V8HI_FTYPE_V8DI:
36585 case V8UHI_FTYPE_V8UHI:
36586 case V8SI_FTYPE_V8DI:
36587 case V8SF_FTYPE_V8DF:
36588 case V8DI_FTYPE_QI:
36589 case V8DI_FTYPE_INT64:
36590 case V8DI_FTYPE_V4DI:
36591 case V8DI_FTYPE_V8DI:
36592 case V8DF_FTYPE_DOUBLE:
36593 case V8DF_FTYPE_V4DF:
36594 case V8DF_FTYPE_V2DF:
36595 case V8DF_FTYPE_V8DF:
36596 case V8DF_FTYPE_V8SI:
36597 nargs = 1;
36598 break;
36599 case V4SF_FTYPE_V4SF_VEC_MERGE:
36600 case V2DF_FTYPE_V2DF_VEC_MERGE:
36601 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
36602 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
36603 case V16QI_FTYPE_V16QI_V16QI:
36604 case V16QI_FTYPE_V8HI_V8HI:
36605 case V16SI_FTYPE_V16SI_V16SI:
36606 case V16SF_FTYPE_V16SF_V16SF:
36607 case V16SF_FTYPE_V16SF_V16SI:
36608 case V8QI_FTYPE_V8QI_V8QI:
36609 case V8QI_FTYPE_V4HI_V4HI:
36610 case V8HI_FTYPE_V8HI_V8HI:
36611 case V8HI_FTYPE_V16QI_V16QI:
36612 case V8HI_FTYPE_V4SI_V4SI:
36613 case V8SF_FTYPE_V8SF_V8SF:
36614 case V8SF_FTYPE_V8SF_V8SI:
36615 case V8DI_FTYPE_V8DI_V8DI:
36616 case V8DF_FTYPE_V8DF_V8DF:
36617 case V8DF_FTYPE_V8DF_V8DI:
36618 case V4SI_FTYPE_V4SI_V4SI:
36619 case V4SI_FTYPE_V8HI_V8HI:
36620 case V4SI_FTYPE_V4SF_V4SF:
36621 case V4SI_FTYPE_V2DF_V2DF:
36622 case V4HI_FTYPE_V4HI_V4HI:
36623 case V4HI_FTYPE_V8QI_V8QI:
36624 case V4HI_FTYPE_V2SI_V2SI:
36625 case V4DF_FTYPE_V4DF_V4DF:
36626 case V4DF_FTYPE_V4DF_V4DI:
36627 case V4SF_FTYPE_V4SF_V4SF:
36628 case V4SF_FTYPE_V4SF_V4SI:
36629 case V4SF_FTYPE_V4SF_V2SI:
36630 case V4SF_FTYPE_V4SF_V2DF:
36631 case V4SF_FTYPE_V4SF_UINT:
36632 case V4SF_FTYPE_V4SF_UINT64:
36633 case V4SF_FTYPE_V4SF_DI:
36634 case V4SF_FTYPE_V4SF_SI:
36635 case V2DI_FTYPE_V2DI_V2DI:
36636 case V2DI_FTYPE_V16QI_V16QI:
36637 case V2DI_FTYPE_V4SI_V4SI:
36638 case V2UDI_FTYPE_V4USI_V4USI:
36639 case V2DI_FTYPE_V2DI_V16QI:
36640 case V2DI_FTYPE_V2DF_V2DF:
36641 case V2SI_FTYPE_V2SI_V2SI:
36642 case V2SI_FTYPE_V4HI_V4HI:
36643 case V2SI_FTYPE_V2SF_V2SF:
36644 case V2DF_FTYPE_V2DF_V2DF:
36645 case V2DF_FTYPE_V2DF_V4SF:
36646 case V2DF_FTYPE_V2DF_V2DI:
36647 case V2DF_FTYPE_V2DF_DI:
36648 case V2DF_FTYPE_V2DF_SI:
36649 case V2DF_FTYPE_V2DF_UINT:
36650 case V2DF_FTYPE_V2DF_UINT64:
36651 case V2SF_FTYPE_V2SF_V2SF:
36652 case V1DI_FTYPE_V1DI_V1DI:
36653 case V1DI_FTYPE_V8QI_V8QI:
36654 case V1DI_FTYPE_V2SI_V2SI:
36655 case V32QI_FTYPE_V16HI_V16HI:
36656 case V16HI_FTYPE_V8SI_V8SI:
36657 case V32QI_FTYPE_V32QI_V32QI:
36658 case V16HI_FTYPE_V32QI_V32QI:
36659 case V16HI_FTYPE_V16HI_V16HI:
36660 case V8SI_FTYPE_V4DF_V4DF:
36661 case V8SI_FTYPE_V8SI_V8SI:
36662 case V8SI_FTYPE_V16HI_V16HI:
36663 case V4DI_FTYPE_V4DI_V4DI:
36664 case V4DI_FTYPE_V8SI_V8SI:
36665 case V4UDI_FTYPE_V8USI_V8USI:
36666 case QI_FTYPE_V8DI_V8DI:
36667 case V8DI_FTYPE_V64QI_V64QI:
36668 case HI_FTYPE_V16SI_V16SI:
36669 if (comparison == UNKNOWN)
36670 return ix86_expand_binop_builtin (icode, exp, target);
36671 nargs = 2;
36672 break;
36673 case V4SF_FTYPE_V4SF_V4SF_SWAP:
36674 case V2DF_FTYPE_V2DF_V2DF_SWAP:
36675 gcc_assert (comparison != UNKNOWN);
36676 nargs = 2;
36677 swap = true;
36678 break;
36679 case V16HI_FTYPE_V16HI_V8HI_COUNT:
36680 case V16HI_FTYPE_V16HI_SI_COUNT:
36681 case V8SI_FTYPE_V8SI_V4SI_COUNT:
36682 case V8SI_FTYPE_V8SI_SI_COUNT:
36683 case V4DI_FTYPE_V4DI_V2DI_COUNT:
36684 case V4DI_FTYPE_V4DI_INT_COUNT:
36685 case V8HI_FTYPE_V8HI_V8HI_COUNT:
36686 case V8HI_FTYPE_V8HI_SI_COUNT:
36687 case V4SI_FTYPE_V4SI_V4SI_COUNT:
36688 case V4SI_FTYPE_V4SI_SI_COUNT:
36689 case V4HI_FTYPE_V4HI_V4HI_COUNT:
36690 case V4HI_FTYPE_V4HI_SI_COUNT:
36691 case V2DI_FTYPE_V2DI_V2DI_COUNT:
36692 case V2DI_FTYPE_V2DI_SI_COUNT:
36693 case V2SI_FTYPE_V2SI_V2SI_COUNT:
36694 case V2SI_FTYPE_V2SI_SI_COUNT:
36695 case V1DI_FTYPE_V1DI_V1DI_COUNT:
36696 case V1DI_FTYPE_V1DI_SI_COUNT:
36697 nargs = 2;
36698 last_arg_count = true;
36699 break;
36700 case UINT64_FTYPE_UINT64_UINT64:
36701 case UINT_FTYPE_UINT_UINT:
36702 case UINT_FTYPE_UINT_USHORT:
36703 case UINT_FTYPE_UINT_UCHAR:
36704 case UINT16_FTYPE_UINT16_INT:
36705 case UINT8_FTYPE_UINT8_INT:
36706 case HI_FTYPE_HI_HI:
36707 case SI_FTYPE_SI_SI:
36708 case DI_FTYPE_DI_DI:
36709 case V16SI_FTYPE_V8DF_V8DF:
36710 nargs = 2;
36711 break;
36712 case V2DI_FTYPE_V2DI_INT_CONVERT:
36713 nargs = 2;
36714 rmode = V1TImode;
36715 nargs_constant = 1;
36716 break;
36717 case V4DI_FTYPE_V4DI_INT_CONVERT:
36718 nargs = 2;
36719 rmode = V2TImode;
36720 nargs_constant = 1;
36721 break;
36722 case V8DI_FTYPE_V8DI_INT_CONVERT:
36723 nargs = 2;
36724 rmode = V4TImode;
36725 nargs_constant = 1;
36726 break;
36727 case V8HI_FTYPE_V8HI_INT:
36728 case V8HI_FTYPE_V8SF_INT:
36729 case V16HI_FTYPE_V16SF_INT:
36730 case V8HI_FTYPE_V4SF_INT:
36731 case V8SF_FTYPE_V8SF_INT:
36732 case V4SF_FTYPE_V16SF_INT:
36733 case V16SF_FTYPE_V16SF_INT:
36734 case V4SI_FTYPE_V4SI_INT:
36735 case V4SI_FTYPE_V8SI_INT:
36736 case V4HI_FTYPE_V4HI_INT:
36737 case V4DF_FTYPE_V4DF_INT:
36738 case V4DF_FTYPE_V8DF_INT:
36739 case V4SF_FTYPE_V4SF_INT:
36740 case V4SF_FTYPE_V8SF_INT:
36741 case V2DI_FTYPE_V2DI_INT:
36742 case V2DF_FTYPE_V2DF_INT:
36743 case V2DF_FTYPE_V4DF_INT:
36744 case V16HI_FTYPE_V16HI_INT:
36745 case V8SI_FTYPE_V8SI_INT:
36746 case V16SI_FTYPE_V16SI_INT:
36747 case V4SI_FTYPE_V16SI_INT:
36748 case V4DI_FTYPE_V4DI_INT:
36749 case V2DI_FTYPE_V4DI_INT:
36750 case V4DI_FTYPE_V8DI_INT:
36751 case HI_FTYPE_HI_INT:
36752 case QI_FTYPE_V4SF_INT:
36753 case QI_FTYPE_V2DF_INT:
36754 nargs = 2;
36755 nargs_constant = 1;
36756 break;
36757 case V16QI_FTYPE_V16QI_V16QI_V16QI:
36758 case V8SF_FTYPE_V8SF_V8SF_V8SF:
36759 case V4DF_FTYPE_V4DF_V4DF_V4DF:
36760 case V4SF_FTYPE_V4SF_V4SF_V4SF:
36761 case V2DF_FTYPE_V2DF_V2DF_V2DF:
36762 case V32QI_FTYPE_V32QI_V32QI_V32QI:
36763 case HI_FTYPE_V16SI_V16SI_HI:
36764 case QI_FTYPE_V8DI_V8DI_QI:
36765 case V16HI_FTYPE_V16SI_V16HI_HI:
36766 case V16QI_FTYPE_V16SI_V16QI_HI:
36767 case V16QI_FTYPE_V8DI_V16QI_QI:
36768 case V16SF_FTYPE_V16SF_V16SF_HI:
36769 case V16SF_FTYPE_V16SF_V16SF_V16SF:
36770 case V16SF_FTYPE_V16SF_V16SI_V16SF:
36771 case V16SF_FTYPE_V16SI_V16SF_HI:
36772 case V16SF_FTYPE_V16SI_V16SF_V16SF:
36773 case V16SF_FTYPE_V4SF_V16SF_HI:
36774 case V16SI_FTYPE_SI_V16SI_HI:
36775 case V16SI_FTYPE_V16HI_V16SI_HI:
36776 case V16SI_FTYPE_V16QI_V16SI_HI:
36777 case V16SI_FTYPE_V16SF_V16SI_HI:
36778 case V8SF_FTYPE_V4SF_V8SF_QI:
36779 case V4DF_FTYPE_V2DF_V4DF_QI:
36780 case V8SI_FTYPE_V4SI_V8SI_QI:
36781 case V8SI_FTYPE_SI_V8SI_QI:
36782 case V4SI_FTYPE_V4SI_V4SI_QI:
36783 case V4SI_FTYPE_SI_V4SI_QI:
36784 case V4DI_FTYPE_V2DI_V4DI_QI:
36785 case V4DI_FTYPE_DI_V4DI_QI:
36786 case V2DI_FTYPE_V2DI_V2DI_QI:
36787 case V2DI_FTYPE_DI_V2DI_QI:
36788 case V64QI_FTYPE_V64QI_V64QI_DI:
36789 case V64QI_FTYPE_V16QI_V64QI_DI:
36790 case V64QI_FTYPE_QI_V64QI_DI:
36791 case V32QI_FTYPE_V32QI_V32QI_SI:
36792 case V32QI_FTYPE_V16QI_V32QI_SI:
36793 case V32QI_FTYPE_QI_V32QI_SI:
36794 case V16QI_FTYPE_V16QI_V16QI_HI:
36795 case V16QI_FTYPE_QI_V16QI_HI:
36796 case V32HI_FTYPE_V8HI_V32HI_SI:
36797 case V32HI_FTYPE_HI_V32HI_SI:
36798 case V16HI_FTYPE_V8HI_V16HI_HI:
36799 case V16HI_FTYPE_HI_V16HI_HI:
36800 case V8HI_FTYPE_V8HI_V8HI_QI:
36801 case V8HI_FTYPE_HI_V8HI_QI:
36802 case V8SF_FTYPE_V8HI_V8SF_QI:
36803 case V4SF_FTYPE_V8HI_V4SF_QI:
36804 case V8SI_FTYPE_V8SF_V8SI_QI:
36805 case V4SI_FTYPE_V4SF_V4SI_QI:
36806 case V8DI_FTYPE_V8SF_V8DI_QI:
36807 case V4DI_FTYPE_V4SF_V4DI_QI:
36808 case V2DI_FTYPE_V4SF_V2DI_QI:
36809 case V8SF_FTYPE_V8DI_V8SF_QI:
36810 case V4SF_FTYPE_V4DI_V4SF_QI:
36811 case V4SF_FTYPE_V2DI_V4SF_QI:
36812 case V8DF_FTYPE_V8DI_V8DF_QI:
36813 case V4DF_FTYPE_V4DI_V4DF_QI:
36814 case V2DF_FTYPE_V2DI_V2DF_QI:
36815 case V16QI_FTYPE_V8HI_V16QI_QI:
36816 case V16QI_FTYPE_V16HI_V16QI_HI:
36817 case V16QI_FTYPE_V4SI_V16QI_QI:
36818 case V16QI_FTYPE_V8SI_V16QI_QI:
36819 case V8HI_FTYPE_V4SI_V8HI_QI:
36820 case V8HI_FTYPE_V8SI_V8HI_QI:
36821 case V16QI_FTYPE_V2DI_V16QI_QI:
36822 case V16QI_FTYPE_V4DI_V16QI_QI:
36823 case V8HI_FTYPE_V2DI_V8HI_QI:
36824 case V8HI_FTYPE_V4DI_V8HI_QI:
36825 case V4SI_FTYPE_V2DI_V4SI_QI:
36826 case V4SI_FTYPE_V4DI_V4SI_QI:
36827 case V32QI_FTYPE_V32HI_V32QI_SI:
36828 case HI_FTYPE_V16QI_V16QI_HI:
36829 case SI_FTYPE_V32QI_V32QI_SI:
36830 case DI_FTYPE_V64QI_V64QI_DI:
36831 case QI_FTYPE_V8HI_V8HI_QI:
36832 case HI_FTYPE_V16HI_V16HI_HI:
36833 case SI_FTYPE_V32HI_V32HI_SI:
36834 case QI_FTYPE_V4SI_V4SI_QI:
36835 case QI_FTYPE_V8SI_V8SI_QI:
36836 case QI_FTYPE_V2DI_V2DI_QI:
36837 case QI_FTYPE_V4DI_V4DI_QI:
36838 case V4SF_FTYPE_V2DF_V4SF_QI:
36839 case V4SF_FTYPE_V4DF_V4SF_QI:
36840 case V16SI_FTYPE_V16SI_V16SI_HI:
36841 case V16SI_FTYPE_V16SI_V16SI_V16SI:
36842 case V16SI_FTYPE_V4SI_V16SI_HI:
36843 case V2DI_FTYPE_V2DI_V2DI_V2DI:
36844 case V2DI_FTYPE_V4SI_V2DI_QI:
36845 case V2DI_FTYPE_V8HI_V2DI_QI:
36846 case V2DI_FTYPE_V16QI_V2DI_QI:
36847 case V4DI_FTYPE_V4DI_V4DI_QI:
36848 case V4DI_FTYPE_V4SI_V4DI_QI:
36849 case V4DI_FTYPE_V8HI_V4DI_QI:
36850 case V4DI_FTYPE_V16QI_V4DI_QI:
36851 case V8DI_FTYPE_V8DF_V8DI_QI:
36852 case V4DI_FTYPE_V4DF_V4DI_QI:
36853 case V2DI_FTYPE_V2DF_V2DI_QI:
36854 case V4SI_FTYPE_V4DF_V4SI_QI:
36855 case V4SI_FTYPE_V2DF_V4SI_QI:
36856 case V4SI_FTYPE_V8HI_V4SI_QI:
36857 case V4SI_FTYPE_V16QI_V4SI_QI:
36858 case V8SI_FTYPE_V8SI_V8SI_V8SI:
36859 case V4DI_FTYPE_V4DI_V4DI_V4DI:
36860 case V8DF_FTYPE_V2DF_V8DF_QI:
36861 case V8DF_FTYPE_V4DF_V8DF_QI:
36862 case V8DF_FTYPE_V8DF_V8DF_QI:
36863 case V8DF_FTYPE_V8DF_V8DF_V8DF:
36864 case V8SF_FTYPE_V8SF_V8SF_QI:
36865 case V8SF_FTYPE_V8SI_V8SF_QI:
36866 case V4DF_FTYPE_V4DF_V4DF_QI:
36867 case V4SF_FTYPE_V4SF_V4SF_QI:
36868 case V2DF_FTYPE_V2DF_V2DF_QI:
36869 case V2DF_FTYPE_V4SF_V2DF_QI:
36870 case V2DF_FTYPE_V4SI_V2DF_QI:
36871 case V4SF_FTYPE_V4SI_V4SF_QI:
36872 case V4DF_FTYPE_V4SF_V4DF_QI:
36873 case V4DF_FTYPE_V4SI_V4DF_QI:
36874 case V8SI_FTYPE_V8SI_V8SI_QI:
36875 case V8SI_FTYPE_V8HI_V8SI_QI:
36876 case V8SI_FTYPE_V16QI_V8SI_QI:
36877 case V8DF_FTYPE_V8DF_V8DI_V8DF:
36878 case V8DF_FTYPE_V8DI_V8DF_V8DF:
36879 case V8DF_FTYPE_V8SF_V8DF_QI:
36880 case V8DF_FTYPE_V8SI_V8DF_QI:
36881 case V8DI_FTYPE_DI_V8DI_QI:
36882 case V16SF_FTYPE_V8SF_V16SF_HI:
36883 case V16SI_FTYPE_V8SI_V16SI_HI:
36884 case V16HI_FTYPE_V16HI_V16HI_HI:
36885 case V8HI_FTYPE_V16QI_V8HI_QI:
36886 case V16HI_FTYPE_V16QI_V16HI_HI:
36887 case V32HI_FTYPE_V32HI_V32HI_SI:
36888 case V32HI_FTYPE_V32QI_V32HI_SI:
36889 case V8DI_FTYPE_V16QI_V8DI_QI:
36890 case V8DI_FTYPE_V2DI_V8DI_QI:
36891 case V8DI_FTYPE_V4DI_V8DI_QI:
36892 case V8DI_FTYPE_V8DI_V8DI_QI:
36893 case V8DI_FTYPE_V8DI_V8DI_V8DI:
36894 case V8DI_FTYPE_V8HI_V8DI_QI:
36895 case V8DI_FTYPE_V8SI_V8DI_QI:
36896 case V8HI_FTYPE_V8DI_V8HI_QI:
36897 case V8SF_FTYPE_V8DF_V8SF_QI:
36898 case V8SI_FTYPE_V8DF_V8SI_QI:
36899 case V8SI_FTYPE_V8DI_V8SI_QI:
36900 case V4SI_FTYPE_V4SI_V4SI_V4SI:
36901 nargs = 3;
36902 break;
36903 case V32QI_FTYPE_V32QI_V32QI_INT:
36904 case V16HI_FTYPE_V16HI_V16HI_INT:
36905 case V16QI_FTYPE_V16QI_V16QI_INT:
36906 case V4DI_FTYPE_V4DI_V4DI_INT:
36907 case V8HI_FTYPE_V8HI_V8HI_INT:
36908 case V8SI_FTYPE_V8SI_V8SI_INT:
36909 case V8SI_FTYPE_V8SI_V4SI_INT:
36910 case V8SF_FTYPE_V8SF_V8SF_INT:
36911 case V8SF_FTYPE_V8SF_V4SF_INT:
36912 case V4SI_FTYPE_V4SI_V4SI_INT:
36913 case V4DF_FTYPE_V4DF_V4DF_INT:
36914 case V16SF_FTYPE_V16SF_V16SF_INT:
36915 case V16SF_FTYPE_V16SF_V4SF_INT:
36916 case V16SI_FTYPE_V16SI_V4SI_INT:
36917 case V4DF_FTYPE_V4DF_V2DF_INT:
36918 case V4SF_FTYPE_V4SF_V4SF_INT:
36919 case V2DI_FTYPE_V2DI_V2DI_INT:
36920 case V4DI_FTYPE_V4DI_V2DI_INT:
36921 case V2DF_FTYPE_V2DF_V2DF_INT:
36922 case QI_FTYPE_V8DI_V8DI_INT:
36923 case QI_FTYPE_V8DF_V8DF_INT:
36924 case QI_FTYPE_V2DF_V2DF_INT:
36925 case QI_FTYPE_V4SF_V4SF_INT:
36926 case HI_FTYPE_V16SI_V16SI_INT:
36927 case HI_FTYPE_V16SF_V16SF_INT:
36928 nargs = 3;
36929 nargs_constant = 1;
36930 break;
36931 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
36932 nargs = 3;
36933 rmode = V4DImode;
36934 nargs_constant = 1;
36935 break;
36936 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
36937 nargs = 3;
36938 rmode = V2DImode;
36939 nargs_constant = 1;
36940 break;
36941 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
36942 nargs = 3;
36943 rmode = DImode;
36944 nargs_constant = 1;
36945 break;
36946 case V2DI_FTYPE_V2DI_UINT_UINT:
36947 nargs = 3;
36948 nargs_constant = 2;
36949 break;
36950 case V8DI_FTYPE_V8DI_V8DI_INT_CONVERT:
36951 nargs = 3;
36952 rmode = V8DImode;
36953 nargs_constant = 1;
36954 break;
36955 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT:
36956 nargs = 5;
36957 rmode = V8DImode;
36958 mask_pos = 2;
36959 nargs_constant = 1;
36960 break;
36961 case QI_FTYPE_V8DF_INT_QI:
36962 case QI_FTYPE_V4DF_INT_QI:
36963 case QI_FTYPE_V2DF_INT_QI:
36964 case HI_FTYPE_V16SF_INT_HI:
36965 case QI_FTYPE_V8SF_INT_QI:
36966 case QI_FTYPE_V4SF_INT_QI:
36967 nargs = 3;
36968 mask_pos = 1;
36969 nargs_constant = 1;
36970 break;
36971 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT:
36972 nargs = 5;
36973 rmode = V4DImode;
36974 mask_pos = 2;
36975 nargs_constant = 1;
36976 break;
36977 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT:
36978 nargs = 5;
36979 rmode = V2DImode;
36980 mask_pos = 2;
36981 nargs_constant = 1;
36982 break;
36983 case V32QI_FTYPE_V32QI_V32QI_V32QI_SI:
36984 case V32HI_FTYPE_V32HI_V32HI_V32HI_SI:
36985 case V32HI_FTYPE_V64QI_V64QI_V32HI_SI:
36986 case V16SI_FTYPE_V32HI_V32HI_V16SI_HI:
36987 case V64QI_FTYPE_V64QI_V64QI_V64QI_DI:
36988 case V32HI_FTYPE_V32HI_V8HI_V32HI_SI:
36989 case V16HI_FTYPE_V16HI_V8HI_V16HI_HI:
36990 case V8SI_FTYPE_V8SI_V4SI_V8SI_QI:
36991 case V4DI_FTYPE_V4DI_V2DI_V4DI_QI:
36992 case V64QI_FTYPE_V32HI_V32HI_V64QI_DI:
36993 case V32QI_FTYPE_V16HI_V16HI_V32QI_SI:
36994 case V16QI_FTYPE_V8HI_V8HI_V16QI_HI:
36995 case V32HI_FTYPE_V16SI_V16SI_V32HI_SI:
36996 case V16HI_FTYPE_V8SI_V8SI_V16HI_HI:
36997 case V8HI_FTYPE_V4SI_V4SI_V8HI_QI:
36998 case V4DF_FTYPE_V4DF_V4DI_V4DF_QI:
36999 case V8SF_FTYPE_V8SF_V8SI_V8SF_QI:
37000 case V4SF_FTYPE_V4SF_V4SI_V4SF_QI:
37001 case V2DF_FTYPE_V2DF_V2DI_V2DF_QI:
37002 case V2DI_FTYPE_V4SI_V4SI_V2DI_QI:
37003 case V4DI_FTYPE_V8SI_V8SI_V4DI_QI:
37004 case V4DF_FTYPE_V4DI_V4DF_V4DF_QI:
37005 case V8SF_FTYPE_V8SI_V8SF_V8SF_QI:
37006 case V2DF_FTYPE_V2DI_V2DF_V2DF_QI:
37007 case V4SF_FTYPE_V4SI_V4SF_V4SF_QI:
37008 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI:
37009 case V16SF_FTYPE_V16SF_V16SI_V16SF_HI:
37010 case V16SF_FTYPE_V16SI_V16SF_V16SF_HI:
37011 case V16SI_FTYPE_V16SI_V16SI_V16SI_HI:
37012 case V16SI_FTYPE_V16SI_V4SI_V16SI_HI:
37013 case V8HI_FTYPE_V8HI_V8HI_V8HI_QI:
37014 case V8SI_FTYPE_V8SI_V8SI_V8SI_QI:
37015 case V4SI_FTYPE_V4SI_V4SI_V4SI_QI:
37016 case V8SF_FTYPE_V8SF_V8SF_V8SF_QI:
37017 case V16QI_FTYPE_V16QI_V16QI_V16QI_HI:
37018 case V16HI_FTYPE_V16HI_V16HI_V16HI_HI:
37019 case V2DI_FTYPE_V2DI_V2DI_V2DI_QI:
37020 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI:
37021 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI:
37022 case V4DI_FTYPE_V4DI_V4DI_V4DI_QI:
37023 case V4DF_FTYPE_V4DF_V4DF_V4DF_QI:
37024 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI:
37025 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI:
37026 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI:
37027 case V8DF_FTYPE_V8DF_V8DI_V8DF_QI:
37028 case V8DF_FTYPE_V8DI_V8DF_V8DF_QI:
37029 case V8DI_FTYPE_V16SI_V16SI_V8DI_QI:
37030 case V8DI_FTYPE_V8DI_SI_V8DI_V8DI:
37031 case V8DI_FTYPE_V8DI_V2DI_V8DI_QI:
37032 case V8DI_FTYPE_V8DI_V8DI_V8DI_QI:
37033 case V8HI_FTYPE_V16QI_V16QI_V8HI_QI:
37034 case V16HI_FTYPE_V32QI_V32QI_V16HI_HI:
37035 case V8SI_FTYPE_V16HI_V16HI_V8SI_QI:
37036 case V4SI_FTYPE_V8HI_V8HI_V4SI_QI:
37037 nargs = 4;
37038 break;
37039 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
37040 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
37041 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
37042 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
37043 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
37044 nargs = 4;
37045 nargs_constant = 1;
37046 break;
37047 case QI_FTYPE_V4DI_V4DI_INT_QI:
37048 case QI_FTYPE_V8SI_V8SI_INT_QI:
37049 case QI_FTYPE_V4DF_V4DF_INT_QI:
37050 case QI_FTYPE_V8SF_V8SF_INT_QI:
37051 case QI_FTYPE_V2DI_V2DI_INT_QI:
37052 case QI_FTYPE_V4SI_V4SI_INT_QI:
37053 case QI_FTYPE_V2DF_V2DF_INT_QI:
37054 case QI_FTYPE_V4SF_V4SF_INT_QI:
37055 case DI_FTYPE_V64QI_V64QI_INT_DI:
37056 case SI_FTYPE_V32QI_V32QI_INT_SI:
37057 case HI_FTYPE_V16QI_V16QI_INT_HI:
37058 case SI_FTYPE_V32HI_V32HI_INT_SI:
37059 case HI_FTYPE_V16HI_V16HI_INT_HI:
37060 case QI_FTYPE_V8HI_V8HI_INT_QI:
37061 nargs = 4;
37062 mask_pos = 1;
37063 nargs_constant = 1;
37064 break;
37065 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
37066 nargs = 4;
37067 nargs_constant = 2;
37068 break;
37069 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
37070 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
37071 nargs = 4;
37072 break;
37073 case QI_FTYPE_V8DI_V8DI_INT_QI:
37074 case HI_FTYPE_V16SI_V16SI_INT_HI:
37075 case QI_FTYPE_V8DF_V8DF_INT_QI:
37076 case HI_FTYPE_V16SF_V16SF_INT_HI:
37077 mask_pos = 1;
37078 nargs = 4;
37079 nargs_constant = 1;
37080 break;
37081 case V8SF_FTYPE_V8SF_INT_V8SF_QI:
37082 case V4SF_FTYPE_V4SF_INT_V4SF_QI:
37083 case V2DF_FTYPE_V4DF_INT_V2DF_QI:
37084 case V2DI_FTYPE_V4DI_INT_V2DI_QI:
37085 case V8SF_FTYPE_V16SF_INT_V8SF_QI:
37086 case V8SI_FTYPE_V16SI_INT_V8SI_QI:
37087 case V2DF_FTYPE_V8DF_INT_V2DF_QI:
37088 case V2DI_FTYPE_V8DI_INT_V2DI_QI:
37089 case V4SF_FTYPE_V8SF_INT_V4SF_QI:
37090 case V4SI_FTYPE_V8SI_INT_V4SI_QI:
37091 case V8HI_FTYPE_V8SF_INT_V8HI_QI:
37092 case V8HI_FTYPE_V4SF_INT_V8HI_QI:
37093 case V32HI_FTYPE_V32HI_INT_V32HI_SI:
37094 case V16HI_FTYPE_V16HI_INT_V16HI_HI:
37095 case V8HI_FTYPE_V8HI_INT_V8HI_QI:
37096 case V4DI_FTYPE_V4DI_INT_V4DI_QI:
37097 case V2DI_FTYPE_V2DI_INT_V2DI_QI:
37098 case V8SI_FTYPE_V8SI_INT_V8SI_QI:
37099 case V4SI_FTYPE_V4SI_INT_V4SI_QI:
37100 case V4DF_FTYPE_V4DF_INT_V4DF_QI:
37101 case V2DF_FTYPE_V2DF_INT_V2DF_QI:
37102 case V8DF_FTYPE_V8DF_INT_V8DF_QI:
37103 case V16SF_FTYPE_V16SF_INT_V16SF_HI:
37104 case V16HI_FTYPE_V16SF_INT_V16HI_HI:
37105 case V16SI_FTYPE_V16SI_INT_V16SI_HI:
37106 case V4SI_FTYPE_V16SI_INT_V4SI_QI:
37107 case V4DI_FTYPE_V8DI_INT_V4DI_QI:
37108 case V4DF_FTYPE_V8DF_INT_V4DF_QI:
37109 case V4SF_FTYPE_V16SF_INT_V4SF_QI:
37110 case V8DI_FTYPE_V8DI_INT_V8DI_QI:
37111 nargs = 4;
37112 mask_pos = 2;
37113 nargs_constant = 1;
37114 break;
37115 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI:
37116 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI:
37117 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI:
37118 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI:
37119 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI:
37120 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI:
37121 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI:
37122 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI:
37123 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI:
37124 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI:
37125 case V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI:
37126 case V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI:
37127 case V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI:
37128 case V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI:
37129 case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI:
37130 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI:
37131 case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI:
37132 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI:
37133 case V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI:
37134 case V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI:
37135 case V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI:
37136 case V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI:
37137 case V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI:
37138 case V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI:
37139 case V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI:
37140 case V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI:
37141 case V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI:
37142 nargs = 5;
37143 mask_pos = 2;
37144 nargs_constant = 1;
37145 break;
37146 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI:
37147 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI:
37148 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI:
37149 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI:
37150 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI:
37151 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI:
37152 case V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI:
37153 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI:
37154 case V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI:
37155 case V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI:
37156 case V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI:
37157 nargs = 5;
37158 nargs = 5;
37159 mask_pos = 1;
37160 nargs_constant = 1;
37161 break;
37163 default:
37164 gcc_unreachable ();
37167 gcc_assert (nargs <= ARRAY_SIZE (args));
37169 if (comparison != UNKNOWN)
37171 gcc_assert (nargs == 2);
37172 return ix86_expand_sse_compare (d, exp, target, swap);
37175 if (rmode == VOIDmode || rmode == tmode)
37177 if (optimize
37178 || target == 0
37179 || GET_MODE (target) != tmode
37180 || !insn_p->operand[0].predicate (target, tmode))
37181 target = gen_reg_rtx (tmode);
37182 real_target = target;
37184 else
37186 real_target = gen_reg_rtx (tmode);
37187 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
37190 for (i = 0; i < nargs; i++)
37192 tree arg = CALL_EXPR_ARG (exp, i);
37193 rtx op = expand_normal (arg);
37194 machine_mode mode = insn_p->operand[i + 1].mode;
37195 bool match = insn_p->operand[i + 1].predicate (op, mode);
37197 if (last_arg_count && (i + 1) == nargs)
37199 /* SIMD shift insns take either an 8-bit immediate or
37200 register as count. But builtin functions take int as
37201 count. If count doesn't match, we put it in register. */
37202 if (!match)
37204 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
37205 if (!insn_p->operand[i + 1].predicate (op, mode))
37206 op = copy_to_reg (op);
37209 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
37210 (!mask_pos && (nargs - i) <= nargs_constant))
37212 if (!match)
37213 switch (icode)
37215 case CODE_FOR_avx_vinsertf128v4di:
37216 case CODE_FOR_avx_vextractf128v4di:
37217 error ("the last argument must be an 1-bit immediate");
37218 return const0_rtx;
37220 case CODE_FOR_avx512f_cmpv8di3_mask:
37221 case CODE_FOR_avx512f_cmpv16si3_mask:
37222 case CODE_FOR_avx512f_ucmpv8di3_mask:
37223 case CODE_FOR_avx512f_ucmpv16si3_mask:
37224 case CODE_FOR_avx512vl_cmpv4di3_mask:
37225 case CODE_FOR_avx512vl_cmpv8si3_mask:
37226 case CODE_FOR_avx512vl_ucmpv4di3_mask:
37227 case CODE_FOR_avx512vl_ucmpv8si3_mask:
37228 case CODE_FOR_avx512vl_cmpv2di3_mask:
37229 case CODE_FOR_avx512vl_cmpv4si3_mask:
37230 case CODE_FOR_avx512vl_ucmpv2di3_mask:
37231 case CODE_FOR_avx512vl_ucmpv4si3_mask:
37232 error ("the last argument must be a 3-bit immediate");
37233 return const0_rtx;
37235 case CODE_FOR_sse4_1_roundsd:
37236 case CODE_FOR_sse4_1_roundss:
37238 case CODE_FOR_sse4_1_roundpd:
37239 case CODE_FOR_sse4_1_roundps:
37240 case CODE_FOR_avx_roundpd256:
37241 case CODE_FOR_avx_roundps256:
37243 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
37244 case CODE_FOR_sse4_1_roundps_sfix:
37245 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
37246 case CODE_FOR_avx_roundps_sfix256:
37248 case CODE_FOR_sse4_1_blendps:
37249 case CODE_FOR_avx_blendpd256:
37250 case CODE_FOR_avx_vpermilv4df:
37251 case CODE_FOR_avx_vpermilv4df_mask:
37252 case CODE_FOR_avx512f_getmantv8df_mask:
37253 case CODE_FOR_avx512f_getmantv16sf_mask:
37254 case CODE_FOR_avx512vl_getmantv8sf_mask:
37255 case CODE_FOR_avx512vl_getmantv4df_mask:
37256 case CODE_FOR_avx512vl_getmantv4sf_mask:
37257 case CODE_FOR_avx512vl_getmantv2df_mask:
37258 case CODE_FOR_avx512dq_rangepv8df_mask_round:
37259 case CODE_FOR_avx512dq_rangepv16sf_mask_round:
37260 case CODE_FOR_avx512dq_rangepv4df_mask:
37261 case CODE_FOR_avx512dq_rangepv8sf_mask:
37262 case CODE_FOR_avx512dq_rangepv2df_mask:
37263 case CODE_FOR_avx512dq_rangepv4sf_mask:
37264 case CODE_FOR_avx_shufpd256_mask:
37265 error ("the last argument must be a 4-bit immediate");
37266 return const0_rtx;
37268 case CODE_FOR_sha1rnds4:
37269 case CODE_FOR_sse4_1_blendpd:
37270 case CODE_FOR_avx_vpermilv2df:
37271 case CODE_FOR_avx_vpermilv2df_mask:
37272 case CODE_FOR_xop_vpermil2v2df3:
37273 case CODE_FOR_xop_vpermil2v4sf3:
37274 case CODE_FOR_xop_vpermil2v4df3:
37275 case CODE_FOR_xop_vpermil2v8sf3:
37276 case CODE_FOR_avx512f_vinsertf32x4_mask:
37277 case CODE_FOR_avx512f_vinserti32x4_mask:
37278 case CODE_FOR_avx512f_vextractf32x4_mask:
37279 case CODE_FOR_avx512f_vextracti32x4_mask:
37280 case CODE_FOR_sse2_shufpd:
37281 case CODE_FOR_sse2_shufpd_mask:
37282 case CODE_FOR_avx512dq_shuf_f64x2_mask:
37283 case CODE_FOR_avx512dq_shuf_i64x2_mask:
37284 case CODE_FOR_avx512vl_shuf_i32x4_mask:
37285 case CODE_FOR_avx512vl_shuf_f32x4_mask:
37286 error ("the last argument must be a 2-bit immediate");
37287 return const0_rtx;
37289 case CODE_FOR_avx_vextractf128v4df:
37290 case CODE_FOR_avx_vextractf128v8sf:
37291 case CODE_FOR_avx_vextractf128v8si:
37292 case CODE_FOR_avx_vinsertf128v4df:
37293 case CODE_FOR_avx_vinsertf128v8sf:
37294 case CODE_FOR_avx_vinsertf128v8si:
37295 case CODE_FOR_avx512f_vinsertf64x4_mask:
37296 case CODE_FOR_avx512f_vinserti64x4_mask:
37297 case CODE_FOR_avx512f_vextractf64x4_mask:
37298 case CODE_FOR_avx512f_vextracti64x4_mask:
37299 case CODE_FOR_avx512dq_vinsertf32x8_mask:
37300 case CODE_FOR_avx512dq_vinserti32x8_mask:
37301 case CODE_FOR_avx512vl_vinsertv4df:
37302 case CODE_FOR_avx512vl_vinsertv4di:
37303 case CODE_FOR_avx512vl_vinsertv8sf:
37304 case CODE_FOR_avx512vl_vinsertv8si:
37305 error ("the last argument must be a 1-bit immediate");
37306 return const0_rtx;
37308 case CODE_FOR_avx_vmcmpv2df3:
37309 case CODE_FOR_avx_vmcmpv4sf3:
37310 case CODE_FOR_avx_cmpv2df3:
37311 case CODE_FOR_avx_cmpv4sf3:
37312 case CODE_FOR_avx_cmpv4df3:
37313 case CODE_FOR_avx_cmpv8sf3:
37314 case CODE_FOR_avx512f_cmpv8df3_mask:
37315 case CODE_FOR_avx512f_cmpv16sf3_mask:
37316 case CODE_FOR_avx512f_vmcmpv2df3_mask:
37317 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
37318 error ("the last argument must be a 5-bit immediate");
37319 return const0_rtx;
37321 default:
37322 switch (nargs_constant)
37324 case 2:
37325 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
37326 (!mask_pos && (nargs - i) == nargs_constant))
37328 error ("the next to last argument must be an 8-bit immediate");
37329 break;
37331 case 1:
37332 error ("the last argument must be an 8-bit immediate");
37333 break;
37334 default:
37335 gcc_unreachable ();
37337 return const0_rtx;
37340 else
37342 if (VECTOR_MODE_P (mode))
37343 op = safe_vector_operand (op, mode);
37345 /* If we aren't optimizing, only allow one memory operand to
37346 be generated. */
37347 if (memory_operand (op, mode))
37348 num_memory++;
37350 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
37352 if (optimize || !match || num_memory > 1)
37353 op = copy_to_mode_reg (mode, op);
37355 else
37357 op = copy_to_reg (op);
37358 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
37362 args[i].op = op;
37363 args[i].mode = mode;
37366 switch (nargs)
37368 case 1:
37369 pat = GEN_FCN (icode) (real_target, args[0].op);
37370 break;
37371 case 2:
37372 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
37373 break;
37374 case 3:
37375 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37376 args[2].op);
37377 break;
37378 case 4:
37379 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37380 args[2].op, args[3].op);
37381 break;
37382 case 5:
37383 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37384 args[2].op, args[3].op, args[4].op);
37385 case 6:
37386 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37387 args[2].op, args[3].op, args[4].op,
37388 args[5].op);
37389 break;
37390 default:
37391 gcc_unreachable ();
37394 if (! pat)
37395 return 0;
37397 emit_insn (pat);
37398 return target;
37401 /* Transform pattern of following layout:
37402 (parallel [
37403 set (A B)
37404 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
37406 into:
37407 (set (A B))
37410 (parallel [ A B
37412 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
37415 into:
37416 (parallel [ A B ... ]) */
37418 static rtx
37419 ix86_erase_embedded_rounding (rtx pat)
37421 if (GET_CODE (pat) == INSN)
37422 pat = PATTERN (pat);
37424 gcc_assert (GET_CODE (pat) == PARALLEL);
37426 if (XVECLEN (pat, 0) == 2)
37428 rtx p0 = XVECEXP (pat, 0, 0);
37429 rtx p1 = XVECEXP (pat, 0, 1);
37431 gcc_assert (GET_CODE (p0) == SET
37432 && GET_CODE (p1) == UNSPEC
37433 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
37435 return p0;
37437 else
37439 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
37440 int i = 0;
37441 int j = 0;
37443 for (; i < XVECLEN (pat, 0); ++i)
37445 rtx elem = XVECEXP (pat, 0, i);
37446 if (GET_CODE (elem) != UNSPEC
37447 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
37448 res [j++] = elem;
37451 /* No more than 1 occurence was removed. */
37452 gcc_assert (j >= XVECLEN (pat, 0) - 1);
37454 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
37458 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
37459 with rounding. */
37460 static rtx
37461 ix86_expand_sse_comi_round (const struct builtin_description *d,
37462 tree exp, rtx target)
37464 rtx pat, set_dst;
37465 tree arg0 = CALL_EXPR_ARG (exp, 0);
37466 tree arg1 = CALL_EXPR_ARG (exp, 1);
37467 tree arg2 = CALL_EXPR_ARG (exp, 2);
37468 tree arg3 = CALL_EXPR_ARG (exp, 3);
37469 rtx op0 = expand_normal (arg0);
37470 rtx op1 = expand_normal (arg1);
37471 rtx op2 = expand_normal (arg2);
37472 rtx op3 = expand_normal (arg3);
37473 enum insn_code icode = d->icode;
37474 const struct insn_data_d *insn_p = &insn_data[icode];
37475 machine_mode mode0 = insn_p->operand[0].mode;
37476 machine_mode mode1 = insn_p->operand[1].mode;
37477 enum rtx_code comparison = UNEQ;
37478 bool need_ucomi = false;
37480 /* See avxintrin.h for values. */
37481 enum rtx_code comi_comparisons[32] =
37483 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
37484 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
37485 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
37487 bool need_ucomi_values[32] =
37489 true, false, false, true, true, false, false, true,
37490 true, false, false, true, true, false, false, true,
37491 false, true, true, false, false, true, true, false,
37492 false, true, true, false, false, true, true, false
37495 if (!CONST_INT_P (op2))
37497 error ("the third argument must be comparison constant");
37498 return const0_rtx;
37500 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
37502 error ("incorect comparison mode");
37503 return const0_rtx;
37506 if (!insn_p->operand[2].predicate (op3, SImode))
37508 error ("incorrect rounding operand");
37509 return const0_rtx;
37512 comparison = comi_comparisons[INTVAL (op2)];
37513 need_ucomi = need_ucomi_values[INTVAL (op2)];
37515 if (VECTOR_MODE_P (mode0))
37516 op0 = safe_vector_operand (op0, mode0);
37517 if (VECTOR_MODE_P (mode1))
37518 op1 = safe_vector_operand (op1, mode1);
37520 target = gen_reg_rtx (SImode);
37521 emit_move_insn (target, const0_rtx);
37522 target = gen_rtx_SUBREG (QImode, target, 0);
37524 if ((optimize && !register_operand (op0, mode0))
37525 || !insn_p->operand[0].predicate (op0, mode0))
37526 op0 = copy_to_mode_reg (mode0, op0);
37527 if ((optimize && !register_operand (op1, mode1))
37528 || !insn_p->operand[1].predicate (op1, mode1))
37529 op1 = copy_to_mode_reg (mode1, op1);
37531 if (need_ucomi)
37532 icode = icode == CODE_FOR_sse_comi_round
37533 ? CODE_FOR_sse_ucomi_round
37534 : CODE_FOR_sse2_ucomi_round;
37536 pat = GEN_FCN (icode) (op0, op1, op3);
37537 if (! pat)
37538 return 0;
37540 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
37541 if (INTVAL (op3) == NO_ROUND)
37543 pat = ix86_erase_embedded_rounding (pat);
37544 if (! pat)
37545 return 0;
37547 set_dst = SET_DEST (pat);
37549 else
37551 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
37552 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
37555 emit_insn (pat);
37556 emit_insn (gen_rtx_SET (VOIDmode,
37557 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
37558 gen_rtx_fmt_ee (comparison, QImode,
37559 set_dst,
37560 const0_rtx)));
37562 return SUBREG_REG (target);
37565 static rtx
37566 ix86_expand_round_builtin (const struct builtin_description *d,
37567 tree exp, rtx target)
37569 rtx pat;
37570 unsigned int i, nargs;
37571 struct
37573 rtx op;
37574 machine_mode mode;
37575 } args[6];
37576 enum insn_code icode = d->icode;
37577 const struct insn_data_d *insn_p = &insn_data[icode];
37578 machine_mode tmode = insn_p->operand[0].mode;
37579 unsigned int nargs_constant = 0;
37580 unsigned int redundant_embed_rnd = 0;
37582 switch ((enum ix86_builtin_func_type) d->flag)
37584 case UINT64_FTYPE_V2DF_INT:
37585 case UINT64_FTYPE_V4SF_INT:
37586 case UINT_FTYPE_V2DF_INT:
37587 case UINT_FTYPE_V4SF_INT:
37588 case INT64_FTYPE_V2DF_INT:
37589 case INT64_FTYPE_V4SF_INT:
37590 case INT_FTYPE_V2DF_INT:
37591 case INT_FTYPE_V4SF_INT:
37592 nargs = 2;
37593 break;
37594 case V4SF_FTYPE_V4SF_UINT_INT:
37595 case V4SF_FTYPE_V4SF_UINT64_INT:
37596 case V2DF_FTYPE_V2DF_UINT64_INT:
37597 case V4SF_FTYPE_V4SF_INT_INT:
37598 case V4SF_FTYPE_V4SF_INT64_INT:
37599 case V2DF_FTYPE_V2DF_INT64_INT:
37600 case V4SF_FTYPE_V4SF_V4SF_INT:
37601 case V2DF_FTYPE_V2DF_V2DF_INT:
37602 case V4SF_FTYPE_V4SF_V2DF_INT:
37603 case V2DF_FTYPE_V2DF_V4SF_INT:
37604 nargs = 3;
37605 break;
37606 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
37607 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
37608 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
37609 case V8DI_FTYPE_V8DF_V8DI_QI_INT:
37610 case V8SF_FTYPE_V8DI_V8SF_QI_INT:
37611 case V8DF_FTYPE_V8DI_V8DF_QI_INT:
37612 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
37613 case V8DI_FTYPE_V8SF_V8DI_QI_INT:
37614 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
37615 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
37616 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
37617 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
37618 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
37619 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
37620 nargs = 4;
37621 break;
37622 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
37623 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
37624 nargs_constant = 2;
37625 nargs = 4;
37626 break;
37627 case INT_FTYPE_V4SF_V4SF_INT_INT:
37628 case INT_FTYPE_V2DF_V2DF_INT_INT:
37629 return ix86_expand_sse_comi_round (d, exp, target);
37630 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT:
37631 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
37632 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
37633 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
37634 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
37635 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
37636 nargs = 5;
37637 break;
37638 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
37639 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
37640 nargs_constant = 4;
37641 nargs = 5;
37642 break;
37643 case QI_FTYPE_V8DF_V8DF_INT_QI_INT:
37644 case QI_FTYPE_V2DF_V2DF_INT_QI_INT:
37645 case HI_FTYPE_V16SF_V16SF_INT_HI_INT:
37646 case QI_FTYPE_V4SF_V4SF_INT_QI_INT:
37647 nargs_constant = 3;
37648 nargs = 5;
37649 break;
37650 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT:
37651 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT:
37652 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
37653 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
37654 nargs = 6;
37655 nargs_constant = 4;
37656 break;
37657 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
37658 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
37659 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
37660 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
37661 nargs = 6;
37662 nargs_constant = 3;
37663 break;
37664 default:
37665 gcc_unreachable ();
37667 gcc_assert (nargs <= ARRAY_SIZE (args));
37669 if (optimize
37670 || target == 0
37671 || GET_MODE (target) != tmode
37672 || !insn_p->operand[0].predicate (target, tmode))
37673 target = gen_reg_rtx (tmode);
37675 for (i = 0; i < nargs; i++)
37677 tree arg = CALL_EXPR_ARG (exp, i);
37678 rtx op = expand_normal (arg);
37679 machine_mode mode = insn_p->operand[i + 1].mode;
37680 bool match = insn_p->operand[i + 1].predicate (op, mode);
37682 if (i == nargs - nargs_constant)
37684 if (!match)
37686 switch (icode)
37688 case CODE_FOR_avx512f_getmantv8df_mask_round:
37689 case CODE_FOR_avx512f_getmantv16sf_mask_round:
37690 case CODE_FOR_avx512f_vgetmantv2df_round:
37691 case CODE_FOR_avx512f_vgetmantv4sf_round:
37692 error ("the immediate argument must be a 4-bit immediate");
37693 return const0_rtx;
37694 case CODE_FOR_avx512f_cmpv8df3_mask_round:
37695 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
37696 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
37697 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
37698 error ("the immediate argument must be a 5-bit immediate");
37699 return const0_rtx;
37700 default:
37701 error ("the immediate argument must be an 8-bit immediate");
37702 return const0_rtx;
37706 else if (i == nargs-1)
37708 if (!insn_p->operand[nargs].predicate (op, SImode))
37710 error ("incorrect rounding operand");
37711 return const0_rtx;
37714 /* If there is no rounding use normal version of the pattern. */
37715 if (INTVAL (op) == NO_ROUND)
37716 redundant_embed_rnd = 1;
37718 else
37720 if (VECTOR_MODE_P (mode))
37721 op = safe_vector_operand (op, mode);
37723 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
37725 if (optimize || !match)
37726 op = copy_to_mode_reg (mode, op);
37728 else
37730 op = copy_to_reg (op);
37731 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
37735 args[i].op = op;
37736 args[i].mode = mode;
37739 switch (nargs)
37741 case 1:
37742 pat = GEN_FCN (icode) (target, args[0].op);
37743 break;
37744 case 2:
37745 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
37746 break;
37747 case 3:
37748 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37749 args[2].op);
37750 break;
37751 case 4:
37752 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37753 args[2].op, args[3].op);
37754 break;
37755 case 5:
37756 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37757 args[2].op, args[3].op, args[4].op);
37758 case 6:
37759 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37760 args[2].op, args[3].op, args[4].op,
37761 args[5].op);
37762 break;
37763 default:
37764 gcc_unreachable ();
37767 if (!pat)
37768 return 0;
37770 if (redundant_embed_rnd)
37771 pat = ix86_erase_embedded_rounding (pat);
37773 emit_insn (pat);
37774 return target;
37777 /* Subroutine of ix86_expand_builtin to take care of special insns
37778 with variable number of operands. */
37780 static rtx
37781 ix86_expand_special_args_builtin (const struct builtin_description *d,
37782 tree exp, rtx target)
37784 tree arg;
37785 rtx pat, op;
37786 unsigned int i, nargs, arg_adjust, memory;
37787 bool aligned_mem = false;
37788 struct
37790 rtx op;
37791 machine_mode mode;
37792 } args[3];
37793 enum insn_code icode = d->icode;
37794 bool last_arg_constant = false;
37795 const struct insn_data_d *insn_p = &insn_data[icode];
37796 machine_mode tmode = insn_p->operand[0].mode;
37797 enum { load, store } klass;
37799 switch ((enum ix86_builtin_func_type) d->flag)
37801 case VOID_FTYPE_VOID:
37802 emit_insn (GEN_FCN (icode) (target));
37803 return 0;
37804 case VOID_FTYPE_UINT64:
37805 case VOID_FTYPE_UNSIGNED:
37806 nargs = 0;
37807 klass = store;
37808 memory = 0;
37809 break;
37811 case INT_FTYPE_VOID:
37812 case USHORT_FTYPE_VOID:
37813 case UINT64_FTYPE_VOID:
37814 case UNSIGNED_FTYPE_VOID:
37815 nargs = 0;
37816 klass = load;
37817 memory = 0;
37818 break;
37819 case UINT64_FTYPE_PUNSIGNED:
37820 case V2DI_FTYPE_PV2DI:
37821 case V4DI_FTYPE_PV4DI:
37822 case V32QI_FTYPE_PCCHAR:
37823 case V16QI_FTYPE_PCCHAR:
37824 case V8SF_FTYPE_PCV4SF:
37825 case V8SF_FTYPE_PCFLOAT:
37826 case V4SF_FTYPE_PCFLOAT:
37827 case V4DF_FTYPE_PCV2DF:
37828 case V4DF_FTYPE_PCDOUBLE:
37829 case V2DF_FTYPE_PCDOUBLE:
37830 case VOID_FTYPE_PVOID:
37831 case V16SI_FTYPE_PV4SI:
37832 case V16SF_FTYPE_PV4SF:
37833 case V8DI_FTYPE_PV4DI:
37834 case V8DI_FTYPE_PV8DI:
37835 case V8DF_FTYPE_PV4DF:
37836 nargs = 1;
37837 klass = load;
37838 memory = 0;
37839 switch (icode)
37841 case CODE_FOR_sse4_1_movntdqa:
37842 case CODE_FOR_avx2_movntdqa:
37843 case CODE_FOR_avx512f_movntdqa:
37844 aligned_mem = true;
37845 break;
37846 default:
37847 break;
37849 break;
37850 case VOID_FTYPE_PV2SF_V4SF:
37851 case VOID_FTYPE_PV8DI_V8DI:
37852 case VOID_FTYPE_PV4DI_V4DI:
37853 case VOID_FTYPE_PV2DI_V2DI:
37854 case VOID_FTYPE_PCHAR_V32QI:
37855 case VOID_FTYPE_PCHAR_V16QI:
37856 case VOID_FTYPE_PFLOAT_V16SF:
37857 case VOID_FTYPE_PFLOAT_V8SF:
37858 case VOID_FTYPE_PFLOAT_V4SF:
37859 case VOID_FTYPE_PDOUBLE_V8DF:
37860 case VOID_FTYPE_PDOUBLE_V4DF:
37861 case VOID_FTYPE_PDOUBLE_V2DF:
37862 case VOID_FTYPE_PLONGLONG_LONGLONG:
37863 case VOID_FTYPE_PULONGLONG_ULONGLONG:
37864 case VOID_FTYPE_PINT_INT:
37865 nargs = 1;
37866 klass = store;
37867 /* Reserve memory operand for target. */
37868 memory = ARRAY_SIZE (args);
37869 switch (icode)
37871 /* These builtins and instructions require the memory
37872 to be properly aligned. */
37873 case CODE_FOR_avx_movntv4di:
37874 case CODE_FOR_sse2_movntv2di:
37875 case CODE_FOR_avx_movntv8sf:
37876 case CODE_FOR_sse_movntv4sf:
37877 case CODE_FOR_sse4a_vmmovntv4sf:
37878 case CODE_FOR_avx_movntv4df:
37879 case CODE_FOR_sse2_movntv2df:
37880 case CODE_FOR_sse4a_vmmovntv2df:
37881 case CODE_FOR_sse2_movntidi:
37882 case CODE_FOR_sse_movntq:
37883 case CODE_FOR_sse2_movntisi:
37884 case CODE_FOR_avx512f_movntv16sf:
37885 case CODE_FOR_avx512f_movntv8df:
37886 case CODE_FOR_avx512f_movntv8di:
37887 aligned_mem = true;
37888 break;
37889 default:
37890 break;
37892 break;
37893 case V4SF_FTYPE_V4SF_PCV2SF:
37894 case V2DF_FTYPE_V2DF_PCDOUBLE:
37895 nargs = 2;
37896 klass = load;
37897 memory = 1;
37898 break;
37899 case V8SF_FTYPE_PCV8SF_V8SI:
37900 case V4DF_FTYPE_PCV4DF_V4DI:
37901 case V4SF_FTYPE_PCV4SF_V4SI:
37902 case V2DF_FTYPE_PCV2DF_V2DI:
37903 case V8SI_FTYPE_PCV8SI_V8SI:
37904 case V4DI_FTYPE_PCV4DI_V4DI:
37905 case V4SI_FTYPE_PCV4SI_V4SI:
37906 case V2DI_FTYPE_PCV2DI_V2DI:
37907 nargs = 2;
37908 klass = load;
37909 memory = 0;
37910 break;
37911 case VOID_FTYPE_PV8DF_V8DF_QI:
37912 case VOID_FTYPE_PV16SF_V16SF_HI:
37913 case VOID_FTYPE_PV8DI_V8DI_QI:
37914 case VOID_FTYPE_PV4DI_V4DI_QI:
37915 case VOID_FTYPE_PV2DI_V2DI_QI:
37916 case VOID_FTYPE_PV16SI_V16SI_HI:
37917 case VOID_FTYPE_PV8SI_V8SI_QI:
37918 case VOID_FTYPE_PV4SI_V4SI_QI:
37919 switch (icode)
37921 /* These builtins and instructions require the memory
37922 to be properly aligned. */
37923 case CODE_FOR_avx512f_storev16sf_mask:
37924 case CODE_FOR_avx512f_storev16si_mask:
37925 case CODE_FOR_avx512f_storev8df_mask:
37926 case CODE_FOR_avx512f_storev8di_mask:
37927 case CODE_FOR_avx512vl_storev8sf_mask:
37928 case CODE_FOR_avx512vl_storev8si_mask:
37929 case CODE_FOR_avx512vl_storev4df_mask:
37930 case CODE_FOR_avx512vl_storev4di_mask:
37931 case CODE_FOR_avx512vl_storev4sf_mask:
37932 case CODE_FOR_avx512vl_storev4si_mask:
37933 case CODE_FOR_avx512vl_storev2df_mask:
37934 case CODE_FOR_avx512vl_storev2di_mask:
37935 aligned_mem = true;
37936 break;
37937 default:
37938 break;
37940 /* FALLTHRU */
37941 case VOID_FTYPE_PV8SF_V8SI_V8SF:
37942 case VOID_FTYPE_PV4DF_V4DI_V4DF:
37943 case VOID_FTYPE_PV4SF_V4SI_V4SF:
37944 case VOID_FTYPE_PV2DF_V2DI_V2DF:
37945 case VOID_FTYPE_PV8SI_V8SI_V8SI:
37946 case VOID_FTYPE_PV4DI_V4DI_V4DI:
37947 case VOID_FTYPE_PV4SI_V4SI_V4SI:
37948 case VOID_FTYPE_PV2DI_V2DI_V2DI:
37949 case VOID_FTYPE_PDOUBLE_V2DF_QI:
37950 case VOID_FTYPE_PFLOAT_V4SF_QI:
37951 case VOID_FTYPE_PV8SI_V8DI_QI:
37952 case VOID_FTYPE_PV8HI_V8DI_QI:
37953 case VOID_FTYPE_PV16HI_V16SI_HI:
37954 case VOID_FTYPE_PV16QI_V8DI_QI:
37955 case VOID_FTYPE_PV16QI_V16SI_HI:
37956 case VOID_FTYPE_PV4SI_V4DI_QI:
37957 case VOID_FTYPE_PV4SI_V2DI_QI:
37958 case VOID_FTYPE_PV8HI_V4DI_QI:
37959 case VOID_FTYPE_PV8HI_V2DI_QI:
37960 case VOID_FTYPE_PV8HI_V8SI_QI:
37961 case VOID_FTYPE_PV8HI_V4SI_QI:
37962 case VOID_FTYPE_PV16QI_V4DI_QI:
37963 case VOID_FTYPE_PV16QI_V2DI_QI:
37964 case VOID_FTYPE_PV16QI_V8SI_QI:
37965 case VOID_FTYPE_PV16QI_V4SI_QI:
37966 case VOID_FTYPE_PV8HI_V8HI_QI:
37967 case VOID_FTYPE_PV16HI_V16HI_HI:
37968 case VOID_FTYPE_PV32HI_V32HI_SI:
37969 case VOID_FTYPE_PV16QI_V16QI_HI:
37970 case VOID_FTYPE_PV32QI_V32QI_SI:
37971 case VOID_FTYPE_PV64QI_V64QI_DI:
37972 case VOID_FTYPE_PV4DF_V4DF_QI:
37973 case VOID_FTYPE_PV2DF_V2DF_QI:
37974 case VOID_FTYPE_PV8SF_V8SF_QI:
37975 case VOID_FTYPE_PV4SF_V4SF_QI:
37976 nargs = 2;
37977 klass = store;
37978 /* Reserve memory operand for target. */
37979 memory = ARRAY_SIZE (args);
37980 break;
37981 case V4SF_FTYPE_PCV4SF_V4SF_QI:
37982 case V8SF_FTYPE_PCV8SF_V8SF_QI:
37983 case V16SF_FTYPE_PCV16SF_V16SF_HI:
37984 case V4SI_FTYPE_PCV4SI_V4SI_QI:
37985 case V8SI_FTYPE_PCV8SI_V8SI_QI:
37986 case V16SI_FTYPE_PCV16SI_V16SI_HI:
37987 case V2DF_FTYPE_PCV2DF_V2DF_QI:
37988 case V4DF_FTYPE_PCV4DF_V4DF_QI:
37989 case V8DF_FTYPE_PCV8DF_V8DF_QI:
37990 case V2DI_FTYPE_PCV2DI_V2DI_QI:
37991 case V4DI_FTYPE_PCV4DI_V4DI_QI:
37992 case V8DI_FTYPE_PCV8DI_V8DI_QI:
37993 case V2DF_FTYPE_PCDOUBLE_V2DF_QI:
37994 case V4SF_FTYPE_PCFLOAT_V4SF_QI:
37995 case V8HI_FTYPE_PCV8HI_V8HI_QI:
37996 case V16HI_FTYPE_PCV16HI_V16HI_HI:
37997 case V32HI_FTYPE_PCV32HI_V32HI_SI:
37998 case V16QI_FTYPE_PCV16QI_V16QI_HI:
37999 case V32QI_FTYPE_PCV32QI_V32QI_SI:
38000 case V64QI_FTYPE_PCV64QI_V64QI_DI:
38001 nargs = 3;
38002 klass = load;
38003 memory = 0;
38004 switch (icode)
38006 /* These builtins and instructions require the memory
38007 to be properly aligned. */
38008 case CODE_FOR_avx512f_loadv16sf_mask:
38009 case CODE_FOR_avx512f_loadv16si_mask:
38010 case CODE_FOR_avx512f_loadv8df_mask:
38011 case CODE_FOR_avx512f_loadv8di_mask:
38012 case CODE_FOR_avx512vl_loadv8sf_mask:
38013 case CODE_FOR_avx512vl_loadv8si_mask:
38014 case CODE_FOR_avx512vl_loadv4df_mask:
38015 case CODE_FOR_avx512vl_loadv4di_mask:
38016 case CODE_FOR_avx512vl_loadv4sf_mask:
38017 case CODE_FOR_avx512vl_loadv4si_mask:
38018 case CODE_FOR_avx512vl_loadv2df_mask:
38019 case CODE_FOR_avx512vl_loadv2di_mask:
38020 case CODE_FOR_avx512bw_loadv64qi_mask:
38021 case CODE_FOR_avx512vl_loadv32qi_mask:
38022 case CODE_FOR_avx512vl_loadv16qi_mask:
38023 case CODE_FOR_avx512bw_loadv32hi_mask:
38024 case CODE_FOR_avx512vl_loadv16hi_mask:
38025 case CODE_FOR_avx512vl_loadv8hi_mask:
38026 aligned_mem = true;
38027 break;
38028 default:
38029 break;
38031 break;
38032 case VOID_FTYPE_UINT_UINT_UINT:
38033 case VOID_FTYPE_UINT64_UINT_UINT:
38034 case UCHAR_FTYPE_UINT_UINT_UINT:
38035 case UCHAR_FTYPE_UINT64_UINT_UINT:
38036 nargs = 3;
38037 klass = load;
38038 memory = ARRAY_SIZE (args);
38039 last_arg_constant = true;
38040 break;
38041 default:
38042 gcc_unreachable ();
38045 gcc_assert (nargs <= ARRAY_SIZE (args));
38047 if (klass == store)
38049 arg = CALL_EXPR_ARG (exp, 0);
38050 op = expand_normal (arg);
38051 gcc_assert (target == 0);
38052 if (memory)
38054 op = ix86_zero_extend_to_Pmode (op);
38055 target = gen_rtx_MEM (tmode, op);
38056 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
38057 on it. Try to improve it using get_pointer_alignment,
38058 and if the special builtin is one that requires strict
38059 mode alignment, also from it's GET_MODE_ALIGNMENT.
38060 Failure to do so could lead to ix86_legitimate_combined_insn
38061 rejecting all changes to such insns. */
38062 unsigned int align = get_pointer_alignment (arg);
38063 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
38064 align = GET_MODE_ALIGNMENT (tmode);
38065 if (MEM_ALIGN (target) < align)
38066 set_mem_align (target, align);
38068 else
38069 target = force_reg (tmode, op);
38070 arg_adjust = 1;
38072 else
38074 arg_adjust = 0;
38075 if (optimize
38076 || target == 0
38077 || !register_operand (target, tmode)
38078 || GET_MODE (target) != tmode)
38079 target = gen_reg_rtx (tmode);
38082 for (i = 0; i < nargs; i++)
38084 machine_mode mode = insn_p->operand[i + 1].mode;
38085 bool match;
38087 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
38088 op = expand_normal (arg);
38089 match = insn_p->operand[i + 1].predicate (op, mode);
38091 if (last_arg_constant && (i + 1) == nargs)
38093 if (!match)
38095 if (icode == CODE_FOR_lwp_lwpvalsi3
38096 || icode == CODE_FOR_lwp_lwpinssi3
38097 || icode == CODE_FOR_lwp_lwpvaldi3
38098 || icode == CODE_FOR_lwp_lwpinsdi3)
38099 error ("the last argument must be a 32-bit immediate");
38100 else
38101 error ("the last argument must be an 8-bit immediate");
38102 return const0_rtx;
38105 else
38107 if (i == memory)
38109 /* This must be the memory operand. */
38110 op = ix86_zero_extend_to_Pmode (op);
38111 op = gen_rtx_MEM (mode, op);
38112 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
38113 on it. Try to improve it using get_pointer_alignment,
38114 and if the special builtin is one that requires strict
38115 mode alignment, also from it's GET_MODE_ALIGNMENT.
38116 Failure to do so could lead to ix86_legitimate_combined_insn
38117 rejecting all changes to such insns. */
38118 unsigned int align = get_pointer_alignment (arg);
38119 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
38120 align = GET_MODE_ALIGNMENT (mode);
38121 if (MEM_ALIGN (op) < align)
38122 set_mem_align (op, align);
38124 else
38126 /* This must be register. */
38127 if (VECTOR_MODE_P (mode))
38128 op = safe_vector_operand (op, mode);
38130 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
38131 op = copy_to_mode_reg (mode, op);
38132 else
38134 op = copy_to_reg (op);
38135 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
38140 args[i].op = op;
38141 args[i].mode = mode;
38144 switch (nargs)
38146 case 0:
38147 pat = GEN_FCN (icode) (target);
38148 break;
38149 case 1:
38150 pat = GEN_FCN (icode) (target, args[0].op);
38151 break;
38152 case 2:
38153 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
38154 break;
38155 case 3:
38156 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
38157 break;
38158 default:
38159 gcc_unreachable ();
38162 if (! pat)
38163 return 0;
38164 emit_insn (pat);
38165 return klass == store ? 0 : target;
38168 /* Return the integer constant in ARG. Constrain it to be in the range
38169 of the subparts of VEC_TYPE; issue an error if not. */
38171 static int
38172 get_element_number (tree vec_type, tree arg)
38174 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
38176 if (!tree_fits_uhwi_p (arg)
38177 || (elt = tree_to_uhwi (arg), elt > max))
38179 error ("selector must be an integer constant in the range 0..%wi", max);
38180 return 0;
38183 return elt;
38186 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38187 ix86_expand_vector_init. We DO have language-level syntax for this, in
38188 the form of (type){ init-list }. Except that since we can't place emms
38189 instructions from inside the compiler, we can't allow the use of MMX
38190 registers unless the user explicitly asks for it. So we do *not* define
38191 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
38192 we have builtins invoked by mmintrin.h that gives us license to emit
38193 these sorts of instructions. */
38195 static rtx
38196 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
38198 machine_mode tmode = TYPE_MODE (type);
38199 machine_mode inner_mode = GET_MODE_INNER (tmode);
38200 int i, n_elt = GET_MODE_NUNITS (tmode);
38201 rtvec v = rtvec_alloc (n_elt);
38203 gcc_assert (VECTOR_MODE_P (tmode));
38204 gcc_assert (call_expr_nargs (exp) == n_elt);
38206 for (i = 0; i < n_elt; ++i)
38208 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
38209 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
38212 if (!target || !register_operand (target, tmode))
38213 target = gen_reg_rtx (tmode);
38215 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
38216 return target;
38219 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38220 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
38221 had a language-level syntax for referencing vector elements. */
38223 static rtx
38224 ix86_expand_vec_ext_builtin (tree exp, rtx target)
38226 machine_mode tmode, mode0;
38227 tree arg0, arg1;
38228 int elt;
38229 rtx op0;
38231 arg0 = CALL_EXPR_ARG (exp, 0);
38232 arg1 = CALL_EXPR_ARG (exp, 1);
38234 op0 = expand_normal (arg0);
38235 elt = get_element_number (TREE_TYPE (arg0), arg1);
38237 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
38238 mode0 = TYPE_MODE (TREE_TYPE (arg0));
38239 gcc_assert (VECTOR_MODE_P (mode0));
38241 op0 = force_reg (mode0, op0);
38243 if (optimize || !target || !register_operand (target, tmode))
38244 target = gen_reg_rtx (tmode);
38246 ix86_expand_vector_extract (true, target, op0, elt);
38248 return target;
38251 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38252 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
38253 a language-level syntax for referencing vector elements. */
38255 static rtx
38256 ix86_expand_vec_set_builtin (tree exp)
38258 machine_mode tmode, mode1;
38259 tree arg0, arg1, arg2;
38260 int elt;
38261 rtx op0, op1, target;
38263 arg0 = CALL_EXPR_ARG (exp, 0);
38264 arg1 = CALL_EXPR_ARG (exp, 1);
38265 arg2 = CALL_EXPR_ARG (exp, 2);
38267 tmode = TYPE_MODE (TREE_TYPE (arg0));
38268 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
38269 gcc_assert (VECTOR_MODE_P (tmode));
38271 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
38272 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
38273 elt = get_element_number (TREE_TYPE (arg0), arg2);
38275 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
38276 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
38278 op0 = force_reg (tmode, op0);
38279 op1 = force_reg (mode1, op1);
38281 /* OP0 is the source of these builtin functions and shouldn't be
38282 modified. Create a copy, use it and return it as target. */
38283 target = gen_reg_rtx (tmode);
38284 emit_move_insn (target, op0);
38285 ix86_expand_vector_set (true, target, op1, elt);
38287 return target;
38290 /* Emit conditional move of SRC to DST with condition
38291 OP1 CODE OP2. */
38292 static void
38293 ix86_emit_cmove (rtx dst, rtx src, enum rtx_code code, rtx op1, rtx op2)
38295 rtx t;
38297 if (TARGET_CMOVE)
38299 t = ix86_expand_compare (code, op1, op2);
38300 emit_insn (gen_rtx_SET (VOIDmode, dst,
38301 gen_rtx_IF_THEN_ELSE (GET_MODE (dst), t,
38302 src, dst)));
38304 else
38306 rtx nomove = gen_label_rtx ();
38307 emit_cmp_and_jump_insns (op1, op2, reverse_condition (code),
38308 const0_rtx, GET_MODE (op1), 1, nomove);
38309 emit_move_insn (dst, src);
38310 emit_label (nomove);
38314 /* Choose max of DST and SRC and put it to DST. */
38315 static void
38316 ix86_emit_move_max (rtx dst, rtx src)
38318 ix86_emit_cmove (dst, src, LTU, dst, src);
38321 /* Expand an expression EXP that calls a built-in function,
38322 with result going to TARGET if that's convenient
38323 (and in mode MODE if that's convenient).
38324 SUBTARGET may be used as the target for computing one of EXP's operands.
38325 IGNORE is nonzero if the value is to be ignored. */
38327 static rtx
38328 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
38329 machine_mode mode, int ignore)
38331 const struct builtin_description *d;
38332 size_t i;
38333 enum insn_code icode;
38334 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
38335 tree arg0, arg1, arg2, arg3, arg4;
38336 rtx op0, op1, op2, op3, op4, pat, insn;
38337 machine_mode mode0, mode1, mode2, mode3, mode4;
38338 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
38340 /* For CPU builtins that can be folded, fold first and expand the fold. */
38341 switch (fcode)
38343 case IX86_BUILTIN_CPU_INIT:
38345 /* Make it call __cpu_indicator_init in libgcc. */
38346 tree call_expr, fndecl, type;
38347 type = build_function_type_list (integer_type_node, NULL_TREE);
38348 fndecl = build_fn_decl ("__cpu_indicator_init", type);
38349 call_expr = build_call_expr (fndecl, 0);
38350 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
38352 case IX86_BUILTIN_CPU_IS:
38353 case IX86_BUILTIN_CPU_SUPPORTS:
38355 tree arg0 = CALL_EXPR_ARG (exp, 0);
38356 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
38357 gcc_assert (fold_expr != NULL_TREE);
38358 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
38362 /* Determine whether the builtin function is available under the current ISA.
38363 Originally the builtin was not created if it wasn't applicable to the
38364 current ISA based on the command line switches. With function specific
38365 options, we need to check in the context of the function making the call
38366 whether it is supported. */
38367 if (ix86_builtins_isa[fcode].isa
38368 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
38370 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
38371 NULL, (enum fpmath_unit) 0, false);
38373 if (!opts)
38374 error ("%qE needs unknown isa option", fndecl);
38375 else
38377 gcc_assert (opts != NULL);
38378 error ("%qE needs isa option %s", fndecl, opts);
38379 free (opts);
38381 return const0_rtx;
38384 switch (fcode)
38386 case IX86_BUILTIN_BNDMK:
38387 if (!target
38388 || GET_MODE (target) != BNDmode
38389 || !register_operand (target, BNDmode))
38390 target = gen_reg_rtx (BNDmode);
38392 arg0 = CALL_EXPR_ARG (exp, 0);
38393 arg1 = CALL_EXPR_ARG (exp, 1);
38395 op0 = expand_normal (arg0);
38396 op1 = expand_normal (arg1);
38398 if (!register_operand (op0, Pmode))
38399 op0 = ix86_zero_extend_to_Pmode (op0);
38400 if (!register_operand (op1, Pmode))
38401 op1 = ix86_zero_extend_to_Pmode (op1);
38403 /* Builtin arg1 is size of block but instruction op1 should
38404 be (size - 1). */
38405 op1 = expand_simple_binop (Pmode, PLUS, op1, constm1_rtx,
38406 NULL_RTX, 1, OPTAB_DIRECT);
38408 emit_insn (BNDmode == BND64mode
38409 ? gen_bnd64_mk (target, op0, op1)
38410 : gen_bnd32_mk (target, op0, op1));
38411 return target;
38413 case IX86_BUILTIN_BNDSTX:
38414 arg0 = CALL_EXPR_ARG (exp, 0);
38415 arg1 = CALL_EXPR_ARG (exp, 1);
38416 arg2 = CALL_EXPR_ARG (exp, 2);
38418 op0 = expand_normal (arg0);
38419 op1 = expand_normal (arg1);
38420 op2 = expand_normal (arg2);
38422 if (!register_operand (op0, Pmode))
38423 op0 = ix86_zero_extend_to_Pmode (op0);
38424 if (!register_operand (op1, BNDmode))
38425 op1 = copy_to_mode_reg (BNDmode, op1);
38426 if (!register_operand (op2, Pmode))
38427 op2 = ix86_zero_extend_to_Pmode (op2);
38429 emit_insn (BNDmode == BND64mode
38430 ? gen_bnd64_stx (op2, op0, op1)
38431 : gen_bnd32_stx (op2, op0, op1));
38432 return 0;
38434 case IX86_BUILTIN_BNDLDX:
38435 if (!target
38436 || GET_MODE (target) != BNDmode
38437 || !register_operand (target, BNDmode))
38438 target = gen_reg_rtx (BNDmode);
38440 arg0 = CALL_EXPR_ARG (exp, 0);
38441 arg1 = CALL_EXPR_ARG (exp, 1);
38443 op0 = expand_normal (arg0);
38444 op1 = expand_normal (arg1);
38446 if (!register_operand (op0, Pmode))
38447 op0 = ix86_zero_extend_to_Pmode (op0);
38448 if (!register_operand (op1, Pmode))
38449 op1 = ix86_zero_extend_to_Pmode (op1);
38451 emit_insn (BNDmode == BND64mode
38452 ? gen_bnd64_ldx (target, op0, op1)
38453 : gen_bnd32_ldx (target, op0, op1));
38454 return target;
38456 case IX86_BUILTIN_BNDCL:
38457 arg0 = CALL_EXPR_ARG (exp, 0);
38458 arg1 = CALL_EXPR_ARG (exp, 1);
38460 op0 = expand_normal (arg0);
38461 op1 = expand_normal (arg1);
38463 if (!register_operand (op0, Pmode))
38464 op0 = ix86_zero_extend_to_Pmode (op0);
38465 if (!register_operand (op1, BNDmode))
38466 op1 = copy_to_mode_reg (BNDmode, op1);
38468 emit_insn (BNDmode == BND64mode
38469 ? gen_bnd64_cl (op1, op0)
38470 : gen_bnd32_cl (op1, op0));
38471 return 0;
38473 case IX86_BUILTIN_BNDCU:
38474 arg0 = CALL_EXPR_ARG (exp, 0);
38475 arg1 = CALL_EXPR_ARG (exp, 1);
38477 op0 = expand_normal (arg0);
38478 op1 = expand_normal (arg1);
38480 if (!register_operand (op0, Pmode))
38481 op0 = ix86_zero_extend_to_Pmode (op0);
38482 if (!register_operand (op1, BNDmode))
38483 op1 = copy_to_mode_reg (BNDmode, op1);
38485 emit_insn (BNDmode == BND64mode
38486 ? gen_bnd64_cu (op1, op0)
38487 : gen_bnd32_cu (op1, op0));
38488 return 0;
38490 case IX86_BUILTIN_BNDRET:
38491 arg0 = CALL_EXPR_ARG (exp, 0);
38492 gcc_assert (TREE_CODE (arg0) == SSA_NAME);
38493 target = chkp_get_rtl_bounds (arg0);
38495 /* If no bounds were specified for returned value,
38496 then use INIT bounds. It usually happens when
38497 some built-in function is expanded. */
38498 if (!target)
38500 rtx t1 = gen_reg_rtx (Pmode);
38501 rtx t2 = gen_reg_rtx (Pmode);
38502 target = gen_reg_rtx (BNDmode);
38503 emit_move_insn (t1, const0_rtx);
38504 emit_move_insn (t2, constm1_rtx);
38505 emit_insn (BNDmode == BND64mode
38506 ? gen_bnd64_mk (target, t1, t2)
38507 : gen_bnd32_mk (target, t1, t2));
38510 gcc_assert (target && REG_P (target));
38511 return target;
38513 case IX86_BUILTIN_BNDNARROW:
38515 rtx m1, m1h1, m1h2, lb, ub, t1;
38517 /* Return value and lb. */
38518 arg0 = CALL_EXPR_ARG (exp, 0);
38519 /* Bounds. */
38520 arg1 = CALL_EXPR_ARG (exp, 1);
38521 /* Size. */
38522 arg2 = CALL_EXPR_ARG (exp, 2);
38524 lb = expand_normal (arg0);
38525 op1 = expand_normal (arg1);
38526 op2 = expand_normal (arg2);
38528 /* Size was passed but we need to use (size - 1) as for bndmk. */
38529 op2 = expand_simple_binop (Pmode, PLUS, op2, constm1_rtx,
38530 NULL_RTX, 1, OPTAB_DIRECT);
38532 /* Add LB to size and inverse to get UB. */
38533 op2 = expand_simple_binop (Pmode, PLUS, op2, lb,
38534 op2, 1, OPTAB_DIRECT);
38535 ub = expand_simple_unop (Pmode, NOT, op2, op2, 1);
38537 if (!register_operand (lb, Pmode))
38538 lb = ix86_zero_extend_to_Pmode (lb);
38539 if (!register_operand (ub, Pmode))
38540 ub = ix86_zero_extend_to_Pmode (ub);
38542 /* We need to move bounds to memory before any computations. */
38543 if (MEM_P (op1))
38544 m1 = op1;
38545 else
38547 m1 = assign_386_stack_local (BNDmode, SLOT_TEMP);
38548 emit_move_insn (m1, op1);
38551 /* Generate mem expression to be used for access to LB and UB. */
38552 m1h1 = adjust_address (m1, Pmode, 0);
38553 m1h2 = adjust_address (m1, Pmode, GET_MODE_SIZE (Pmode));
38555 t1 = gen_reg_rtx (Pmode);
38557 /* Compute LB. */
38558 emit_move_insn (t1, m1h1);
38559 ix86_emit_move_max (t1, lb);
38560 emit_move_insn (m1h1, t1);
38562 /* Compute UB. UB is stored in 1's complement form. Therefore
38563 we also use max here. */
38564 emit_move_insn (t1, m1h2);
38565 ix86_emit_move_max (t1, ub);
38566 emit_move_insn (m1h2, t1);
38568 op2 = gen_reg_rtx (BNDmode);
38569 emit_move_insn (op2, m1);
38571 return chkp_join_splitted_slot (lb, op2);
38574 case IX86_BUILTIN_BNDINT:
38576 rtx res, rh1, rh2, lb1, lb2, ub1, ub2;
38578 if (!target
38579 || GET_MODE (target) != BNDmode
38580 || !register_operand (target, BNDmode))
38581 target = gen_reg_rtx (BNDmode);
38583 arg0 = CALL_EXPR_ARG (exp, 0);
38584 arg1 = CALL_EXPR_ARG (exp, 1);
38586 op0 = expand_normal (arg0);
38587 op1 = expand_normal (arg1);
38589 res = assign_386_stack_local (BNDmode, SLOT_TEMP);
38590 rh1 = adjust_address (res, Pmode, 0);
38591 rh2 = adjust_address (res, Pmode, GET_MODE_SIZE (Pmode));
38593 /* Put first bounds to temporaries. */
38594 lb1 = gen_reg_rtx (Pmode);
38595 ub1 = gen_reg_rtx (Pmode);
38596 if (MEM_P (op0))
38598 emit_move_insn (lb1, adjust_address (op0, Pmode, 0));
38599 emit_move_insn (ub1, adjust_address (op0, Pmode,
38600 GET_MODE_SIZE (Pmode)));
38602 else
38604 emit_move_insn (res, op0);
38605 emit_move_insn (lb1, rh1);
38606 emit_move_insn (ub1, rh2);
38609 /* Put second bounds to temporaries. */
38610 lb2 = gen_reg_rtx (Pmode);
38611 ub2 = gen_reg_rtx (Pmode);
38612 if (MEM_P (op1))
38614 emit_move_insn (lb2, adjust_address (op1, Pmode, 0));
38615 emit_move_insn (ub2, adjust_address (op1, Pmode,
38616 GET_MODE_SIZE (Pmode)));
38618 else
38620 emit_move_insn (res, op1);
38621 emit_move_insn (lb2, rh1);
38622 emit_move_insn (ub2, rh2);
38625 /* Compute LB. */
38626 ix86_emit_move_max (lb1, lb2);
38627 emit_move_insn (rh1, lb1);
38629 /* Compute UB. UB is stored in 1's complement form. Therefore
38630 we also use max here. */
38631 ix86_emit_move_max (ub1, ub2);
38632 emit_move_insn (rh2, ub1);
38634 emit_move_insn (target, res);
38636 return target;
38639 case IX86_BUILTIN_SIZEOF:
38641 tree name;
38642 rtx symbol;
38644 if (!target
38645 || GET_MODE (target) != Pmode
38646 || !register_operand (target, Pmode))
38647 target = gen_reg_rtx (Pmode);
38649 arg0 = CALL_EXPR_ARG (exp, 0);
38650 gcc_assert (TREE_CODE (arg0) == VAR_DECL);
38652 name = DECL_ASSEMBLER_NAME (arg0);
38653 symbol = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (name));
38655 emit_insn (Pmode == SImode
38656 ? gen_move_size_reloc_si (target, symbol)
38657 : gen_move_size_reloc_di (target, symbol));
38659 return target;
38662 case IX86_BUILTIN_BNDLOWER:
38664 rtx mem, hmem;
38666 if (!target
38667 || GET_MODE (target) != Pmode
38668 || !register_operand (target, Pmode))
38669 target = gen_reg_rtx (Pmode);
38671 arg0 = CALL_EXPR_ARG (exp, 0);
38672 op0 = expand_normal (arg0);
38674 /* We need to move bounds to memory first. */
38675 if (MEM_P (op0))
38676 mem = op0;
38677 else
38679 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
38680 emit_move_insn (mem, op0);
38683 /* Generate mem expression to access LB and load it. */
38684 hmem = adjust_address (mem, Pmode, 0);
38685 emit_move_insn (target, hmem);
38687 return target;
38690 case IX86_BUILTIN_BNDUPPER:
38692 rtx mem, hmem, res;
38694 if (!target
38695 || GET_MODE (target) != Pmode
38696 || !register_operand (target, Pmode))
38697 target = gen_reg_rtx (Pmode);
38699 arg0 = CALL_EXPR_ARG (exp, 0);
38700 op0 = expand_normal (arg0);
38702 /* We need to move bounds to memory first. */
38703 if (MEM_P (op0))
38704 mem = op0;
38705 else
38707 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
38708 emit_move_insn (mem, op0);
38711 /* Generate mem expression to access UB. */
38712 hmem = adjust_address (mem, Pmode, GET_MODE_SIZE (Pmode));
38714 /* We need to inverse all bits of UB. */
38715 res = expand_simple_unop (Pmode, NOT, hmem, target, 1);
38717 if (res != target)
38718 emit_move_insn (target, res);
38720 return target;
38723 case IX86_BUILTIN_MASKMOVQ:
38724 case IX86_BUILTIN_MASKMOVDQU:
38725 icode = (fcode == IX86_BUILTIN_MASKMOVQ
38726 ? CODE_FOR_mmx_maskmovq
38727 : CODE_FOR_sse2_maskmovdqu);
38728 /* Note the arg order is different from the operand order. */
38729 arg1 = CALL_EXPR_ARG (exp, 0);
38730 arg2 = CALL_EXPR_ARG (exp, 1);
38731 arg0 = CALL_EXPR_ARG (exp, 2);
38732 op0 = expand_normal (arg0);
38733 op1 = expand_normal (arg1);
38734 op2 = expand_normal (arg2);
38735 mode0 = insn_data[icode].operand[0].mode;
38736 mode1 = insn_data[icode].operand[1].mode;
38737 mode2 = insn_data[icode].operand[2].mode;
38739 op0 = ix86_zero_extend_to_Pmode (op0);
38740 op0 = gen_rtx_MEM (mode1, op0);
38742 if (!insn_data[icode].operand[0].predicate (op0, mode0))
38743 op0 = copy_to_mode_reg (mode0, op0);
38744 if (!insn_data[icode].operand[1].predicate (op1, mode1))
38745 op1 = copy_to_mode_reg (mode1, op1);
38746 if (!insn_data[icode].operand[2].predicate (op2, mode2))
38747 op2 = copy_to_mode_reg (mode2, op2);
38748 pat = GEN_FCN (icode) (op0, op1, op2);
38749 if (! pat)
38750 return 0;
38751 emit_insn (pat);
38752 return 0;
38754 case IX86_BUILTIN_LDMXCSR:
38755 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
38756 target = assign_386_stack_local (SImode, SLOT_TEMP);
38757 emit_move_insn (target, op0);
38758 emit_insn (gen_sse_ldmxcsr (target));
38759 return 0;
38761 case IX86_BUILTIN_STMXCSR:
38762 target = assign_386_stack_local (SImode, SLOT_TEMP);
38763 emit_insn (gen_sse_stmxcsr (target));
38764 return copy_to_mode_reg (SImode, target);
38766 case IX86_BUILTIN_CLFLUSH:
38767 arg0 = CALL_EXPR_ARG (exp, 0);
38768 op0 = expand_normal (arg0);
38769 icode = CODE_FOR_sse2_clflush;
38770 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38771 op0 = ix86_zero_extend_to_Pmode (op0);
38773 emit_insn (gen_sse2_clflush (op0));
38774 return 0;
38776 case IX86_BUILTIN_CLWB:
38777 arg0 = CALL_EXPR_ARG (exp, 0);
38778 op0 = expand_normal (arg0);
38779 icode = CODE_FOR_clwb;
38780 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38781 op0 = ix86_zero_extend_to_Pmode (op0);
38783 emit_insn (gen_clwb (op0));
38784 return 0;
38786 case IX86_BUILTIN_CLFLUSHOPT:
38787 arg0 = CALL_EXPR_ARG (exp, 0);
38788 op0 = expand_normal (arg0);
38789 icode = CODE_FOR_clflushopt;
38790 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38791 op0 = ix86_zero_extend_to_Pmode (op0);
38793 emit_insn (gen_clflushopt (op0));
38794 return 0;
38796 case IX86_BUILTIN_MONITOR:
38797 arg0 = CALL_EXPR_ARG (exp, 0);
38798 arg1 = CALL_EXPR_ARG (exp, 1);
38799 arg2 = CALL_EXPR_ARG (exp, 2);
38800 op0 = expand_normal (arg0);
38801 op1 = expand_normal (arg1);
38802 op2 = expand_normal (arg2);
38803 if (!REG_P (op0))
38804 op0 = ix86_zero_extend_to_Pmode (op0);
38805 if (!REG_P (op1))
38806 op1 = copy_to_mode_reg (SImode, op1);
38807 if (!REG_P (op2))
38808 op2 = copy_to_mode_reg (SImode, op2);
38809 emit_insn (ix86_gen_monitor (op0, op1, op2));
38810 return 0;
38812 case IX86_BUILTIN_MWAIT:
38813 arg0 = CALL_EXPR_ARG (exp, 0);
38814 arg1 = CALL_EXPR_ARG (exp, 1);
38815 op0 = expand_normal (arg0);
38816 op1 = expand_normal (arg1);
38817 if (!REG_P (op0))
38818 op0 = copy_to_mode_reg (SImode, op0);
38819 if (!REG_P (op1))
38820 op1 = copy_to_mode_reg (SImode, op1);
38821 emit_insn (gen_sse3_mwait (op0, op1));
38822 return 0;
38824 case IX86_BUILTIN_VEC_INIT_V2SI:
38825 case IX86_BUILTIN_VEC_INIT_V4HI:
38826 case IX86_BUILTIN_VEC_INIT_V8QI:
38827 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
38829 case IX86_BUILTIN_VEC_EXT_V2DF:
38830 case IX86_BUILTIN_VEC_EXT_V2DI:
38831 case IX86_BUILTIN_VEC_EXT_V4SF:
38832 case IX86_BUILTIN_VEC_EXT_V4SI:
38833 case IX86_BUILTIN_VEC_EXT_V8HI:
38834 case IX86_BUILTIN_VEC_EXT_V2SI:
38835 case IX86_BUILTIN_VEC_EXT_V4HI:
38836 case IX86_BUILTIN_VEC_EXT_V16QI:
38837 return ix86_expand_vec_ext_builtin (exp, target);
38839 case IX86_BUILTIN_VEC_SET_V2DI:
38840 case IX86_BUILTIN_VEC_SET_V4SF:
38841 case IX86_BUILTIN_VEC_SET_V4SI:
38842 case IX86_BUILTIN_VEC_SET_V8HI:
38843 case IX86_BUILTIN_VEC_SET_V4HI:
38844 case IX86_BUILTIN_VEC_SET_V16QI:
38845 return ix86_expand_vec_set_builtin (exp);
38847 case IX86_BUILTIN_INFQ:
38848 case IX86_BUILTIN_HUGE_VALQ:
38850 REAL_VALUE_TYPE inf;
38851 rtx tmp;
38853 real_inf (&inf);
38854 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
38856 tmp = validize_mem (force_const_mem (mode, tmp));
38858 if (target == 0)
38859 target = gen_reg_rtx (mode);
38861 emit_move_insn (target, tmp);
38862 return target;
38865 case IX86_BUILTIN_RDPMC:
38866 case IX86_BUILTIN_RDTSC:
38867 case IX86_BUILTIN_RDTSCP:
38869 op0 = gen_reg_rtx (DImode);
38870 op1 = gen_reg_rtx (DImode);
38872 if (fcode == IX86_BUILTIN_RDPMC)
38874 arg0 = CALL_EXPR_ARG (exp, 0);
38875 op2 = expand_normal (arg0);
38876 if (!register_operand (op2, SImode))
38877 op2 = copy_to_mode_reg (SImode, op2);
38879 insn = (TARGET_64BIT
38880 ? gen_rdpmc_rex64 (op0, op1, op2)
38881 : gen_rdpmc (op0, op2));
38882 emit_insn (insn);
38884 else if (fcode == IX86_BUILTIN_RDTSC)
38886 insn = (TARGET_64BIT
38887 ? gen_rdtsc_rex64 (op0, op1)
38888 : gen_rdtsc (op0));
38889 emit_insn (insn);
38891 else
38893 op2 = gen_reg_rtx (SImode);
38895 insn = (TARGET_64BIT
38896 ? gen_rdtscp_rex64 (op0, op1, op2)
38897 : gen_rdtscp (op0, op2));
38898 emit_insn (insn);
38900 arg0 = CALL_EXPR_ARG (exp, 0);
38901 op4 = expand_normal (arg0);
38902 if (!address_operand (op4, VOIDmode))
38904 op4 = convert_memory_address (Pmode, op4);
38905 op4 = copy_addr_to_reg (op4);
38907 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
38910 if (target == 0)
38912 /* mode is VOIDmode if __builtin_rd* has been called
38913 without lhs. */
38914 if (mode == VOIDmode)
38915 return target;
38916 target = gen_reg_rtx (mode);
38919 if (TARGET_64BIT)
38921 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
38922 op1, 1, OPTAB_DIRECT);
38923 op0 = expand_simple_binop (DImode, IOR, op0, op1,
38924 op0, 1, OPTAB_DIRECT);
38927 emit_move_insn (target, op0);
38928 return target;
38930 case IX86_BUILTIN_FXSAVE:
38931 case IX86_BUILTIN_FXRSTOR:
38932 case IX86_BUILTIN_FXSAVE64:
38933 case IX86_BUILTIN_FXRSTOR64:
38934 case IX86_BUILTIN_FNSTENV:
38935 case IX86_BUILTIN_FLDENV:
38936 mode0 = BLKmode;
38937 switch (fcode)
38939 case IX86_BUILTIN_FXSAVE:
38940 icode = CODE_FOR_fxsave;
38941 break;
38942 case IX86_BUILTIN_FXRSTOR:
38943 icode = CODE_FOR_fxrstor;
38944 break;
38945 case IX86_BUILTIN_FXSAVE64:
38946 icode = CODE_FOR_fxsave64;
38947 break;
38948 case IX86_BUILTIN_FXRSTOR64:
38949 icode = CODE_FOR_fxrstor64;
38950 break;
38951 case IX86_BUILTIN_FNSTENV:
38952 icode = CODE_FOR_fnstenv;
38953 break;
38954 case IX86_BUILTIN_FLDENV:
38955 icode = CODE_FOR_fldenv;
38956 break;
38957 default:
38958 gcc_unreachable ();
38961 arg0 = CALL_EXPR_ARG (exp, 0);
38962 op0 = expand_normal (arg0);
38964 if (!address_operand (op0, VOIDmode))
38966 op0 = convert_memory_address (Pmode, op0);
38967 op0 = copy_addr_to_reg (op0);
38969 op0 = gen_rtx_MEM (mode0, op0);
38971 pat = GEN_FCN (icode) (op0);
38972 if (pat)
38973 emit_insn (pat);
38974 return 0;
38976 case IX86_BUILTIN_XSAVE:
38977 case IX86_BUILTIN_XRSTOR:
38978 case IX86_BUILTIN_XSAVE64:
38979 case IX86_BUILTIN_XRSTOR64:
38980 case IX86_BUILTIN_XSAVEOPT:
38981 case IX86_BUILTIN_XSAVEOPT64:
38982 case IX86_BUILTIN_XSAVES:
38983 case IX86_BUILTIN_XRSTORS:
38984 case IX86_BUILTIN_XSAVES64:
38985 case IX86_BUILTIN_XRSTORS64:
38986 case IX86_BUILTIN_XSAVEC:
38987 case IX86_BUILTIN_XSAVEC64:
38988 arg0 = CALL_EXPR_ARG (exp, 0);
38989 arg1 = CALL_EXPR_ARG (exp, 1);
38990 op0 = expand_normal (arg0);
38991 op1 = expand_normal (arg1);
38993 if (!address_operand (op0, VOIDmode))
38995 op0 = convert_memory_address (Pmode, op0);
38996 op0 = copy_addr_to_reg (op0);
38998 op0 = gen_rtx_MEM (BLKmode, op0);
39000 op1 = force_reg (DImode, op1);
39002 if (TARGET_64BIT)
39004 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
39005 NULL, 1, OPTAB_DIRECT);
39006 switch (fcode)
39008 case IX86_BUILTIN_XSAVE:
39009 icode = CODE_FOR_xsave_rex64;
39010 break;
39011 case IX86_BUILTIN_XRSTOR:
39012 icode = CODE_FOR_xrstor_rex64;
39013 break;
39014 case IX86_BUILTIN_XSAVE64:
39015 icode = CODE_FOR_xsave64;
39016 break;
39017 case IX86_BUILTIN_XRSTOR64:
39018 icode = CODE_FOR_xrstor64;
39019 break;
39020 case IX86_BUILTIN_XSAVEOPT:
39021 icode = CODE_FOR_xsaveopt_rex64;
39022 break;
39023 case IX86_BUILTIN_XSAVEOPT64:
39024 icode = CODE_FOR_xsaveopt64;
39025 break;
39026 case IX86_BUILTIN_XSAVES:
39027 icode = CODE_FOR_xsaves_rex64;
39028 break;
39029 case IX86_BUILTIN_XRSTORS:
39030 icode = CODE_FOR_xrstors_rex64;
39031 break;
39032 case IX86_BUILTIN_XSAVES64:
39033 icode = CODE_FOR_xsaves64;
39034 break;
39035 case IX86_BUILTIN_XRSTORS64:
39036 icode = CODE_FOR_xrstors64;
39037 break;
39038 case IX86_BUILTIN_XSAVEC:
39039 icode = CODE_FOR_xsavec_rex64;
39040 break;
39041 case IX86_BUILTIN_XSAVEC64:
39042 icode = CODE_FOR_xsavec64;
39043 break;
39044 default:
39045 gcc_unreachable ();
39048 op2 = gen_lowpart (SImode, op2);
39049 op1 = gen_lowpart (SImode, op1);
39050 pat = GEN_FCN (icode) (op0, op1, op2);
39052 else
39054 switch (fcode)
39056 case IX86_BUILTIN_XSAVE:
39057 icode = CODE_FOR_xsave;
39058 break;
39059 case IX86_BUILTIN_XRSTOR:
39060 icode = CODE_FOR_xrstor;
39061 break;
39062 case IX86_BUILTIN_XSAVEOPT:
39063 icode = CODE_FOR_xsaveopt;
39064 break;
39065 case IX86_BUILTIN_XSAVES:
39066 icode = CODE_FOR_xsaves;
39067 break;
39068 case IX86_BUILTIN_XRSTORS:
39069 icode = CODE_FOR_xrstors;
39070 break;
39071 case IX86_BUILTIN_XSAVEC:
39072 icode = CODE_FOR_xsavec;
39073 break;
39074 default:
39075 gcc_unreachable ();
39077 pat = GEN_FCN (icode) (op0, op1);
39080 if (pat)
39081 emit_insn (pat);
39082 return 0;
39084 case IX86_BUILTIN_LLWPCB:
39085 arg0 = CALL_EXPR_ARG (exp, 0);
39086 op0 = expand_normal (arg0);
39087 icode = CODE_FOR_lwp_llwpcb;
39088 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39089 op0 = ix86_zero_extend_to_Pmode (op0);
39090 emit_insn (gen_lwp_llwpcb (op0));
39091 return 0;
39093 case IX86_BUILTIN_SLWPCB:
39094 icode = CODE_FOR_lwp_slwpcb;
39095 if (!target
39096 || !insn_data[icode].operand[0].predicate (target, Pmode))
39097 target = gen_reg_rtx (Pmode);
39098 emit_insn (gen_lwp_slwpcb (target));
39099 return target;
39101 case IX86_BUILTIN_BEXTRI32:
39102 case IX86_BUILTIN_BEXTRI64:
39103 arg0 = CALL_EXPR_ARG (exp, 0);
39104 arg1 = CALL_EXPR_ARG (exp, 1);
39105 op0 = expand_normal (arg0);
39106 op1 = expand_normal (arg1);
39107 icode = (fcode == IX86_BUILTIN_BEXTRI32
39108 ? CODE_FOR_tbm_bextri_si
39109 : CODE_FOR_tbm_bextri_di);
39110 if (!CONST_INT_P (op1))
39112 error ("last argument must be an immediate");
39113 return const0_rtx;
39115 else
39117 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
39118 unsigned char lsb_index = INTVAL (op1) & 0xFF;
39119 op1 = GEN_INT (length);
39120 op2 = GEN_INT (lsb_index);
39121 pat = GEN_FCN (icode) (target, op0, op1, op2);
39122 if (pat)
39123 emit_insn (pat);
39124 return target;
39127 case IX86_BUILTIN_RDRAND16_STEP:
39128 icode = CODE_FOR_rdrandhi_1;
39129 mode0 = HImode;
39130 goto rdrand_step;
39132 case IX86_BUILTIN_RDRAND32_STEP:
39133 icode = CODE_FOR_rdrandsi_1;
39134 mode0 = SImode;
39135 goto rdrand_step;
39137 case IX86_BUILTIN_RDRAND64_STEP:
39138 icode = CODE_FOR_rdranddi_1;
39139 mode0 = DImode;
39141 rdrand_step:
39142 op0 = gen_reg_rtx (mode0);
39143 emit_insn (GEN_FCN (icode) (op0));
39145 arg0 = CALL_EXPR_ARG (exp, 0);
39146 op1 = expand_normal (arg0);
39147 if (!address_operand (op1, VOIDmode))
39149 op1 = convert_memory_address (Pmode, op1);
39150 op1 = copy_addr_to_reg (op1);
39152 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
39154 op1 = gen_reg_rtx (SImode);
39155 emit_move_insn (op1, CONST1_RTX (SImode));
39157 /* Emit SImode conditional move. */
39158 if (mode0 == HImode)
39160 op2 = gen_reg_rtx (SImode);
39161 emit_insn (gen_zero_extendhisi2 (op2, op0));
39163 else if (mode0 == SImode)
39164 op2 = op0;
39165 else
39166 op2 = gen_rtx_SUBREG (SImode, op0, 0);
39168 if (target == 0
39169 || !register_operand (target, SImode))
39170 target = gen_reg_rtx (SImode);
39172 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
39173 const0_rtx);
39174 emit_insn (gen_rtx_SET (VOIDmode, target,
39175 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
39176 return target;
39178 case IX86_BUILTIN_RDSEED16_STEP:
39179 icode = CODE_FOR_rdseedhi_1;
39180 mode0 = HImode;
39181 goto rdseed_step;
39183 case IX86_BUILTIN_RDSEED32_STEP:
39184 icode = CODE_FOR_rdseedsi_1;
39185 mode0 = SImode;
39186 goto rdseed_step;
39188 case IX86_BUILTIN_RDSEED64_STEP:
39189 icode = CODE_FOR_rdseeddi_1;
39190 mode0 = DImode;
39192 rdseed_step:
39193 op0 = gen_reg_rtx (mode0);
39194 emit_insn (GEN_FCN (icode) (op0));
39196 arg0 = CALL_EXPR_ARG (exp, 0);
39197 op1 = expand_normal (arg0);
39198 if (!address_operand (op1, VOIDmode))
39200 op1 = convert_memory_address (Pmode, op1);
39201 op1 = copy_addr_to_reg (op1);
39203 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
39205 op2 = gen_reg_rtx (QImode);
39207 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
39208 const0_rtx);
39209 emit_insn (gen_rtx_SET (VOIDmode, op2, pat));
39211 if (target == 0
39212 || !register_operand (target, SImode))
39213 target = gen_reg_rtx (SImode);
39215 emit_insn (gen_zero_extendqisi2 (target, op2));
39216 return target;
39218 case IX86_BUILTIN_SBB32:
39219 icode = CODE_FOR_subsi3_carry;
39220 mode0 = SImode;
39221 goto addcarryx;
39223 case IX86_BUILTIN_SBB64:
39224 icode = CODE_FOR_subdi3_carry;
39225 mode0 = DImode;
39226 goto addcarryx;
39228 case IX86_BUILTIN_ADDCARRYX32:
39229 icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry;
39230 mode0 = SImode;
39231 goto addcarryx;
39233 case IX86_BUILTIN_ADDCARRYX64:
39234 icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry;
39235 mode0 = DImode;
39237 addcarryx:
39238 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
39239 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
39240 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
39241 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
39243 op0 = gen_reg_rtx (QImode);
39245 /* Generate CF from input operand. */
39246 op1 = expand_normal (arg0);
39247 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
39248 emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx));
39250 /* Gen ADCX instruction to compute X+Y+CF. */
39251 op2 = expand_normal (arg1);
39252 op3 = expand_normal (arg2);
39254 if (!REG_P (op2))
39255 op2 = copy_to_mode_reg (mode0, op2);
39256 if (!REG_P (op3))
39257 op3 = copy_to_mode_reg (mode0, op3);
39259 op0 = gen_reg_rtx (mode0);
39261 op4 = gen_rtx_REG (CCCmode, FLAGS_REG);
39262 pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx);
39263 emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat));
39265 /* Store the result. */
39266 op4 = expand_normal (arg3);
39267 if (!address_operand (op4, VOIDmode))
39269 op4 = convert_memory_address (Pmode, op4);
39270 op4 = copy_addr_to_reg (op4);
39272 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
39274 /* Return current CF value. */
39275 if (target == 0)
39276 target = gen_reg_rtx (QImode);
39278 PUT_MODE (pat, QImode);
39279 emit_insn (gen_rtx_SET (VOIDmode, target, pat));
39280 return target;
39282 case IX86_BUILTIN_READ_FLAGS:
39283 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
39285 if (optimize
39286 || target == NULL_RTX
39287 || !nonimmediate_operand (target, word_mode)
39288 || GET_MODE (target) != word_mode)
39289 target = gen_reg_rtx (word_mode);
39291 emit_insn (gen_pop (target));
39292 return target;
39294 case IX86_BUILTIN_WRITE_FLAGS:
39296 arg0 = CALL_EXPR_ARG (exp, 0);
39297 op0 = expand_normal (arg0);
39298 if (!general_no_elim_operand (op0, word_mode))
39299 op0 = copy_to_mode_reg (word_mode, op0);
39301 emit_insn (gen_push (op0));
39302 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
39303 return 0;
39305 case IX86_BUILTIN_KORTESTC16:
39306 icode = CODE_FOR_kortestchi;
39307 mode0 = HImode;
39308 mode1 = CCCmode;
39309 goto kortest;
39311 case IX86_BUILTIN_KORTESTZ16:
39312 icode = CODE_FOR_kortestzhi;
39313 mode0 = HImode;
39314 mode1 = CCZmode;
39316 kortest:
39317 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
39318 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
39319 op0 = expand_normal (arg0);
39320 op1 = expand_normal (arg1);
39322 op0 = copy_to_reg (op0);
39323 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
39324 op1 = copy_to_reg (op1);
39325 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
39327 target = gen_reg_rtx (QImode);
39328 emit_insn (gen_rtx_SET (mode0, target, const0_rtx));
39330 /* Emit kortest. */
39331 emit_insn (GEN_FCN (icode) (op0, op1));
39332 /* And use setcc to return result from flags. */
39333 ix86_expand_setcc (target, EQ,
39334 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
39335 return target;
39337 case IX86_BUILTIN_GATHERSIV2DF:
39338 icode = CODE_FOR_avx2_gathersiv2df;
39339 goto gather_gen;
39340 case IX86_BUILTIN_GATHERSIV4DF:
39341 icode = CODE_FOR_avx2_gathersiv4df;
39342 goto gather_gen;
39343 case IX86_BUILTIN_GATHERDIV2DF:
39344 icode = CODE_FOR_avx2_gatherdiv2df;
39345 goto gather_gen;
39346 case IX86_BUILTIN_GATHERDIV4DF:
39347 icode = CODE_FOR_avx2_gatherdiv4df;
39348 goto gather_gen;
39349 case IX86_BUILTIN_GATHERSIV4SF:
39350 icode = CODE_FOR_avx2_gathersiv4sf;
39351 goto gather_gen;
39352 case IX86_BUILTIN_GATHERSIV8SF:
39353 icode = CODE_FOR_avx2_gathersiv8sf;
39354 goto gather_gen;
39355 case IX86_BUILTIN_GATHERDIV4SF:
39356 icode = CODE_FOR_avx2_gatherdiv4sf;
39357 goto gather_gen;
39358 case IX86_BUILTIN_GATHERDIV8SF:
39359 icode = CODE_FOR_avx2_gatherdiv8sf;
39360 goto gather_gen;
39361 case IX86_BUILTIN_GATHERSIV2DI:
39362 icode = CODE_FOR_avx2_gathersiv2di;
39363 goto gather_gen;
39364 case IX86_BUILTIN_GATHERSIV4DI:
39365 icode = CODE_FOR_avx2_gathersiv4di;
39366 goto gather_gen;
39367 case IX86_BUILTIN_GATHERDIV2DI:
39368 icode = CODE_FOR_avx2_gatherdiv2di;
39369 goto gather_gen;
39370 case IX86_BUILTIN_GATHERDIV4DI:
39371 icode = CODE_FOR_avx2_gatherdiv4di;
39372 goto gather_gen;
39373 case IX86_BUILTIN_GATHERSIV4SI:
39374 icode = CODE_FOR_avx2_gathersiv4si;
39375 goto gather_gen;
39376 case IX86_BUILTIN_GATHERSIV8SI:
39377 icode = CODE_FOR_avx2_gathersiv8si;
39378 goto gather_gen;
39379 case IX86_BUILTIN_GATHERDIV4SI:
39380 icode = CODE_FOR_avx2_gatherdiv4si;
39381 goto gather_gen;
39382 case IX86_BUILTIN_GATHERDIV8SI:
39383 icode = CODE_FOR_avx2_gatherdiv8si;
39384 goto gather_gen;
39385 case IX86_BUILTIN_GATHERALTSIV4DF:
39386 icode = CODE_FOR_avx2_gathersiv4df;
39387 goto gather_gen;
39388 case IX86_BUILTIN_GATHERALTDIV8SF:
39389 icode = CODE_FOR_avx2_gatherdiv8sf;
39390 goto gather_gen;
39391 case IX86_BUILTIN_GATHERALTSIV4DI:
39392 icode = CODE_FOR_avx2_gathersiv4di;
39393 goto gather_gen;
39394 case IX86_BUILTIN_GATHERALTDIV8SI:
39395 icode = CODE_FOR_avx2_gatherdiv8si;
39396 goto gather_gen;
39397 case IX86_BUILTIN_GATHER3SIV16SF:
39398 icode = CODE_FOR_avx512f_gathersiv16sf;
39399 goto gather_gen;
39400 case IX86_BUILTIN_GATHER3SIV8DF:
39401 icode = CODE_FOR_avx512f_gathersiv8df;
39402 goto gather_gen;
39403 case IX86_BUILTIN_GATHER3DIV16SF:
39404 icode = CODE_FOR_avx512f_gatherdiv16sf;
39405 goto gather_gen;
39406 case IX86_BUILTIN_GATHER3DIV8DF:
39407 icode = CODE_FOR_avx512f_gatherdiv8df;
39408 goto gather_gen;
39409 case IX86_BUILTIN_GATHER3SIV16SI:
39410 icode = CODE_FOR_avx512f_gathersiv16si;
39411 goto gather_gen;
39412 case IX86_BUILTIN_GATHER3SIV8DI:
39413 icode = CODE_FOR_avx512f_gathersiv8di;
39414 goto gather_gen;
39415 case IX86_BUILTIN_GATHER3DIV16SI:
39416 icode = CODE_FOR_avx512f_gatherdiv16si;
39417 goto gather_gen;
39418 case IX86_BUILTIN_GATHER3DIV8DI:
39419 icode = CODE_FOR_avx512f_gatherdiv8di;
39420 goto gather_gen;
39421 case IX86_BUILTIN_GATHER3ALTSIV8DF:
39422 icode = CODE_FOR_avx512f_gathersiv8df;
39423 goto gather_gen;
39424 case IX86_BUILTIN_GATHER3ALTDIV16SF:
39425 icode = CODE_FOR_avx512f_gatherdiv16sf;
39426 goto gather_gen;
39427 case IX86_BUILTIN_GATHER3ALTSIV8DI:
39428 icode = CODE_FOR_avx512f_gathersiv8di;
39429 goto gather_gen;
39430 case IX86_BUILTIN_GATHER3ALTDIV16SI:
39431 icode = CODE_FOR_avx512f_gatherdiv16si;
39432 goto gather_gen;
39433 case IX86_BUILTIN_GATHER3SIV2DF:
39434 icode = CODE_FOR_avx512vl_gathersiv2df;
39435 goto gather_gen;
39436 case IX86_BUILTIN_GATHER3SIV4DF:
39437 icode = CODE_FOR_avx512vl_gathersiv4df;
39438 goto gather_gen;
39439 case IX86_BUILTIN_GATHER3DIV2DF:
39440 icode = CODE_FOR_avx512vl_gatherdiv2df;
39441 goto gather_gen;
39442 case IX86_BUILTIN_GATHER3DIV4DF:
39443 icode = CODE_FOR_avx512vl_gatherdiv4df;
39444 goto gather_gen;
39445 case IX86_BUILTIN_GATHER3SIV4SF:
39446 icode = CODE_FOR_avx512vl_gathersiv4sf;
39447 goto gather_gen;
39448 case IX86_BUILTIN_GATHER3SIV8SF:
39449 icode = CODE_FOR_avx512vl_gathersiv8sf;
39450 goto gather_gen;
39451 case IX86_BUILTIN_GATHER3DIV4SF:
39452 icode = CODE_FOR_avx512vl_gatherdiv4sf;
39453 goto gather_gen;
39454 case IX86_BUILTIN_GATHER3DIV8SF:
39455 icode = CODE_FOR_avx512vl_gatherdiv8sf;
39456 goto gather_gen;
39457 case IX86_BUILTIN_GATHER3SIV2DI:
39458 icode = CODE_FOR_avx512vl_gathersiv2di;
39459 goto gather_gen;
39460 case IX86_BUILTIN_GATHER3SIV4DI:
39461 icode = CODE_FOR_avx512vl_gathersiv4di;
39462 goto gather_gen;
39463 case IX86_BUILTIN_GATHER3DIV2DI:
39464 icode = CODE_FOR_avx512vl_gatherdiv2di;
39465 goto gather_gen;
39466 case IX86_BUILTIN_GATHER3DIV4DI:
39467 icode = CODE_FOR_avx512vl_gatherdiv4di;
39468 goto gather_gen;
39469 case IX86_BUILTIN_GATHER3SIV4SI:
39470 icode = CODE_FOR_avx512vl_gathersiv4si;
39471 goto gather_gen;
39472 case IX86_BUILTIN_GATHER3SIV8SI:
39473 icode = CODE_FOR_avx512vl_gathersiv8si;
39474 goto gather_gen;
39475 case IX86_BUILTIN_GATHER3DIV4SI:
39476 icode = CODE_FOR_avx512vl_gatherdiv4si;
39477 goto gather_gen;
39478 case IX86_BUILTIN_GATHER3DIV8SI:
39479 icode = CODE_FOR_avx512vl_gatherdiv8si;
39480 goto gather_gen;
39481 case IX86_BUILTIN_GATHER3ALTSIV4DF:
39482 icode = CODE_FOR_avx512vl_gathersiv4df;
39483 goto gather_gen;
39484 case IX86_BUILTIN_GATHER3ALTDIV8SF:
39485 icode = CODE_FOR_avx512vl_gatherdiv8sf;
39486 goto gather_gen;
39487 case IX86_BUILTIN_GATHER3ALTSIV4DI:
39488 icode = CODE_FOR_avx512vl_gathersiv4di;
39489 goto gather_gen;
39490 case IX86_BUILTIN_GATHER3ALTDIV8SI:
39491 icode = CODE_FOR_avx512vl_gatherdiv8si;
39492 goto gather_gen;
39493 case IX86_BUILTIN_SCATTERSIV16SF:
39494 icode = CODE_FOR_avx512f_scattersiv16sf;
39495 goto scatter_gen;
39496 case IX86_BUILTIN_SCATTERSIV8DF:
39497 icode = CODE_FOR_avx512f_scattersiv8df;
39498 goto scatter_gen;
39499 case IX86_BUILTIN_SCATTERDIV16SF:
39500 icode = CODE_FOR_avx512f_scatterdiv16sf;
39501 goto scatter_gen;
39502 case IX86_BUILTIN_SCATTERDIV8DF:
39503 icode = CODE_FOR_avx512f_scatterdiv8df;
39504 goto scatter_gen;
39505 case IX86_BUILTIN_SCATTERSIV16SI:
39506 icode = CODE_FOR_avx512f_scattersiv16si;
39507 goto scatter_gen;
39508 case IX86_BUILTIN_SCATTERSIV8DI:
39509 icode = CODE_FOR_avx512f_scattersiv8di;
39510 goto scatter_gen;
39511 case IX86_BUILTIN_SCATTERDIV16SI:
39512 icode = CODE_FOR_avx512f_scatterdiv16si;
39513 goto scatter_gen;
39514 case IX86_BUILTIN_SCATTERDIV8DI:
39515 icode = CODE_FOR_avx512f_scatterdiv8di;
39516 goto scatter_gen;
39517 case IX86_BUILTIN_SCATTERSIV8SF:
39518 icode = CODE_FOR_avx512vl_scattersiv8sf;
39519 goto scatter_gen;
39520 case IX86_BUILTIN_SCATTERSIV4SF:
39521 icode = CODE_FOR_avx512vl_scattersiv4sf;
39522 goto scatter_gen;
39523 case IX86_BUILTIN_SCATTERSIV4DF:
39524 icode = CODE_FOR_avx512vl_scattersiv4df;
39525 goto scatter_gen;
39526 case IX86_BUILTIN_SCATTERSIV2DF:
39527 icode = CODE_FOR_avx512vl_scattersiv2df;
39528 goto scatter_gen;
39529 case IX86_BUILTIN_SCATTERDIV8SF:
39530 icode = CODE_FOR_avx512vl_scatterdiv8sf;
39531 goto scatter_gen;
39532 case IX86_BUILTIN_SCATTERDIV4SF:
39533 icode = CODE_FOR_avx512vl_scatterdiv4sf;
39534 goto scatter_gen;
39535 case IX86_BUILTIN_SCATTERDIV4DF:
39536 icode = CODE_FOR_avx512vl_scatterdiv4df;
39537 goto scatter_gen;
39538 case IX86_BUILTIN_SCATTERDIV2DF:
39539 icode = CODE_FOR_avx512vl_scatterdiv2df;
39540 goto scatter_gen;
39541 case IX86_BUILTIN_SCATTERSIV8SI:
39542 icode = CODE_FOR_avx512vl_scattersiv8si;
39543 goto scatter_gen;
39544 case IX86_BUILTIN_SCATTERSIV4SI:
39545 icode = CODE_FOR_avx512vl_scattersiv4si;
39546 goto scatter_gen;
39547 case IX86_BUILTIN_SCATTERSIV4DI:
39548 icode = CODE_FOR_avx512vl_scattersiv4di;
39549 goto scatter_gen;
39550 case IX86_BUILTIN_SCATTERSIV2DI:
39551 icode = CODE_FOR_avx512vl_scattersiv2di;
39552 goto scatter_gen;
39553 case IX86_BUILTIN_SCATTERDIV8SI:
39554 icode = CODE_FOR_avx512vl_scatterdiv8si;
39555 goto scatter_gen;
39556 case IX86_BUILTIN_SCATTERDIV4SI:
39557 icode = CODE_FOR_avx512vl_scatterdiv4si;
39558 goto scatter_gen;
39559 case IX86_BUILTIN_SCATTERDIV4DI:
39560 icode = CODE_FOR_avx512vl_scatterdiv4di;
39561 goto scatter_gen;
39562 case IX86_BUILTIN_SCATTERDIV2DI:
39563 icode = CODE_FOR_avx512vl_scatterdiv2di;
39564 goto scatter_gen;
39565 case IX86_BUILTIN_GATHERPFDPD:
39566 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
39567 goto vec_prefetch_gen;
39568 case IX86_BUILTIN_GATHERPFDPS:
39569 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
39570 goto vec_prefetch_gen;
39571 case IX86_BUILTIN_GATHERPFQPD:
39572 icode = CODE_FOR_avx512pf_gatherpfv8didf;
39573 goto vec_prefetch_gen;
39574 case IX86_BUILTIN_GATHERPFQPS:
39575 icode = CODE_FOR_avx512pf_gatherpfv8disf;
39576 goto vec_prefetch_gen;
39577 case IX86_BUILTIN_SCATTERPFDPD:
39578 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
39579 goto vec_prefetch_gen;
39580 case IX86_BUILTIN_SCATTERPFDPS:
39581 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
39582 goto vec_prefetch_gen;
39583 case IX86_BUILTIN_SCATTERPFQPD:
39584 icode = CODE_FOR_avx512pf_scatterpfv8didf;
39585 goto vec_prefetch_gen;
39586 case IX86_BUILTIN_SCATTERPFQPS:
39587 icode = CODE_FOR_avx512pf_scatterpfv8disf;
39588 goto vec_prefetch_gen;
39590 gather_gen:
39591 rtx half;
39592 rtx (*gen) (rtx, rtx);
39594 arg0 = CALL_EXPR_ARG (exp, 0);
39595 arg1 = CALL_EXPR_ARG (exp, 1);
39596 arg2 = CALL_EXPR_ARG (exp, 2);
39597 arg3 = CALL_EXPR_ARG (exp, 3);
39598 arg4 = CALL_EXPR_ARG (exp, 4);
39599 op0 = expand_normal (arg0);
39600 op1 = expand_normal (arg1);
39601 op2 = expand_normal (arg2);
39602 op3 = expand_normal (arg3);
39603 op4 = expand_normal (arg4);
39604 /* Note the arg order is different from the operand order. */
39605 mode0 = insn_data[icode].operand[1].mode;
39606 mode2 = insn_data[icode].operand[3].mode;
39607 mode3 = insn_data[icode].operand[4].mode;
39608 mode4 = insn_data[icode].operand[5].mode;
39610 if (target == NULL_RTX
39611 || GET_MODE (target) != insn_data[icode].operand[0].mode
39612 || !insn_data[icode].operand[0].predicate (target,
39613 GET_MODE (target)))
39614 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
39615 else
39616 subtarget = target;
39618 switch (fcode)
39620 case IX86_BUILTIN_GATHER3ALTSIV8DF:
39621 case IX86_BUILTIN_GATHER3ALTSIV8DI:
39622 half = gen_reg_rtx (V8SImode);
39623 if (!nonimmediate_operand (op2, V16SImode))
39624 op2 = copy_to_mode_reg (V16SImode, op2);
39625 emit_insn (gen_vec_extract_lo_v16si (half, op2));
39626 op2 = half;
39627 break;
39628 case IX86_BUILTIN_GATHER3ALTSIV4DF:
39629 case IX86_BUILTIN_GATHER3ALTSIV4DI:
39630 case IX86_BUILTIN_GATHERALTSIV4DF:
39631 case IX86_BUILTIN_GATHERALTSIV4DI:
39632 half = gen_reg_rtx (V4SImode);
39633 if (!nonimmediate_operand (op2, V8SImode))
39634 op2 = copy_to_mode_reg (V8SImode, op2);
39635 emit_insn (gen_vec_extract_lo_v8si (half, op2));
39636 op2 = half;
39637 break;
39638 case IX86_BUILTIN_GATHER3ALTDIV16SF:
39639 case IX86_BUILTIN_GATHER3ALTDIV16SI:
39640 half = gen_reg_rtx (mode0);
39641 if (mode0 == V8SFmode)
39642 gen = gen_vec_extract_lo_v16sf;
39643 else
39644 gen = gen_vec_extract_lo_v16si;
39645 if (!nonimmediate_operand (op0, GET_MODE (op0)))
39646 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
39647 emit_insn (gen (half, op0));
39648 op0 = half;
39649 if (GET_MODE (op3) != VOIDmode)
39651 if (!nonimmediate_operand (op3, GET_MODE (op3)))
39652 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
39653 emit_insn (gen (half, op3));
39654 op3 = half;
39656 break;
39657 case IX86_BUILTIN_GATHER3ALTDIV8SF:
39658 case IX86_BUILTIN_GATHER3ALTDIV8SI:
39659 case IX86_BUILTIN_GATHERALTDIV8SF:
39660 case IX86_BUILTIN_GATHERALTDIV8SI:
39661 half = gen_reg_rtx (mode0);
39662 if (mode0 == V4SFmode)
39663 gen = gen_vec_extract_lo_v8sf;
39664 else
39665 gen = gen_vec_extract_lo_v8si;
39666 if (!nonimmediate_operand (op0, GET_MODE (op0)))
39667 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
39668 emit_insn (gen (half, op0));
39669 op0 = half;
39670 if (GET_MODE (op3) != VOIDmode)
39672 if (!nonimmediate_operand (op3, GET_MODE (op3)))
39673 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
39674 emit_insn (gen (half, op3));
39675 op3 = half;
39677 break;
39678 default:
39679 break;
39682 /* Force memory operand only with base register here. But we
39683 don't want to do it on memory operand for other builtin
39684 functions. */
39685 op1 = ix86_zero_extend_to_Pmode (op1);
39687 if (!insn_data[icode].operand[1].predicate (op0, mode0))
39688 op0 = copy_to_mode_reg (mode0, op0);
39689 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
39690 op1 = copy_to_mode_reg (Pmode, op1);
39691 if (!insn_data[icode].operand[3].predicate (op2, mode2))
39692 op2 = copy_to_mode_reg (mode2, op2);
39693 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
39695 if (!insn_data[icode].operand[4].predicate (op3, mode3))
39696 op3 = copy_to_mode_reg (mode3, op3);
39698 else
39700 op3 = copy_to_reg (op3);
39701 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
39703 if (!insn_data[icode].operand[5].predicate (op4, mode4))
39705 error ("the last argument must be scale 1, 2, 4, 8");
39706 return const0_rtx;
39709 /* Optimize. If mask is known to have all high bits set,
39710 replace op0 with pc_rtx to signal that the instruction
39711 overwrites the whole destination and doesn't use its
39712 previous contents. */
39713 if (optimize)
39715 if (TREE_CODE (arg3) == INTEGER_CST)
39717 if (integer_all_onesp (arg3))
39718 op0 = pc_rtx;
39720 else if (TREE_CODE (arg3) == VECTOR_CST)
39722 unsigned int negative = 0;
39723 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
39725 tree cst = VECTOR_CST_ELT (arg3, i);
39726 if (TREE_CODE (cst) == INTEGER_CST
39727 && tree_int_cst_sign_bit (cst))
39728 negative++;
39729 else if (TREE_CODE (cst) == REAL_CST
39730 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
39731 negative++;
39733 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
39734 op0 = pc_rtx;
39736 else if (TREE_CODE (arg3) == SSA_NAME
39737 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
39739 /* Recognize also when mask is like:
39740 __v2df src = _mm_setzero_pd ();
39741 __v2df mask = _mm_cmpeq_pd (src, src);
39743 __v8sf src = _mm256_setzero_ps ();
39744 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
39745 as that is a cheaper way to load all ones into
39746 a register than having to load a constant from
39747 memory. */
39748 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
39749 if (is_gimple_call (def_stmt))
39751 tree fndecl = gimple_call_fndecl (def_stmt);
39752 if (fndecl
39753 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
39754 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
39756 case IX86_BUILTIN_CMPPD:
39757 case IX86_BUILTIN_CMPPS:
39758 case IX86_BUILTIN_CMPPD256:
39759 case IX86_BUILTIN_CMPPS256:
39760 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
39761 break;
39762 /* FALLTHRU */
39763 case IX86_BUILTIN_CMPEQPD:
39764 case IX86_BUILTIN_CMPEQPS:
39765 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
39766 && initializer_zerop (gimple_call_arg (def_stmt,
39767 1)))
39768 op0 = pc_rtx;
39769 break;
39770 default:
39771 break;
39777 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
39778 if (! pat)
39779 return const0_rtx;
39780 emit_insn (pat);
39782 switch (fcode)
39784 case IX86_BUILTIN_GATHER3DIV16SF:
39785 if (target == NULL_RTX)
39786 target = gen_reg_rtx (V8SFmode);
39787 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
39788 break;
39789 case IX86_BUILTIN_GATHER3DIV16SI:
39790 if (target == NULL_RTX)
39791 target = gen_reg_rtx (V8SImode);
39792 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
39793 break;
39794 case IX86_BUILTIN_GATHER3DIV8SF:
39795 case IX86_BUILTIN_GATHERDIV8SF:
39796 if (target == NULL_RTX)
39797 target = gen_reg_rtx (V4SFmode);
39798 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
39799 break;
39800 case IX86_BUILTIN_GATHER3DIV8SI:
39801 case IX86_BUILTIN_GATHERDIV8SI:
39802 if (target == NULL_RTX)
39803 target = gen_reg_rtx (V4SImode);
39804 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
39805 break;
39806 default:
39807 target = subtarget;
39808 break;
39810 return target;
39812 scatter_gen:
39813 arg0 = CALL_EXPR_ARG (exp, 0);
39814 arg1 = CALL_EXPR_ARG (exp, 1);
39815 arg2 = CALL_EXPR_ARG (exp, 2);
39816 arg3 = CALL_EXPR_ARG (exp, 3);
39817 arg4 = CALL_EXPR_ARG (exp, 4);
39818 op0 = expand_normal (arg0);
39819 op1 = expand_normal (arg1);
39820 op2 = expand_normal (arg2);
39821 op3 = expand_normal (arg3);
39822 op4 = expand_normal (arg4);
39823 mode1 = insn_data[icode].operand[1].mode;
39824 mode2 = insn_data[icode].operand[2].mode;
39825 mode3 = insn_data[icode].operand[3].mode;
39826 mode4 = insn_data[icode].operand[4].mode;
39828 /* Force memory operand only with base register here. But we
39829 don't want to do it on memory operand for other builtin
39830 functions. */
39831 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
39833 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39834 op0 = copy_to_mode_reg (Pmode, op0);
39836 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
39838 if (!insn_data[icode].operand[1].predicate (op1, mode1))
39839 op1 = copy_to_mode_reg (mode1, op1);
39841 else
39843 op1 = copy_to_reg (op1);
39844 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
39847 if (!insn_data[icode].operand[2].predicate (op2, mode2))
39848 op2 = copy_to_mode_reg (mode2, op2);
39850 if (!insn_data[icode].operand[3].predicate (op3, mode3))
39851 op3 = copy_to_mode_reg (mode3, op3);
39853 if (!insn_data[icode].operand[4].predicate (op4, mode4))
39855 error ("the last argument must be scale 1, 2, 4, 8");
39856 return const0_rtx;
39859 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
39860 if (! pat)
39861 return const0_rtx;
39863 emit_insn (pat);
39864 return 0;
39866 vec_prefetch_gen:
39867 arg0 = CALL_EXPR_ARG (exp, 0);
39868 arg1 = CALL_EXPR_ARG (exp, 1);
39869 arg2 = CALL_EXPR_ARG (exp, 2);
39870 arg3 = CALL_EXPR_ARG (exp, 3);
39871 arg4 = CALL_EXPR_ARG (exp, 4);
39872 op0 = expand_normal (arg0);
39873 op1 = expand_normal (arg1);
39874 op2 = expand_normal (arg2);
39875 op3 = expand_normal (arg3);
39876 op4 = expand_normal (arg4);
39877 mode0 = insn_data[icode].operand[0].mode;
39878 mode1 = insn_data[icode].operand[1].mode;
39879 mode3 = insn_data[icode].operand[3].mode;
39880 mode4 = insn_data[icode].operand[4].mode;
39882 if (GET_MODE (op0) == mode0
39883 || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
39885 if (!insn_data[icode].operand[0].predicate (op0, mode0))
39886 op0 = copy_to_mode_reg (mode0, op0);
39888 else if (op0 != constm1_rtx)
39890 op0 = copy_to_reg (op0);
39891 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
39894 if (!insn_data[icode].operand[1].predicate (op1, mode1))
39895 op1 = copy_to_mode_reg (mode1, op1);
39897 /* Force memory operand only with base register here. But we
39898 don't want to do it on memory operand for other builtin
39899 functions. */
39900 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
39902 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
39903 op2 = copy_to_mode_reg (Pmode, op2);
39905 if (!insn_data[icode].operand[3].predicate (op3, mode3))
39907 error ("the forth argument must be scale 1, 2, 4, 8");
39908 return const0_rtx;
39911 if (!insn_data[icode].operand[4].predicate (op4, mode4))
39913 error ("incorrect hint operand");
39914 return const0_rtx;
39917 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
39918 if (! pat)
39919 return const0_rtx;
39921 emit_insn (pat);
39923 return 0;
39925 case IX86_BUILTIN_XABORT:
39926 icode = CODE_FOR_xabort;
39927 arg0 = CALL_EXPR_ARG (exp, 0);
39928 op0 = expand_normal (arg0);
39929 mode0 = insn_data[icode].operand[0].mode;
39930 if (!insn_data[icode].operand[0].predicate (op0, mode0))
39932 error ("the xabort's argument must be an 8-bit immediate");
39933 return const0_rtx;
39935 emit_insn (gen_xabort (op0));
39936 return 0;
39938 default:
39939 break;
39942 for (i = 0, d = bdesc_special_args;
39943 i < ARRAY_SIZE (bdesc_special_args);
39944 i++, d++)
39945 if (d->code == fcode)
39946 return ix86_expand_special_args_builtin (d, exp, target);
39948 for (i = 0, d = bdesc_args;
39949 i < ARRAY_SIZE (bdesc_args);
39950 i++, d++)
39951 if (d->code == fcode)
39952 switch (fcode)
39954 case IX86_BUILTIN_FABSQ:
39955 case IX86_BUILTIN_COPYSIGNQ:
39956 if (!TARGET_SSE)
39957 /* Emit a normal call if SSE isn't available. */
39958 return expand_call (exp, target, ignore);
39959 default:
39960 return ix86_expand_args_builtin (d, exp, target);
39963 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
39964 if (d->code == fcode)
39965 return ix86_expand_sse_comi (d, exp, target);
39967 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
39968 if (d->code == fcode)
39969 return ix86_expand_round_builtin (d, exp, target);
39971 for (i = 0, d = bdesc_pcmpestr;
39972 i < ARRAY_SIZE (bdesc_pcmpestr);
39973 i++, d++)
39974 if (d->code == fcode)
39975 return ix86_expand_sse_pcmpestr (d, exp, target);
39977 for (i = 0, d = bdesc_pcmpistr;
39978 i < ARRAY_SIZE (bdesc_pcmpistr);
39979 i++, d++)
39980 if (d->code == fcode)
39981 return ix86_expand_sse_pcmpistr (d, exp, target);
39983 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
39984 if (d->code == fcode)
39985 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
39986 (enum ix86_builtin_func_type)
39987 d->flag, d->comparison);
39989 gcc_unreachable ();
39992 /* This returns the target-specific builtin with code CODE if
39993 current_function_decl has visibility on this builtin, which is checked
39994 using isa flags. Returns NULL_TREE otherwise. */
39996 static tree ix86_get_builtin (enum ix86_builtins code)
39998 struct cl_target_option *opts;
39999 tree target_tree = NULL_TREE;
40001 /* Determine the isa flags of current_function_decl. */
40003 if (current_function_decl)
40004 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
40006 if (target_tree == NULL)
40007 target_tree = target_option_default_node;
40009 opts = TREE_TARGET_OPTION (target_tree);
40011 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
40012 return ix86_builtin_decl (code, true);
40013 else
40014 return NULL_TREE;
40017 /* Return function decl for target specific builtin
40018 for given MPX builtin passed i FCODE. */
40019 static tree
40020 ix86_builtin_mpx_function (unsigned fcode)
40022 switch (fcode)
40024 case BUILT_IN_CHKP_BNDMK:
40025 return ix86_builtins[IX86_BUILTIN_BNDMK];
40027 case BUILT_IN_CHKP_BNDSTX:
40028 return ix86_builtins[IX86_BUILTIN_BNDSTX];
40030 case BUILT_IN_CHKP_BNDLDX:
40031 return ix86_builtins[IX86_BUILTIN_BNDLDX];
40033 case BUILT_IN_CHKP_BNDCL:
40034 return ix86_builtins[IX86_BUILTIN_BNDCL];
40036 case BUILT_IN_CHKP_BNDCU:
40037 return ix86_builtins[IX86_BUILTIN_BNDCU];
40039 case BUILT_IN_CHKP_BNDRET:
40040 return ix86_builtins[IX86_BUILTIN_BNDRET];
40042 case BUILT_IN_CHKP_INTERSECT:
40043 return ix86_builtins[IX86_BUILTIN_BNDINT];
40045 case BUILT_IN_CHKP_NARROW:
40046 return ix86_builtins[IX86_BUILTIN_BNDNARROW];
40048 case BUILT_IN_CHKP_SIZEOF:
40049 return ix86_builtins[IX86_BUILTIN_SIZEOF];
40051 case BUILT_IN_CHKP_EXTRACT_LOWER:
40052 return ix86_builtins[IX86_BUILTIN_BNDLOWER];
40054 case BUILT_IN_CHKP_EXTRACT_UPPER:
40055 return ix86_builtins[IX86_BUILTIN_BNDUPPER];
40057 default:
40058 return NULL_TREE;
40061 gcc_unreachable ();
40064 /* Helper function for ix86_load_bounds and ix86_store_bounds.
40066 Return an address to be used to load/store bounds for pointer
40067 passed in SLOT.
40069 SLOT_NO is an integer constant holding number of a target
40070 dependent special slot to be used in case SLOT is not a memory.
40072 SPECIAL_BASE is a pointer to be used as a base of fake address
40073 to access special slots in Bounds Table. SPECIAL_BASE[-1],
40074 SPECIAL_BASE[-2] etc. will be used as fake pointer locations. */
40076 static rtx
40077 ix86_get_arg_address_for_bt (rtx slot, rtx slot_no, rtx special_base)
40079 rtx addr = NULL;
40081 /* NULL slot means we pass bounds for pointer not passed to the
40082 function at all. Register slot means we pass pointer in a
40083 register. In both these cases bounds are passed via Bounds
40084 Table. Since we do not have actual pointer stored in memory,
40085 we have to use fake addresses to access Bounds Table. We
40086 start with (special_base - sizeof (void*)) and decrease this
40087 address by pointer size to get addresses for other slots. */
40088 if (!slot || REG_P (slot))
40090 gcc_assert (CONST_INT_P (slot_no));
40091 addr = plus_constant (Pmode, special_base,
40092 -(INTVAL (slot_no) + 1) * GET_MODE_SIZE (Pmode));
40094 /* If pointer is passed in a memory then its address is used to
40095 access Bounds Table. */
40096 else if (MEM_P (slot))
40098 addr = XEXP (slot, 0);
40099 if (!register_operand (addr, Pmode))
40100 addr = copy_addr_to_reg (addr);
40102 else
40103 gcc_unreachable ();
40105 return addr;
40108 /* Expand pass uses this hook to load bounds for function parameter
40109 PTR passed in SLOT in case its bounds are not passed in a register.
40111 If SLOT is a memory, then bounds are loaded as for regular pointer
40112 loaded from memory. PTR may be NULL in case SLOT is a memory.
40113 In such case value of PTR (if required) may be loaded from SLOT.
40115 If SLOT is NULL or a register then SLOT_NO is an integer constant
40116 holding number of the target dependent special slot which should be
40117 used to obtain bounds.
40119 Return loaded bounds. */
40121 static rtx
40122 ix86_load_bounds (rtx slot, rtx ptr, rtx slot_no)
40124 rtx reg = gen_reg_rtx (BNDmode);
40125 rtx addr;
40127 /* Get address to be used to access Bounds Table. Special slots start
40128 at the location of return address of the current function. */
40129 addr = ix86_get_arg_address_for_bt (slot, slot_no, arg_pointer_rtx);
40131 /* Load pointer value from a memory if we don't have it. */
40132 if (!ptr)
40134 gcc_assert (MEM_P (slot));
40135 ptr = copy_addr_to_reg (slot);
40138 emit_insn (BNDmode == BND64mode
40139 ? gen_bnd64_ldx (reg, addr, ptr)
40140 : gen_bnd32_ldx (reg, addr, ptr));
40142 return reg;
40145 /* Expand pass uses this hook to store BOUNDS for call argument PTR
40146 passed in SLOT in case BOUNDS are not passed in a register.
40148 If SLOT is a memory, then BOUNDS are stored as for regular pointer
40149 stored in memory. PTR may be NULL in case SLOT is a memory.
40150 In such case value of PTR (if required) may be loaded from SLOT.
40152 If SLOT is NULL or a register then SLOT_NO is an integer constant
40153 holding number of the target dependent special slot which should be
40154 used to store BOUNDS. */
40156 static void
40157 ix86_store_bounds (rtx ptr, rtx slot, rtx bounds, rtx slot_no)
40159 rtx addr;
40161 /* Get address to be used to access Bounds Table. Special slots start
40162 at the location of return address of a called function. */
40163 addr = ix86_get_arg_address_for_bt (slot, slot_no, stack_pointer_rtx);
40165 /* Load pointer value from a memory if we don't have it. */
40166 if (!ptr)
40168 gcc_assert (MEM_P (slot));
40169 ptr = copy_addr_to_reg (slot);
40172 gcc_assert (POINTER_BOUNDS_MODE_P (GET_MODE (bounds)));
40173 if (!register_operand (bounds, BNDmode))
40174 bounds = copy_to_mode_reg (BNDmode, bounds);
40176 emit_insn (BNDmode == BND64mode
40177 ? gen_bnd64_stx (addr, ptr, bounds)
40178 : gen_bnd32_stx (addr, ptr, bounds));
40181 /* Load and return bounds returned by function in SLOT. */
40183 static rtx
40184 ix86_load_returned_bounds (rtx slot)
40186 rtx res;
40188 gcc_assert (REG_P (slot));
40189 res = gen_reg_rtx (BNDmode);
40190 emit_move_insn (res, slot);
40192 return res;
40195 /* Store BOUNDS returned by function into SLOT. */
40197 static void
40198 ix86_store_returned_bounds (rtx slot, rtx bounds)
40200 gcc_assert (REG_P (slot));
40201 emit_move_insn (slot, bounds);
40204 /* Returns a function decl for a vectorized version of the builtin function
40205 with builtin function code FN and the result vector type TYPE, or NULL_TREE
40206 if it is not available. */
40208 static tree
40209 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
40210 tree type_in)
40212 machine_mode in_mode, out_mode;
40213 int in_n, out_n;
40214 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
40216 if (TREE_CODE (type_out) != VECTOR_TYPE
40217 || TREE_CODE (type_in) != VECTOR_TYPE
40218 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
40219 return NULL_TREE;
40221 out_mode = TYPE_MODE (TREE_TYPE (type_out));
40222 out_n = TYPE_VECTOR_SUBPARTS (type_out);
40223 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40224 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40226 switch (fn)
40228 case BUILT_IN_SQRT:
40229 if (out_mode == DFmode && in_mode == DFmode)
40231 if (out_n == 2 && in_n == 2)
40232 return ix86_get_builtin (IX86_BUILTIN_SQRTPD);
40233 else if (out_n == 4 && in_n == 4)
40234 return ix86_get_builtin (IX86_BUILTIN_SQRTPD256);
40235 else if (out_n == 8 && in_n == 8)
40236 return ix86_get_builtin (IX86_BUILTIN_SQRTPD512);
40238 break;
40240 case BUILT_IN_EXP2F:
40241 if (out_mode == SFmode && in_mode == SFmode)
40243 if (out_n == 16 && in_n == 16)
40244 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
40246 break;
40248 case BUILT_IN_SQRTF:
40249 if (out_mode == SFmode && in_mode == SFmode)
40251 if (out_n == 4 && in_n == 4)
40252 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR);
40253 else if (out_n == 8 && in_n == 8)
40254 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256);
40255 else if (out_n == 16 && in_n == 16)
40256 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR512);
40258 break;
40260 case BUILT_IN_IFLOOR:
40261 case BUILT_IN_LFLOOR:
40262 case BUILT_IN_LLFLOOR:
40263 /* The round insn does not trap on denormals. */
40264 if (flag_trapping_math || !TARGET_ROUND)
40265 break;
40267 if (out_mode == SImode && in_mode == DFmode)
40269 if (out_n == 4 && in_n == 2)
40270 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
40271 else if (out_n == 8 && in_n == 4)
40272 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
40273 else if (out_n == 16 && in_n == 8)
40274 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
40276 break;
40278 case BUILT_IN_IFLOORF:
40279 case BUILT_IN_LFLOORF:
40280 case BUILT_IN_LLFLOORF:
40281 /* The round insn does not trap on denormals. */
40282 if (flag_trapping_math || !TARGET_ROUND)
40283 break;
40285 if (out_mode == SImode && in_mode == SFmode)
40287 if (out_n == 4 && in_n == 4)
40288 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
40289 else if (out_n == 8 && in_n == 8)
40290 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
40292 break;
40294 case BUILT_IN_ICEIL:
40295 case BUILT_IN_LCEIL:
40296 case BUILT_IN_LLCEIL:
40297 /* The round insn does not trap on denormals. */
40298 if (flag_trapping_math || !TARGET_ROUND)
40299 break;
40301 if (out_mode == SImode && in_mode == DFmode)
40303 if (out_n == 4 && in_n == 2)
40304 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
40305 else if (out_n == 8 && in_n == 4)
40306 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
40307 else if (out_n == 16 && in_n == 8)
40308 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
40310 break;
40312 case BUILT_IN_ICEILF:
40313 case BUILT_IN_LCEILF:
40314 case BUILT_IN_LLCEILF:
40315 /* The round insn does not trap on denormals. */
40316 if (flag_trapping_math || !TARGET_ROUND)
40317 break;
40319 if (out_mode == SImode && in_mode == SFmode)
40321 if (out_n == 4 && in_n == 4)
40322 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
40323 else if (out_n == 8 && in_n == 8)
40324 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
40326 break;
40328 case BUILT_IN_IRINT:
40329 case BUILT_IN_LRINT:
40330 case BUILT_IN_LLRINT:
40331 if (out_mode == SImode && in_mode == DFmode)
40333 if (out_n == 4 && in_n == 2)
40334 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
40335 else if (out_n == 8 && in_n == 4)
40336 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
40338 break;
40340 case BUILT_IN_IRINTF:
40341 case BUILT_IN_LRINTF:
40342 case BUILT_IN_LLRINTF:
40343 if (out_mode == SImode && in_mode == SFmode)
40345 if (out_n == 4 && in_n == 4)
40346 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
40347 else if (out_n == 8 && in_n == 8)
40348 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
40350 break;
40352 case BUILT_IN_IROUND:
40353 case BUILT_IN_LROUND:
40354 case BUILT_IN_LLROUND:
40355 /* The round insn does not trap on denormals. */
40356 if (flag_trapping_math || !TARGET_ROUND)
40357 break;
40359 if (out_mode == SImode && in_mode == DFmode)
40361 if (out_n == 4 && in_n == 2)
40362 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
40363 else if (out_n == 8 && in_n == 4)
40364 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
40365 else if (out_n == 16 && in_n == 8)
40366 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
40368 break;
40370 case BUILT_IN_IROUNDF:
40371 case BUILT_IN_LROUNDF:
40372 case BUILT_IN_LLROUNDF:
40373 /* The round insn does not trap on denormals. */
40374 if (flag_trapping_math || !TARGET_ROUND)
40375 break;
40377 if (out_mode == SImode && in_mode == SFmode)
40379 if (out_n == 4 && in_n == 4)
40380 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
40381 else if (out_n == 8 && in_n == 8)
40382 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
40384 break;
40386 case BUILT_IN_COPYSIGN:
40387 if (out_mode == DFmode && in_mode == DFmode)
40389 if (out_n == 2 && in_n == 2)
40390 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD);
40391 else if (out_n == 4 && in_n == 4)
40392 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256);
40393 else if (out_n == 8 && in_n == 8)
40394 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD512);
40396 break;
40398 case BUILT_IN_COPYSIGNF:
40399 if (out_mode == SFmode && in_mode == SFmode)
40401 if (out_n == 4 && in_n == 4)
40402 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS);
40403 else if (out_n == 8 && in_n == 8)
40404 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256);
40405 else if (out_n == 16 && in_n == 16)
40406 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS512);
40408 break;
40410 case BUILT_IN_FLOOR:
40411 /* The round insn does not trap on denormals. */
40412 if (flag_trapping_math || !TARGET_ROUND)
40413 break;
40415 if (out_mode == DFmode && in_mode == DFmode)
40417 if (out_n == 2 && in_n == 2)
40418 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
40419 else if (out_n == 4 && in_n == 4)
40420 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
40422 break;
40424 case BUILT_IN_FLOORF:
40425 /* The round insn does not trap on denormals. */
40426 if (flag_trapping_math || !TARGET_ROUND)
40427 break;
40429 if (out_mode == SFmode && in_mode == SFmode)
40431 if (out_n == 4 && in_n == 4)
40432 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
40433 else if (out_n == 8 && in_n == 8)
40434 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
40436 break;
40438 case BUILT_IN_CEIL:
40439 /* The round insn does not trap on denormals. */
40440 if (flag_trapping_math || !TARGET_ROUND)
40441 break;
40443 if (out_mode == DFmode && in_mode == DFmode)
40445 if (out_n == 2 && in_n == 2)
40446 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
40447 else if (out_n == 4 && in_n == 4)
40448 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
40450 break;
40452 case BUILT_IN_CEILF:
40453 /* The round insn does not trap on denormals. */
40454 if (flag_trapping_math || !TARGET_ROUND)
40455 break;
40457 if (out_mode == SFmode && in_mode == SFmode)
40459 if (out_n == 4 && in_n == 4)
40460 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
40461 else if (out_n == 8 && in_n == 8)
40462 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
40464 break;
40466 case BUILT_IN_TRUNC:
40467 /* The round insn does not trap on denormals. */
40468 if (flag_trapping_math || !TARGET_ROUND)
40469 break;
40471 if (out_mode == DFmode && in_mode == DFmode)
40473 if (out_n == 2 && in_n == 2)
40474 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
40475 else if (out_n == 4 && in_n == 4)
40476 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
40478 break;
40480 case BUILT_IN_TRUNCF:
40481 /* The round insn does not trap on denormals. */
40482 if (flag_trapping_math || !TARGET_ROUND)
40483 break;
40485 if (out_mode == SFmode && in_mode == SFmode)
40487 if (out_n == 4 && in_n == 4)
40488 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
40489 else if (out_n == 8 && in_n == 8)
40490 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
40492 break;
40494 case BUILT_IN_RINT:
40495 /* The round insn does not trap on denormals. */
40496 if (flag_trapping_math || !TARGET_ROUND)
40497 break;
40499 if (out_mode == DFmode && in_mode == DFmode)
40501 if (out_n == 2 && in_n == 2)
40502 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
40503 else if (out_n == 4 && in_n == 4)
40504 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
40506 break;
40508 case BUILT_IN_RINTF:
40509 /* The round insn does not trap on denormals. */
40510 if (flag_trapping_math || !TARGET_ROUND)
40511 break;
40513 if (out_mode == SFmode && in_mode == SFmode)
40515 if (out_n == 4 && in_n == 4)
40516 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
40517 else if (out_n == 8 && in_n == 8)
40518 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
40520 break;
40522 case BUILT_IN_ROUND:
40523 /* The round insn does not trap on denormals. */
40524 if (flag_trapping_math || !TARGET_ROUND)
40525 break;
40527 if (out_mode == DFmode && in_mode == DFmode)
40529 if (out_n == 2 && in_n == 2)
40530 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
40531 else if (out_n == 4 && in_n == 4)
40532 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
40534 break;
40536 case BUILT_IN_ROUNDF:
40537 /* The round insn does not trap on denormals. */
40538 if (flag_trapping_math || !TARGET_ROUND)
40539 break;
40541 if (out_mode == SFmode && in_mode == SFmode)
40543 if (out_n == 4 && in_n == 4)
40544 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
40545 else if (out_n == 8 && in_n == 8)
40546 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
40548 break;
40550 case BUILT_IN_FMA:
40551 if (out_mode == DFmode && in_mode == DFmode)
40553 if (out_n == 2 && in_n == 2)
40554 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
40555 if (out_n == 4 && in_n == 4)
40556 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
40558 break;
40560 case BUILT_IN_FMAF:
40561 if (out_mode == SFmode && in_mode == SFmode)
40563 if (out_n == 4 && in_n == 4)
40564 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
40565 if (out_n == 8 && in_n == 8)
40566 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
40568 break;
40570 default:
40571 break;
40574 /* Dispatch to a handler for a vectorization library. */
40575 if (ix86_veclib_handler)
40576 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
40577 type_in);
40579 return NULL_TREE;
40582 /* Handler for an SVML-style interface to
40583 a library with vectorized intrinsics. */
40585 static tree
40586 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
40588 char name[20];
40589 tree fntype, new_fndecl, args;
40590 unsigned arity;
40591 const char *bname;
40592 machine_mode el_mode, in_mode;
40593 int n, in_n;
40595 /* The SVML is suitable for unsafe math only. */
40596 if (!flag_unsafe_math_optimizations)
40597 return NULL_TREE;
40599 el_mode = TYPE_MODE (TREE_TYPE (type_out));
40600 n = TYPE_VECTOR_SUBPARTS (type_out);
40601 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40602 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40603 if (el_mode != in_mode
40604 || n != in_n)
40605 return NULL_TREE;
40607 switch (fn)
40609 case BUILT_IN_EXP:
40610 case BUILT_IN_LOG:
40611 case BUILT_IN_LOG10:
40612 case BUILT_IN_POW:
40613 case BUILT_IN_TANH:
40614 case BUILT_IN_TAN:
40615 case BUILT_IN_ATAN:
40616 case BUILT_IN_ATAN2:
40617 case BUILT_IN_ATANH:
40618 case BUILT_IN_CBRT:
40619 case BUILT_IN_SINH:
40620 case BUILT_IN_SIN:
40621 case BUILT_IN_ASINH:
40622 case BUILT_IN_ASIN:
40623 case BUILT_IN_COSH:
40624 case BUILT_IN_COS:
40625 case BUILT_IN_ACOSH:
40626 case BUILT_IN_ACOS:
40627 if (el_mode != DFmode || n != 2)
40628 return NULL_TREE;
40629 break;
40631 case BUILT_IN_EXPF:
40632 case BUILT_IN_LOGF:
40633 case BUILT_IN_LOG10F:
40634 case BUILT_IN_POWF:
40635 case BUILT_IN_TANHF:
40636 case BUILT_IN_TANF:
40637 case BUILT_IN_ATANF:
40638 case BUILT_IN_ATAN2F:
40639 case BUILT_IN_ATANHF:
40640 case BUILT_IN_CBRTF:
40641 case BUILT_IN_SINHF:
40642 case BUILT_IN_SINF:
40643 case BUILT_IN_ASINHF:
40644 case BUILT_IN_ASINF:
40645 case BUILT_IN_COSHF:
40646 case BUILT_IN_COSF:
40647 case BUILT_IN_ACOSHF:
40648 case BUILT_IN_ACOSF:
40649 if (el_mode != SFmode || n != 4)
40650 return NULL_TREE;
40651 break;
40653 default:
40654 return NULL_TREE;
40657 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
40659 if (fn == BUILT_IN_LOGF)
40660 strcpy (name, "vmlsLn4");
40661 else if (fn == BUILT_IN_LOG)
40662 strcpy (name, "vmldLn2");
40663 else if (n == 4)
40665 sprintf (name, "vmls%s", bname+10);
40666 name[strlen (name)-1] = '4';
40668 else
40669 sprintf (name, "vmld%s2", bname+10);
40671 /* Convert to uppercase. */
40672 name[4] &= ~0x20;
40674 arity = 0;
40675 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
40676 args;
40677 args = TREE_CHAIN (args))
40678 arity++;
40680 if (arity == 1)
40681 fntype = build_function_type_list (type_out, type_in, NULL);
40682 else
40683 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
40685 /* Build a function declaration for the vectorized function. */
40686 new_fndecl = build_decl (BUILTINS_LOCATION,
40687 FUNCTION_DECL, get_identifier (name), fntype);
40688 TREE_PUBLIC (new_fndecl) = 1;
40689 DECL_EXTERNAL (new_fndecl) = 1;
40690 DECL_IS_NOVOPS (new_fndecl) = 1;
40691 TREE_READONLY (new_fndecl) = 1;
40693 return new_fndecl;
40696 /* Handler for an ACML-style interface to
40697 a library with vectorized intrinsics. */
40699 static tree
40700 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
40702 char name[20] = "__vr.._";
40703 tree fntype, new_fndecl, args;
40704 unsigned arity;
40705 const char *bname;
40706 machine_mode el_mode, in_mode;
40707 int n, in_n;
40709 /* The ACML is 64bits only and suitable for unsafe math only as
40710 it does not correctly support parts of IEEE with the required
40711 precision such as denormals. */
40712 if (!TARGET_64BIT
40713 || !flag_unsafe_math_optimizations)
40714 return NULL_TREE;
40716 el_mode = TYPE_MODE (TREE_TYPE (type_out));
40717 n = TYPE_VECTOR_SUBPARTS (type_out);
40718 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40719 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40720 if (el_mode != in_mode
40721 || n != in_n)
40722 return NULL_TREE;
40724 switch (fn)
40726 case BUILT_IN_SIN:
40727 case BUILT_IN_COS:
40728 case BUILT_IN_EXP:
40729 case BUILT_IN_LOG:
40730 case BUILT_IN_LOG2:
40731 case BUILT_IN_LOG10:
40732 name[4] = 'd';
40733 name[5] = '2';
40734 if (el_mode != DFmode
40735 || n != 2)
40736 return NULL_TREE;
40737 break;
40739 case BUILT_IN_SINF:
40740 case BUILT_IN_COSF:
40741 case BUILT_IN_EXPF:
40742 case BUILT_IN_POWF:
40743 case BUILT_IN_LOGF:
40744 case BUILT_IN_LOG2F:
40745 case BUILT_IN_LOG10F:
40746 name[4] = 's';
40747 name[5] = '4';
40748 if (el_mode != SFmode
40749 || n != 4)
40750 return NULL_TREE;
40751 break;
40753 default:
40754 return NULL_TREE;
40757 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
40758 sprintf (name + 7, "%s", bname+10);
40760 arity = 0;
40761 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
40762 args;
40763 args = TREE_CHAIN (args))
40764 arity++;
40766 if (arity == 1)
40767 fntype = build_function_type_list (type_out, type_in, NULL);
40768 else
40769 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
40771 /* Build a function declaration for the vectorized function. */
40772 new_fndecl = build_decl (BUILTINS_LOCATION,
40773 FUNCTION_DECL, get_identifier (name), fntype);
40774 TREE_PUBLIC (new_fndecl) = 1;
40775 DECL_EXTERNAL (new_fndecl) = 1;
40776 DECL_IS_NOVOPS (new_fndecl) = 1;
40777 TREE_READONLY (new_fndecl) = 1;
40779 return new_fndecl;
40782 /* Returns a decl of a function that implements gather load with
40783 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
40784 Return NULL_TREE if it is not available. */
40786 static tree
40787 ix86_vectorize_builtin_gather (const_tree mem_vectype,
40788 const_tree index_type, int scale)
40790 bool si;
40791 enum ix86_builtins code;
40793 if (! TARGET_AVX2)
40794 return NULL_TREE;
40796 if ((TREE_CODE (index_type) != INTEGER_TYPE
40797 && !POINTER_TYPE_P (index_type))
40798 || (TYPE_MODE (index_type) != SImode
40799 && TYPE_MODE (index_type) != DImode))
40800 return NULL_TREE;
40802 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
40803 return NULL_TREE;
40805 /* v*gather* insn sign extends index to pointer mode. */
40806 if (TYPE_PRECISION (index_type) < POINTER_SIZE
40807 && TYPE_UNSIGNED (index_type))
40808 return NULL_TREE;
40810 if (scale <= 0
40811 || scale > 8
40812 || (scale & (scale - 1)) != 0)
40813 return NULL_TREE;
40815 si = TYPE_MODE (index_type) == SImode;
40816 switch (TYPE_MODE (mem_vectype))
40818 case V2DFmode:
40819 if (TARGET_AVX512VL)
40820 code = si ? IX86_BUILTIN_GATHER3SIV2DF : IX86_BUILTIN_GATHER3DIV2DF;
40821 else
40822 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
40823 break;
40824 case V4DFmode:
40825 if (TARGET_AVX512VL)
40826 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DF : IX86_BUILTIN_GATHER3DIV4DF;
40827 else
40828 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
40829 break;
40830 case V2DImode:
40831 if (TARGET_AVX512VL)
40832 code = si ? IX86_BUILTIN_GATHER3SIV2DI : IX86_BUILTIN_GATHER3DIV2DI;
40833 else
40834 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
40835 break;
40836 case V4DImode:
40837 if (TARGET_AVX512VL)
40838 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DI : IX86_BUILTIN_GATHER3DIV4DI;
40839 else
40840 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
40841 break;
40842 case V4SFmode:
40843 if (TARGET_AVX512VL)
40844 code = si ? IX86_BUILTIN_GATHER3SIV4SF : IX86_BUILTIN_GATHER3DIV4SF;
40845 else
40846 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
40847 break;
40848 case V8SFmode:
40849 if (TARGET_AVX512VL)
40850 code = si ? IX86_BUILTIN_GATHER3SIV8SF : IX86_BUILTIN_GATHER3ALTDIV8SF;
40851 else
40852 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
40853 break;
40854 case V4SImode:
40855 if (TARGET_AVX512VL)
40856 code = si ? IX86_BUILTIN_GATHER3SIV4SI : IX86_BUILTIN_GATHER3DIV4SI;
40857 else
40858 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
40859 break;
40860 case V8SImode:
40861 if (TARGET_AVX512VL)
40862 code = si ? IX86_BUILTIN_GATHER3SIV8SI : IX86_BUILTIN_GATHER3ALTDIV8SI;
40863 else
40864 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
40865 break;
40866 case V8DFmode:
40867 if (TARGET_AVX512F)
40868 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
40869 else
40870 return NULL_TREE;
40871 break;
40872 case V8DImode:
40873 if (TARGET_AVX512F)
40874 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
40875 else
40876 return NULL_TREE;
40877 break;
40878 case V16SFmode:
40879 if (TARGET_AVX512F)
40880 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
40881 else
40882 return NULL_TREE;
40883 break;
40884 case V16SImode:
40885 if (TARGET_AVX512F)
40886 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
40887 else
40888 return NULL_TREE;
40889 break;
40890 default:
40891 return NULL_TREE;
40894 return ix86_get_builtin (code);
40897 /* Returns a code for a target-specific builtin that implements
40898 reciprocal of the function, or NULL_TREE if not available. */
40900 static tree
40901 ix86_builtin_reciprocal (unsigned int fn, bool md_fn, bool)
40903 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
40904 && flag_finite_math_only && !flag_trapping_math
40905 && flag_unsafe_math_optimizations))
40906 return NULL_TREE;
40908 if (md_fn)
40909 /* Machine dependent builtins. */
40910 switch (fn)
40912 /* Vectorized version of sqrt to rsqrt conversion. */
40913 case IX86_BUILTIN_SQRTPS_NR:
40914 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
40916 case IX86_BUILTIN_SQRTPS_NR256:
40917 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
40919 default:
40920 return NULL_TREE;
40922 else
40923 /* Normal builtins. */
40924 switch (fn)
40926 /* Sqrt to rsqrt conversion. */
40927 case BUILT_IN_SQRTF:
40928 return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
40930 default:
40931 return NULL_TREE;
40935 /* Helper for avx_vpermilps256_operand et al. This is also used by
40936 the expansion functions to turn the parallel back into a mask.
40937 The return value is 0 for no match and the imm8+1 for a match. */
40940 avx_vpermilp_parallel (rtx par, machine_mode mode)
40942 unsigned i, nelt = GET_MODE_NUNITS (mode);
40943 unsigned mask = 0;
40944 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
40946 if (XVECLEN (par, 0) != (int) nelt)
40947 return 0;
40949 /* Validate that all of the elements are constants, and not totally
40950 out of range. Copy the data into an integral array to make the
40951 subsequent checks easier. */
40952 for (i = 0; i < nelt; ++i)
40954 rtx er = XVECEXP (par, 0, i);
40955 unsigned HOST_WIDE_INT ei;
40957 if (!CONST_INT_P (er))
40958 return 0;
40959 ei = INTVAL (er);
40960 if (ei >= nelt)
40961 return 0;
40962 ipar[i] = ei;
40965 switch (mode)
40967 case V8DFmode:
40968 /* In the 512-bit DFmode case, we can only move elements within
40969 a 128-bit lane. First fill the second part of the mask,
40970 then fallthru. */
40971 for (i = 4; i < 6; ++i)
40973 if (ipar[i] < 4 || ipar[i] >= 6)
40974 return 0;
40975 mask |= (ipar[i] - 4) << i;
40977 for (i = 6; i < 8; ++i)
40979 if (ipar[i] < 6)
40980 return 0;
40981 mask |= (ipar[i] - 6) << i;
40983 /* FALLTHRU */
40985 case V4DFmode:
40986 /* In the 256-bit DFmode case, we can only move elements within
40987 a 128-bit lane. */
40988 for (i = 0; i < 2; ++i)
40990 if (ipar[i] >= 2)
40991 return 0;
40992 mask |= ipar[i] << i;
40994 for (i = 2; i < 4; ++i)
40996 if (ipar[i] < 2)
40997 return 0;
40998 mask |= (ipar[i] - 2) << i;
41000 break;
41002 case V16SFmode:
41003 /* In 512 bit SFmode case, permutation in the upper 256 bits
41004 must mirror the permutation in the lower 256-bits. */
41005 for (i = 0; i < 8; ++i)
41006 if (ipar[i] + 8 != ipar[i + 8])
41007 return 0;
41008 /* FALLTHRU */
41010 case V8SFmode:
41011 /* In 256 bit SFmode case, we have full freedom of
41012 movement within the low 128-bit lane, but the high 128-bit
41013 lane must mirror the exact same pattern. */
41014 for (i = 0; i < 4; ++i)
41015 if (ipar[i] + 4 != ipar[i + 4])
41016 return 0;
41017 nelt = 4;
41018 /* FALLTHRU */
41020 case V2DFmode:
41021 case V4SFmode:
41022 /* In the 128-bit case, we've full freedom in the placement of
41023 the elements from the source operand. */
41024 for (i = 0; i < nelt; ++i)
41025 mask |= ipar[i] << (i * (nelt / 2));
41026 break;
41028 default:
41029 gcc_unreachable ();
41032 /* Make sure success has a non-zero value by adding one. */
41033 return mask + 1;
41036 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
41037 the expansion functions to turn the parallel back into a mask.
41038 The return value is 0 for no match and the imm8+1 for a match. */
41041 avx_vperm2f128_parallel (rtx par, machine_mode mode)
41043 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
41044 unsigned mask = 0;
41045 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
41047 if (XVECLEN (par, 0) != (int) nelt)
41048 return 0;
41050 /* Validate that all of the elements are constants, and not totally
41051 out of range. Copy the data into an integral array to make the
41052 subsequent checks easier. */
41053 for (i = 0; i < nelt; ++i)
41055 rtx er = XVECEXP (par, 0, i);
41056 unsigned HOST_WIDE_INT ei;
41058 if (!CONST_INT_P (er))
41059 return 0;
41060 ei = INTVAL (er);
41061 if (ei >= 2 * nelt)
41062 return 0;
41063 ipar[i] = ei;
41066 /* Validate that the halves of the permute are halves. */
41067 for (i = 0; i < nelt2 - 1; ++i)
41068 if (ipar[i] + 1 != ipar[i + 1])
41069 return 0;
41070 for (i = nelt2; i < nelt - 1; ++i)
41071 if (ipar[i] + 1 != ipar[i + 1])
41072 return 0;
41074 /* Reconstruct the mask. */
41075 for (i = 0; i < 2; ++i)
41077 unsigned e = ipar[i * nelt2];
41078 if (e % nelt2)
41079 return 0;
41080 e /= nelt2;
41081 mask |= e << (i * 4);
41084 /* Make sure success has a non-zero value by adding one. */
41085 return mask + 1;
41088 /* Return a register priority for hard reg REGNO. */
41089 static int
41090 ix86_register_priority (int hard_regno)
41092 /* ebp and r13 as the base always wants a displacement, r12 as the
41093 base always wants an index. So discourage their usage in an
41094 address. */
41095 if (hard_regno == R12_REG || hard_regno == R13_REG)
41096 return 0;
41097 if (hard_regno == BP_REG)
41098 return 1;
41099 /* New x86-64 int registers result in bigger code size. Discourage
41100 them. */
41101 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
41102 return 2;
41103 /* New x86-64 SSE registers result in bigger code size. Discourage
41104 them. */
41105 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
41106 return 2;
41107 /* Usage of AX register results in smaller code. Prefer it. */
41108 if (hard_regno == 0)
41109 return 4;
41110 return 3;
41113 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
41115 Put float CONST_DOUBLE in the constant pool instead of fp regs.
41116 QImode must go into class Q_REGS.
41117 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
41118 movdf to do mem-to-mem moves through integer regs. */
41120 static reg_class_t
41121 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
41123 machine_mode mode = GET_MODE (x);
41125 /* We're only allowed to return a subclass of CLASS. Many of the
41126 following checks fail for NO_REGS, so eliminate that early. */
41127 if (regclass == NO_REGS)
41128 return NO_REGS;
41130 /* All classes can load zeros. */
41131 if (x == CONST0_RTX (mode))
41132 return regclass;
41134 /* Force constants into memory if we are loading a (nonzero) constant into
41135 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
41136 instructions to load from a constant. */
41137 if (CONSTANT_P (x)
41138 && (MAYBE_MMX_CLASS_P (regclass)
41139 || MAYBE_SSE_CLASS_P (regclass)
41140 || MAYBE_MASK_CLASS_P (regclass)))
41141 return NO_REGS;
41143 /* Prefer SSE regs only, if we can use them for math. */
41144 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
41145 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
41147 /* Floating-point constants need more complex checks. */
41148 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
41150 /* General regs can load everything. */
41151 if (reg_class_subset_p (regclass, GENERAL_REGS))
41152 return regclass;
41154 /* Floats can load 0 and 1 plus some others. Note that we eliminated
41155 zero above. We only want to wind up preferring 80387 registers if
41156 we plan on doing computation with them. */
41157 if (TARGET_80387
41158 && standard_80387_constant_p (x) > 0)
41160 /* Limit class to non-sse. */
41161 if (regclass == FLOAT_SSE_REGS)
41162 return FLOAT_REGS;
41163 if (regclass == FP_TOP_SSE_REGS)
41164 return FP_TOP_REG;
41165 if (regclass == FP_SECOND_SSE_REGS)
41166 return FP_SECOND_REG;
41167 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
41168 return regclass;
41171 return NO_REGS;
41174 /* Generally when we see PLUS here, it's the function invariant
41175 (plus soft-fp const_int). Which can only be computed into general
41176 regs. */
41177 if (GET_CODE (x) == PLUS)
41178 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
41180 /* QImode constants are easy to load, but non-constant QImode data
41181 must go into Q_REGS. */
41182 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
41184 if (reg_class_subset_p (regclass, Q_REGS))
41185 return regclass;
41186 if (reg_class_subset_p (Q_REGS, regclass))
41187 return Q_REGS;
41188 return NO_REGS;
41191 return regclass;
41194 /* Discourage putting floating-point values in SSE registers unless
41195 SSE math is being used, and likewise for the 387 registers. */
41196 static reg_class_t
41197 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
41199 machine_mode mode = GET_MODE (x);
41201 /* Restrict the output reload class to the register bank that we are doing
41202 math on. If we would like not to return a subset of CLASS, reject this
41203 alternative: if reload cannot do this, it will still use its choice. */
41204 mode = GET_MODE (x);
41205 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
41206 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
41208 if (X87_FLOAT_MODE_P (mode))
41210 if (regclass == FP_TOP_SSE_REGS)
41211 return FP_TOP_REG;
41212 else if (regclass == FP_SECOND_SSE_REGS)
41213 return FP_SECOND_REG;
41214 else
41215 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
41218 return regclass;
41221 static reg_class_t
41222 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
41223 machine_mode mode, secondary_reload_info *sri)
41225 /* Double-word spills from general registers to non-offsettable memory
41226 references (zero-extended addresses) require special handling. */
41227 if (TARGET_64BIT
41228 && MEM_P (x)
41229 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
41230 && INTEGER_CLASS_P (rclass)
41231 && !offsettable_memref_p (x))
41233 sri->icode = (in_p
41234 ? CODE_FOR_reload_noff_load
41235 : CODE_FOR_reload_noff_store);
41236 /* Add the cost of moving address to a temporary. */
41237 sri->extra_cost = 1;
41239 return NO_REGS;
41242 /* QImode spills from non-QI registers require
41243 intermediate register on 32bit targets. */
41244 if (mode == QImode
41245 && (MAYBE_MASK_CLASS_P (rclass)
41246 || (!TARGET_64BIT && !in_p
41247 && INTEGER_CLASS_P (rclass)
41248 && MAYBE_NON_Q_CLASS_P (rclass))))
41250 int regno;
41252 if (REG_P (x))
41253 regno = REGNO (x);
41254 else
41255 regno = -1;
41257 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
41258 regno = true_regnum (x);
41260 /* Return Q_REGS if the operand is in memory. */
41261 if (regno == -1)
41262 return Q_REGS;
41265 /* This condition handles corner case where an expression involving
41266 pointers gets vectorized. We're trying to use the address of a
41267 stack slot as a vector initializer.
41269 (set (reg:V2DI 74 [ vect_cst_.2 ])
41270 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
41272 Eventually frame gets turned into sp+offset like this:
41274 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41275 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
41276 (const_int 392 [0x188]))))
41278 That later gets turned into:
41280 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41281 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
41282 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
41284 We'll have the following reload recorded:
41286 Reload 0: reload_in (DI) =
41287 (plus:DI (reg/f:DI 7 sp)
41288 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
41289 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41290 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
41291 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
41292 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41293 reload_reg_rtx: (reg:V2DI 22 xmm1)
41295 Which isn't going to work since SSE instructions can't handle scalar
41296 additions. Returning GENERAL_REGS forces the addition into integer
41297 register and reload can handle subsequent reloads without problems. */
41299 if (in_p && GET_CODE (x) == PLUS
41300 && SSE_CLASS_P (rclass)
41301 && SCALAR_INT_MODE_P (mode))
41302 return GENERAL_REGS;
41304 return NO_REGS;
41307 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
41309 static bool
41310 ix86_class_likely_spilled_p (reg_class_t rclass)
41312 switch (rclass)
41314 case AREG:
41315 case DREG:
41316 case CREG:
41317 case BREG:
41318 case AD_REGS:
41319 case SIREG:
41320 case DIREG:
41321 case SSE_FIRST_REG:
41322 case FP_TOP_REG:
41323 case FP_SECOND_REG:
41324 case BND_REGS:
41325 return true;
41327 default:
41328 break;
41331 return false;
41334 /* If we are copying between general and FP registers, we need a memory
41335 location. The same is true for SSE and MMX registers.
41337 To optimize register_move_cost performance, allow inline variant.
41339 The macro can't work reliably when one of the CLASSES is class containing
41340 registers from multiple units (SSE, MMX, integer). We avoid this by never
41341 combining those units in single alternative in the machine description.
41342 Ensure that this constraint holds to avoid unexpected surprises.
41344 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
41345 enforce these sanity checks. */
41347 static inline bool
41348 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
41349 machine_mode mode, int strict)
41351 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
41352 return false;
41353 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
41354 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
41355 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
41356 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
41357 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
41358 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
41360 gcc_assert (!strict || lra_in_progress);
41361 return true;
41364 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
41365 return true;
41367 /* Between mask and general, we have moves no larger than word size. */
41368 if ((MAYBE_MASK_CLASS_P (class1) != MAYBE_MASK_CLASS_P (class2))
41369 && (GET_MODE_SIZE (mode) > UNITS_PER_WORD))
41370 return true;
41372 /* ??? This is a lie. We do have moves between mmx/general, and for
41373 mmx/sse2. But by saying we need secondary memory we discourage the
41374 register allocator from using the mmx registers unless needed. */
41375 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
41376 return true;
41378 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
41380 /* SSE1 doesn't have any direct moves from other classes. */
41381 if (!TARGET_SSE2)
41382 return true;
41384 /* If the target says that inter-unit moves are more expensive
41385 than moving through memory, then don't generate them. */
41386 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
41387 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
41388 return true;
41390 /* Between SSE and general, we have moves no larger than word size. */
41391 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
41392 return true;
41395 return false;
41398 bool
41399 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
41400 machine_mode mode, int strict)
41402 return inline_secondary_memory_needed (class1, class2, mode, strict);
41405 /* Implement the TARGET_CLASS_MAX_NREGS hook.
41407 On the 80386, this is the size of MODE in words,
41408 except in the FP regs, where a single reg is always enough. */
41410 static unsigned char
41411 ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
41413 if (MAYBE_INTEGER_CLASS_P (rclass))
41415 if (mode == XFmode)
41416 return (TARGET_64BIT ? 2 : 3);
41417 else if (mode == XCmode)
41418 return (TARGET_64BIT ? 4 : 6);
41419 else
41420 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
41422 else
41424 if (COMPLEX_MODE_P (mode))
41425 return 2;
41426 else
41427 return 1;
41431 /* Return true if the registers in CLASS cannot represent the change from
41432 modes FROM to TO. */
41434 bool
41435 ix86_cannot_change_mode_class (machine_mode from, machine_mode to,
41436 enum reg_class regclass)
41438 if (from == to)
41439 return false;
41441 /* x87 registers can't do subreg at all, as all values are reformatted
41442 to extended precision. */
41443 if (MAYBE_FLOAT_CLASS_P (regclass))
41444 return true;
41446 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
41448 /* Vector registers do not support QI or HImode loads. If we don't
41449 disallow a change to these modes, reload will assume it's ok to
41450 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
41451 the vec_dupv4hi pattern. */
41452 if (GET_MODE_SIZE (from) < 4)
41453 return true;
41456 return false;
41459 /* Return the cost of moving data of mode M between a
41460 register and memory. A value of 2 is the default; this cost is
41461 relative to those in `REGISTER_MOVE_COST'.
41463 This function is used extensively by register_move_cost that is used to
41464 build tables at startup. Make it inline in this case.
41465 When IN is 2, return maximum of in and out move cost.
41467 If moving between registers and memory is more expensive than
41468 between two registers, you should define this macro to express the
41469 relative cost.
41471 Model also increased moving costs of QImode registers in non
41472 Q_REGS classes.
41474 static inline int
41475 inline_memory_move_cost (machine_mode mode, enum reg_class regclass,
41476 int in)
41478 int cost;
41479 if (FLOAT_CLASS_P (regclass))
41481 int index;
41482 switch (mode)
41484 case SFmode:
41485 index = 0;
41486 break;
41487 case DFmode:
41488 index = 1;
41489 break;
41490 case XFmode:
41491 index = 2;
41492 break;
41493 default:
41494 return 100;
41496 if (in == 2)
41497 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
41498 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
41500 if (SSE_CLASS_P (regclass))
41502 int index;
41503 switch (GET_MODE_SIZE (mode))
41505 case 4:
41506 index = 0;
41507 break;
41508 case 8:
41509 index = 1;
41510 break;
41511 case 16:
41512 index = 2;
41513 break;
41514 default:
41515 return 100;
41517 if (in == 2)
41518 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
41519 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
41521 if (MMX_CLASS_P (regclass))
41523 int index;
41524 switch (GET_MODE_SIZE (mode))
41526 case 4:
41527 index = 0;
41528 break;
41529 case 8:
41530 index = 1;
41531 break;
41532 default:
41533 return 100;
41535 if (in)
41536 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
41537 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
41539 switch (GET_MODE_SIZE (mode))
41541 case 1:
41542 if (Q_CLASS_P (regclass) || TARGET_64BIT)
41544 if (!in)
41545 return ix86_cost->int_store[0];
41546 if (TARGET_PARTIAL_REG_DEPENDENCY
41547 && optimize_function_for_speed_p (cfun))
41548 cost = ix86_cost->movzbl_load;
41549 else
41550 cost = ix86_cost->int_load[0];
41551 if (in == 2)
41552 return MAX (cost, ix86_cost->int_store[0]);
41553 return cost;
41555 else
41557 if (in == 2)
41558 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
41559 if (in)
41560 return ix86_cost->movzbl_load;
41561 else
41562 return ix86_cost->int_store[0] + 4;
41564 break;
41565 case 2:
41566 if (in == 2)
41567 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
41568 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
41569 default:
41570 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
41571 if (mode == TFmode)
41572 mode = XFmode;
41573 if (in == 2)
41574 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
41575 else if (in)
41576 cost = ix86_cost->int_load[2];
41577 else
41578 cost = ix86_cost->int_store[2];
41579 return (cost * (((int) GET_MODE_SIZE (mode)
41580 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
41584 static int
41585 ix86_memory_move_cost (machine_mode mode, reg_class_t regclass,
41586 bool in)
41588 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
41592 /* Return the cost of moving data from a register in class CLASS1 to
41593 one in class CLASS2.
41595 It is not required that the cost always equal 2 when FROM is the same as TO;
41596 on some machines it is expensive to move between registers if they are not
41597 general registers. */
41599 static int
41600 ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
41601 reg_class_t class2_i)
41603 enum reg_class class1 = (enum reg_class) class1_i;
41604 enum reg_class class2 = (enum reg_class) class2_i;
41606 /* In case we require secondary memory, compute cost of the store followed
41607 by load. In order to avoid bad register allocation choices, we need
41608 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
41610 if (inline_secondary_memory_needed (class1, class2, mode, 0))
41612 int cost = 1;
41614 cost += inline_memory_move_cost (mode, class1, 2);
41615 cost += inline_memory_move_cost (mode, class2, 2);
41617 /* In case of copying from general_purpose_register we may emit multiple
41618 stores followed by single load causing memory size mismatch stall.
41619 Count this as arbitrarily high cost of 20. */
41620 if (targetm.class_max_nregs (class1, mode)
41621 > targetm.class_max_nregs (class2, mode))
41622 cost += 20;
41624 /* In the case of FP/MMX moves, the registers actually overlap, and we
41625 have to switch modes in order to treat them differently. */
41626 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
41627 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
41628 cost += 20;
41630 return cost;
41633 /* Moves between SSE/MMX and integer unit are expensive. */
41634 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
41635 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
41637 /* ??? By keeping returned value relatively high, we limit the number
41638 of moves between integer and MMX/SSE registers for all targets.
41639 Additionally, high value prevents problem with x86_modes_tieable_p(),
41640 where integer modes in MMX/SSE registers are not tieable
41641 because of missing QImode and HImode moves to, from or between
41642 MMX/SSE registers. */
41643 return MAX (8, ix86_cost->mmxsse_to_integer);
41645 if (MAYBE_FLOAT_CLASS_P (class1))
41646 return ix86_cost->fp_move;
41647 if (MAYBE_SSE_CLASS_P (class1))
41648 return ix86_cost->sse_move;
41649 if (MAYBE_MMX_CLASS_P (class1))
41650 return ix86_cost->mmx_move;
41651 return 2;
41654 /* Return TRUE if hard register REGNO can hold a value of machine-mode
41655 MODE. */
41657 bool
41658 ix86_hard_regno_mode_ok (int regno, machine_mode mode)
41660 /* Flags and only flags can only hold CCmode values. */
41661 if (CC_REGNO_P (regno))
41662 return GET_MODE_CLASS (mode) == MODE_CC;
41663 if (GET_MODE_CLASS (mode) == MODE_CC
41664 || GET_MODE_CLASS (mode) == MODE_RANDOM
41665 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
41666 return false;
41667 if (STACK_REGNO_P (regno))
41668 return VALID_FP_MODE_P (mode);
41669 if (MASK_REGNO_P (regno))
41670 return (VALID_MASK_REG_MODE (mode)
41671 || ((TARGET_AVX512BW || TARGET_AVX512VBMI)
41672 && VALID_MASK_AVX512BW_MODE (mode)));
41673 if (BND_REGNO_P (regno))
41674 return VALID_BND_REG_MODE (mode);
41675 if (SSE_REGNO_P (regno))
41677 /* We implement the move patterns for all vector modes into and
41678 out of SSE registers, even when no operation instructions
41679 are available. */
41681 /* For AVX-512 we allow, regardless of regno:
41682 - XI mode
41683 - any of 512-bit wide vector mode
41684 - any scalar mode. */
41685 if (TARGET_AVX512F
41686 && (mode == XImode
41687 || VALID_AVX512F_REG_MODE (mode)
41688 || VALID_AVX512F_SCALAR_MODE (mode)))
41689 return true;
41691 /* TODO check for QI/HI scalars. */
41692 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
41693 if (TARGET_AVX512VL
41694 && (mode == OImode
41695 || mode == TImode
41696 || VALID_AVX256_REG_MODE (mode)
41697 || VALID_AVX512VL_128_REG_MODE (mode)))
41698 return true;
41700 /* xmm16-xmm31 are only available for AVX-512. */
41701 if (EXT_REX_SSE_REGNO_P (regno))
41702 return false;
41704 /* OImode and AVX modes are available only when AVX is enabled. */
41705 return ((TARGET_AVX
41706 && VALID_AVX256_REG_OR_OI_MODE (mode))
41707 || VALID_SSE_REG_MODE (mode)
41708 || VALID_SSE2_REG_MODE (mode)
41709 || VALID_MMX_REG_MODE (mode)
41710 || VALID_MMX_REG_MODE_3DNOW (mode));
41712 if (MMX_REGNO_P (regno))
41714 /* We implement the move patterns for 3DNOW modes even in MMX mode,
41715 so if the register is available at all, then we can move data of
41716 the given mode into or out of it. */
41717 return (VALID_MMX_REG_MODE (mode)
41718 || VALID_MMX_REG_MODE_3DNOW (mode));
41721 if (mode == QImode)
41723 /* Take care for QImode values - they can be in non-QI regs,
41724 but then they do cause partial register stalls. */
41725 if (ANY_QI_REGNO_P (regno))
41726 return true;
41727 if (!TARGET_PARTIAL_REG_STALL)
41728 return true;
41729 /* LRA checks if the hard register is OK for the given mode.
41730 QImode values can live in non-QI regs, so we allow all
41731 registers here. */
41732 if (lra_in_progress)
41733 return true;
41734 return !can_create_pseudo_p ();
41736 /* We handle both integer and floats in the general purpose registers. */
41737 else if (VALID_INT_MODE_P (mode))
41738 return true;
41739 else if (VALID_FP_MODE_P (mode))
41740 return true;
41741 else if (VALID_DFP_MODE_P (mode))
41742 return true;
41743 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
41744 on to use that value in smaller contexts, this can easily force a
41745 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
41746 supporting DImode, allow it. */
41747 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
41748 return true;
41750 return false;
41753 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
41754 tieable integer mode. */
41756 static bool
41757 ix86_tieable_integer_mode_p (machine_mode mode)
41759 switch (mode)
41761 case HImode:
41762 case SImode:
41763 return true;
41765 case QImode:
41766 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
41768 case DImode:
41769 return TARGET_64BIT;
41771 default:
41772 return false;
41776 /* Return true if MODE1 is accessible in a register that can hold MODE2
41777 without copying. That is, all register classes that can hold MODE2
41778 can also hold MODE1. */
41780 bool
41781 ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
41783 if (mode1 == mode2)
41784 return true;
41786 if (ix86_tieable_integer_mode_p (mode1)
41787 && ix86_tieable_integer_mode_p (mode2))
41788 return true;
41790 /* MODE2 being XFmode implies fp stack or general regs, which means we
41791 can tie any smaller floating point modes to it. Note that we do not
41792 tie this with TFmode. */
41793 if (mode2 == XFmode)
41794 return mode1 == SFmode || mode1 == DFmode;
41796 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
41797 that we can tie it with SFmode. */
41798 if (mode2 == DFmode)
41799 return mode1 == SFmode;
41801 /* If MODE2 is only appropriate for an SSE register, then tie with
41802 any other mode acceptable to SSE registers. */
41803 if (GET_MODE_SIZE (mode2) == 32
41804 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
41805 return (GET_MODE_SIZE (mode1) == 32
41806 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
41807 if (GET_MODE_SIZE (mode2) == 16
41808 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
41809 return (GET_MODE_SIZE (mode1) == 16
41810 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
41812 /* If MODE2 is appropriate for an MMX register, then tie
41813 with any other mode acceptable to MMX registers. */
41814 if (GET_MODE_SIZE (mode2) == 8
41815 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
41816 return (GET_MODE_SIZE (mode1) == 8
41817 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
41819 return false;
41822 /* Return the cost of moving between two registers of mode MODE. */
41824 static int
41825 ix86_set_reg_reg_cost (machine_mode mode)
41827 unsigned int units = UNITS_PER_WORD;
41829 switch (GET_MODE_CLASS (mode))
41831 default:
41832 break;
41834 case MODE_CC:
41835 units = GET_MODE_SIZE (CCmode);
41836 break;
41838 case MODE_FLOAT:
41839 if ((TARGET_SSE && mode == TFmode)
41840 || (TARGET_80387 && mode == XFmode)
41841 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
41842 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
41843 units = GET_MODE_SIZE (mode);
41844 break;
41846 case MODE_COMPLEX_FLOAT:
41847 if ((TARGET_SSE && mode == TCmode)
41848 || (TARGET_80387 && mode == XCmode)
41849 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
41850 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
41851 units = GET_MODE_SIZE (mode);
41852 break;
41854 case MODE_VECTOR_INT:
41855 case MODE_VECTOR_FLOAT:
41856 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
41857 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
41858 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
41859 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
41860 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
41861 units = GET_MODE_SIZE (mode);
41864 /* Return the cost of moving between two registers of mode MODE,
41865 assuming that the move will be in pieces of at most UNITS bytes. */
41866 return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
41869 /* Compute a (partial) cost for rtx X. Return true if the complete
41870 cost has been computed, and false if subexpressions should be
41871 scanned. In either case, *TOTAL contains the cost result. */
41873 static bool
41874 ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
41875 bool speed)
41877 rtx mask;
41878 enum rtx_code code = (enum rtx_code) code_i;
41879 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
41880 machine_mode mode = GET_MODE (x);
41881 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
41883 switch (code)
41885 case SET:
41886 if (register_operand (SET_DEST (x), VOIDmode)
41887 && reg_or_0_operand (SET_SRC (x), VOIDmode))
41889 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
41890 return true;
41892 return false;
41894 case CONST_INT:
41895 case CONST:
41896 case LABEL_REF:
41897 case SYMBOL_REF:
41898 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
41899 *total = 3;
41900 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
41901 *total = 2;
41902 else if (flag_pic && SYMBOLIC_CONST (x)
41903 && !(TARGET_64BIT
41904 && (GET_CODE (x) == LABEL_REF
41905 || (GET_CODE (x) == SYMBOL_REF
41906 && SYMBOL_REF_LOCAL_P (x)))))
41907 *total = 1;
41908 else
41909 *total = 0;
41910 return true;
41912 case CONST_DOUBLE:
41913 if (mode == VOIDmode)
41915 *total = 0;
41916 return true;
41918 switch (standard_80387_constant_p (x))
41920 case 1: /* 0.0 */
41921 *total = 1;
41922 return true;
41923 default: /* Other constants */
41924 *total = 2;
41925 return true;
41926 case 0:
41927 case -1:
41928 break;
41930 if (SSE_FLOAT_MODE_P (mode))
41932 case CONST_VECTOR:
41933 switch (standard_sse_constant_p (x))
41935 case 0:
41936 break;
41937 case 1: /* 0: xor eliminates false dependency */
41938 *total = 0;
41939 return true;
41940 default: /* -1: cmp contains false dependency */
41941 *total = 1;
41942 return true;
41945 /* Fall back to (MEM (SYMBOL_REF)), since that's where
41946 it'll probably end up. Add a penalty for size. */
41947 *total = (COSTS_N_INSNS (1)
41948 + (flag_pic != 0 && !TARGET_64BIT)
41949 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
41950 return true;
41952 case ZERO_EXTEND:
41953 /* The zero extensions is often completely free on x86_64, so make
41954 it as cheap as possible. */
41955 if (TARGET_64BIT && mode == DImode
41956 && GET_MODE (XEXP (x, 0)) == SImode)
41957 *total = 1;
41958 else if (TARGET_ZERO_EXTEND_WITH_AND)
41959 *total = cost->add;
41960 else
41961 *total = cost->movzx;
41962 return false;
41964 case SIGN_EXTEND:
41965 *total = cost->movsx;
41966 return false;
41968 case ASHIFT:
41969 if (SCALAR_INT_MODE_P (mode)
41970 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
41971 && CONST_INT_P (XEXP (x, 1)))
41973 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
41974 if (value == 1)
41976 *total = cost->add;
41977 return false;
41979 if ((value == 2 || value == 3)
41980 && cost->lea <= cost->shift_const)
41982 *total = cost->lea;
41983 return false;
41986 /* FALLTHRU */
41988 case ROTATE:
41989 case ASHIFTRT:
41990 case LSHIFTRT:
41991 case ROTATERT:
41992 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
41994 /* ??? Should be SSE vector operation cost. */
41995 /* At least for published AMD latencies, this really is the same
41996 as the latency for a simple fpu operation like fabs. */
41997 /* V*QImode is emulated with 1-11 insns. */
41998 if (mode == V16QImode || mode == V32QImode)
42000 int count = 11;
42001 if (TARGET_XOP && mode == V16QImode)
42003 /* For XOP we use vpshab, which requires a broadcast of the
42004 value to the variable shift insn. For constants this
42005 means a V16Q const in mem; even when we can perform the
42006 shift with one insn set the cost to prefer paddb. */
42007 if (CONSTANT_P (XEXP (x, 1)))
42009 *total = (cost->fabs
42010 + rtx_cost (XEXP (x, 0), code, 0, speed)
42011 + (speed ? 2 : COSTS_N_BYTES (16)));
42012 return true;
42014 count = 3;
42016 else if (TARGET_SSSE3)
42017 count = 7;
42018 *total = cost->fabs * count;
42020 else
42021 *total = cost->fabs;
42023 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42025 if (CONST_INT_P (XEXP (x, 1)))
42027 if (INTVAL (XEXP (x, 1)) > 32)
42028 *total = cost->shift_const + COSTS_N_INSNS (2);
42029 else
42030 *total = cost->shift_const * 2;
42032 else
42034 if (GET_CODE (XEXP (x, 1)) == AND)
42035 *total = cost->shift_var * 2;
42036 else
42037 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
42040 else
42042 if (CONST_INT_P (XEXP (x, 1)))
42043 *total = cost->shift_const;
42044 else if (GET_CODE (XEXP (x, 1)) == SUBREG
42045 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
42047 /* Return the cost after shift-and truncation. */
42048 *total = cost->shift_var;
42049 return true;
42051 else
42052 *total = cost->shift_var;
42054 return false;
42056 case FMA:
42058 rtx sub;
42060 gcc_assert (FLOAT_MODE_P (mode));
42061 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
42063 /* ??? SSE scalar/vector cost should be used here. */
42064 /* ??? Bald assumption that fma has the same cost as fmul. */
42065 *total = cost->fmul;
42066 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
42068 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
42069 sub = XEXP (x, 0);
42070 if (GET_CODE (sub) == NEG)
42071 sub = XEXP (sub, 0);
42072 *total += rtx_cost (sub, FMA, 0, speed);
42074 sub = XEXP (x, 2);
42075 if (GET_CODE (sub) == NEG)
42076 sub = XEXP (sub, 0);
42077 *total += rtx_cost (sub, FMA, 2, speed);
42078 return true;
42081 case MULT:
42082 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42084 /* ??? SSE scalar cost should be used here. */
42085 *total = cost->fmul;
42086 return false;
42088 else if (X87_FLOAT_MODE_P (mode))
42090 *total = cost->fmul;
42091 return false;
42093 else if (FLOAT_MODE_P (mode))
42095 /* ??? SSE vector cost should be used here. */
42096 *total = cost->fmul;
42097 return false;
42099 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42101 /* V*QImode is emulated with 7-13 insns. */
42102 if (mode == V16QImode || mode == V32QImode)
42104 int extra = 11;
42105 if (TARGET_XOP && mode == V16QImode)
42106 extra = 5;
42107 else if (TARGET_SSSE3)
42108 extra = 6;
42109 *total = cost->fmul * 2 + cost->fabs * extra;
42111 /* V*DImode is emulated with 5-8 insns. */
42112 else if (mode == V2DImode || mode == V4DImode)
42114 if (TARGET_XOP && mode == V2DImode)
42115 *total = cost->fmul * 2 + cost->fabs * 3;
42116 else
42117 *total = cost->fmul * 3 + cost->fabs * 5;
42119 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
42120 insns, including two PMULUDQ. */
42121 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
42122 *total = cost->fmul * 2 + cost->fabs * 5;
42123 else
42124 *total = cost->fmul;
42125 return false;
42127 else
42129 rtx op0 = XEXP (x, 0);
42130 rtx op1 = XEXP (x, 1);
42131 int nbits;
42132 if (CONST_INT_P (XEXP (x, 1)))
42134 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
42135 for (nbits = 0; value != 0; value &= value - 1)
42136 nbits++;
42138 else
42139 /* This is arbitrary. */
42140 nbits = 7;
42142 /* Compute costs correctly for widening multiplication. */
42143 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
42144 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
42145 == GET_MODE_SIZE (mode))
42147 int is_mulwiden = 0;
42148 machine_mode inner_mode = GET_MODE (op0);
42150 if (GET_CODE (op0) == GET_CODE (op1))
42151 is_mulwiden = 1, op1 = XEXP (op1, 0);
42152 else if (CONST_INT_P (op1))
42154 if (GET_CODE (op0) == SIGN_EXTEND)
42155 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
42156 == INTVAL (op1);
42157 else
42158 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
42161 if (is_mulwiden)
42162 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
42165 *total = (cost->mult_init[MODE_INDEX (mode)]
42166 + nbits * cost->mult_bit
42167 + rtx_cost (op0, outer_code, opno, speed)
42168 + rtx_cost (op1, outer_code, opno, speed));
42170 return true;
42173 case DIV:
42174 case UDIV:
42175 case MOD:
42176 case UMOD:
42177 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42178 /* ??? SSE cost should be used here. */
42179 *total = cost->fdiv;
42180 else if (X87_FLOAT_MODE_P (mode))
42181 *total = cost->fdiv;
42182 else if (FLOAT_MODE_P (mode))
42183 /* ??? SSE vector cost should be used here. */
42184 *total = cost->fdiv;
42185 else
42186 *total = cost->divide[MODE_INDEX (mode)];
42187 return false;
42189 case PLUS:
42190 if (GET_MODE_CLASS (mode) == MODE_INT
42191 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
42193 if (GET_CODE (XEXP (x, 0)) == PLUS
42194 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
42195 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
42196 && CONSTANT_P (XEXP (x, 1)))
42198 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
42199 if (val == 2 || val == 4 || val == 8)
42201 *total = cost->lea;
42202 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
42203 outer_code, opno, speed);
42204 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
42205 outer_code, opno, speed);
42206 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42207 return true;
42210 else if (GET_CODE (XEXP (x, 0)) == MULT
42211 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
42213 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
42214 if (val == 2 || val == 4 || val == 8)
42216 *total = cost->lea;
42217 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
42218 outer_code, opno, speed);
42219 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42220 return true;
42223 else if (GET_CODE (XEXP (x, 0)) == PLUS)
42225 *total = cost->lea;
42226 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
42227 outer_code, opno, speed);
42228 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
42229 outer_code, opno, speed);
42230 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42231 return true;
42234 /* FALLTHRU */
42236 case MINUS:
42237 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42239 /* ??? SSE cost should be used here. */
42240 *total = cost->fadd;
42241 return false;
42243 else if (X87_FLOAT_MODE_P (mode))
42245 *total = cost->fadd;
42246 return false;
42248 else if (FLOAT_MODE_P (mode))
42250 /* ??? SSE vector cost should be used here. */
42251 *total = cost->fadd;
42252 return false;
42254 /* FALLTHRU */
42256 case AND:
42257 case IOR:
42258 case XOR:
42259 if (GET_MODE_CLASS (mode) == MODE_INT
42260 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42262 *total = (cost->add * 2
42263 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
42264 << (GET_MODE (XEXP (x, 0)) != DImode))
42265 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
42266 << (GET_MODE (XEXP (x, 1)) != DImode)));
42267 return true;
42269 /* FALLTHRU */
42271 case NEG:
42272 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42274 /* ??? SSE cost should be used here. */
42275 *total = cost->fchs;
42276 return false;
42278 else if (X87_FLOAT_MODE_P (mode))
42280 *total = cost->fchs;
42281 return false;
42283 else if (FLOAT_MODE_P (mode))
42285 /* ??? SSE vector cost should be used here. */
42286 *total = cost->fchs;
42287 return false;
42289 /* FALLTHRU */
42291 case NOT:
42292 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42294 /* ??? Should be SSE vector operation cost. */
42295 /* At least for published AMD latencies, this really is the same
42296 as the latency for a simple fpu operation like fabs. */
42297 *total = cost->fabs;
42299 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42300 *total = cost->add * 2;
42301 else
42302 *total = cost->add;
42303 return false;
42305 case COMPARE:
42306 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
42307 && XEXP (XEXP (x, 0), 1) == const1_rtx
42308 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
42309 && XEXP (x, 1) == const0_rtx)
42311 /* This kind of construct is implemented using test[bwl].
42312 Treat it as if we had an AND. */
42313 *total = (cost->add
42314 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
42315 + rtx_cost (const1_rtx, outer_code, opno, speed));
42316 return true;
42318 return false;
42320 case FLOAT_EXTEND:
42321 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
42322 *total = 0;
42323 return false;
42325 case ABS:
42326 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42327 /* ??? SSE cost should be used here. */
42328 *total = cost->fabs;
42329 else if (X87_FLOAT_MODE_P (mode))
42330 *total = cost->fabs;
42331 else if (FLOAT_MODE_P (mode))
42332 /* ??? SSE vector cost should be used here. */
42333 *total = cost->fabs;
42334 return false;
42336 case SQRT:
42337 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42338 /* ??? SSE cost should be used here. */
42339 *total = cost->fsqrt;
42340 else if (X87_FLOAT_MODE_P (mode))
42341 *total = cost->fsqrt;
42342 else if (FLOAT_MODE_P (mode))
42343 /* ??? SSE vector cost should be used here. */
42344 *total = cost->fsqrt;
42345 return false;
42347 case UNSPEC:
42348 if (XINT (x, 1) == UNSPEC_TP)
42349 *total = 0;
42350 return false;
42352 case VEC_SELECT:
42353 case VEC_CONCAT:
42354 case VEC_DUPLICATE:
42355 /* ??? Assume all of these vector manipulation patterns are
42356 recognizable. In which case they all pretty much have the
42357 same cost. */
42358 *total = cost->fabs;
42359 return true;
42360 case VEC_MERGE:
42361 mask = XEXP (x, 2);
42362 /* This is masked instruction, assume the same cost,
42363 as nonmasked variant. */
42364 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
42365 *total = rtx_cost (XEXP (x, 0), outer_code, opno, speed);
42366 else
42367 *total = cost->fabs;
42368 return true;
42370 default:
42371 return false;
42375 #if TARGET_MACHO
42377 static int current_machopic_label_num;
42379 /* Given a symbol name and its associated stub, write out the
42380 definition of the stub. */
42382 void
42383 machopic_output_stub (FILE *file, const char *symb, const char *stub)
42385 unsigned int length;
42386 char *binder_name, *symbol_name, lazy_ptr_name[32];
42387 int label = ++current_machopic_label_num;
42389 /* For 64-bit we shouldn't get here. */
42390 gcc_assert (!TARGET_64BIT);
42392 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
42393 symb = targetm.strip_name_encoding (symb);
42395 length = strlen (stub);
42396 binder_name = XALLOCAVEC (char, length + 32);
42397 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
42399 length = strlen (symb);
42400 symbol_name = XALLOCAVEC (char, length + 32);
42401 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
42403 sprintf (lazy_ptr_name, "L%d$lz", label);
42405 if (MACHOPIC_ATT_STUB)
42406 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
42407 else if (MACHOPIC_PURE)
42408 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
42409 else
42410 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
42412 fprintf (file, "%s:\n", stub);
42413 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
42415 if (MACHOPIC_ATT_STUB)
42417 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
42419 else if (MACHOPIC_PURE)
42421 /* PIC stub. */
42422 /* 25-byte PIC stub using "CALL get_pc_thunk". */
42423 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
42424 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
42425 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
42426 label, lazy_ptr_name, label);
42427 fprintf (file, "\tjmp\t*%%ecx\n");
42429 else
42430 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
42432 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
42433 it needs no stub-binding-helper. */
42434 if (MACHOPIC_ATT_STUB)
42435 return;
42437 fprintf (file, "%s:\n", binder_name);
42439 if (MACHOPIC_PURE)
42441 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
42442 fprintf (file, "\tpushl\t%%ecx\n");
42444 else
42445 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
42447 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
42449 /* N.B. Keep the correspondence of these
42450 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
42451 old-pic/new-pic/non-pic stubs; altering this will break
42452 compatibility with existing dylibs. */
42453 if (MACHOPIC_PURE)
42455 /* 25-byte PIC stub using "CALL get_pc_thunk". */
42456 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
42458 else
42459 /* 16-byte -mdynamic-no-pic stub. */
42460 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
42462 fprintf (file, "%s:\n", lazy_ptr_name);
42463 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
42464 fprintf (file, ASM_LONG "%s\n", binder_name);
42466 #endif /* TARGET_MACHO */
42468 /* Order the registers for register allocator. */
42470 void
42471 x86_order_regs_for_local_alloc (void)
42473 int pos = 0;
42474 int i;
42476 /* First allocate the local general purpose registers. */
42477 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
42478 if (GENERAL_REGNO_P (i) && call_used_regs[i])
42479 reg_alloc_order [pos++] = i;
42481 /* Global general purpose registers. */
42482 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
42483 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
42484 reg_alloc_order [pos++] = i;
42486 /* x87 registers come first in case we are doing FP math
42487 using them. */
42488 if (!TARGET_SSE_MATH)
42489 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
42490 reg_alloc_order [pos++] = i;
42492 /* SSE registers. */
42493 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
42494 reg_alloc_order [pos++] = i;
42495 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
42496 reg_alloc_order [pos++] = i;
42498 /* Extended REX SSE registers. */
42499 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
42500 reg_alloc_order [pos++] = i;
42502 /* Mask register. */
42503 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
42504 reg_alloc_order [pos++] = i;
42506 /* MPX bound registers. */
42507 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
42508 reg_alloc_order [pos++] = i;
42510 /* x87 registers. */
42511 if (TARGET_SSE_MATH)
42512 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
42513 reg_alloc_order [pos++] = i;
42515 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
42516 reg_alloc_order [pos++] = i;
42518 /* Initialize the rest of array as we do not allocate some registers
42519 at all. */
42520 while (pos < FIRST_PSEUDO_REGISTER)
42521 reg_alloc_order [pos++] = 0;
42524 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
42525 in struct attribute_spec handler. */
42526 static tree
42527 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
42528 tree args,
42529 int,
42530 bool *no_add_attrs)
42532 if (TREE_CODE (*node) != FUNCTION_TYPE
42533 && TREE_CODE (*node) != METHOD_TYPE
42534 && TREE_CODE (*node) != FIELD_DECL
42535 && TREE_CODE (*node) != TYPE_DECL)
42537 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42538 name);
42539 *no_add_attrs = true;
42540 return NULL_TREE;
42542 if (TARGET_64BIT)
42544 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
42545 name);
42546 *no_add_attrs = true;
42547 return NULL_TREE;
42549 if (is_attribute_p ("callee_pop_aggregate_return", name))
42551 tree cst;
42553 cst = TREE_VALUE (args);
42554 if (TREE_CODE (cst) != INTEGER_CST)
42556 warning (OPT_Wattributes,
42557 "%qE attribute requires an integer constant argument",
42558 name);
42559 *no_add_attrs = true;
42561 else if (compare_tree_int (cst, 0) != 0
42562 && compare_tree_int (cst, 1) != 0)
42564 warning (OPT_Wattributes,
42565 "argument to %qE attribute is neither zero, nor one",
42566 name);
42567 *no_add_attrs = true;
42570 return NULL_TREE;
42573 return NULL_TREE;
42576 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
42577 struct attribute_spec.handler. */
42578 static tree
42579 ix86_handle_abi_attribute (tree *node, tree name, tree, int,
42580 bool *no_add_attrs)
42582 if (TREE_CODE (*node) != FUNCTION_TYPE
42583 && TREE_CODE (*node) != METHOD_TYPE
42584 && TREE_CODE (*node) != FIELD_DECL
42585 && TREE_CODE (*node) != TYPE_DECL)
42587 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42588 name);
42589 *no_add_attrs = true;
42590 return NULL_TREE;
42593 /* Can combine regparm with all attributes but fastcall. */
42594 if (is_attribute_p ("ms_abi", name))
42596 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
42598 error ("ms_abi and sysv_abi attributes are not compatible");
42601 return NULL_TREE;
42603 else if (is_attribute_p ("sysv_abi", name))
42605 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
42607 error ("ms_abi and sysv_abi attributes are not compatible");
42610 return NULL_TREE;
42613 return NULL_TREE;
42616 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
42617 struct attribute_spec.handler. */
42618 static tree
42619 ix86_handle_struct_attribute (tree *node, tree name, tree, int,
42620 bool *no_add_attrs)
42622 tree *type = NULL;
42623 if (DECL_P (*node))
42625 if (TREE_CODE (*node) == TYPE_DECL)
42626 type = &TREE_TYPE (*node);
42628 else
42629 type = node;
42631 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
42633 warning (OPT_Wattributes, "%qE attribute ignored",
42634 name);
42635 *no_add_attrs = true;
42638 else if ((is_attribute_p ("ms_struct", name)
42639 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
42640 || ((is_attribute_p ("gcc_struct", name)
42641 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
42643 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
42644 name);
42645 *no_add_attrs = true;
42648 return NULL_TREE;
42651 static tree
42652 ix86_handle_fndecl_attribute (tree *node, tree name, tree, int,
42653 bool *no_add_attrs)
42655 if (TREE_CODE (*node) != FUNCTION_DECL)
42657 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42658 name);
42659 *no_add_attrs = true;
42661 return NULL_TREE;
42664 static bool
42665 ix86_ms_bitfield_layout_p (const_tree record_type)
42667 return ((TARGET_MS_BITFIELD_LAYOUT
42668 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
42669 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
42672 /* Returns an expression indicating where the this parameter is
42673 located on entry to the FUNCTION. */
42675 static rtx
42676 x86_this_parameter (tree function)
42678 tree type = TREE_TYPE (function);
42679 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
42680 int nregs;
42682 if (TARGET_64BIT)
42684 const int *parm_regs;
42686 if (ix86_function_type_abi (type) == MS_ABI)
42687 parm_regs = x86_64_ms_abi_int_parameter_registers;
42688 else
42689 parm_regs = x86_64_int_parameter_registers;
42690 return gen_rtx_REG (Pmode, parm_regs[aggr]);
42693 nregs = ix86_function_regparm (type, function);
42695 if (nregs > 0 && !stdarg_p (type))
42697 int regno;
42698 unsigned int ccvt = ix86_get_callcvt (type);
42700 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
42701 regno = aggr ? DX_REG : CX_REG;
42702 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
42704 regno = CX_REG;
42705 if (aggr)
42706 return gen_rtx_MEM (SImode,
42707 plus_constant (Pmode, stack_pointer_rtx, 4));
42709 else
42711 regno = AX_REG;
42712 if (aggr)
42714 regno = DX_REG;
42715 if (nregs == 1)
42716 return gen_rtx_MEM (SImode,
42717 plus_constant (Pmode,
42718 stack_pointer_rtx, 4));
42721 return gen_rtx_REG (SImode, regno);
42724 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
42725 aggr ? 8 : 4));
42728 /* Determine whether x86_output_mi_thunk can succeed. */
42730 static bool
42731 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
42732 const_tree function)
42734 /* 64-bit can handle anything. */
42735 if (TARGET_64BIT)
42736 return true;
42738 /* For 32-bit, everything's fine if we have one free register. */
42739 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
42740 return true;
42742 /* Need a free register for vcall_offset. */
42743 if (vcall_offset)
42744 return false;
42746 /* Need a free register for GOT references. */
42747 if (flag_pic && !targetm.binds_local_p (function))
42748 return false;
42750 /* Otherwise ok. */
42751 return true;
42754 /* Output the assembler code for a thunk function. THUNK_DECL is the
42755 declaration for the thunk function itself, FUNCTION is the decl for
42756 the target function. DELTA is an immediate constant offset to be
42757 added to THIS. If VCALL_OFFSET is nonzero, the word at
42758 *(*this + vcall_offset) should be added to THIS. */
42760 static void
42761 x86_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
42762 HOST_WIDE_INT vcall_offset, tree function)
42764 rtx this_param = x86_this_parameter (function);
42765 rtx this_reg, tmp, fnaddr;
42766 unsigned int tmp_regno;
42767 rtx_insn *insn;
42769 if (TARGET_64BIT)
42770 tmp_regno = R10_REG;
42771 else
42773 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
42774 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
42775 tmp_regno = AX_REG;
42776 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
42777 tmp_regno = DX_REG;
42778 else
42779 tmp_regno = CX_REG;
42782 emit_note (NOTE_INSN_PROLOGUE_END);
42784 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
42785 pull it in now and let DELTA benefit. */
42786 if (REG_P (this_param))
42787 this_reg = this_param;
42788 else if (vcall_offset)
42790 /* Put the this parameter into %eax. */
42791 this_reg = gen_rtx_REG (Pmode, AX_REG);
42792 emit_move_insn (this_reg, this_param);
42794 else
42795 this_reg = NULL_RTX;
42797 /* Adjust the this parameter by a fixed constant. */
42798 if (delta)
42800 rtx delta_rtx = GEN_INT (delta);
42801 rtx delta_dst = this_reg ? this_reg : this_param;
42803 if (TARGET_64BIT)
42805 if (!x86_64_general_operand (delta_rtx, Pmode))
42807 tmp = gen_rtx_REG (Pmode, tmp_regno);
42808 emit_move_insn (tmp, delta_rtx);
42809 delta_rtx = tmp;
42813 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
42816 /* Adjust the this parameter by a value stored in the vtable. */
42817 if (vcall_offset)
42819 rtx vcall_addr, vcall_mem, this_mem;
42821 tmp = gen_rtx_REG (Pmode, tmp_regno);
42823 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
42824 if (Pmode != ptr_mode)
42825 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
42826 emit_move_insn (tmp, this_mem);
42828 /* Adjust the this parameter. */
42829 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
42830 if (TARGET_64BIT
42831 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
42833 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
42834 emit_move_insn (tmp2, GEN_INT (vcall_offset));
42835 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
42838 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
42839 if (Pmode != ptr_mode)
42840 emit_insn (gen_addsi_1_zext (this_reg,
42841 gen_rtx_REG (ptr_mode,
42842 REGNO (this_reg)),
42843 vcall_mem));
42844 else
42845 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
42848 /* If necessary, drop THIS back to its stack slot. */
42849 if (this_reg && this_reg != this_param)
42850 emit_move_insn (this_param, this_reg);
42852 fnaddr = XEXP (DECL_RTL (function), 0);
42853 if (TARGET_64BIT)
42855 if (!flag_pic || targetm.binds_local_p (function)
42856 || TARGET_PECOFF)
42858 else
42860 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
42861 tmp = gen_rtx_CONST (Pmode, tmp);
42862 fnaddr = gen_const_mem (Pmode, tmp);
42865 else
42867 if (!flag_pic || targetm.binds_local_p (function))
42869 #if TARGET_MACHO
42870 else if (TARGET_MACHO)
42872 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
42873 fnaddr = XEXP (fnaddr, 0);
42875 #endif /* TARGET_MACHO */
42876 else
42878 tmp = gen_rtx_REG (Pmode, CX_REG);
42879 output_set_got (tmp, NULL_RTX);
42881 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
42882 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
42883 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
42884 fnaddr = gen_const_mem (Pmode, fnaddr);
42888 /* Our sibling call patterns do not allow memories, because we have no
42889 predicate that can distinguish between frame and non-frame memory.
42890 For our purposes here, we can get away with (ab)using a jump pattern,
42891 because we're going to do no optimization. */
42892 if (MEM_P (fnaddr))
42894 if (sibcall_insn_operand (fnaddr, word_mode))
42896 fnaddr = XEXP (DECL_RTL (function), 0);
42897 tmp = gen_rtx_MEM (QImode, fnaddr);
42898 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
42899 tmp = emit_call_insn (tmp);
42900 SIBLING_CALL_P (tmp) = 1;
42902 else
42903 emit_jump_insn (gen_indirect_jump (fnaddr));
42905 else
42907 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
42909 // CM_LARGE_PIC always uses pseudo PIC register which is
42910 // uninitialized. Since FUNCTION is local and calling it
42911 // doesn't go through PLT, we use scratch register %r11 as
42912 // PIC register and initialize it here.
42913 pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
42914 ix86_init_large_pic_reg (tmp_regno);
42915 fnaddr = legitimize_pic_address (fnaddr,
42916 gen_rtx_REG (Pmode, tmp_regno));
42919 if (!sibcall_insn_operand (fnaddr, word_mode))
42921 tmp = gen_rtx_REG (word_mode, tmp_regno);
42922 if (GET_MODE (fnaddr) != word_mode)
42923 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
42924 emit_move_insn (tmp, fnaddr);
42925 fnaddr = tmp;
42928 tmp = gen_rtx_MEM (QImode, fnaddr);
42929 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
42930 tmp = emit_call_insn (tmp);
42931 SIBLING_CALL_P (tmp) = 1;
42933 emit_barrier ();
42935 /* Emit just enough of rest_of_compilation to get the insns emitted.
42936 Note that use_thunk calls assemble_start_function et al. */
42937 insn = get_insns ();
42938 shorten_branches (insn);
42939 final_start_function (insn, file, 1);
42940 final (insn, file, 1);
42941 final_end_function ();
42944 static void
42945 x86_file_start (void)
42947 default_file_start ();
42948 if (TARGET_16BIT)
42949 fputs ("\t.code16gcc\n", asm_out_file);
42950 #if TARGET_MACHO
42951 darwin_file_start ();
42952 #endif
42953 if (X86_FILE_START_VERSION_DIRECTIVE)
42954 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
42955 if (X86_FILE_START_FLTUSED)
42956 fputs ("\t.global\t__fltused\n", asm_out_file);
42957 if (ix86_asm_dialect == ASM_INTEL)
42958 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
42962 x86_field_alignment (tree field, int computed)
42964 machine_mode mode;
42965 tree type = TREE_TYPE (field);
42967 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
42968 return computed;
42969 mode = TYPE_MODE (strip_array_types (type));
42970 if (mode == DFmode || mode == DCmode
42971 || GET_MODE_CLASS (mode) == MODE_INT
42972 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
42973 return MIN (32, computed);
42974 return computed;
42977 /* Print call to TARGET to FILE. */
42979 static void
42980 x86_print_call_or_nop (FILE *file, const char *target)
42982 if (flag_nop_mcount)
42983 fprintf (file, "1:\tnopl 0x00(%%eax,%%eax,1)\n"); /* 5 byte nop. */
42984 else
42985 fprintf (file, "1:\tcall\t%s\n", target);
42988 /* Output assembler code to FILE to increment profiler label # LABELNO
42989 for profiling a function entry. */
42990 void
42991 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
42993 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
42994 : MCOUNT_NAME);
42995 if (TARGET_64BIT)
42997 #ifndef NO_PROFILE_COUNTERS
42998 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
42999 #endif
43001 if (!TARGET_PECOFF && flag_pic)
43002 fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
43003 else
43004 x86_print_call_or_nop (file, mcount_name);
43006 else if (flag_pic)
43008 #ifndef NO_PROFILE_COUNTERS
43009 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
43010 LPREFIX, labelno);
43011 #endif
43012 fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
43014 else
43016 #ifndef NO_PROFILE_COUNTERS
43017 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
43018 LPREFIX, labelno);
43019 #endif
43020 x86_print_call_or_nop (file, mcount_name);
43023 if (flag_record_mcount)
43025 fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
43026 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
43027 fprintf (file, "\t.previous\n");
43031 /* We don't have exact information about the insn sizes, but we may assume
43032 quite safely that we are informed about all 1 byte insns and memory
43033 address sizes. This is enough to eliminate unnecessary padding in
43034 99% of cases. */
43036 static int
43037 min_insn_size (rtx_insn *insn)
43039 int l = 0, len;
43041 if (!INSN_P (insn) || !active_insn_p (insn))
43042 return 0;
43044 /* Discard alignments we've emit and jump instructions. */
43045 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
43046 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
43047 return 0;
43049 /* Important case - calls are always 5 bytes.
43050 It is common to have many calls in the row. */
43051 if (CALL_P (insn)
43052 && symbolic_reference_mentioned_p (PATTERN (insn))
43053 && !SIBLING_CALL_P (insn))
43054 return 5;
43055 len = get_attr_length (insn);
43056 if (len <= 1)
43057 return 1;
43059 /* For normal instructions we rely on get_attr_length being exact,
43060 with a few exceptions. */
43061 if (!JUMP_P (insn))
43063 enum attr_type type = get_attr_type (insn);
43065 switch (type)
43067 case TYPE_MULTI:
43068 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
43069 || asm_noperands (PATTERN (insn)) >= 0)
43070 return 0;
43071 break;
43072 case TYPE_OTHER:
43073 case TYPE_FCMP:
43074 break;
43075 default:
43076 /* Otherwise trust get_attr_length. */
43077 return len;
43080 l = get_attr_length_address (insn);
43081 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
43082 l = 4;
43084 if (l)
43085 return 1+l;
43086 else
43087 return 2;
43090 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
43092 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
43093 window. */
43095 static void
43096 ix86_avoid_jump_mispredicts (void)
43098 rtx_insn *insn, *start = get_insns ();
43099 int nbytes = 0, njumps = 0;
43100 int isjump = 0;
43102 /* Look for all minimal intervals of instructions containing 4 jumps.
43103 The intervals are bounded by START and INSN. NBYTES is the total
43104 size of instructions in the interval including INSN and not including
43105 START. When the NBYTES is smaller than 16 bytes, it is possible
43106 that the end of START and INSN ends up in the same 16byte page.
43108 The smallest offset in the page INSN can start is the case where START
43109 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
43110 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
43112 Don't consider asm goto as jump, while it can contain a jump, it doesn't
43113 have to, control transfer to label(s) can be performed through other
43114 means, and also we estimate minimum length of all asm stmts as 0. */
43115 for (insn = start; insn; insn = NEXT_INSN (insn))
43117 int min_size;
43119 if (LABEL_P (insn))
43121 int align = label_to_alignment (insn);
43122 int max_skip = label_to_max_skip (insn);
43124 if (max_skip > 15)
43125 max_skip = 15;
43126 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
43127 already in the current 16 byte page, because otherwise
43128 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
43129 bytes to reach 16 byte boundary. */
43130 if (align <= 0
43131 || (align <= 3 && max_skip != (1 << align) - 1))
43132 max_skip = 0;
43133 if (dump_file)
43134 fprintf (dump_file, "Label %i with max_skip %i\n",
43135 INSN_UID (insn), max_skip);
43136 if (max_skip)
43138 while (nbytes + max_skip >= 16)
43140 start = NEXT_INSN (start);
43141 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
43142 || CALL_P (start))
43143 njumps--, isjump = 1;
43144 else
43145 isjump = 0;
43146 nbytes -= min_insn_size (start);
43149 continue;
43152 min_size = min_insn_size (insn);
43153 nbytes += min_size;
43154 if (dump_file)
43155 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
43156 INSN_UID (insn), min_size);
43157 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
43158 || CALL_P (insn))
43159 njumps++;
43160 else
43161 continue;
43163 while (njumps > 3)
43165 start = NEXT_INSN (start);
43166 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
43167 || CALL_P (start))
43168 njumps--, isjump = 1;
43169 else
43170 isjump = 0;
43171 nbytes -= min_insn_size (start);
43173 gcc_assert (njumps >= 0);
43174 if (dump_file)
43175 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
43176 INSN_UID (start), INSN_UID (insn), nbytes);
43178 if (njumps == 3 && isjump && nbytes < 16)
43180 int padsize = 15 - nbytes + min_insn_size (insn);
43182 if (dump_file)
43183 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
43184 INSN_UID (insn), padsize);
43185 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
43189 #endif
43191 /* AMD Athlon works faster
43192 when RET is not destination of conditional jump or directly preceded
43193 by other jump instruction. We avoid the penalty by inserting NOP just
43194 before the RET instructions in such cases. */
43195 static void
43196 ix86_pad_returns (void)
43198 edge e;
43199 edge_iterator ei;
43201 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43203 basic_block bb = e->src;
43204 rtx_insn *ret = BB_END (bb);
43205 rtx_insn *prev;
43206 bool replace = false;
43208 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
43209 || optimize_bb_for_size_p (bb))
43210 continue;
43211 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
43212 if (active_insn_p (prev) || LABEL_P (prev))
43213 break;
43214 if (prev && LABEL_P (prev))
43216 edge e;
43217 edge_iterator ei;
43219 FOR_EACH_EDGE (e, ei, bb->preds)
43220 if (EDGE_FREQUENCY (e) && e->src->index >= 0
43221 && !(e->flags & EDGE_FALLTHRU))
43223 replace = true;
43224 break;
43227 if (!replace)
43229 prev = prev_active_insn (ret);
43230 if (prev
43231 && ((JUMP_P (prev) && any_condjump_p (prev))
43232 || CALL_P (prev)))
43233 replace = true;
43234 /* Empty functions get branch mispredict even when
43235 the jump destination is not visible to us. */
43236 if (!prev && !optimize_function_for_size_p (cfun))
43237 replace = true;
43239 if (replace)
43241 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
43242 delete_insn (ret);
43247 /* Count the minimum number of instructions in BB. Return 4 if the
43248 number of instructions >= 4. */
43250 static int
43251 ix86_count_insn_bb (basic_block bb)
43253 rtx_insn *insn;
43254 int insn_count = 0;
43256 /* Count number of instructions in this block. Return 4 if the number
43257 of instructions >= 4. */
43258 FOR_BB_INSNS (bb, insn)
43260 /* Only happen in exit blocks. */
43261 if (JUMP_P (insn)
43262 && ANY_RETURN_P (PATTERN (insn)))
43263 break;
43265 if (NONDEBUG_INSN_P (insn)
43266 && GET_CODE (PATTERN (insn)) != USE
43267 && GET_CODE (PATTERN (insn)) != CLOBBER)
43269 insn_count++;
43270 if (insn_count >= 4)
43271 return insn_count;
43275 return insn_count;
43279 /* Count the minimum number of instructions in code path in BB.
43280 Return 4 if the number of instructions >= 4. */
43282 static int
43283 ix86_count_insn (basic_block bb)
43285 edge e;
43286 edge_iterator ei;
43287 int min_prev_count;
43289 /* Only bother counting instructions along paths with no
43290 more than 2 basic blocks between entry and exit. Given
43291 that BB has an edge to exit, determine if a predecessor
43292 of BB has an edge from entry. If so, compute the number
43293 of instructions in the predecessor block. If there
43294 happen to be multiple such blocks, compute the minimum. */
43295 min_prev_count = 4;
43296 FOR_EACH_EDGE (e, ei, bb->preds)
43298 edge prev_e;
43299 edge_iterator prev_ei;
43301 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
43303 min_prev_count = 0;
43304 break;
43306 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
43308 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
43310 int count = ix86_count_insn_bb (e->src);
43311 if (count < min_prev_count)
43312 min_prev_count = count;
43313 break;
43318 if (min_prev_count < 4)
43319 min_prev_count += ix86_count_insn_bb (bb);
43321 return min_prev_count;
43324 /* Pad short function to 4 instructions. */
43326 static void
43327 ix86_pad_short_function (void)
43329 edge e;
43330 edge_iterator ei;
43332 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43334 rtx_insn *ret = BB_END (e->src);
43335 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
43337 int insn_count = ix86_count_insn (e->src);
43339 /* Pad short function. */
43340 if (insn_count < 4)
43342 rtx_insn *insn = ret;
43344 /* Find epilogue. */
43345 while (insn
43346 && (!NOTE_P (insn)
43347 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
43348 insn = PREV_INSN (insn);
43350 if (!insn)
43351 insn = ret;
43353 /* Two NOPs count as one instruction. */
43354 insn_count = 2 * (4 - insn_count);
43355 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
43361 /* Fix up a Windows system unwinder issue. If an EH region falls through into
43362 the epilogue, the Windows system unwinder will apply epilogue logic and
43363 produce incorrect offsets. This can be avoided by adding a nop between
43364 the last insn that can throw and the first insn of the epilogue. */
43366 static void
43367 ix86_seh_fixup_eh_fallthru (void)
43369 edge e;
43370 edge_iterator ei;
43372 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43374 rtx_insn *insn, *next;
43376 /* Find the beginning of the epilogue. */
43377 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
43378 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
43379 break;
43380 if (insn == NULL)
43381 continue;
43383 /* We only care about preceding insns that can throw. */
43384 insn = prev_active_insn (insn);
43385 if (insn == NULL || !can_throw_internal (insn))
43386 continue;
43388 /* Do not separate calls from their debug information. */
43389 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
43390 if (NOTE_P (next)
43391 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
43392 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
43393 insn = next;
43394 else
43395 break;
43397 emit_insn_after (gen_nops (const1_rtx), insn);
43401 /* Implement machine specific optimizations. We implement padding of returns
43402 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
43403 static void
43404 ix86_reorg (void)
43406 /* We are freeing block_for_insn in the toplev to keep compatibility
43407 with old MDEP_REORGS that are not CFG based. Recompute it now. */
43408 compute_bb_for_insn ();
43410 if (TARGET_SEH && current_function_has_exception_handlers ())
43411 ix86_seh_fixup_eh_fallthru ();
43413 if (optimize && optimize_function_for_speed_p (cfun))
43415 if (TARGET_PAD_SHORT_FUNCTION)
43416 ix86_pad_short_function ();
43417 else if (TARGET_PAD_RETURNS)
43418 ix86_pad_returns ();
43419 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
43420 if (TARGET_FOUR_JUMP_LIMIT)
43421 ix86_avoid_jump_mispredicts ();
43422 #endif
43426 /* Return nonzero when QImode register that must be represented via REX prefix
43427 is used. */
43428 bool
43429 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
43431 int i;
43432 extract_insn_cached (insn);
43433 for (i = 0; i < recog_data.n_operands; i++)
43434 if (GENERAL_REG_P (recog_data.operand[i])
43435 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
43436 return true;
43437 return false;
43440 /* Return true when INSN mentions register that must be encoded using REX
43441 prefix. */
43442 bool
43443 x86_extended_reg_mentioned_p (rtx insn)
43445 subrtx_iterator::array_type array;
43446 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
43448 const_rtx x = *iter;
43449 if (REG_P (x)
43450 && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))))
43451 return true;
43453 return false;
43456 /* If profitable, negate (without causing overflow) integer constant
43457 of mode MODE at location LOC. Return true in this case. */
43458 bool
43459 x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
43461 HOST_WIDE_INT val;
43463 if (!CONST_INT_P (*loc))
43464 return false;
43466 switch (mode)
43468 case DImode:
43469 /* DImode x86_64 constants must fit in 32 bits. */
43470 gcc_assert (x86_64_immediate_operand (*loc, mode));
43472 mode = SImode;
43473 break;
43475 case SImode:
43476 case HImode:
43477 case QImode:
43478 break;
43480 default:
43481 gcc_unreachable ();
43484 /* Avoid overflows. */
43485 if (mode_signbit_p (mode, *loc))
43486 return false;
43488 val = INTVAL (*loc);
43490 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
43491 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
43492 if ((val < 0 && val != -128)
43493 || val == 128)
43495 *loc = GEN_INT (-val);
43496 return true;
43499 return false;
43502 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
43503 optabs would emit if we didn't have TFmode patterns. */
43505 void
43506 x86_emit_floatuns (rtx operands[2])
43508 rtx_code_label *neglab, *donelab;
43509 rtx i0, i1, f0, in, out;
43510 machine_mode mode, inmode;
43512 inmode = GET_MODE (operands[1]);
43513 gcc_assert (inmode == SImode || inmode == DImode);
43515 out = operands[0];
43516 in = force_reg (inmode, operands[1]);
43517 mode = GET_MODE (out);
43518 neglab = gen_label_rtx ();
43519 donelab = gen_label_rtx ();
43520 f0 = gen_reg_rtx (mode);
43522 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
43524 expand_float (out, in, 0);
43526 emit_jump_insn (gen_jump (donelab));
43527 emit_barrier ();
43529 emit_label (neglab);
43531 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
43532 1, OPTAB_DIRECT);
43533 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
43534 1, OPTAB_DIRECT);
43535 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
43537 expand_float (f0, i0, 0);
43539 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
43541 emit_label (donelab);
43544 static bool canonicalize_perm (struct expand_vec_perm_d *d);
43545 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
43546 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
43547 static bool expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool);
43549 /* Get a vector mode of the same size as the original but with elements
43550 twice as wide. This is only guaranteed to apply to integral vectors. */
43552 static inline machine_mode
43553 get_mode_wider_vector (machine_mode o)
43555 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
43556 machine_mode n = GET_MODE_WIDER_MODE (o);
43557 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
43558 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
43559 return n;
43562 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
43563 fill target with val via vec_duplicate. */
43565 static bool
43566 ix86_vector_duplicate_value (machine_mode mode, rtx target, rtx val)
43568 bool ok;
43569 rtx_insn *insn;
43570 rtx dup;
43572 /* First attempt to recognize VAL as-is. */
43573 dup = gen_rtx_VEC_DUPLICATE (mode, val);
43574 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
43575 if (recog_memoized (insn) < 0)
43577 rtx_insn *seq;
43578 /* If that fails, force VAL into a register. */
43580 start_sequence ();
43581 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
43582 seq = get_insns ();
43583 end_sequence ();
43584 if (seq)
43585 emit_insn_before (seq, insn);
43587 ok = recog_memoized (insn) >= 0;
43588 gcc_assert (ok);
43590 return true;
43593 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
43594 with all elements equal to VAR. Return true if successful. */
43596 static bool
43597 ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
43598 rtx target, rtx val)
43600 bool ok;
43602 switch (mode)
43604 case V2SImode:
43605 case V2SFmode:
43606 if (!mmx_ok)
43607 return false;
43608 /* FALLTHRU */
43610 case V4DFmode:
43611 case V4DImode:
43612 case V8SFmode:
43613 case V8SImode:
43614 case V2DFmode:
43615 case V2DImode:
43616 case V4SFmode:
43617 case V4SImode:
43618 case V16SImode:
43619 case V8DImode:
43620 case V16SFmode:
43621 case V8DFmode:
43622 return ix86_vector_duplicate_value (mode, target, val);
43624 case V4HImode:
43625 if (!mmx_ok)
43626 return false;
43627 if (TARGET_SSE || TARGET_3DNOW_A)
43629 rtx x;
43631 val = gen_lowpart (SImode, val);
43632 x = gen_rtx_TRUNCATE (HImode, val);
43633 x = gen_rtx_VEC_DUPLICATE (mode, x);
43634 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43635 return true;
43637 goto widen;
43639 case V8QImode:
43640 if (!mmx_ok)
43641 return false;
43642 goto widen;
43644 case V8HImode:
43645 if (TARGET_AVX2)
43646 return ix86_vector_duplicate_value (mode, target, val);
43648 if (TARGET_SSE2)
43650 struct expand_vec_perm_d dperm;
43651 rtx tmp1, tmp2;
43653 permute:
43654 memset (&dperm, 0, sizeof (dperm));
43655 dperm.target = target;
43656 dperm.vmode = mode;
43657 dperm.nelt = GET_MODE_NUNITS (mode);
43658 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
43659 dperm.one_operand_p = true;
43661 /* Extend to SImode using a paradoxical SUBREG. */
43662 tmp1 = gen_reg_rtx (SImode);
43663 emit_move_insn (tmp1, gen_lowpart (SImode, val));
43665 /* Insert the SImode value as low element of a V4SImode vector. */
43666 tmp2 = gen_reg_rtx (V4SImode);
43667 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
43668 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
43670 ok = (expand_vec_perm_1 (&dperm)
43671 || expand_vec_perm_broadcast_1 (&dperm));
43672 gcc_assert (ok);
43673 return ok;
43675 goto widen;
43677 case V16QImode:
43678 if (TARGET_AVX2)
43679 return ix86_vector_duplicate_value (mode, target, val);
43681 if (TARGET_SSE2)
43682 goto permute;
43683 goto widen;
43685 widen:
43686 /* Replicate the value once into the next wider mode and recurse. */
43688 machine_mode smode, wsmode, wvmode;
43689 rtx x;
43691 smode = GET_MODE_INNER (mode);
43692 wvmode = get_mode_wider_vector (mode);
43693 wsmode = GET_MODE_INNER (wvmode);
43695 val = convert_modes (wsmode, smode, val, true);
43696 x = expand_simple_binop (wsmode, ASHIFT, val,
43697 GEN_INT (GET_MODE_BITSIZE (smode)),
43698 NULL_RTX, 1, OPTAB_LIB_WIDEN);
43699 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
43701 x = gen_reg_rtx (wvmode);
43702 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
43703 gcc_assert (ok);
43704 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
43705 return ok;
43708 case V16HImode:
43709 case V32QImode:
43710 if (TARGET_AVX2)
43711 return ix86_vector_duplicate_value (mode, target, val);
43712 else
43714 machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
43715 rtx x = gen_reg_rtx (hvmode);
43717 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
43718 gcc_assert (ok);
43720 x = gen_rtx_VEC_CONCAT (mode, x, x);
43721 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43723 return true;
43725 case V64QImode:
43726 case V32HImode:
43727 if (TARGET_AVX512BW)
43728 return ix86_vector_duplicate_value (mode, target, val);
43729 else
43731 machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode);
43732 rtx x = gen_reg_rtx (hvmode);
43734 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
43735 gcc_assert (ok);
43737 x = gen_rtx_VEC_CONCAT (mode, x, x);
43738 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43740 return true;
43742 default:
43743 return false;
43747 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
43748 whose ONE_VAR element is VAR, and other elements are zero. Return true
43749 if successful. */
43751 static bool
43752 ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
43753 rtx target, rtx var, int one_var)
43755 machine_mode vsimode;
43756 rtx new_target;
43757 rtx x, tmp;
43758 bool use_vector_set = false;
43760 switch (mode)
43762 case V2DImode:
43763 /* For SSE4.1, we normally use vector set. But if the second
43764 element is zero and inter-unit moves are OK, we use movq
43765 instead. */
43766 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
43767 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
43768 && one_var == 0));
43769 break;
43770 case V16QImode:
43771 case V4SImode:
43772 case V4SFmode:
43773 use_vector_set = TARGET_SSE4_1;
43774 break;
43775 case V8HImode:
43776 use_vector_set = TARGET_SSE2;
43777 break;
43778 case V4HImode:
43779 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
43780 break;
43781 case V32QImode:
43782 case V16HImode:
43783 case V8SImode:
43784 case V8SFmode:
43785 case V4DFmode:
43786 use_vector_set = TARGET_AVX;
43787 break;
43788 case V4DImode:
43789 /* Use ix86_expand_vector_set in 64bit mode only. */
43790 use_vector_set = TARGET_AVX && TARGET_64BIT;
43791 break;
43792 default:
43793 break;
43796 if (use_vector_set)
43798 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
43799 var = force_reg (GET_MODE_INNER (mode), var);
43800 ix86_expand_vector_set (mmx_ok, target, var, one_var);
43801 return true;
43804 switch (mode)
43806 case V2SFmode:
43807 case V2SImode:
43808 if (!mmx_ok)
43809 return false;
43810 /* FALLTHRU */
43812 case V2DFmode:
43813 case V2DImode:
43814 if (one_var != 0)
43815 return false;
43816 var = force_reg (GET_MODE_INNER (mode), var);
43817 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
43818 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43819 return true;
43821 case V4SFmode:
43822 case V4SImode:
43823 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
43824 new_target = gen_reg_rtx (mode);
43825 else
43826 new_target = target;
43827 var = force_reg (GET_MODE_INNER (mode), var);
43828 x = gen_rtx_VEC_DUPLICATE (mode, var);
43829 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
43830 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
43831 if (one_var != 0)
43833 /* We need to shuffle the value to the correct position, so
43834 create a new pseudo to store the intermediate result. */
43836 /* With SSE2, we can use the integer shuffle insns. */
43837 if (mode != V4SFmode && TARGET_SSE2)
43839 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
43840 const1_rtx,
43841 GEN_INT (one_var == 1 ? 0 : 1),
43842 GEN_INT (one_var == 2 ? 0 : 1),
43843 GEN_INT (one_var == 3 ? 0 : 1)));
43844 if (target != new_target)
43845 emit_move_insn (target, new_target);
43846 return true;
43849 /* Otherwise convert the intermediate result to V4SFmode and
43850 use the SSE1 shuffle instructions. */
43851 if (mode != V4SFmode)
43853 tmp = gen_reg_rtx (V4SFmode);
43854 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
43856 else
43857 tmp = new_target;
43859 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
43860 const1_rtx,
43861 GEN_INT (one_var == 1 ? 0 : 1),
43862 GEN_INT (one_var == 2 ? 0+4 : 1+4),
43863 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
43865 if (mode != V4SFmode)
43866 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
43867 else if (tmp != target)
43868 emit_move_insn (target, tmp);
43870 else if (target != new_target)
43871 emit_move_insn (target, new_target);
43872 return true;
43874 case V8HImode:
43875 case V16QImode:
43876 vsimode = V4SImode;
43877 goto widen;
43878 case V4HImode:
43879 case V8QImode:
43880 if (!mmx_ok)
43881 return false;
43882 vsimode = V2SImode;
43883 goto widen;
43884 widen:
43885 if (one_var != 0)
43886 return false;
43888 /* Zero extend the variable element to SImode and recurse. */
43889 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
43891 x = gen_reg_rtx (vsimode);
43892 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
43893 var, one_var))
43894 gcc_unreachable ();
43896 emit_move_insn (target, gen_lowpart (mode, x));
43897 return true;
43899 default:
43900 return false;
43904 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
43905 consisting of the values in VALS. It is known that all elements
43906 except ONE_VAR are constants. Return true if successful. */
43908 static bool
43909 ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode,
43910 rtx target, rtx vals, int one_var)
43912 rtx var = XVECEXP (vals, 0, one_var);
43913 machine_mode wmode;
43914 rtx const_vec, x;
43916 const_vec = copy_rtx (vals);
43917 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
43918 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
43920 switch (mode)
43922 case V2DFmode:
43923 case V2DImode:
43924 case V2SFmode:
43925 case V2SImode:
43926 /* For the two element vectors, it's just as easy to use
43927 the general case. */
43928 return false;
43930 case V4DImode:
43931 /* Use ix86_expand_vector_set in 64bit mode only. */
43932 if (!TARGET_64BIT)
43933 return false;
43934 case V4DFmode:
43935 case V8SFmode:
43936 case V8SImode:
43937 case V16HImode:
43938 case V32QImode:
43939 case V4SFmode:
43940 case V4SImode:
43941 case V8HImode:
43942 case V4HImode:
43943 break;
43945 case V16QImode:
43946 if (TARGET_SSE4_1)
43947 break;
43948 wmode = V8HImode;
43949 goto widen;
43950 case V8QImode:
43951 wmode = V4HImode;
43952 goto widen;
43953 widen:
43954 /* There's no way to set one QImode entry easily. Combine
43955 the variable value with its adjacent constant value, and
43956 promote to an HImode set. */
43957 x = XVECEXP (vals, 0, one_var ^ 1);
43958 if (one_var & 1)
43960 var = convert_modes (HImode, QImode, var, true);
43961 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
43962 NULL_RTX, 1, OPTAB_LIB_WIDEN);
43963 x = GEN_INT (INTVAL (x) & 0xff);
43965 else
43967 var = convert_modes (HImode, QImode, var, true);
43968 x = gen_int_mode (INTVAL (x) << 8, HImode);
43970 if (x != const0_rtx)
43971 var = expand_simple_binop (HImode, IOR, var, x, var,
43972 1, OPTAB_LIB_WIDEN);
43974 x = gen_reg_rtx (wmode);
43975 emit_move_insn (x, gen_lowpart (wmode, const_vec));
43976 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
43978 emit_move_insn (target, gen_lowpart (mode, x));
43979 return true;
43981 default:
43982 return false;
43985 emit_move_insn (target, const_vec);
43986 ix86_expand_vector_set (mmx_ok, target, var, one_var);
43987 return true;
43990 /* A subroutine of ix86_expand_vector_init_general. Use vector
43991 concatenate to handle the most general case: all values variable,
43992 and none identical. */
43994 static void
43995 ix86_expand_vector_init_concat (machine_mode mode,
43996 rtx target, rtx *ops, int n)
43998 machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
43999 rtx first[16], second[8], third[4];
44000 rtvec v;
44001 int i, j;
44003 switch (n)
44005 case 2:
44006 switch (mode)
44008 case V16SImode:
44009 cmode = V8SImode;
44010 break;
44011 case V16SFmode:
44012 cmode = V8SFmode;
44013 break;
44014 case V8DImode:
44015 cmode = V4DImode;
44016 break;
44017 case V8DFmode:
44018 cmode = V4DFmode;
44019 break;
44020 case V8SImode:
44021 cmode = V4SImode;
44022 break;
44023 case V8SFmode:
44024 cmode = V4SFmode;
44025 break;
44026 case V4DImode:
44027 cmode = V2DImode;
44028 break;
44029 case V4DFmode:
44030 cmode = V2DFmode;
44031 break;
44032 case V4SImode:
44033 cmode = V2SImode;
44034 break;
44035 case V4SFmode:
44036 cmode = V2SFmode;
44037 break;
44038 case V2DImode:
44039 cmode = DImode;
44040 break;
44041 case V2SImode:
44042 cmode = SImode;
44043 break;
44044 case V2DFmode:
44045 cmode = DFmode;
44046 break;
44047 case V2SFmode:
44048 cmode = SFmode;
44049 break;
44050 default:
44051 gcc_unreachable ();
44054 if (!register_operand (ops[1], cmode))
44055 ops[1] = force_reg (cmode, ops[1]);
44056 if (!register_operand (ops[0], cmode))
44057 ops[0] = force_reg (cmode, ops[0]);
44058 emit_insn (gen_rtx_SET (VOIDmode, target,
44059 gen_rtx_VEC_CONCAT (mode, ops[0],
44060 ops[1])));
44061 break;
44063 case 4:
44064 switch (mode)
44066 case V4DImode:
44067 cmode = V2DImode;
44068 break;
44069 case V4DFmode:
44070 cmode = V2DFmode;
44071 break;
44072 case V4SImode:
44073 cmode = V2SImode;
44074 break;
44075 case V4SFmode:
44076 cmode = V2SFmode;
44077 break;
44078 default:
44079 gcc_unreachable ();
44081 goto half;
44083 case 8:
44084 switch (mode)
44086 case V8DImode:
44087 cmode = V2DImode;
44088 hmode = V4DImode;
44089 break;
44090 case V8DFmode:
44091 cmode = V2DFmode;
44092 hmode = V4DFmode;
44093 break;
44094 case V8SImode:
44095 cmode = V2SImode;
44096 hmode = V4SImode;
44097 break;
44098 case V8SFmode:
44099 cmode = V2SFmode;
44100 hmode = V4SFmode;
44101 break;
44102 default:
44103 gcc_unreachable ();
44105 goto half;
44107 case 16:
44108 switch (mode)
44110 case V16SImode:
44111 cmode = V2SImode;
44112 hmode = V4SImode;
44113 gmode = V8SImode;
44114 break;
44115 case V16SFmode:
44116 cmode = V2SFmode;
44117 hmode = V4SFmode;
44118 gmode = V8SFmode;
44119 break;
44120 default:
44121 gcc_unreachable ();
44123 goto half;
44125 half:
44126 /* FIXME: We process inputs backward to help RA. PR 36222. */
44127 i = n - 1;
44128 j = (n >> 1) - 1;
44129 for (; i > 0; i -= 2, j--)
44131 first[j] = gen_reg_rtx (cmode);
44132 v = gen_rtvec (2, ops[i - 1], ops[i]);
44133 ix86_expand_vector_init (false, first[j],
44134 gen_rtx_PARALLEL (cmode, v));
44137 n >>= 1;
44138 if (n > 4)
44140 gcc_assert (hmode != VOIDmode);
44141 gcc_assert (gmode != VOIDmode);
44142 for (i = j = 0; i < n; i += 2, j++)
44144 second[j] = gen_reg_rtx (hmode);
44145 ix86_expand_vector_init_concat (hmode, second [j],
44146 &first [i], 2);
44148 n >>= 1;
44149 for (i = j = 0; i < n; i += 2, j++)
44151 third[j] = gen_reg_rtx (gmode);
44152 ix86_expand_vector_init_concat (gmode, third[j],
44153 &second[i], 2);
44155 n >>= 1;
44156 ix86_expand_vector_init_concat (mode, target, third, n);
44158 else if (n > 2)
44160 gcc_assert (hmode != VOIDmode);
44161 for (i = j = 0; i < n; i += 2, j++)
44163 second[j] = gen_reg_rtx (hmode);
44164 ix86_expand_vector_init_concat (hmode, second [j],
44165 &first [i], 2);
44167 n >>= 1;
44168 ix86_expand_vector_init_concat (mode, target, second, n);
44170 else
44171 ix86_expand_vector_init_concat (mode, target, first, n);
44172 break;
44174 default:
44175 gcc_unreachable ();
44179 /* A subroutine of ix86_expand_vector_init_general. Use vector
44180 interleave to handle the most general case: all values variable,
44181 and none identical. */
44183 static void
44184 ix86_expand_vector_init_interleave (machine_mode mode,
44185 rtx target, rtx *ops, int n)
44187 machine_mode first_imode, second_imode, third_imode, inner_mode;
44188 int i, j;
44189 rtx op0, op1;
44190 rtx (*gen_load_even) (rtx, rtx, rtx);
44191 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
44192 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
44194 switch (mode)
44196 case V8HImode:
44197 gen_load_even = gen_vec_setv8hi;
44198 gen_interleave_first_low = gen_vec_interleave_lowv4si;
44199 gen_interleave_second_low = gen_vec_interleave_lowv2di;
44200 inner_mode = HImode;
44201 first_imode = V4SImode;
44202 second_imode = V2DImode;
44203 third_imode = VOIDmode;
44204 break;
44205 case V16QImode:
44206 gen_load_even = gen_vec_setv16qi;
44207 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
44208 gen_interleave_second_low = gen_vec_interleave_lowv4si;
44209 inner_mode = QImode;
44210 first_imode = V8HImode;
44211 second_imode = V4SImode;
44212 third_imode = V2DImode;
44213 break;
44214 default:
44215 gcc_unreachable ();
44218 for (i = 0; i < n; i++)
44220 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
44221 op0 = gen_reg_rtx (SImode);
44222 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
44224 /* Insert the SImode value as low element of V4SImode vector. */
44225 op1 = gen_reg_rtx (V4SImode);
44226 op0 = gen_rtx_VEC_MERGE (V4SImode,
44227 gen_rtx_VEC_DUPLICATE (V4SImode,
44228 op0),
44229 CONST0_RTX (V4SImode),
44230 const1_rtx);
44231 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
44233 /* Cast the V4SImode vector back to a vector in orignal mode. */
44234 op0 = gen_reg_rtx (mode);
44235 emit_move_insn (op0, gen_lowpart (mode, op1));
44237 /* Load even elements into the second position. */
44238 emit_insn (gen_load_even (op0,
44239 force_reg (inner_mode,
44240 ops [i + i + 1]),
44241 const1_rtx));
44243 /* Cast vector to FIRST_IMODE vector. */
44244 ops[i] = gen_reg_rtx (first_imode);
44245 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
44248 /* Interleave low FIRST_IMODE vectors. */
44249 for (i = j = 0; i < n; i += 2, j++)
44251 op0 = gen_reg_rtx (first_imode);
44252 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
44254 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
44255 ops[j] = gen_reg_rtx (second_imode);
44256 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
44259 /* Interleave low SECOND_IMODE vectors. */
44260 switch (second_imode)
44262 case V4SImode:
44263 for (i = j = 0; i < n / 2; i += 2, j++)
44265 op0 = gen_reg_rtx (second_imode);
44266 emit_insn (gen_interleave_second_low (op0, ops[i],
44267 ops[i + 1]));
44269 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
44270 vector. */
44271 ops[j] = gen_reg_rtx (third_imode);
44272 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
44274 second_imode = V2DImode;
44275 gen_interleave_second_low = gen_vec_interleave_lowv2di;
44276 /* FALLTHRU */
44278 case V2DImode:
44279 op0 = gen_reg_rtx (second_imode);
44280 emit_insn (gen_interleave_second_low (op0, ops[0],
44281 ops[1]));
44283 /* Cast the SECOND_IMODE vector back to a vector on original
44284 mode. */
44285 emit_insn (gen_rtx_SET (VOIDmode, target,
44286 gen_lowpart (mode, op0)));
44287 break;
44289 default:
44290 gcc_unreachable ();
44294 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
44295 all values variable, and none identical. */
44297 static void
44298 ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode,
44299 rtx target, rtx vals)
44301 rtx ops[64], op0, op1, op2, op3, op4, op5;
44302 machine_mode half_mode = VOIDmode;
44303 machine_mode quarter_mode = VOIDmode;
44304 int n, i;
44306 switch (mode)
44308 case V2SFmode:
44309 case V2SImode:
44310 if (!mmx_ok && !TARGET_SSE)
44311 break;
44312 /* FALLTHRU */
44314 case V16SImode:
44315 case V16SFmode:
44316 case V8DFmode:
44317 case V8DImode:
44318 case V8SFmode:
44319 case V8SImode:
44320 case V4DFmode:
44321 case V4DImode:
44322 case V4SFmode:
44323 case V4SImode:
44324 case V2DFmode:
44325 case V2DImode:
44326 n = GET_MODE_NUNITS (mode);
44327 for (i = 0; i < n; i++)
44328 ops[i] = XVECEXP (vals, 0, i);
44329 ix86_expand_vector_init_concat (mode, target, ops, n);
44330 return;
44332 case V32QImode:
44333 half_mode = V16QImode;
44334 goto half;
44336 case V16HImode:
44337 half_mode = V8HImode;
44338 goto half;
44340 half:
44341 n = GET_MODE_NUNITS (mode);
44342 for (i = 0; i < n; i++)
44343 ops[i] = XVECEXP (vals, 0, i);
44344 op0 = gen_reg_rtx (half_mode);
44345 op1 = gen_reg_rtx (half_mode);
44346 ix86_expand_vector_init_interleave (half_mode, op0, ops,
44347 n >> 2);
44348 ix86_expand_vector_init_interleave (half_mode, op1,
44349 &ops [n >> 1], n >> 2);
44350 emit_insn (gen_rtx_SET (VOIDmode, target,
44351 gen_rtx_VEC_CONCAT (mode, op0, op1)));
44352 return;
44354 case V64QImode:
44355 quarter_mode = V16QImode;
44356 half_mode = V32QImode;
44357 goto quarter;
44359 case V32HImode:
44360 quarter_mode = V8HImode;
44361 half_mode = V16HImode;
44362 goto quarter;
44364 quarter:
44365 n = GET_MODE_NUNITS (mode);
44366 for (i = 0; i < n; i++)
44367 ops[i] = XVECEXP (vals, 0, i);
44368 op0 = gen_reg_rtx (quarter_mode);
44369 op1 = gen_reg_rtx (quarter_mode);
44370 op2 = gen_reg_rtx (quarter_mode);
44371 op3 = gen_reg_rtx (quarter_mode);
44372 op4 = gen_reg_rtx (half_mode);
44373 op5 = gen_reg_rtx (half_mode);
44374 ix86_expand_vector_init_interleave (quarter_mode, op0, ops,
44375 n >> 3);
44376 ix86_expand_vector_init_interleave (quarter_mode, op1,
44377 &ops [n >> 2], n >> 3);
44378 ix86_expand_vector_init_interleave (quarter_mode, op2,
44379 &ops [n >> 1], n >> 3);
44380 ix86_expand_vector_init_interleave (quarter_mode, op3,
44381 &ops [(n >> 1) | (n >> 2)], n >> 3);
44382 emit_insn (gen_rtx_SET (VOIDmode, op4,
44383 gen_rtx_VEC_CONCAT (half_mode, op0, op1)));
44384 emit_insn (gen_rtx_SET (VOIDmode, op5,
44385 gen_rtx_VEC_CONCAT (half_mode, op2, op3)));
44386 emit_insn (gen_rtx_SET (VOIDmode, target,
44387 gen_rtx_VEC_CONCAT (mode, op4, op5)));
44388 return;
44390 case V16QImode:
44391 if (!TARGET_SSE4_1)
44392 break;
44393 /* FALLTHRU */
44395 case V8HImode:
44396 if (!TARGET_SSE2)
44397 break;
44399 /* Don't use ix86_expand_vector_init_interleave if we can't
44400 move from GPR to SSE register directly. */
44401 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
44402 break;
44404 n = GET_MODE_NUNITS (mode);
44405 for (i = 0; i < n; i++)
44406 ops[i] = XVECEXP (vals, 0, i);
44407 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
44408 return;
44410 case V4HImode:
44411 case V8QImode:
44412 break;
44414 default:
44415 gcc_unreachable ();
44419 int i, j, n_elts, n_words, n_elt_per_word;
44420 machine_mode inner_mode;
44421 rtx words[4], shift;
44423 inner_mode = GET_MODE_INNER (mode);
44424 n_elts = GET_MODE_NUNITS (mode);
44425 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
44426 n_elt_per_word = n_elts / n_words;
44427 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
44429 for (i = 0; i < n_words; ++i)
44431 rtx word = NULL_RTX;
44433 for (j = 0; j < n_elt_per_word; ++j)
44435 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
44436 elt = convert_modes (word_mode, inner_mode, elt, true);
44438 if (j == 0)
44439 word = elt;
44440 else
44442 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
44443 word, 1, OPTAB_LIB_WIDEN);
44444 word = expand_simple_binop (word_mode, IOR, word, elt,
44445 word, 1, OPTAB_LIB_WIDEN);
44449 words[i] = word;
44452 if (n_words == 1)
44453 emit_move_insn (target, gen_lowpart (mode, words[0]));
44454 else if (n_words == 2)
44456 rtx tmp = gen_reg_rtx (mode);
44457 emit_clobber (tmp);
44458 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
44459 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
44460 emit_move_insn (target, tmp);
44462 else if (n_words == 4)
44464 rtx tmp = gen_reg_rtx (V4SImode);
44465 gcc_assert (word_mode == SImode);
44466 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
44467 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
44468 emit_move_insn (target, gen_lowpart (mode, tmp));
44470 else
44471 gcc_unreachable ();
44475 /* Initialize vector TARGET via VALS. Suppress the use of MMX
44476 instructions unless MMX_OK is true. */
44478 void
44479 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
44481 machine_mode mode = GET_MODE (target);
44482 machine_mode inner_mode = GET_MODE_INNER (mode);
44483 int n_elts = GET_MODE_NUNITS (mode);
44484 int n_var = 0, one_var = -1;
44485 bool all_same = true, all_const_zero = true;
44486 int i;
44487 rtx x;
44489 for (i = 0; i < n_elts; ++i)
44491 x = XVECEXP (vals, 0, i);
44492 if (!(CONST_INT_P (x)
44493 || GET_CODE (x) == CONST_DOUBLE
44494 || GET_CODE (x) == CONST_FIXED))
44495 n_var++, one_var = i;
44496 else if (x != CONST0_RTX (inner_mode))
44497 all_const_zero = false;
44498 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
44499 all_same = false;
44502 /* Constants are best loaded from the constant pool. */
44503 if (n_var == 0)
44505 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
44506 return;
44509 /* If all values are identical, broadcast the value. */
44510 if (all_same
44511 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
44512 XVECEXP (vals, 0, 0)))
44513 return;
44515 /* Values where only one field is non-constant are best loaded from
44516 the pool and overwritten via move later. */
44517 if (n_var == 1)
44519 if (all_const_zero
44520 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
44521 XVECEXP (vals, 0, one_var),
44522 one_var))
44523 return;
44525 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
44526 return;
44529 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
44532 void
44533 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
44535 machine_mode mode = GET_MODE (target);
44536 machine_mode inner_mode = GET_MODE_INNER (mode);
44537 machine_mode half_mode;
44538 bool use_vec_merge = false;
44539 rtx tmp;
44540 static rtx (*gen_extract[6][2]) (rtx, rtx)
44542 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
44543 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
44544 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
44545 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
44546 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
44547 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
44549 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
44551 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
44552 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
44553 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
44554 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
44555 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
44556 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
44558 int i, j, n;
44560 switch (mode)
44562 case V2SFmode:
44563 case V2SImode:
44564 if (mmx_ok)
44566 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
44567 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
44568 if (elt == 0)
44569 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
44570 else
44571 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
44572 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44573 return;
44575 break;
44577 case V2DImode:
44578 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
44579 if (use_vec_merge)
44580 break;
44582 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
44583 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
44584 if (elt == 0)
44585 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
44586 else
44587 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
44588 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44589 return;
44591 case V2DFmode:
44593 rtx op0, op1;
44595 /* For the two element vectors, we implement a VEC_CONCAT with
44596 the extraction of the other element. */
44598 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
44599 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
44601 if (elt == 0)
44602 op0 = val, op1 = tmp;
44603 else
44604 op0 = tmp, op1 = val;
44606 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
44607 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44609 return;
44611 case V4SFmode:
44612 use_vec_merge = TARGET_SSE4_1;
44613 if (use_vec_merge)
44614 break;
44616 switch (elt)
44618 case 0:
44619 use_vec_merge = true;
44620 break;
44622 case 1:
44623 /* tmp = target = A B C D */
44624 tmp = copy_to_reg (target);
44625 /* target = A A B B */
44626 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
44627 /* target = X A B B */
44628 ix86_expand_vector_set (false, target, val, 0);
44629 /* target = A X C D */
44630 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44631 const1_rtx, const0_rtx,
44632 GEN_INT (2+4), GEN_INT (3+4)));
44633 return;
44635 case 2:
44636 /* tmp = target = A B C D */
44637 tmp = copy_to_reg (target);
44638 /* tmp = X B C D */
44639 ix86_expand_vector_set (false, tmp, val, 0);
44640 /* target = A B X D */
44641 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44642 const0_rtx, const1_rtx,
44643 GEN_INT (0+4), GEN_INT (3+4)));
44644 return;
44646 case 3:
44647 /* tmp = target = A B C D */
44648 tmp = copy_to_reg (target);
44649 /* tmp = X B C D */
44650 ix86_expand_vector_set (false, tmp, val, 0);
44651 /* target = A B X D */
44652 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44653 const0_rtx, const1_rtx,
44654 GEN_INT (2+4), GEN_INT (0+4)));
44655 return;
44657 default:
44658 gcc_unreachable ();
44660 break;
44662 case V4SImode:
44663 use_vec_merge = TARGET_SSE4_1;
44664 if (use_vec_merge)
44665 break;
44667 /* Element 0 handled by vec_merge below. */
44668 if (elt == 0)
44670 use_vec_merge = true;
44671 break;
44674 if (TARGET_SSE2)
44676 /* With SSE2, use integer shuffles to swap element 0 and ELT,
44677 store into element 0, then shuffle them back. */
44679 rtx order[4];
44681 order[0] = GEN_INT (elt);
44682 order[1] = const1_rtx;
44683 order[2] = const2_rtx;
44684 order[3] = GEN_INT (3);
44685 order[elt] = const0_rtx;
44687 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
44688 order[1], order[2], order[3]));
44690 ix86_expand_vector_set (false, target, val, 0);
44692 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
44693 order[1], order[2], order[3]));
44695 else
44697 /* For SSE1, we have to reuse the V4SF code. */
44698 rtx t = gen_reg_rtx (V4SFmode);
44699 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
44700 emit_move_insn (target, gen_lowpart (mode, t));
44702 return;
44704 case V8HImode:
44705 use_vec_merge = TARGET_SSE2;
44706 break;
44707 case V4HImode:
44708 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
44709 break;
44711 case V16QImode:
44712 use_vec_merge = TARGET_SSE4_1;
44713 break;
44715 case V8QImode:
44716 break;
44718 case V32QImode:
44719 half_mode = V16QImode;
44720 j = 0;
44721 n = 16;
44722 goto half;
44724 case V16HImode:
44725 half_mode = V8HImode;
44726 j = 1;
44727 n = 8;
44728 goto half;
44730 case V8SImode:
44731 half_mode = V4SImode;
44732 j = 2;
44733 n = 4;
44734 goto half;
44736 case V4DImode:
44737 half_mode = V2DImode;
44738 j = 3;
44739 n = 2;
44740 goto half;
44742 case V8SFmode:
44743 half_mode = V4SFmode;
44744 j = 4;
44745 n = 4;
44746 goto half;
44748 case V4DFmode:
44749 half_mode = V2DFmode;
44750 j = 5;
44751 n = 2;
44752 goto half;
44754 half:
44755 /* Compute offset. */
44756 i = elt / n;
44757 elt %= n;
44759 gcc_assert (i <= 1);
44761 /* Extract the half. */
44762 tmp = gen_reg_rtx (half_mode);
44763 emit_insn (gen_extract[j][i] (tmp, target));
44765 /* Put val in tmp at elt. */
44766 ix86_expand_vector_set (false, tmp, val, elt);
44768 /* Put it back. */
44769 emit_insn (gen_insert[j][i] (target, target, tmp));
44770 return;
44772 case V8DFmode:
44773 if (TARGET_AVX512F)
44775 tmp = gen_reg_rtx (mode);
44776 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44777 gen_rtx_VEC_DUPLICATE (mode, val)));
44778 emit_insn (gen_avx512f_blendmv8df (target, tmp, target,
44779 force_reg (QImode, GEN_INT (1 << elt))));
44780 return;
44782 else
44783 break;
44784 case V8DImode:
44785 if (TARGET_AVX512F)
44787 tmp = gen_reg_rtx (mode);
44788 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44789 gen_rtx_VEC_DUPLICATE (mode, val)));
44790 emit_insn (gen_avx512f_blendmv8di (target, tmp, target,
44791 force_reg (QImode, GEN_INT (1 << elt))));
44792 return;
44794 else
44795 break;
44796 case V16SFmode:
44797 if (TARGET_AVX512F)
44799 tmp = gen_reg_rtx (mode);
44800 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44801 gen_rtx_VEC_DUPLICATE (mode, val)));
44802 emit_insn (gen_avx512f_blendmv16sf (target, tmp, target,
44803 force_reg (HImode, GEN_INT (1 << elt))));
44804 return;
44806 else
44807 break;
44808 case V16SImode:
44809 if (TARGET_AVX512F)
44811 tmp = gen_reg_rtx (mode);
44812 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44813 gen_rtx_VEC_DUPLICATE (mode, val)));
44814 emit_insn (gen_avx512f_blendmv16si (target, tmp, target,
44815 force_reg (HImode, GEN_INT (1 << elt))));
44816 return;
44818 else
44819 break;
44820 case V32HImode:
44821 if (TARGET_AVX512F && TARGET_AVX512BW)
44823 tmp = gen_reg_rtx (mode);
44824 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44825 gen_rtx_VEC_DUPLICATE (mode, val)));
44826 emit_insn (gen_avx512bw_blendmv32hi (target, tmp, target,
44827 force_reg (SImode, GEN_INT (1 << elt))));
44828 return;
44830 else
44831 break;
44832 case V64QImode:
44833 if (TARGET_AVX512F && TARGET_AVX512BW)
44835 tmp = gen_reg_rtx (mode);
44836 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44837 gen_rtx_VEC_DUPLICATE (mode, val)));
44838 emit_insn (gen_avx512bw_blendmv64qi (target, tmp, target,
44839 force_reg (DImode, GEN_INT (1 << elt))));
44840 return;
44842 else
44843 break;
44845 default:
44846 break;
44849 if (use_vec_merge)
44851 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
44852 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
44853 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44855 else
44857 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
44859 emit_move_insn (mem, target);
44861 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
44862 emit_move_insn (tmp, val);
44864 emit_move_insn (target, mem);
44868 void
44869 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
44871 machine_mode mode = GET_MODE (vec);
44872 machine_mode inner_mode = GET_MODE_INNER (mode);
44873 bool use_vec_extr = false;
44874 rtx tmp;
44876 switch (mode)
44878 case V2SImode:
44879 case V2SFmode:
44880 if (!mmx_ok)
44881 break;
44882 /* FALLTHRU */
44884 case V2DFmode:
44885 case V2DImode:
44886 use_vec_extr = true;
44887 break;
44889 case V4SFmode:
44890 use_vec_extr = TARGET_SSE4_1;
44891 if (use_vec_extr)
44892 break;
44894 switch (elt)
44896 case 0:
44897 tmp = vec;
44898 break;
44900 case 1:
44901 case 3:
44902 tmp = gen_reg_rtx (mode);
44903 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
44904 GEN_INT (elt), GEN_INT (elt),
44905 GEN_INT (elt+4), GEN_INT (elt+4)));
44906 break;
44908 case 2:
44909 tmp = gen_reg_rtx (mode);
44910 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
44911 break;
44913 default:
44914 gcc_unreachable ();
44916 vec = tmp;
44917 use_vec_extr = true;
44918 elt = 0;
44919 break;
44921 case V4SImode:
44922 use_vec_extr = TARGET_SSE4_1;
44923 if (use_vec_extr)
44924 break;
44926 if (TARGET_SSE2)
44928 switch (elt)
44930 case 0:
44931 tmp = vec;
44932 break;
44934 case 1:
44935 case 3:
44936 tmp = gen_reg_rtx (mode);
44937 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
44938 GEN_INT (elt), GEN_INT (elt),
44939 GEN_INT (elt), GEN_INT (elt)));
44940 break;
44942 case 2:
44943 tmp = gen_reg_rtx (mode);
44944 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
44945 break;
44947 default:
44948 gcc_unreachable ();
44950 vec = tmp;
44951 use_vec_extr = true;
44952 elt = 0;
44954 else
44956 /* For SSE1, we have to reuse the V4SF code. */
44957 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
44958 gen_lowpart (V4SFmode, vec), elt);
44959 return;
44961 break;
44963 case V8HImode:
44964 use_vec_extr = TARGET_SSE2;
44965 break;
44966 case V4HImode:
44967 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
44968 break;
44970 case V16QImode:
44971 use_vec_extr = TARGET_SSE4_1;
44972 break;
44974 case V8SFmode:
44975 if (TARGET_AVX)
44977 tmp = gen_reg_rtx (V4SFmode);
44978 if (elt < 4)
44979 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
44980 else
44981 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
44982 ix86_expand_vector_extract (false, target, tmp, elt & 3);
44983 return;
44985 break;
44987 case V4DFmode:
44988 if (TARGET_AVX)
44990 tmp = gen_reg_rtx (V2DFmode);
44991 if (elt < 2)
44992 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
44993 else
44994 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
44995 ix86_expand_vector_extract (false, target, tmp, elt & 1);
44996 return;
44998 break;
45000 case V32QImode:
45001 if (TARGET_AVX)
45003 tmp = gen_reg_rtx (V16QImode);
45004 if (elt < 16)
45005 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
45006 else
45007 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
45008 ix86_expand_vector_extract (false, target, tmp, elt & 15);
45009 return;
45011 break;
45013 case V16HImode:
45014 if (TARGET_AVX)
45016 tmp = gen_reg_rtx (V8HImode);
45017 if (elt < 8)
45018 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
45019 else
45020 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
45021 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45022 return;
45024 break;
45026 case V8SImode:
45027 if (TARGET_AVX)
45029 tmp = gen_reg_rtx (V4SImode);
45030 if (elt < 4)
45031 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
45032 else
45033 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
45034 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45035 return;
45037 break;
45039 case V4DImode:
45040 if (TARGET_AVX)
45042 tmp = gen_reg_rtx (V2DImode);
45043 if (elt < 2)
45044 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
45045 else
45046 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
45047 ix86_expand_vector_extract (false, target, tmp, elt & 1);
45048 return;
45050 break;
45052 case V32HImode:
45053 if (TARGET_AVX512BW)
45055 tmp = gen_reg_rtx (V16HImode);
45056 if (elt < 16)
45057 emit_insn (gen_vec_extract_lo_v32hi (tmp, vec));
45058 else
45059 emit_insn (gen_vec_extract_hi_v32hi (tmp, vec));
45060 ix86_expand_vector_extract (false, target, tmp, elt & 15);
45061 return;
45063 break;
45065 case V64QImode:
45066 if (TARGET_AVX512BW)
45068 tmp = gen_reg_rtx (V32QImode);
45069 if (elt < 32)
45070 emit_insn (gen_vec_extract_lo_v64qi (tmp, vec));
45071 else
45072 emit_insn (gen_vec_extract_hi_v64qi (tmp, vec));
45073 ix86_expand_vector_extract (false, target, tmp, elt & 31);
45074 return;
45076 break;
45078 case V16SFmode:
45079 tmp = gen_reg_rtx (V8SFmode);
45080 if (elt < 8)
45081 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
45082 else
45083 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
45084 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45085 return;
45087 case V8DFmode:
45088 tmp = gen_reg_rtx (V4DFmode);
45089 if (elt < 4)
45090 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
45091 else
45092 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
45093 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45094 return;
45096 case V16SImode:
45097 tmp = gen_reg_rtx (V8SImode);
45098 if (elt < 8)
45099 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
45100 else
45101 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
45102 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45103 return;
45105 case V8DImode:
45106 tmp = gen_reg_rtx (V4DImode);
45107 if (elt < 4)
45108 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
45109 else
45110 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
45111 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45112 return;
45114 case V8QImode:
45115 /* ??? Could extract the appropriate HImode element and shift. */
45116 default:
45117 break;
45120 if (use_vec_extr)
45122 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
45123 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
45125 /* Let the rtl optimizers know about the zero extension performed. */
45126 if (inner_mode == QImode || inner_mode == HImode)
45128 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
45129 target = gen_lowpart (SImode, target);
45132 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
45134 else
45136 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
45138 emit_move_insn (mem, vec);
45140 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
45141 emit_move_insn (target, tmp);
45145 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
45146 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
45147 The upper bits of DEST are undefined, though they shouldn't cause
45148 exceptions (some bits from src or all zeros are ok). */
45150 static void
45151 emit_reduc_half (rtx dest, rtx src, int i)
45153 rtx tem, d = dest;
45154 switch (GET_MODE (src))
45156 case V4SFmode:
45157 if (i == 128)
45158 tem = gen_sse_movhlps (dest, src, src);
45159 else
45160 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
45161 GEN_INT (1 + 4), GEN_INT (1 + 4));
45162 break;
45163 case V2DFmode:
45164 tem = gen_vec_interleave_highv2df (dest, src, src);
45165 break;
45166 case V16QImode:
45167 case V8HImode:
45168 case V4SImode:
45169 case V2DImode:
45170 d = gen_reg_rtx (V1TImode);
45171 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
45172 GEN_INT (i / 2));
45173 break;
45174 case V8SFmode:
45175 if (i == 256)
45176 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
45177 else
45178 tem = gen_avx_shufps256 (dest, src, src,
45179 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
45180 break;
45181 case V4DFmode:
45182 if (i == 256)
45183 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
45184 else
45185 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
45186 break;
45187 case V32QImode:
45188 case V16HImode:
45189 case V8SImode:
45190 case V4DImode:
45191 if (i == 256)
45193 if (GET_MODE (dest) != V4DImode)
45194 d = gen_reg_rtx (V4DImode);
45195 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
45196 gen_lowpart (V4DImode, src),
45197 const1_rtx);
45199 else
45201 d = gen_reg_rtx (V2TImode);
45202 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
45203 GEN_INT (i / 2));
45205 break;
45206 case V64QImode:
45207 case V32HImode:
45208 case V16SImode:
45209 case V16SFmode:
45210 case V8DImode:
45211 case V8DFmode:
45212 if (i > 128)
45213 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
45214 gen_lowpart (V16SImode, src),
45215 gen_lowpart (V16SImode, src),
45216 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
45217 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
45218 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
45219 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
45220 GEN_INT (0xC), GEN_INT (0xD),
45221 GEN_INT (0xE), GEN_INT (0xF),
45222 GEN_INT (0x10), GEN_INT (0x11),
45223 GEN_INT (0x12), GEN_INT (0x13),
45224 GEN_INT (0x14), GEN_INT (0x15),
45225 GEN_INT (0x16), GEN_INT (0x17));
45226 else
45227 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
45228 gen_lowpart (V16SImode, src),
45229 GEN_INT (i == 128 ? 0x2 : 0x1),
45230 GEN_INT (0x3),
45231 GEN_INT (0x3),
45232 GEN_INT (0x3),
45233 GEN_INT (i == 128 ? 0x6 : 0x5),
45234 GEN_INT (0x7),
45235 GEN_INT (0x7),
45236 GEN_INT (0x7),
45237 GEN_INT (i == 128 ? 0xA : 0x9),
45238 GEN_INT (0xB),
45239 GEN_INT (0xB),
45240 GEN_INT (0xB),
45241 GEN_INT (i == 128 ? 0xE : 0xD),
45242 GEN_INT (0xF),
45243 GEN_INT (0xF),
45244 GEN_INT (0xF));
45245 break;
45246 default:
45247 gcc_unreachable ();
45249 emit_insn (tem);
45250 if (d != dest)
45251 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
45254 /* Expand a vector reduction. FN is the binary pattern to reduce;
45255 DEST is the destination; IN is the input vector. */
45257 void
45258 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
45260 rtx half, dst, vec = in;
45261 machine_mode mode = GET_MODE (in);
45262 int i;
45264 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
45265 if (TARGET_SSE4_1
45266 && mode == V8HImode
45267 && fn == gen_uminv8hi3)
45269 emit_insn (gen_sse4_1_phminposuw (dest, in));
45270 return;
45273 for (i = GET_MODE_BITSIZE (mode);
45274 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
45275 i >>= 1)
45277 half = gen_reg_rtx (mode);
45278 emit_reduc_half (half, vec, i);
45279 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
45280 dst = dest;
45281 else
45282 dst = gen_reg_rtx (mode);
45283 emit_insn (fn (dst, half, vec));
45284 vec = dst;
45288 /* Target hook for scalar_mode_supported_p. */
45289 static bool
45290 ix86_scalar_mode_supported_p (machine_mode mode)
45292 if (DECIMAL_FLOAT_MODE_P (mode))
45293 return default_decimal_float_supported_p ();
45294 else if (mode == TFmode)
45295 return true;
45296 else
45297 return default_scalar_mode_supported_p (mode);
45300 /* Implements target hook vector_mode_supported_p. */
45301 static bool
45302 ix86_vector_mode_supported_p (machine_mode mode)
45304 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
45305 return true;
45306 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
45307 return true;
45308 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
45309 return true;
45310 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
45311 return true;
45312 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
45313 return true;
45314 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
45315 return true;
45316 return false;
45319 /* Implement target hook libgcc_floating_mode_supported_p. */
45320 static bool
45321 ix86_libgcc_floating_mode_supported_p (machine_mode mode)
45323 switch (mode)
45325 case SFmode:
45326 case DFmode:
45327 case XFmode:
45328 return true;
45330 case TFmode:
45331 #ifdef IX86_NO_LIBGCC_TFMODE
45332 return false;
45333 #elif defined IX86_MAYBE_NO_LIBGCC_TFMODE
45334 return TARGET_LONG_DOUBLE_128;
45335 #else
45336 return true;
45337 #endif
45339 default:
45340 return false;
45344 /* Target hook for c_mode_for_suffix. */
45345 static machine_mode
45346 ix86_c_mode_for_suffix (char suffix)
45348 if (suffix == 'q')
45349 return TFmode;
45350 if (suffix == 'w')
45351 return XFmode;
45353 return VOIDmode;
45356 /* Worker function for TARGET_MD_ASM_CLOBBERS.
45358 We do this in the new i386 backend to maintain source compatibility
45359 with the old cc0-based compiler. */
45361 static tree
45362 ix86_md_asm_clobbers (tree, tree, tree clobbers)
45364 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
45365 clobbers);
45366 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
45367 clobbers);
45368 return clobbers;
45371 /* Implements target vector targetm.asm.encode_section_info. */
45373 static void ATTRIBUTE_UNUSED
45374 ix86_encode_section_info (tree decl, rtx rtl, int first)
45376 default_encode_section_info (decl, rtl, first);
45378 if (ix86_in_large_data_p (decl))
45379 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
45382 /* Worker function for REVERSE_CONDITION. */
45384 enum rtx_code
45385 ix86_reverse_condition (enum rtx_code code, machine_mode mode)
45387 return (mode != CCFPmode && mode != CCFPUmode
45388 ? reverse_condition (code)
45389 : reverse_condition_maybe_unordered (code));
45392 /* Output code to perform an x87 FP register move, from OPERANDS[1]
45393 to OPERANDS[0]. */
45395 const char *
45396 output_387_reg_move (rtx insn, rtx *operands)
45398 if (REG_P (operands[0]))
45400 if (REG_P (operands[1])
45401 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
45403 if (REGNO (operands[0]) == FIRST_STACK_REG)
45404 return output_387_ffreep (operands, 0);
45405 return "fstp\t%y0";
45407 if (STACK_TOP_P (operands[0]))
45408 return "fld%Z1\t%y1";
45409 return "fst\t%y0";
45411 else if (MEM_P (operands[0]))
45413 gcc_assert (REG_P (operands[1]));
45414 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
45415 return "fstp%Z0\t%y0";
45416 else
45418 /* There is no non-popping store to memory for XFmode.
45419 So if we need one, follow the store with a load. */
45420 if (GET_MODE (operands[0]) == XFmode)
45421 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
45422 else
45423 return "fst%Z0\t%y0";
45426 else
45427 gcc_unreachable();
45430 /* Output code to perform a conditional jump to LABEL, if C2 flag in
45431 FP status register is set. */
45433 void
45434 ix86_emit_fp_unordered_jump (rtx label)
45436 rtx reg = gen_reg_rtx (HImode);
45437 rtx temp;
45439 emit_insn (gen_x86_fnstsw_1 (reg));
45441 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
45443 emit_insn (gen_x86_sahf_1 (reg));
45445 temp = gen_rtx_REG (CCmode, FLAGS_REG);
45446 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
45448 else
45450 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
45452 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
45453 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
45456 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
45457 gen_rtx_LABEL_REF (VOIDmode, label),
45458 pc_rtx);
45459 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
45461 emit_jump_insn (temp);
45462 predict_jump (REG_BR_PROB_BASE * 10 / 100);
45465 /* Output code to perform a log1p XFmode calculation. */
45467 void ix86_emit_i387_log1p (rtx op0, rtx op1)
45469 rtx_code_label *label1 = gen_label_rtx ();
45470 rtx_code_label *label2 = gen_label_rtx ();
45472 rtx tmp = gen_reg_rtx (XFmode);
45473 rtx tmp2 = gen_reg_rtx (XFmode);
45474 rtx test;
45476 emit_insn (gen_absxf2 (tmp, op1));
45477 test = gen_rtx_GE (VOIDmode, tmp,
45478 CONST_DOUBLE_FROM_REAL_VALUE (
45479 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
45480 XFmode));
45481 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
45483 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
45484 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
45485 emit_jump (label2);
45487 emit_label (label1);
45488 emit_move_insn (tmp, CONST1_RTX (XFmode));
45489 emit_insn (gen_addxf3 (tmp, op1, tmp));
45490 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
45491 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
45493 emit_label (label2);
45496 /* Emit code for round calculation. */
45497 void ix86_emit_i387_round (rtx op0, rtx op1)
45499 machine_mode inmode = GET_MODE (op1);
45500 machine_mode outmode = GET_MODE (op0);
45501 rtx e1, e2, res, tmp, tmp1, half;
45502 rtx scratch = gen_reg_rtx (HImode);
45503 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
45504 rtx_code_label *jump_label = gen_label_rtx ();
45505 rtx insn;
45506 rtx (*gen_abs) (rtx, rtx);
45507 rtx (*gen_neg) (rtx, rtx);
45509 switch (inmode)
45511 case SFmode:
45512 gen_abs = gen_abssf2;
45513 break;
45514 case DFmode:
45515 gen_abs = gen_absdf2;
45516 break;
45517 case XFmode:
45518 gen_abs = gen_absxf2;
45519 break;
45520 default:
45521 gcc_unreachable ();
45524 switch (outmode)
45526 case SFmode:
45527 gen_neg = gen_negsf2;
45528 break;
45529 case DFmode:
45530 gen_neg = gen_negdf2;
45531 break;
45532 case XFmode:
45533 gen_neg = gen_negxf2;
45534 break;
45535 case HImode:
45536 gen_neg = gen_neghi2;
45537 break;
45538 case SImode:
45539 gen_neg = gen_negsi2;
45540 break;
45541 case DImode:
45542 gen_neg = gen_negdi2;
45543 break;
45544 default:
45545 gcc_unreachable ();
45548 e1 = gen_reg_rtx (inmode);
45549 e2 = gen_reg_rtx (inmode);
45550 res = gen_reg_rtx (outmode);
45552 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
45554 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
45556 /* scratch = fxam(op1) */
45557 emit_insn (gen_rtx_SET (VOIDmode, scratch,
45558 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
45559 UNSPEC_FXAM)));
45560 /* e1 = fabs(op1) */
45561 emit_insn (gen_abs (e1, op1));
45563 /* e2 = e1 + 0.5 */
45564 half = force_reg (inmode, half);
45565 emit_insn (gen_rtx_SET (VOIDmode, e2,
45566 gen_rtx_PLUS (inmode, e1, half)));
45568 /* res = floor(e2) */
45569 if (inmode != XFmode)
45571 tmp1 = gen_reg_rtx (XFmode);
45573 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
45574 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
45576 else
45577 tmp1 = e2;
45579 switch (outmode)
45581 case SFmode:
45582 case DFmode:
45584 rtx tmp0 = gen_reg_rtx (XFmode);
45586 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
45588 emit_insn (gen_rtx_SET (VOIDmode, res,
45589 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
45590 UNSPEC_TRUNC_NOOP)));
45592 break;
45593 case XFmode:
45594 emit_insn (gen_frndintxf2_floor (res, tmp1));
45595 break;
45596 case HImode:
45597 emit_insn (gen_lfloorxfhi2 (res, tmp1));
45598 break;
45599 case SImode:
45600 emit_insn (gen_lfloorxfsi2 (res, tmp1));
45601 break;
45602 case DImode:
45603 emit_insn (gen_lfloorxfdi2 (res, tmp1));
45604 break;
45605 default:
45606 gcc_unreachable ();
45609 /* flags = signbit(a) */
45610 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
45612 /* if (flags) then res = -res */
45613 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
45614 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
45615 gen_rtx_LABEL_REF (VOIDmode, jump_label),
45616 pc_rtx);
45617 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
45618 predict_jump (REG_BR_PROB_BASE * 50 / 100);
45619 JUMP_LABEL (insn) = jump_label;
45621 emit_insn (gen_neg (res, res));
45623 emit_label (jump_label);
45624 LABEL_NUSES (jump_label) = 1;
45626 emit_move_insn (op0, res);
45629 /* Output code to perform a Newton-Rhapson approximation of a single precision
45630 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
45632 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
45634 rtx x0, x1, e0, e1;
45636 x0 = gen_reg_rtx (mode);
45637 e0 = gen_reg_rtx (mode);
45638 e1 = gen_reg_rtx (mode);
45639 x1 = gen_reg_rtx (mode);
45641 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
45643 b = force_reg (mode, b);
45645 /* x0 = rcp(b) estimate */
45646 if (mode == V16SFmode || mode == V8DFmode)
45647 emit_insn (gen_rtx_SET (VOIDmode, x0,
45648 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
45649 UNSPEC_RCP14)));
45650 else
45651 emit_insn (gen_rtx_SET (VOIDmode, x0,
45652 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
45653 UNSPEC_RCP)));
45655 /* e0 = x0 * b */
45656 emit_insn (gen_rtx_SET (VOIDmode, e0,
45657 gen_rtx_MULT (mode, x0, b)));
45659 /* e0 = x0 * e0 */
45660 emit_insn (gen_rtx_SET (VOIDmode, e0,
45661 gen_rtx_MULT (mode, x0, e0)));
45663 /* e1 = x0 + x0 */
45664 emit_insn (gen_rtx_SET (VOIDmode, e1,
45665 gen_rtx_PLUS (mode, x0, x0)));
45667 /* x1 = e1 - e0 */
45668 emit_insn (gen_rtx_SET (VOIDmode, x1,
45669 gen_rtx_MINUS (mode, e1, e0)));
45671 /* res = a * x1 */
45672 emit_insn (gen_rtx_SET (VOIDmode, res,
45673 gen_rtx_MULT (mode, a, x1)));
45676 /* Output code to perform a Newton-Rhapson approximation of a
45677 single precision floating point [reciprocal] square root. */
45679 void ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode,
45680 bool recip)
45682 rtx x0, e0, e1, e2, e3, mthree, mhalf;
45683 REAL_VALUE_TYPE r;
45684 int unspec;
45686 x0 = gen_reg_rtx (mode);
45687 e0 = gen_reg_rtx (mode);
45688 e1 = gen_reg_rtx (mode);
45689 e2 = gen_reg_rtx (mode);
45690 e3 = gen_reg_rtx (mode);
45692 real_from_integer (&r, VOIDmode, -3, SIGNED);
45693 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
45695 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
45696 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
45697 unspec = UNSPEC_RSQRT;
45699 if (VECTOR_MODE_P (mode))
45701 mthree = ix86_build_const_vector (mode, true, mthree);
45702 mhalf = ix86_build_const_vector (mode, true, mhalf);
45703 /* There is no 512-bit rsqrt. There is however rsqrt14. */
45704 if (GET_MODE_SIZE (mode) == 64)
45705 unspec = UNSPEC_RSQRT14;
45708 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
45709 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
45711 a = force_reg (mode, a);
45713 /* x0 = rsqrt(a) estimate */
45714 emit_insn (gen_rtx_SET (VOIDmode, x0,
45715 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
45716 unspec)));
45718 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
45719 if (!recip)
45721 rtx zero, mask;
45723 zero = gen_reg_rtx (mode);
45724 mask = gen_reg_rtx (mode);
45726 zero = force_reg (mode, CONST0_RTX(mode));
45728 /* Handle masked compare. */
45729 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
45731 mask = gen_reg_rtx (HImode);
45732 /* Imm value 0x4 corresponds to not-equal comparison. */
45733 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
45734 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
45736 else
45738 emit_insn (gen_rtx_SET (VOIDmode, mask,
45739 gen_rtx_NE (mode, zero, a)));
45741 emit_insn (gen_rtx_SET (VOIDmode, x0,
45742 gen_rtx_AND (mode, x0, mask)));
45746 /* e0 = x0 * a */
45747 emit_insn (gen_rtx_SET (VOIDmode, e0,
45748 gen_rtx_MULT (mode, x0, a)));
45749 /* e1 = e0 * x0 */
45750 emit_insn (gen_rtx_SET (VOIDmode, e1,
45751 gen_rtx_MULT (mode, e0, x0)));
45753 /* e2 = e1 - 3. */
45754 mthree = force_reg (mode, mthree);
45755 emit_insn (gen_rtx_SET (VOIDmode, e2,
45756 gen_rtx_PLUS (mode, e1, mthree)));
45758 mhalf = force_reg (mode, mhalf);
45759 if (recip)
45760 /* e3 = -.5 * x0 */
45761 emit_insn (gen_rtx_SET (VOIDmode, e3,
45762 gen_rtx_MULT (mode, x0, mhalf)));
45763 else
45764 /* e3 = -.5 * e0 */
45765 emit_insn (gen_rtx_SET (VOIDmode, e3,
45766 gen_rtx_MULT (mode, e0, mhalf)));
45767 /* ret = e2 * e3 */
45768 emit_insn (gen_rtx_SET (VOIDmode, res,
45769 gen_rtx_MULT (mode, e2, e3)));
45772 #ifdef TARGET_SOLARIS
45773 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
45775 static void
45776 i386_solaris_elf_named_section (const char *name, unsigned int flags,
45777 tree decl)
45779 /* With Binutils 2.15, the "@unwind" marker must be specified on
45780 every occurrence of the ".eh_frame" section, not just the first
45781 one. */
45782 if (TARGET_64BIT
45783 && strcmp (name, ".eh_frame") == 0)
45785 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
45786 flags & SECTION_WRITE ? "aw" : "a");
45787 return;
45790 #ifndef USE_GAS
45791 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
45793 solaris_elf_asm_comdat_section (name, flags, decl);
45794 return;
45796 #endif
45798 default_elf_asm_named_section (name, flags, decl);
45800 #endif /* TARGET_SOLARIS */
45802 /* Return the mangling of TYPE if it is an extended fundamental type. */
45804 static const char *
45805 ix86_mangle_type (const_tree type)
45807 type = TYPE_MAIN_VARIANT (type);
45809 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
45810 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
45811 return NULL;
45813 switch (TYPE_MODE (type))
45815 case TFmode:
45816 /* __float128 is "g". */
45817 return "g";
45818 case XFmode:
45819 /* "long double" or __float80 is "e". */
45820 return "e";
45821 default:
45822 return NULL;
45826 /* For 32-bit code we can save PIC register setup by using
45827 __stack_chk_fail_local hidden function instead of calling
45828 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
45829 register, so it is better to call __stack_chk_fail directly. */
45831 static tree ATTRIBUTE_UNUSED
45832 ix86_stack_protect_fail (void)
45834 return TARGET_64BIT
45835 ? default_external_stack_protect_fail ()
45836 : default_hidden_stack_protect_fail ();
45839 /* Select a format to encode pointers in exception handling data. CODE
45840 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
45841 true if the symbol may be affected by dynamic relocations.
45843 ??? All x86 object file formats are capable of representing this.
45844 After all, the relocation needed is the same as for the call insn.
45845 Whether or not a particular assembler allows us to enter such, I
45846 guess we'll have to see. */
45848 asm_preferred_eh_data_format (int code, int global)
45850 if (flag_pic)
45852 int type = DW_EH_PE_sdata8;
45853 if (!TARGET_64BIT
45854 || ix86_cmodel == CM_SMALL_PIC
45855 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
45856 type = DW_EH_PE_sdata4;
45857 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
45859 if (ix86_cmodel == CM_SMALL
45860 || (ix86_cmodel == CM_MEDIUM && code))
45861 return DW_EH_PE_udata4;
45862 return DW_EH_PE_absptr;
45865 /* Expand copysign from SIGN to the positive value ABS_VALUE
45866 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
45867 the sign-bit. */
45868 static void
45869 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
45871 machine_mode mode = GET_MODE (sign);
45872 rtx sgn = gen_reg_rtx (mode);
45873 if (mask == NULL_RTX)
45875 machine_mode vmode;
45877 if (mode == SFmode)
45878 vmode = V4SFmode;
45879 else if (mode == DFmode)
45880 vmode = V2DFmode;
45881 else
45882 vmode = mode;
45884 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
45885 if (!VECTOR_MODE_P (mode))
45887 /* We need to generate a scalar mode mask in this case. */
45888 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
45889 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
45890 mask = gen_reg_rtx (mode);
45891 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
45894 else
45895 mask = gen_rtx_NOT (mode, mask);
45896 emit_insn (gen_rtx_SET (VOIDmode, sgn,
45897 gen_rtx_AND (mode, mask, sign)));
45898 emit_insn (gen_rtx_SET (VOIDmode, result,
45899 gen_rtx_IOR (mode, abs_value, sgn)));
45902 /* Expand fabs (OP0) and return a new rtx that holds the result. The
45903 mask for masking out the sign-bit is stored in *SMASK, if that is
45904 non-null. */
45905 static rtx
45906 ix86_expand_sse_fabs (rtx op0, rtx *smask)
45908 machine_mode vmode, mode = GET_MODE (op0);
45909 rtx xa, mask;
45911 xa = gen_reg_rtx (mode);
45912 if (mode == SFmode)
45913 vmode = V4SFmode;
45914 else if (mode == DFmode)
45915 vmode = V2DFmode;
45916 else
45917 vmode = mode;
45918 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
45919 if (!VECTOR_MODE_P (mode))
45921 /* We need to generate a scalar mode mask in this case. */
45922 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
45923 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
45924 mask = gen_reg_rtx (mode);
45925 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
45927 emit_insn (gen_rtx_SET (VOIDmode, xa,
45928 gen_rtx_AND (mode, op0, mask)));
45930 if (smask)
45931 *smask = mask;
45933 return xa;
45936 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
45937 swapping the operands if SWAP_OPERANDS is true. The expanded
45938 code is a forward jump to a newly created label in case the
45939 comparison is true. The generated label rtx is returned. */
45940 static rtx_code_label *
45941 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
45942 bool swap_operands)
45944 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
45945 rtx_code_label *label;
45946 rtx tmp;
45948 if (swap_operands)
45949 std::swap (op0, op1);
45951 label = gen_label_rtx ();
45952 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
45953 emit_insn (gen_rtx_SET (VOIDmode, tmp,
45954 gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
45955 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
45956 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
45957 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
45958 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
45959 JUMP_LABEL (tmp) = label;
45961 return label;
45964 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
45965 using comparison code CODE. Operands are swapped for the comparison if
45966 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
45967 static rtx
45968 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
45969 bool swap_operands)
45971 rtx (*insn)(rtx, rtx, rtx, rtx);
45972 machine_mode mode = GET_MODE (op0);
45973 rtx mask = gen_reg_rtx (mode);
45975 if (swap_operands)
45976 std::swap (op0, op1);
45978 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
45980 emit_insn (insn (mask, op0, op1,
45981 gen_rtx_fmt_ee (code, mode, op0, op1)));
45982 return mask;
45985 /* Generate and return a rtx of mode MODE for 2**n where n is the number
45986 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
45987 static rtx
45988 ix86_gen_TWO52 (machine_mode mode)
45990 REAL_VALUE_TYPE TWO52r;
45991 rtx TWO52;
45993 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
45994 TWO52 = const_double_from_real_value (TWO52r, mode);
45995 TWO52 = force_reg (mode, TWO52);
45997 return TWO52;
46000 /* Expand SSE sequence for computing lround from OP1 storing
46001 into OP0. */
46002 void
46003 ix86_expand_lround (rtx op0, rtx op1)
46005 /* C code for the stuff we're doing below:
46006 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
46007 return (long)tmp;
46009 machine_mode mode = GET_MODE (op1);
46010 const struct real_format *fmt;
46011 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46012 rtx adj;
46014 /* load nextafter (0.5, 0.0) */
46015 fmt = REAL_MODE_FORMAT (mode);
46016 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46017 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46019 /* adj = copysign (0.5, op1) */
46020 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
46021 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
46023 /* adj = op1 + adj */
46024 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
46026 /* op0 = (imode)adj */
46027 expand_fix (op0, adj, 0);
46030 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
46031 into OPERAND0. */
46032 void
46033 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
46035 /* C code for the stuff we're doing below (for do_floor):
46036 xi = (long)op1;
46037 xi -= (double)xi > op1 ? 1 : 0;
46038 return xi;
46040 machine_mode fmode = GET_MODE (op1);
46041 machine_mode imode = GET_MODE (op0);
46042 rtx ireg, freg, tmp;
46043 rtx_code_label *label;
46045 /* reg = (long)op1 */
46046 ireg = gen_reg_rtx (imode);
46047 expand_fix (ireg, op1, 0);
46049 /* freg = (double)reg */
46050 freg = gen_reg_rtx (fmode);
46051 expand_float (freg, ireg, 0);
46053 /* ireg = (freg > op1) ? ireg - 1 : ireg */
46054 label = ix86_expand_sse_compare_and_jump (UNLE,
46055 freg, op1, !do_floor);
46056 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
46057 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
46058 emit_move_insn (ireg, tmp);
46060 emit_label (label);
46061 LABEL_NUSES (label) = 1;
46063 emit_move_insn (op0, ireg);
46066 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
46067 result in OPERAND0. */
46068 void
46069 ix86_expand_rint (rtx operand0, rtx operand1)
46071 /* C code for the stuff we're doing below:
46072 xa = fabs (operand1);
46073 if (!isless (xa, 2**52))
46074 return operand1;
46075 xa = xa + 2**52 - 2**52;
46076 return copysign (xa, operand1);
46078 machine_mode mode = GET_MODE (operand0);
46079 rtx res, xa, TWO52, mask;
46080 rtx_code_label *label;
46082 res = gen_reg_rtx (mode);
46083 emit_move_insn (res, operand1);
46085 /* xa = abs (operand1) */
46086 xa = ix86_expand_sse_fabs (res, &mask);
46088 /* if (!isless (xa, TWO52)) goto label; */
46089 TWO52 = ix86_gen_TWO52 (mode);
46090 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46092 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46093 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
46095 ix86_sse_copysign_to_positive (res, xa, res, mask);
46097 emit_label (label);
46098 LABEL_NUSES (label) = 1;
46100 emit_move_insn (operand0, res);
46103 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
46104 into OPERAND0. */
46105 void
46106 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
46108 /* C code for the stuff we expand below.
46109 double xa = fabs (x), x2;
46110 if (!isless (xa, TWO52))
46111 return x;
46112 xa = xa + TWO52 - TWO52;
46113 x2 = copysign (xa, x);
46114 Compensate. Floor:
46115 if (x2 > x)
46116 x2 -= 1;
46117 Compensate. Ceil:
46118 if (x2 < x)
46119 x2 -= -1;
46120 return x2;
46122 machine_mode mode = GET_MODE (operand0);
46123 rtx xa, TWO52, tmp, one, res, mask;
46124 rtx_code_label *label;
46126 TWO52 = ix86_gen_TWO52 (mode);
46128 /* Temporary for holding the result, initialized to the input
46129 operand to ease control flow. */
46130 res = gen_reg_rtx (mode);
46131 emit_move_insn (res, operand1);
46133 /* xa = abs (operand1) */
46134 xa = ix86_expand_sse_fabs (res, &mask);
46136 /* if (!isless (xa, TWO52)) goto label; */
46137 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46139 /* xa = xa + TWO52 - TWO52; */
46140 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46141 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
46143 /* xa = copysign (xa, operand1) */
46144 ix86_sse_copysign_to_positive (xa, xa, res, mask);
46146 /* generate 1.0 or -1.0 */
46147 one = force_reg (mode,
46148 const_double_from_real_value (do_floor
46149 ? dconst1 : dconstm1, mode));
46151 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
46152 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
46153 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46154 gen_rtx_AND (mode, one, tmp)));
46155 /* We always need to subtract here to preserve signed zero. */
46156 tmp = expand_simple_binop (mode, MINUS,
46157 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46158 emit_move_insn (res, tmp);
46160 emit_label (label);
46161 LABEL_NUSES (label) = 1;
46163 emit_move_insn (operand0, res);
46166 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
46167 into OPERAND0. */
46168 void
46169 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
46171 /* C code for the stuff we expand below.
46172 double xa = fabs (x), x2;
46173 if (!isless (xa, TWO52))
46174 return x;
46175 x2 = (double)(long)x;
46176 Compensate. Floor:
46177 if (x2 > x)
46178 x2 -= 1;
46179 Compensate. Ceil:
46180 if (x2 < x)
46181 x2 += 1;
46182 if (HONOR_SIGNED_ZEROS (mode))
46183 return copysign (x2, x);
46184 return x2;
46186 machine_mode mode = GET_MODE (operand0);
46187 rtx xa, xi, TWO52, tmp, one, res, mask;
46188 rtx_code_label *label;
46190 TWO52 = ix86_gen_TWO52 (mode);
46192 /* Temporary for holding the result, initialized to the input
46193 operand to ease control flow. */
46194 res = gen_reg_rtx (mode);
46195 emit_move_insn (res, operand1);
46197 /* xa = abs (operand1) */
46198 xa = ix86_expand_sse_fabs (res, &mask);
46200 /* if (!isless (xa, TWO52)) goto label; */
46201 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46203 /* xa = (double)(long)x */
46204 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46205 expand_fix (xi, res, 0);
46206 expand_float (xa, xi, 0);
46208 /* generate 1.0 */
46209 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
46211 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
46212 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
46213 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46214 gen_rtx_AND (mode, one, tmp)));
46215 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
46216 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46217 emit_move_insn (res, tmp);
46219 if (HONOR_SIGNED_ZEROS (mode))
46220 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
46222 emit_label (label);
46223 LABEL_NUSES (label) = 1;
46225 emit_move_insn (operand0, res);
46228 /* Expand SSE sequence for computing round from OPERAND1 storing
46229 into OPERAND0. Sequence that works without relying on DImode truncation
46230 via cvttsd2siq that is only available on 64bit targets. */
46231 void
46232 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
46234 /* C code for the stuff we expand below.
46235 double xa = fabs (x), xa2, x2;
46236 if (!isless (xa, TWO52))
46237 return x;
46238 Using the absolute value and copying back sign makes
46239 -0.0 -> -0.0 correct.
46240 xa2 = xa + TWO52 - TWO52;
46241 Compensate.
46242 dxa = xa2 - xa;
46243 if (dxa <= -0.5)
46244 xa2 += 1;
46245 else if (dxa > 0.5)
46246 xa2 -= 1;
46247 x2 = copysign (xa2, x);
46248 return x2;
46250 machine_mode mode = GET_MODE (operand0);
46251 rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask;
46252 rtx_code_label *label;
46254 TWO52 = ix86_gen_TWO52 (mode);
46256 /* Temporary for holding the result, initialized to the input
46257 operand to ease control flow. */
46258 res = gen_reg_rtx (mode);
46259 emit_move_insn (res, operand1);
46261 /* xa = abs (operand1) */
46262 xa = ix86_expand_sse_fabs (res, &mask);
46264 /* if (!isless (xa, TWO52)) goto label; */
46265 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46267 /* xa2 = xa + TWO52 - TWO52; */
46268 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46269 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
46271 /* dxa = xa2 - xa; */
46272 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
46274 /* generate 0.5, 1.0 and -0.5 */
46275 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
46276 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
46277 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
46278 0, OPTAB_DIRECT);
46280 /* Compensate. */
46281 tmp = gen_reg_rtx (mode);
46282 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
46283 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
46284 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46285 gen_rtx_AND (mode, one, tmp)));
46286 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46287 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
46288 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
46289 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46290 gen_rtx_AND (mode, one, tmp)));
46291 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46293 /* res = copysign (xa2, operand1) */
46294 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
46296 emit_label (label);
46297 LABEL_NUSES (label) = 1;
46299 emit_move_insn (operand0, res);
46302 /* Expand SSE sequence for computing trunc from OPERAND1 storing
46303 into OPERAND0. */
46304 void
46305 ix86_expand_trunc (rtx operand0, rtx operand1)
46307 /* C code for SSE variant we expand below.
46308 double xa = fabs (x), x2;
46309 if (!isless (xa, TWO52))
46310 return x;
46311 x2 = (double)(long)x;
46312 if (HONOR_SIGNED_ZEROS (mode))
46313 return copysign (x2, x);
46314 return x2;
46316 machine_mode mode = GET_MODE (operand0);
46317 rtx xa, xi, TWO52, res, mask;
46318 rtx_code_label *label;
46320 TWO52 = ix86_gen_TWO52 (mode);
46322 /* Temporary for holding the result, initialized to the input
46323 operand to ease control flow. */
46324 res = gen_reg_rtx (mode);
46325 emit_move_insn (res, operand1);
46327 /* xa = abs (operand1) */
46328 xa = ix86_expand_sse_fabs (res, &mask);
46330 /* if (!isless (xa, TWO52)) goto label; */
46331 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46333 /* x = (double)(long)x */
46334 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46335 expand_fix (xi, res, 0);
46336 expand_float (res, xi, 0);
46338 if (HONOR_SIGNED_ZEROS (mode))
46339 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
46341 emit_label (label);
46342 LABEL_NUSES (label) = 1;
46344 emit_move_insn (operand0, res);
46347 /* Expand SSE sequence for computing trunc from OPERAND1 storing
46348 into OPERAND0. */
46349 void
46350 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
46352 machine_mode mode = GET_MODE (operand0);
46353 rtx xa, mask, TWO52, one, res, smask, tmp;
46354 rtx_code_label *label;
46356 /* C code for SSE variant we expand below.
46357 double xa = fabs (x), x2;
46358 if (!isless (xa, TWO52))
46359 return x;
46360 xa2 = xa + TWO52 - TWO52;
46361 Compensate:
46362 if (xa2 > xa)
46363 xa2 -= 1.0;
46364 x2 = copysign (xa2, x);
46365 return x2;
46368 TWO52 = ix86_gen_TWO52 (mode);
46370 /* Temporary for holding the result, initialized to the input
46371 operand to ease control flow. */
46372 res = gen_reg_rtx (mode);
46373 emit_move_insn (res, operand1);
46375 /* xa = abs (operand1) */
46376 xa = ix86_expand_sse_fabs (res, &smask);
46378 /* if (!isless (xa, TWO52)) goto label; */
46379 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46381 /* res = xa + TWO52 - TWO52; */
46382 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46383 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
46384 emit_move_insn (res, tmp);
46386 /* generate 1.0 */
46387 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
46389 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
46390 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
46391 emit_insn (gen_rtx_SET (VOIDmode, mask,
46392 gen_rtx_AND (mode, mask, one)));
46393 tmp = expand_simple_binop (mode, MINUS,
46394 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
46395 emit_move_insn (res, tmp);
46397 /* res = copysign (res, operand1) */
46398 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
46400 emit_label (label);
46401 LABEL_NUSES (label) = 1;
46403 emit_move_insn (operand0, res);
46406 /* Expand SSE sequence for computing round from OPERAND1 storing
46407 into OPERAND0. */
46408 void
46409 ix86_expand_round (rtx operand0, rtx operand1)
46411 /* C code for the stuff we're doing below:
46412 double xa = fabs (x);
46413 if (!isless (xa, TWO52))
46414 return x;
46415 xa = (double)(long)(xa + nextafter (0.5, 0.0));
46416 return copysign (xa, x);
46418 machine_mode mode = GET_MODE (operand0);
46419 rtx res, TWO52, xa, xi, half, mask;
46420 rtx_code_label *label;
46421 const struct real_format *fmt;
46422 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46424 /* Temporary for holding the result, initialized to the input
46425 operand to ease control flow. */
46426 res = gen_reg_rtx (mode);
46427 emit_move_insn (res, operand1);
46429 TWO52 = ix86_gen_TWO52 (mode);
46430 xa = ix86_expand_sse_fabs (res, &mask);
46431 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46433 /* load nextafter (0.5, 0.0) */
46434 fmt = REAL_MODE_FORMAT (mode);
46435 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46436 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46438 /* xa = xa + 0.5 */
46439 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
46440 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
46442 /* xa = (double)(int64_t)xa */
46443 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46444 expand_fix (xi, xa, 0);
46445 expand_float (xa, xi, 0);
46447 /* res = copysign (xa, operand1) */
46448 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
46450 emit_label (label);
46451 LABEL_NUSES (label) = 1;
46453 emit_move_insn (operand0, res);
46456 /* Expand SSE sequence for computing round
46457 from OP1 storing into OP0 using sse4 round insn. */
46458 void
46459 ix86_expand_round_sse4 (rtx op0, rtx op1)
46461 machine_mode mode = GET_MODE (op0);
46462 rtx e1, e2, res, half;
46463 const struct real_format *fmt;
46464 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46465 rtx (*gen_copysign) (rtx, rtx, rtx);
46466 rtx (*gen_round) (rtx, rtx, rtx);
46468 switch (mode)
46470 case SFmode:
46471 gen_copysign = gen_copysignsf3;
46472 gen_round = gen_sse4_1_roundsf2;
46473 break;
46474 case DFmode:
46475 gen_copysign = gen_copysigndf3;
46476 gen_round = gen_sse4_1_rounddf2;
46477 break;
46478 default:
46479 gcc_unreachable ();
46482 /* round (a) = trunc (a + copysign (0.5, a)) */
46484 /* load nextafter (0.5, 0.0) */
46485 fmt = REAL_MODE_FORMAT (mode);
46486 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46487 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46488 half = const_double_from_real_value (pred_half, mode);
46490 /* e1 = copysign (0.5, op1) */
46491 e1 = gen_reg_rtx (mode);
46492 emit_insn (gen_copysign (e1, half, op1));
46494 /* e2 = op1 + e1 */
46495 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
46497 /* res = trunc (e2) */
46498 res = gen_reg_rtx (mode);
46499 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
46501 emit_move_insn (op0, res);
46505 /* Table of valid machine attributes. */
46506 static const struct attribute_spec ix86_attribute_table[] =
46508 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
46509 affects_type_identity } */
46510 /* Stdcall attribute says callee is responsible for popping arguments
46511 if they are not variable. */
46512 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46513 true },
46514 /* Fastcall attribute says callee is responsible for popping arguments
46515 if they are not variable. */
46516 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46517 true },
46518 /* Thiscall attribute says callee is responsible for popping arguments
46519 if they are not variable. */
46520 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46521 true },
46522 /* Cdecl attribute says the callee is a normal C declaration */
46523 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46524 true },
46525 /* Regparm attribute specifies how many integer arguments are to be
46526 passed in registers. */
46527 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
46528 true },
46529 /* Sseregparm attribute says we are using x86_64 calling conventions
46530 for FP arguments. */
46531 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46532 true },
46533 /* The transactional memory builtins are implicitly regparm or fastcall
46534 depending on the ABI. Override the generic do-nothing attribute that
46535 these builtins were declared with. */
46536 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
46537 true },
46538 /* force_align_arg_pointer says this function realigns the stack at entry. */
46539 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
46540 false, true, true, ix86_handle_cconv_attribute, false },
46541 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
46542 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
46543 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
46544 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
46545 false },
46546 #endif
46547 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
46548 false },
46549 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
46550 false },
46551 #ifdef SUBTARGET_ATTRIBUTE_TABLE
46552 SUBTARGET_ATTRIBUTE_TABLE,
46553 #endif
46554 /* ms_abi and sysv_abi calling convention function attributes. */
46555 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
46556 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
46557 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
46558 false },
46559 { "callee_pop_aggregate_return", 1, 1, false, true, true,
46560 ix86_handle_callee_pop_aggregate_return, true },
46561 /* End element. */
46562 { NULL, 0, 0, false, false, false, NULL, false }
46565 /* Implement targetm.vectorize.builtin_vectorization_cost. */
46566 static int
46567 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
46568 tree vectype, int)
46570 unsigned elements;
46572 switch (type_of_cost)
46574 case scalar_stmt:
46575 return ix86_cost->scalar_stmt_cost;
46577 case scalar_load:
46578 return ix86_cost->scalar_load_cost;
46580 case scalar_store:
46581 return ix86_cost->scalar_store_cost;
46583 case vector_stmt:
46584 return ix86_cost->vec_stmt_cost;
46586 case vector_load:
46587 return ix86_cost->vec_align_load_cost;
46589 case vector_store:
46590 return ix86_cost->vec_store_cost;
46592 case vec_to_scalar:
46593 return ix86_cost->vec_to_scalar_cost;
46595 case scalar_to_vec:
46596 return ix86_cost->scalar_to_vec_cost;
46598 case unaligned_load:
46599 case unaligned_store:
46600 return ix86_cost->vec_unalign_load_cost;
46602 case cond_branch_taken:
46603 return ix86_cost->cond_taken_branch_cost;
46605 case cond_branch_not_taken:
46606 return ix86_cost->cond_not_taken_branch_cost;
46608 case vec_perm:
46609 case vec_promote_demote:
46610 return ix86_cost->vec_stmt_cost;
46612 case vec_construct:
46613 elements = TYPE_VECTOR_SUBPARTS (vectype);
46614 return elements / 2 + 1;
46616 default:
46617 gcc_unreachable ();
46621 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
46622 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
46623 insn every time. */
46625 static GTY(()) rtx_insn *vselect_insn;
46627 /* Initialize vselect_insn. */
46629 static void
46630 init_vselect_insn (void)
46632 unsigned i;
46633 rtx x;
46635 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
46636 for (i = 0; i < MAX_VECT_LEN; ++i)
46637 XVECEXP (x, 0, i) = const0_rtx;
46638 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
46639 const0_rtx), x);
46640 x = gen_rtx_SET (VOIDmode, const0_rtx, x);
46641 start_sequence ();
46642 vselect_insn = emit_insn (x);
46643 end_sequence ();
46646 /* Construct (set target (vec_select op0 (parallel perm))) and
46647 return true if that's a valid instruction in the active ISA. */
46649 static bool
46650 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
46651 unsigned nelt, bool testing_p)
46653 unsigned int i;
46654 rtx x, save_vconcat;
46655 int icode;
46657 if (vselect_insn == NULL_RTX)
46658 init_vselect_insn ();
46660 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
46661 PUT_NUM_ELEM (XVEC (x, 0), nelt);
46662 for (i = 0; i < nelt; ++i)
46663 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
46664 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
46665 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
46666 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
46667 SET_DEST (PATTERN (vselect_insn)) = target;
46668 icode = recog_memoized (vselect_insn);
46670 if (icode >= 0 && !testing_p)
46671 emit_insn (copy_rtx (PATTERN (vselect_insn)));
46673 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
46674 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
46675 INSN_CODE (vselect_insn) = -1;
46677 return icode >= 0;
46680 /* Similar, but generate a vec_concat from op0 and op1 as well. */
46682 static bool
46683 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
46684 const unsigned char *perm, unsigned nelt,
46685 bool testing_p)
46687 machine_mode v2mode;
46688 rtx x;
46689 bool ok;
46691 if (vselect_insn == NULL_RTX)
46692 init_vselect_insn ();
46694 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
46695 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
46696 PUT_MODE (x, v2mode);
46697 XEXP (x, 0) = op0;
46698 XEXP (x, 1) = op1;
46699 ok = expand_vselect (target, x, perm, nelt, testing_p);
46700 XEXP (x, 0) = const0_rtx;
46701 XEXP (x, 1) = const0_rtx;
46702 return ok;
46705 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
46706 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
46708 static bool
46709 expand_vec_perm_blend (struct expand_vec_perm_d *d)
46711 machine_mode vmode = d->vmode;
46712 unsigned i, mask, nelt = d->nelt;
46713 rtx target, op0, op1, x;
46714 rtx rperm[32], vperm;
46716 if (d->one_operand_p)
46717 return false;
46718 if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64
46719 && GET_MODE_SIZE (GET_MODE_INNER (vmode)) >= 4)
46721 else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
46723 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
46725 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
46727 else
46728 return false;
46730 /* This is a blend, not a permute. Elements must stay in their
46731 respective lanes. */
46732 for (i = 0; i < nelt; ++i)
46734 unsigned e = d->perm[i];
46735 if (!(e == i || e == i + nelt))
46736 return false;
46739 if (d->testing_p)
46740 return true;
46742 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
46743 decision should be extracted elsewhere, so that we only try that
46744 sequence once all budget==3 options have been tried. */
46745 target = d->target;
46746 op0 = d->op0;
46747 op1 = d->op1;
46748 mask = 0;
46750 switch (vmode)
46752 case V8DFmode:
46753 case V16SFmode:
46754 case V4DFmode:
46755 case V8SFmode:
46756 case V2DFmode:
46757 case V4SFmode:
46758 case V8HImode:
46759 case V8SImode:
46760 case V32HImode:
46761 case V64QImode:
46762 case V16SImode:
46763 case V8DImode:
46764 for (i = 0; i < nelt; ++i)
46765 mask |= (d->perm[i] >= nelt) << i;
46766 break;
46768 case V2DImode:
46769 for (i = 0; i < 2; ++i)
46770 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
46771 vmode = V8HImode;
46772 goto do_subreg;
46774 case V4SImode:
46775 for (i = 0; i < 4; ++i)
46776 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
46777 vmode = V8HImode;
46778 goto do_subreg;
46780 case V16QImode:
46781 /* See if bytes move in pairs so we can use pblendw with
46782 an immediate argument, rather than pblendvb with a vector
46783 argument. */
46784 for (i = 0; i < 16; i += 2)
46785 if (d->perm[i] + 1 != d->perm[i + 1])
46787 use_pblendvb:
46788 for (i = 0; i < nelt; ++i)
46789 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
46791 finish_pblendvb:
46792 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
46793 vperm = force_reg (vmode, vperm);
46795 if (GET_MODE_SIZE (vmode) == 16)
46796 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
46797 else
46798 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
46799 if (target != d->target)
46800 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
46801 return true;
46804 for (i = 0; i < 8; ++i)
46805 mask |= (d->perm[i * 2] >= 16) << i;
46806 vmode = V8HImode;
46807 /* FALLTHRU */
46809 do_subreg:
46810 target = gen_reg_rtx (vmode);
46811 op0 = gen_lowpart (vmode, op0);
46812 op1 = gen_lowpart (vmode, op1);
46813 break;
46815 case V32QImode:
46816 /* See if bytes move in pairs. If not, vpblendvb must be used. */
46817 for (i = 0; i < 32; i += 2)
46818 if (d->perm[i] + 1 != d->perm[i + 1])
46819 goto use_pblendvb;
46820 /* See if bytes move in quadruplets. If yes, vpblendd
46821 with immediate can be used. */
46822 for (i = 0; i < 32; i += 4)
46823 if (d->perm[i] + 2 != d->perm[i + 2])
46824 break;
46825 if (i < 32)
46827 /* See if bytes move the same in both lanes. If yes,
46828 vpblendw with immediate can be used. */
46829 for (i = 0; i < 16; i += 2)
46830 if (d->perm[i] + 16 != d->perm[i + 16])
46831 goto use_pblendvb;
46833 /* Use vpblendw. */
46834 for (i = 0; i < 16; ++i)
46835 mask |= (d->perm[i * 2] >= 32) << i;
46836 vmode = V16HImode;
46837 goto do_subreg;
46840 /* Use vpblendd. */
46841 for (i = 0; i < 8; ++i)
46842 mask |= (d->perm[i * 4] >= 32) << i;
46843 vmode = V8SImode;
46844 goto do_subreg;
46846 case V16HImode:
46847 /* See if words move in pairs. If yes, vpblendd can be used. */
46848 for (i = 0; i < 16; i += 2)
46849 if (d->perm[i] + 1 != d->perm[i + 1])
46850 break;
46851 if (i < 16)
46853 /* See if words move the same in both lanes. If not,
46854 vpblendvb must be used. */
46855 for (i = 0; i < 8; i++)
46856 if (d->perm[i] + 8 != d->perm[i + 8])
46858 /* Use vpblendvb. */
46859 for (i = 0; i < 32; ++i)
46860 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
46862 vmode = V32QImode;
46863 nelt = 32;
46864 target = gen_reg_rtx (vmode);
46865 op0 = gen_lowpart (vmode, op0);
46866 op1 = gen_lowpart (vmode, op1);
46867 goto finish_pblendvb;
46870 /* Use vpblendw. */
46871 for (i = 0; i < 16; ++i)
46872 mask |= (d->perm[i] >= 16) << i;
46873 break;
46876 /* Use vpblendd. */
46877 for (i = 0; i < 8; ++i)
46878 mask |= (d->perm[i * 2] >= 16) << i;
46879 vmode = V8SImode;
46880 goto do_subreg;
46882 case V4DImode:
46883 /* Use vpblendd. */
46884 for (i = 0; i < 4; ++i)
46885 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
46886 vmode = V8SImode;
46887 goto do_subreg;
46889 default:
46890 gcc_unreachable ();
46893 /* This matches five different patterns with the different modes. */
46894 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
46895 x = gen_rtx_SET (VOIDmode, target, x);
46896 emit_insn (x);
46897 if (target != d->target)
46898 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
46900 return true;
46903 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
46904 in terms of the variable form of vpermilps.
46906 Note that we will have already failed the immediate input vpermilps,
46907 which requires that the high and low part shuffle be identical; the
46908 variable form doesn't require that. */
46910 static bool
46911 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
46913 rtx rperm[8], vperm;
46914 unsigned i;
46916 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
46917 return false;
46919 /* We can only permute within the 128-bit lane. */
46920 for (i = 0; i < 8; ++i)
46922 unsigned e = d->perm[i];
46923 if (i < 4 ? e >= 4 : e < 4)
46924 return false;
46927 if (d->testing_p)
46928 return true;
46930 for (i = 0; i < 8; ++i)
46932 unsigned e = d->perm[i];
46934 /* Within each 128-bit lane, the elements of op0 are numbered
46935 from 0 and the elements of op1 are numbered from 4. */
46936 if (e >= 8 + 4)
46937 e -= 8;
46938 else if (e >= 4)
46939 e -= 4;
46941 rperm[i] = GEN_INT (e);
46944 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
46945 vperm = force_reg (V8SImode, vperm);
46946 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
46948 return true;
46951 /* Return true if permutation D can be performed as VMODE permutation
46952 instead. */
46954 static bool
46955 valid_perm_using_mode_p (machine_mode vmode, struct expand_vec_perm_d *d)
46957 unsigned int i, j, chunk;
46959 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
46960 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
46961 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
46962 return false;
46964 if (GET_MODE_NUNITS (vmode) >= d->nelt)
46965 return true;
46967 chunk = d->nelt / GET_MODE_NUNITS (vmode);
46968 for (i = 0; i < d->nelt; i += chunk)
46969 if (d->perm[i] & (chunk - 1))
46970 return false;
46971 else
46972 for (j = 1; j < chunk; ++j)
46973 if (d->perm[i] + j != d->perm[i + j])
46974 return false;
46976 return true;
46979 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
46980 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
46982 static bool
46983 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
46985 unsigned i, nelt, eltsz, mask;
46986 unsigned char perm[64];
46987 machine_mode vmode = V16QImode;
46988 rtx rperm[64], vperm, target, op0, op1;
46990 nelt = d->nelt;
46992 if (!d->one_operand_p)
46994 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
46996 if (TARGET_AVX2
46997 && valid_perm_using_mode_p (V2TImode, d))
46999 if (d->testing_p)
47000 return true;
47002 /* Use vperm2i128 insn. The pattern uses
47003 V4DImode instead of V2TImode. */
47004 target = d->target;
47005 if (d->vmode != V4DImode)
47006 target = gen_reg_rtx (V4DImode);
47007 op0 = gen_lowpart (V4DImode, d->op0);
47008 op1 = gen_lowpart (V4DImode, d->op1);
47009 rperm[0]
47010 = GEN_INT ((d->perm[0] / (nelt / 2))
47011 | ((d->perm[nelt / 2] / (nelt / 2)) * 16));
47012 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
47013 if (target != d->target)
47014 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47015 return true;
47017 return false;
47020 else
47022 if (GET_MODE_SIZE (d->vmode) == 16)
47024 if (!TARGET_SSSE3)
47025 return false;
47027 else if (GET_MODE_SIZE (d->vmode) == 32)
47029 if (!TARGET_AVX2)
47030 return false;
47032 /* V4DImode should be already handled through
47033 expand_vselect by vpermq instruction. */
47034 gcc_assert (d->vmode != V4DImode);
47036 vmode = V32QImode;
47037 if (d->vmode == V8SImode
47038 || d->vmode == V16HImode
47039 || d->vmode == V32QImode)
47041 /* First see if vpermq can be used for
47042 V8SImode/V16HImode/V32QImode. */
47043 if (valid_perm_using_mode_p (V4DImode, d))
47045 for (i = 0; i < 4; i++)
47046 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
47047 if (d->testing_p)
47048 return true;
47049 target = gen_reg_rtx (V4DImode);
47050 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
47051 perm, 4, false))
47053 emit_move_insn (d->target,
47054 gen_lowpart (d->vmode, target));
47055 return true;
47057 return false;
47060 /* Next see if vpermd can be used. */
47061 if (valid_perm_using_mode_p (V8SImode, d))
47062 vmode = V8SImode;
47064 /* Or if vpermps can be used. */
47065 else if (d->vmode == V8SFmode)
47066 vmode = V8SImode;
47068 if (vmode == V32QImode)
47070 /* vpshufb only works intra lanes, it is not
47071 possible to shuffle bytes in between the lanes. */
47072 for (i = 0; i < nelt; ++i)
47073 if ((d->perm[i] ^ i) & (nelt / 2))
47074 return false;
47077 else if (GET_MODE_SIZE (d->vmode) == 64)
47079 if (!TARGET_AVX512BW)
47080 return false;
47082 /* If vpermq didn't work, vpshufb won't work either. */
47083 if (d->vmode == V8DFmode || d->vmode == V8DImode)
47084 return false;
47086 vmode = V64QImode;
47087 if (d->vmode == V16SImode
47088 || d->vmode == V32HImode
47089 || d->vmode == V64QImode)
47091 /* First see if vpermq can be used for
47092 V16SImode/V32HImode/V64QImode. */
47093 if (valid_perm_using_mode_p (V8DImode, d))
47095 for (i = 0; i < 8; i++)
47096 perm[i] = (d->perm[i * nelt / 8] * 8 / nelt) & 7;
47097 if (d->testing_p)
47098 return true;
47099 target = gen_reg_rtx (V8DImode);
47100 if (expand_vselect (target, gen_lowpart (V8DImode, d->op0),
47101 perm, 8, false))
47103 emit_move_insn (d->target,
47104 gen_lowpart (d->vmode, target));
47105 return true;
47107 return false;
47110 /* Next see if vpermd can be used. */
47111 if (valid_perm_using_mode_p (V16SImode, d))
47112 vmode = V16SImode;
47114 /* Or if vpermps can be used. */
47115 else if (d->vmode == V16SFmode)
47116 vmode = V16SImode;
47117 if (vmode == V64QImode)
47119 /* vpshufb only works intra lanes, it is not
47120 possible to shuffle bytes in between the lanes. */
47121 for (i = 0; i < nelt; ++i)
47122 if ((d->perm[i] ^ i) & (nelt / 4))
47123 return false;
47126 else
47127 return false;
47130 if (d->testing_p)
47131 return true;
47133 if (vmode == V8SImode)
47134 for (i = 0; i < 8; ++i)
47135 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
47136 else if (vmode == V16SImode)
47137 for (i = 0; i < 16; ++i)
47138 rperm[i] = GEN_INT ((d->perm[i * nelt / 16] * 16 / nelt) & 15);
47139 else
47141 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
47142 if (!d->one_operand_p)
47143 mask = 2 * nelt - 1;
47144 else if (vmode == V16QImode)
47145 mask = nelt - 1;
47146 else if (vmode == V64QImode)
47147 mask = nelt / 4 - 1;
47148 else
47149 mask = nelt / 2 - 1;
47151 for (i = 0; i < nelt; ++i)
47153 unsigned j, e = d->perm[i] & mask;
47154 for (j = 0; j < eltsz; ++j)
47155 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
47159 vperm = gen_rtx_CONST_VECTOR (vmode,
47160 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
47161 vperm = force_reg (vmode, vperm);
47163 target = d->target;
47164 if (d->vmode != vmode)
47165 target = gen_reg_rtx (vmode);
47166 op0 = gen_lowpart (vmode, d->op0);
47167 if (d->one_operand_p)
47169 if (vmode == V16QImode)
47170 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
47171 else if (vmode == V32QImode)
47172 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
47173 else if (vmode == V64QImode)
47174 emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm));
47175 else if (vmode == V8SFmode)
47176 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
47177 else if (vmode == V8SImode)
47178 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
47179 else if (vmode == V16SFmode)
47180 emit_insn (gen_avx512f_permvarv16sf (target, op0, vperm));
47181 else if (vmode == V16SImode)
47182 emit_insn (gen_avx512f_permvarv16si (target, op0, vperm));
47183 else
47184 gcc_unreachable ();
47186 else
47188 op1 = gen_lowpart (vmode, d->op1);
47189 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
47191 if (target != d->target)
47192 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47194 return true;
47197 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
47198 in a single instruction. */
47200 static bool
47201 expand_vec_perm_1 (struct expand_vec_perm_d *d)
47203 unsigned i, nelt = d->nelt;
47204 unsigned char perm2[MAX_VECT_LEN];
47206 /* Check plain VEC_SELECT first, because AVX has instructions that could
47207 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
47208 input where SEL+CONCAT may not. */
47209 if (d->one_operand_p)
47211 int mask = nelt - 1;
47212 bool identity_perm = true;
47213 bool broadcast_perm = true;
47215 for (i = 0; i < nelt; i++)
47217 perm2[i] = d->perm[i] & mask;
47218 if (perm2[i] != i)
47219 identity_perm = false;
47220 if (perm2[i])
47221 broadcast_perm = false;
47224 if (identity_perm)
47226 if (!d->testing_p)
47227 emit_move_insn (d->target, d->op0);
47228 return true;
47230 else if (broadcast_perm && TARGET_AVX2)
47232 /* Use vpbroadcast{b,w,d}. */
47233 rtx (*gen) (rtx, rtx) = NULL;
47234 switch (d->vmode)
47236 case V64QImode:
47237 if (TARGET_AVX512BW)
47238 gen = gen_avx512bw_vec_dupv64qi_1;
47239 break;
47240 case V32QImode:
47241 gen = gen_avx2_pbroadcastv32qi_1;
47242 break;
47243 case V32HImode:
47244 if (TARGET_AVX512BW)
47245 gen = gen_avx512bw_vec_dupv32hi_1;
47246 break;
47247 case V16HImode:
47248 gen = gen_avx2_pbroadcastv16hi_1;
47249 break;
47250 case V16SImode:
47251 if (TARGET_AVX512F)
47252 gen = gen_avx512f_vec_dupv16si_1;
47253 break;
47254 case V8SImode:
47255 gen = gen_avx2_pbroadcastv8si_1;
47256 break;
47257 case V16QImode:
47258 gen = gen_avx2_pbroadcastv16qi;
47259 break;
47260 case V8HImode:
47261 gen = gen_avx2_pbroadcastv8hi;
47262 break;
47263 case V16SFmode:
47264 if (TARGET_AVX512F)
47265 gen = gen_avx512f_vec_dupv16sf_1;
47266 break;
47267 case V8SFmode:
47268 gen = gen_avx2_vec_dupv8sf_1;
47269 break;
47270 case V8DFmode:
47271 if (TARGET_AVX512F)
47272 gen = gen_avx512f_vec_dupv8df_1;
47273 break;
47274 case V8DImode:
47275 if (TARGET_AVX512F)
47276 gen = gen_avx512f_vec_dupv8di_1;
47277 break;
47278 /* For other modes prefer other shuffles this function creates. */
47279 default: break;
47281 if (gen != NULL)
47283 if (!d->testing_p)
47284 emit_insn (gen (d->target, d->op0));
47285 return true;
47289 if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
47290 return true;
47292 /* There are plenty of patterns in sse.md that are written for
47293 SEL+CONCAT and are not replicated for a single op. Perhaps
47294 that should be changed, to avoid the nastiness here. */
47296 /* Recognize interleave style patterns, which means incrementing
47297 every other permutation operand. */
47298 for (i = 0; i < nelt; i += 2)
47300 perm2[i] = d->perm[i] & mask;
47301 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
47303 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
47304 d->testing_p))
47305 return true;
47307 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
47308 if (nelt >= 4)
47310 for (i = 0; i < nelt; i += 4)
47312 perm2[i + 0] = d->perm[i + 0] & mask;
47313 perm2[i + 1] = d->perm[i + 1] & mask;
47314 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
47315 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
47318 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
47319 d->testing_p))
47320 return true;
47324 /* Finally, try the fully general two operand permute. */
47325 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
47326 d->testing_p))
47327 return true;
47329 /* Recognize interleave style patterns with reversed operands. */
47330 if (!d->one_operand_p)
47332 for (i = 0; i < nelt; ++i)
47334 unsigned e = d->perm[i];
47335 if (e >= nelt)
47336 e -= nelt;
47337 else
47338 e += nelt;
47339 perm2[i] = e;
47342 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
47343 d->testing_p))
47344 return true;
47347 /* Try the SSE4.1 blend variable merge instructions. */
47348 if (expand_vec_perm_blend (d))
47349 return true;
47351 /* Try one of the AVX vpermil variable permutations. */
47352 if (expand_vec_perm_vpermil (d))
47353 return true;
47355 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
47356 vpshufb, vpermd, vpermps or vpermq variable permutation. */
47357 if (expand_vec_perm_pshufb (d))
47358 return true;
47360 /* Try the AVX2 vpalignr instruction. */
47361 if (expand_vec_perm_palignr (d, true))
47362 return true;
47364 /* Try the AVX512F vpermi2 instructions. */
47365 if (ix86_expand_vec_perm_vpermi2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d))
47366 return true;
47368 return false;
47371 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47372 in terms of a pair of pshuflw + pshufhw instructions. */
47374 static bool
47375 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
47377 unsigned char perm2[MAX_VECT_LEN];
47378 unsigned i;
47379 bool ok;
47381 if (d->vmode != V8HImode || !d->one_operand_p)
47382 return false;
47384 /* The two permutations only operate in 64-bit lanes. */
47385 for (i = 0; i < 4; ++i)
47386 if (d->perm[i] >= 4)
47387 return false;
47388 for (i = 4; i < 8; ++i)
47389 if (d->perm[i] < 4)
47390 return false;
47392 if (d->testing_p)
47393 return true;
47395 /* Emit the pshuflw. */
47396 memcpy (perm2, d->perm, 4);
47397 for (i = 4; i < 8; ++i)
47398 perm2[i] = i;
47399 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
47400 gcc_assert (ok);
47402 /* Emit the pshufhw. */
47403 memcpy (perm2 + 4, d->perm + 4, 4);
47404 for (i = 0; i < 4; ++i)
47405 perm2[i] = i;
47406 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
47407 gcc_assert (ok);
47409 return true;
47412 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47413 the permutation using the SSSE3 palignr instruction. This succeeds
47414 when all of the elements in PERM fit within one vector and we merely
47415 need to shift them down so that a single vector permutation has a
47416 chance to succeed. If SINGLE_INSN_ONLY_P, succeed if only
47417 the vpalignr instruction itself can perform the requested permutation. */
47419 static bool
47420 expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p)
47422 unsigned i, nelt = d->nelt;
47423 unsigned min, max, minswap, maxswap;
47424 bool in_order, ok, swap = false;
47425 rtx shift, target;
47426 struct expand_vec_perm_d dcopy;
47428 /* Even with AVX, palignr only operates on 128-bit vectors,
47429 in AVX2 palignr operates on both 128-bit lanes. */
47430 if ((!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
47431 && (!TARGET_AVX2 || GET_MODE_SIZE (d->vmode) != 32))
47432 return false;
47434 min = 2 * nelt;
47435 max = 0;
47436 minswap = 2 * nelt;
47437 maxswap = 0;
47438 for (i = 0; i < nelt; ++i)
47440 unsigned e = d->perm[i];
47441 unsigned eswap = d->perm[i] ^ nelt;
47442 if (GET_MODE_SIZE (d->vmode) == 32)
47444 e = (e & ((nelt / 2) - 1)) | ((e & nelt) >> 1);
47445 eswap = e ^ (nelt / 2);
47447 if (e < min)
47448 min = e;
47449 if (e > max)
47450 max = e;
47451 if (eswap < minswap)
47452 minswap = eswap;
47453 if (eswap > maxswap)
47454 maxswap = eswap;
47456 if (min == 0
47457 || max - min >= (GET_MODE_SIZE (d->vmode) == 32 ? nelt / 2 : nelt))
47459 if (d->one_operand_p
47460 || minswap == 0
47461 || maxswap - minswap >= (GET_MODE_SIZE (d->vmode) == 32
47462 ? nelt / 2 : nelt))
47463 return false;
47464 swap = true;
47465 min = minswap;
47466 max = maxswap;
47469 /* Given that we have SSSE3, we know we'll be able to implement the
47470 single operand permutation after the palignr with pshufb for
47471 128-bit vectors. If SINGLE_INSN_ONLY_P, in_order has to be computed
47472 first. */
47473 if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16 && !single_insn_only_p)
47474 return true;
47476 dcopy = *d;
47477 if (swap)
47479 dcopy.op0 = d->op1;
47480 dcopy.op1 = d->op0;
47481 for (i = 0; i < nelt; ++i)
47482 dcopy.perm[i] ^= nelt;
47485 in_order = true;
47486 for (i = 0; i < nelt; ++i)
47488 unsigned e = dcopy.perm[i];
47489 if (GET_MODE_SIZE (d->vmode) == 32
47490 && e >= nelt
47491 && (e & (nelt / 2 - 1)) < min)
47492 e = e - min - (nelt / 2);
47493 else
47494 e = e - min;
47495 if (e != i)
47496 in_order = false;
47497 dcopy.perm[i] = e;
47499 dcopy.one_operand_p = true;
47501 if (single_insn_only_p && !in_order)
47502 return false;
47504 /* For AVX2, test whether we can permute the result in one instruction. */
47505 if (d->testing_p)
47507 if (in_order)
47508 return true;
47509 dcopy.op1 = dcopy.op0;
47510 return expand_vec_perm_1 (&dcopy);
47513 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
47514 if (GET_MODE_SIZE (d->vmode) == 16)
47516 target = gen_reg_rtx (TImode);
47517 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, dcopy.op1),
47518 gen_lowpart (TImode, dcopy.op0), shift));
47520 else
47522 target = gen_reg_rtx (V2TImode);
47523 emit_insn (gen_avx2_palignrv2ti (target,
47524 gen_lowpart (V2TImode, dcopy.op1),
47525 gen_lowpart (V2TImode, dcopy.op0),
47526 shift));
47529 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
47531 /* Test for the degenerate case where the alignment by itself
47532 produces the desired permutation. */
47533 if (in_order)
47535 emit_move_insn (d->target, dcopy.op0);
47536 return true;
47539 ok = expand_vec_perm_1 (&dcopy);
47540 gcc_assert (ok || GET_MODE_SIZE (d->vmode) == 32);
47542 return ok;
47545 /* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
47546 the permutation using the SSE4_1 pblendv instruction. Potentially
47547 reduces permutation from 2 pshufb and or to 1 pshufb and pblendv. */
47549 static bool
47550 expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
47552 unsigned i, which, nelt = d->nelt;
47553 struct expand_vec_perm_d dcopy, dcopy1;
47554 machine_mode vmode = d->vmode;
47555 bool ok;
47557 /* Use the same checks as in expand_vec_perm_blend. */
47558 if (d->one_operand_p)
47559 return false;
47560 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
47562 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
47564 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
47566 else
47567 return false;
47569 /* Figure out where permutation elements stay not in their
47570 respective lanes. */
47571 for (i = 0, which = 0; i < nelt; ++i)
47573 unsigned e = d->perm[i];
47574 if (e != i)
47575 which |= (e < nelt ? 1 : 2);
47577 /* We can pblend the part where elements stay not in their
47578 respective lanes only when these elements are all in one
47579 half of a permutation.
47580 {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
47581 lanes, but both 8 and 9 >= 8
47582 {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
47583 respective lanes and 8 >= 8, but 2 not. */
47584 if (which != 1 && which != 2)
47585 return false;
47586 if (d->testing_p && GET_MODE_SIZE (vmode) == 16)
47587 return true;
47589 /* First we apply one operand permutation to the part where
47590 elements stay not in their respective lanes. */
47591 dcopy = *d;
47592 if (which == 2)
47593 dcopy.op0 = dcopy.op1 = d->op1;
47594 else
47595 dcopy.op0 = dcopy.op1 = d->op0;
47596 if (!d->testing_p)
47597 dcopy.target = gen_reg_rtx (vmode);
47598 dcopy.one_operand_p = true;
47600 for (i = 0; i < nelt; ++i)
47601 dcopy.perm[i] = d->perm[i] & (nelt - 1);
47603 ok = expand_vec_perm_1 (&dcopy);
47604 if (GET_MODE_SIZE (vmode) != 16 && !ok)
47605 return false;
47606 else
47607 gcc_assert (ok);
47608 if (d->testing_p)
47609 return true;
47611 /* Next we put permuted elements into their positions. */
47612 dcopy1 = *d;
47613 if (which == 2)
47614 dcopy1.op1 = dcopy.target;
47615 else
47616 dcopy1.op0 = dcopy.target;
47618 for (i = 0; i < nelt; ++i)
47619 dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i);
47621 ok = expand_vec_perm_blend (&dcopy1);
47622 gcc_assert (ok);
47624 return true;
47627 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
47629 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47630 a two vector permutation into a single vector permutation by using
47631 an interleave operation to merge the vectors. */
47633 static bool
47634 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
47636 struct expand_vec_perm_d dremap, dfinal;
47637 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
47638 unsigned HOST_WIDE_INT contents;
47639 unsigned char remap[2 * MAX_VECT_LEN];
47640 rtx_insn *seq;
47641 bool ok, same_halves = false;
47643 if (GET_MODE_SIZE (d->vmode) == 16)
47645 if (d->one_operand_p)
47646 return false;
47648 else if (GET_MODE_SIZE (d->vmode) == 32)
47650 if (!TARGET_AVX)
47651 return false;
47652 /* For 32-byte modes allow even d->one_operand_p.
47653 The lack of cross-lane shuffling in some instructions
47654 might prevent a single insn shuffle. */
47655 dfinal = *d;
47656 dfinal.testing_p = true;
47657 /* If expand_vec_perm_interleave3 can expand this into
47658 a 3 insn sequence, give up and let it be expanded as
47659 3 insn sequence. While that is one insn longer,
47660 it doesn't need a memory operand and in the common
47661 case that both interleave low and high permutations
47662 with the same operands are adjacent needs 4 insns
47663 for both after CSE. */
47664 if (expand_vec_perm_interleave3 (&dfinal))
47665 return false;
47667 else
47668 return false;
47670 /* Examine from whence the elements come. */
47671 contents = 0;
47672 for (i = 0; i < nelt; ++i)
47673 contents |= ((unsigned HOST_WIDE_INT) 1) << d->perm[i];
47675 memset (remap, 0xff, sizeof (remap));
47676 dremap = *d;
47678 if (GET_MODE_SIZE (d->vmode) == 16)
47680 unsigned HOST_WIDE_INT h1, h2, h3, h4;
47682 /* Split the two input vectors into 4 halves. */
47683 h1 = (((unsigned HOST_WIDE_INT) 1) << nelt2) - 1;
47684 h2 = h1 << nelt2;
47685 h3 = h2 << nelt2;
47686 h4 = h3 << nelt2;
47688 /* If the elements from the low halves use interleave low, and similarly
47689 for interleave high. If the elements are from mis-matched halves, we
47690 can use shufps for V4SF/V4SI or do a DImode shuffle. */
47691 if ((contents & (h1 | h3)) == contents)
47693 /* punpckl* */
47694 for (i = 0; i < nelt2; ++i)
47696 remap[i] = i * 2;
47697 remap[i + nelt] = i * 2 + 1;
47698 dremap.perm[i * 2] = i;
47699 dremap.perm[i * 2 + 1] = i + nelt;
47701 if (!TARGET_SSE2 && d->vmode == V4SImode)
47702 dremap.vmode = V4SFmode;
47704 else if ((contents & (h2 | h4)) == contents)
47706 /* punpckh* */
47707 for (i = 0; i < nelt2; ++i)
47709 remap[i + nelt2] = i * 2;
47710 remap[i + nelt + nelt2] = i * 2 + 1;
47711 dremap.perm[i * 2] = i + nelt2;
47712 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
47714 if (!TARGET_SSE2 && d->vmode == V4SImode)
47715 dremap.vmode = V4SFmode;
47717 else if ((contents & (h1 | h4)) == contents)
47719 /* shufps */
47720 for (i = 0; i < nelt2; ++i)
47722 remap[i] = i;
47723 remap[i + nelt + nelt2] = i + nelt2;
47724 dremap.perm[i] = i;
47725 dremap.perm[i + nelt2] = i + nelt + nelt2;
47727 if (nelt != 4)
47729 /* shufpd */
47730 dremap.vmode = V2DImode;
47731 dremap.nelt = 2;
47732 dremap.perm[0] = 0;
47733 dremap.perm[1] = 3;
47736 else if ((contents & (h2 | h3)) == contents)
47738 /* shufps */
47739 for (i = 0; i < nelt2; ++i)
47741 remap[i + nelt2] = i;
47742 remap[i + nelt] = i + nelt2;
47743 dremap.perm[i] = i + nelt2;
47744 dremap.perm[i + nelt2] = i + nelt;
47746 if (nelt != 4)
47748 /* shufpd */
47749 dremap.vmode = V2DImode;
47750 dremap.nelt = 2;
47751 dremap.perm[0] = 1;
47752 dremap.perm[1] = 2;
47755 else
47756 return false;
47758 else
47760 unsigned int nelt4 = nelt / 4, nzcnt = 0;
47761 unsigned HOST_WIDE_INT q[8];
47762 unsigned int nonzero_halves[4];
47764 /* Split the two input vectors into 8 quarters. */
47765 q[0] = (((unsigned HOST_WIDE_INT) 1) << nelt4) - 1;
47766 for (i = 1; i < 8; ++i)
47767 q[i] = q[0] << (nelt4 * i);
47768 for (i = 0; i < 4; ++i)
47769 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
47771 nonzero_halves[nzcnt] = i;
47772 ++nzcnt;
47775 if (nzcnt == 1)
47777 gcc_assert (d->one_operand_p);
47778 nonzero_halves[1] = nonzero_halves[0];
47779 same_halves = true;
47781 else if (d->one_operand_p)
47783 gcc_assert (nonzero_halves[0] == 0);
47784 gcc_assert (nonzero_halves[1] == 1);
47787 if (nzcnt <= 2)
47789 if (d->perm[0] / nelt2 == nonzero_halves[1])
47791 /* Attempt to increase the likelihood that dfinal
47792 shuffle will be intra-lane. */
47793 char tmph = nonzero_halves[0];
47794 nonzero_halves[0] = nonzero_halves[1];
47795 nonzero_halves[1] = tmph;
47798 /* vperm2f128 or vperm2i128. */
47799 for (i = 0; i < nelt2; ++i)
47801 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
47802 remap[i + nonzero_halves[0] * nelt2] = i;
47803 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
47804 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
47807 if (d->vmode != V8SFmode
47808 && d->vmode != V4DFmode
47809 && d->vmode != V8SImode)
47811 dremap.vmode = V8SImode;
47812 dremap.nelt = 8;
47813 for (i = 0; i < 4; ++i)
47815 dremap.perm[i] = i + nonzero_halves[0] * 4;
47816 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
47820 else if (d->one_operand_p)
47821 return false;
47822 else if (TARGET_AVX2
47823 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
47825 /* vpunpckl* */
47826 for (i = 0; i < nelt4; ++i)
47828 remap[i] = i * 2;
47829 remap[i + nelt] = i * 2 + 1;
47830 remap[i + nelt2] = i * 2 + nelt2;
47831 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
47832 dremap.perm[i * 2] = i;
47833 dremap.perm[i * 2 + 1] = i + nelt;
47834 dremap.perm[i * 2 + nelt2] = i + nelt2;
47835 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
47838 else if (TARGET_AVX2
47839 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
47841 /* vpunpckh* */
47842 for (i = 0; i < nelt4; ++i)
47844 remap[i + nelt4] = i * 2;
47845 remap[i + nelt + nelt4] = i * 2 + 1;
47846 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
47847 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
47848 dremap.perm[i * 2] = i + nelt4;
47849 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
47850 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
47851 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
47854 else
47855 return false;
47858 /* Use the remapping array set up above to move the elements from their
47859 swizzled locations into their final destinations. */
47860 dfinal = *d;
47861 for (i = 0; i < nelt; ++i)
47863 unsigned e = remap[d->perm[i]];
47864 gcc_assert (e < nelt);
47865 /* If same_halves is true, both halves of the remapped vector are the
47866 same. Avoid cross-lane accesses if possible. */
47867 if (same_halves && i >= nelt2)
47869 gcc_assert (e < nelt2);
47870 dfinal.perm[i] = e + nelt2;
47872 else
47873 dfinal.perm[i] = e;
47875 if (!d->testing_p)
47877 dremap.target = gen_reg_rtx (dremap.vmode);
47878 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
47880 dfinal.op1 = dfinal.op0;
47881 dfinal.one_operand_p = true;
47883 /* Test if the final remap can be done with a single insn. For V4SFmode or
47884 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
47885 start_sequence ();
47886 ok = expand_vec_perm_1 (&dfinal);
47887 seq = get_insns ();
47888 end_sequence ();
47890 if (!ok)
47891 return false;
47893 if (d->testing_p)
47894 return true;
47896 if (dremap.vmode != dfinal.vmode)
47898 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
47899 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
47902 ok = expand_vec_perm_1 (&dremap);
47903 gcc_assert (ok);
47905 emit_insn (seq);
47906 return true;
47909 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47910 a single vector cross-lane permutation into vpermq followed
47911 by any of the single insn permutations. */
47913 static bool
47914 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
47916 struct expand_vec_perm_d dremap, dfinal;
47917 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
47918 unsigned contents[2];
47919 bool ok;
47921 if (!(TARGET_AVX2
47922 && (d->vmode == V32QImode || d->vmode == V16HImode)
47923 && d->one_operand_p))
47924 return false;
47926 contents[0] = 0;
47927 contents[1] = 0;
47928 for (i = 0; i < nelt2; ++i)
47930 contents[0] |= 1u << (d->perm[i] / nelt4);
47931 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
47934 for (i = 0; i < 2; ++i)
47936 unsigned int cnt = 0;
47937 for (j = 0; j < 4; ++j)
47938 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
47939 return false;
47942 if (d->testing_p)
47943 return true;
47945 dremap = *d;
47946 dremap.vmode = V4DImode;
47947 dremap.nelt = 4;
47948 dremap.target = gen_reg_rtx (V4DImode);
47949 dremap.op0 = gen_lowpart (V4DImode, d->op0);
47950 dremap.op1 = dremap.op0;
47951 dremap.one_operand_p = true;
47952 for (i = 0; i < 2; ++i)
47954 unsigned int cnt = 0;
47955 for (j = 0; j < 4; ++j)
47956 if ((contents[i] & (1u << j)) != 0)
47957 dremap.perm[2 * i + cnt++] = j;
47958 for (; cnt < 2; ++cnt)
47959 dremap.perm[2 * i + cnt] = 0;
47962 dfinal = *d;
47963 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
47964 dfinal.op1 = dfinal.op0;
47965 dfinal.one_operand_p = true;
47966 for (i = 0, j = 0; i < nelt; ++i)
47968 if (i == nelt2)
47969 j = 2;
47970 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
47971 if ((d->perm[i] / nelt4) == dremap.perm[j])
47973 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
47974 dfinal.perm[i] |= nelt4;
47975 else
47976 gcc_unreachable ();
47979 ok = expand_vec_perm_1 (&dremap);
47980 gcc_assert (ok);
47982 ok = expand_vec_perm_1 (&dfinal);
47983 gcc_assert (ok);
47985 return true;
47988 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
47989 a vector permutation using two instructions, vperm2f128 resp.
47990 vperm2i128 followed by any single in-lane permutation. */
47992 static bool
47993 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
47995 struct expand_vec_perm_d dfirst, dsecond;
47996 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
47997 bool ok;
47999 if (!TARGET_AVX
48000 || GET_MODE_SIZE (d->vmode) != 32
48001 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
48002 return false;
48004 dsecond = *d;
48005 dsecond.one_operand_p = false;
48006 dsecond.testing_p = true;
48008 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
48009 immediate. For perm < 16 the second permutation uses
48010 d->op0 as first operand, for perm >= 16 it uses d->op1
48011 as first operand. The second operand is the result of
48012 vperm2[fi]128. */
48013 for (perm = 0; perm < 32; perm++)
48015 /* Ignore permutations which do not move anything cross-lane. */
48016 if (perm < 16)
48018 /* The second shuffle for e.g. V4DFmode has
48019 0123 and ABCD operands.
48020 Ignore AB23, as 23 is already in the second lane
48021 of the first operand. */
48022 if ((perm & 0xc) == (1 << 2)) continue;
48023 /* And 01CD, as 01 is in the first lane of the first
48024 operand. */
48025 if ((perm & 3) == 0) continue;
48026 /* And 4567, as then the vperm2[fi]128 doesn't change
48027 anything on the original 4567 second operand. */
48028 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
48030 else
48032 /* The second shuffle for e.g. V4DFmode has
48033 4567 and ABCD operands.
48034 Ignore AB67, as 67 is already in the second lane
48035 of the first operand. */
48036 if ((perm & 0xc) == (3 << 2)) continue;
48037 /* And 45CD, as 45 is in the first lane of the first
48038 operand. */
48039 if ((perm & 3) == 2) continue;
48040 /* And 0123, as then the vperm2[fi]128 doesn't change
48041 anything on the original 0123 first operand. */
48042 if ((perm & 0xf) == (1 << 2)) continue;
48045 for (i = 0; i < nelt; i++)
48047 j = d->perm[i] / nelt2;
48048 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
48049 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
48050 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
48051 dsecond.perm[i] = d->perm[i] & (nelt - 1);
48052 else
48053 break;
48056 if (i == nelt)
48058 start_sequence ();
48059 ok = expand_vec_perm_1 (&dsecond);
48060 end_sequence ();
48062 else
48063 ok = false;
48065 if (ok)
48067 if (d->testing_p)
48068 return true;
48070 /* Found a usable second shuffle. dfirst will be
48071 vperm2f128 on d->op0 and d->op1. */
48072 dsecond.testing_p = false;
48073 dfirst = *d;
48074 dfirst.target = gen_reg_rtx (d->vmode);
48075 for (i = 0; i < nelt; i++)
48076 dfirst.perm[i] = (i & (nelt2 - 1))
48077 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
48079 canonicalize_perm (&dfirst);
48080 ok = expand_vec_perm_1 (&dfirst);
48081 gcc_assert (ok);
48083 /* And dsecond is some single insn shuffle, taking
48084 d->op0 and result of vperm2f128 (if perm < 16) or
48085 d->op1 and result of vperm2f128 (otherwise). */
48086 if (perm >= 16)
48087 dsecond.op0 = dsecond.op1;
48088 dsecond.op1 = dfirst.target;
48090 ok = expand_vec_perm_1 (&dsecond);
48091 gcc_assert (ok);
48093 return true;
48096 /* For one operand, the only useful vperm2f128 permutation is 0x01
48097 aka lanes swap. */
48098 if (d->one_operand_p)
48099 return false;
48102 return false;
48105 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
48106 a two vector permutation using 2 intra-lane interleave insns
48107 and cross-lane shuffle for 32-byte vectors. */
48109 static bool
48110 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
48112 unsigned i, nelt;
48113 rtx (*gen) (rtx, rtx, rtx);
48115 if (d->one_operand_p)
48116 return false;
48117 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
48119 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
48121 else
48122 return false;
48124 nelt = d->nelt;
48125 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
48126 return false;
48127 for (i = 0; i < nelt; i += 2)
48128 if (d->perm[i] != d->perm[0] + i / 2
48129 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
48130 return false;
48132 if (d->testing_p)
48133 return true;
48135 switch (d->vmode)
48137 case V32QImode:
48138 if (d->perm[0])
48139 gen = gen_vec_interleave_highv32qi;
48140 else
48141 gen = gen_vec_interleave_lowv32qi;
48142 break;
48143 case V16HImode:
48144 if (d->perm[0])
48145 gen = gen_vec_interleave_highv16hi;
48146 else
48147 gen = gen_vec_interleave_lowv16hi;
48148 break;
48149 case V8SImode:
48150 if (d->perm[0])
48151 gen = gen_vec_interleave_highv8si;
48152 else
48153 gen = gen_vec_interleave_lowv8si;
48154 break;
48155 case V4DImode:
48156 if (d->perm[0])
48157 gen = gen_vec_interleave_highv4di;
48158 else
48159 gen = gen_vec_interleave_lowv4di;
48160 break;
48161 case V8SFmode:
48162 if (d->perm[0])
48163 gen = gen_vec_interleave_highv8sf;
48164 else
48165 gen = gen_vec_interleave_lowv8sf;
48166 break;
48167 case V4DFmode:
48168 if (d->perm[0])
48169 gen = gen_vec_interleave_highv4df;
48170 else
48171 gen = gen_vec_interleave_lowv4df;
48172 break;
48173 default:
48174 gcc_unreachable ();
48177 emit_insn (gen (d->target, d->op0, d->op1));
48178 return true;
48181 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
48182 a single vector permutation using a single intra-lane vector
48183 permutation, vperm2f128 swapping the lanes and vblend* insn blending
48184 the non-swapped and swapped vectors together. */
48186 static bool
48187 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
48189 struct expand_vec_perm_d dfirst, dsecond;
48190 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
48191 rtx_insn *seq;
48192 bool ok;
48193 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
48195 if (!TARGET_AVX
48196 || TARGET_AVX2
48197 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
48198 || !d->one_operand_p)
48199 return false;
48201 dfirst = *d;
48202 for (i = 0; i < nelt; i++)
48203 dfirst.perm[i] = 0xff;
48204 for (i = 0, msk = 0; i < nelt; i++)
48206 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
48207 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
48208 return false;
48209 dfirst.perm[j] = d->perm[i];
48210 if (j != i)
48211 msk |= (1 << i);
48213 for (i = 0; i < nelt; i++)
48214 if (dfirst.perm[i] == 0xff)
48215 dfirst.perm[i] = i;
48217 if (!d->testing_p)
48218 dfirst.target = gen_reg_rtx (dfirst.vmode);
48220 start_sequence ();
48221 ok = expand_vec_perm_1 (&dfirst);
48222 seq = get_insns ();
48223 end_sequence ();
48225 if (!ok)
48226 return false;
48228 if (d->testing_p)
48229 return true;
48231 emit_insn (seq);
48233 dsecond = *d;
48234 dsecond.op0 = dfirst.target;
48235 dsecond.op1 = dfirst.target;
48236 dsecond.one_operand_p = true;
48237 dsecond.target = gen_reg_rtx (dsecond.vmode);
48238 for (i = 0; i < nelt; i++)
48239 dsecond.perm[i] = i ^ nelt2;
48241 ok = expand_vec_perm_1 (&dsecond);
48242 gcc_assert (ok);
48244 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
48245 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
48246 return true;
48249 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
48250 permutation using two vperm2f128, followed by a vshufpd insn blending
48251 the two vectors together. */
48253 static bool
48254 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
48256 struct expand_vec_perm_d dfirst, dsecond, dthird;
48257 bool ok;
48259 if (!TARGET_AVX || (d->vmode != V4DFmode))
48260 return false;
48262 if (d->testing_p)
48263 return true;
48265 dfirst = *d;
48266 dsecond = *d;
48267 dthird = *d;
48269 dfirst.perm[0] = (d->perm[0] & ~1);
48270 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
48271 dfirst.perm[2] = (d->perm[2] & ~1);
48272 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
48273 dsecond.perm[0] = (d->perm[1] & ~1);
48274 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
48275 dsecond.perm[2] = (d->perm[3] & ~1);
48276 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
48277 dthird.perm[0] = (d->perm[0] % 2);
48278 dthird.perm[1] = (d->perm[1] % 2) + 4;
48279 dthird.perm[2] = (d->perm[2] % 2) + 2;
48280 dthird.perm[3] = (d->perm[3] % 2) + 6;
48282 dfirst.target = gen_reg_rtx (dfirst.vmode);
48283 dsecond.target = gen_reg_rtx (dsecond.vmode);
48284 dthird.op0 = dfirst.target;
48285 dthird.op1 = dsecond.target;
48286 dthird.one_operand_p = false;
48288 canonicalize_perm (&dfirst);
48289 canonicalize_perm (&dsecond);
48291 ok = expand_vec_perm_1 (&dfirst)
48292 && expand_vec_perm_1 (&dsecond)
48293 && expand_vec_perm_1 (&dthird);
48295 gcc_assert (ok);
48297 return true;
48300 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
48301 permutation with two pshufb insns and an ior. We should have already
48302 failed all two instruction sequences. */
48304 static bool
48305 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
48307 rtx rperm[2][16], vperm, l, h, op, m128;
48308 unsigned int i, nelt, eltsz;
48310 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
48311 return false;
48312 gcc_assert (!d->one_operand_p);
48314 if (d->testing_p)
48315 return true;
48317 nelt = d->nelt;
48318 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48320 /* Generate two permutation masks. If the required element is within
48321 the given vector it is shuffled into the proper lane. If the required
48322 element is in the other vector, force a zero into the lane by setting
48323 bit 7 in the permutation mask. */
48324 m128 = GEN_INT (-128);
48325 for (i = 0; i < nelt; ++i)
48327 unsigned j, e = d->perm[i];
48328 unsigned which = (e >= nelt);
48329 if (e >= nelt)
48330 e -= nelt;
48332 for (j = 0; j < eltsz; ++j)
48334 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
48335 rperm[1-which][i*eltsz + j] = m128;
48339 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
48340 vperm = force_reg (V16QImode, vperm);
48342 l = gen_reg_rtx (V16QImode);
48343 op = gen_lowpart (V16QImode, d->op0);
48344 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
48346 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
48347 vperm = force_reg (V16QImode, vperm);
48349 h = gen_reg_rtx (V16QImode);
48350 op = gen_lowpart (V16QImode, d->op1);
48351 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
48353 op = d->target;
48354 if (d->vmode != V16QImode)
48355 op = gen_reg_rtx (V16QImode);
48356 emit_insn (gen_iorv16qi3 (op, l, h));
48357 if (op != d->target)
48358 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48360 return true;
48363 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
48364 with two vpshufb insns, vpermq and vpor. We should have already failed
48365 all two or three instruction sequences. */
48367 static bool
48368 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
48370 rtx rperm[2][32], vperm, l, h, hp, op, m128;
48371 unsigned int i, nelt, eltsz;
48373 if (!TARGET_AVX2
48374 || !d->one_operand_p
48375 || (d->vmode != V32QImode && d->vmode != V16HImode))
48376 return false;
48378 if (d->testing_p)
48379 return true;
48381 nelt = d->nelt;
48382 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48384 /* Generate two permutation masks. If the required element is within
48385 the same lane, it is shuffled in. If the required element from the
48386 other lane, force a zero by setting bit 7 in the permutation mask.
48387 In the other mask the mask has non-negative elements if element
48388 is requested from the other lane, but also moved to the other lane,
48389 so that the result of vpshufb can have the two V2TImode halves
48390 swapped. */
48391 m128 = GEN_INT (-128);
48392 for (i = 0; i < nelt; ++i)
48394 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48395 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
48397 for (j = 0; j < eltsz; ++j)
48399 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
48400 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
48404 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48405 vperm = force_reg (V32QImode, vperm);
48407 h = gen_reg_rtx (V32QImode);
48408 op = gen_lowpart (V32QImode, d->op0);
48409 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
48411 /* Swap the 128-byte lanes of h into hp. */
48412 hp = gen_reg_rtx (V4DImode);
48413 op = gen_lowpart (V4DImode, h);
48414 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
48415 const1_rtx));
48417 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48418 vperm = force_reg (V32QImode, vperm);
48420 l = gen_reg_rtx (V32QImode);
48421 op = gen_lowpart (V32QImode, d->op0);
48422 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48424 op = d->target;
48425 if (d->vmode != V32QImode)
48426 op = gen_reg_rtx (V32QImode);
48427 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
48428 if (op != d->target)
48429 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48431 return true;
48434 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
48435 and extract-odd permutations of two V32QImode and V16QImode operand
48436 with two vpshufb insns, vpor and vpermq. We should have already
48437 failed all two or three instruction sequences. */
48439 static bool
48440 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
48442 rtx rperm[2][32], vperm, l, h, ior, op, m128;
48443 unsigned int i, nelt, eltsz;
48445 if (!TARGET_AVX2
48446 || d->one_operand_p
48447 || (d->vmode != V32QImode && d->vmode != V16HImode))
48448 return false;
48450 for (i = 0; i < d->nelt; ++i)
48451 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
48452 return false;
48454 if (d->testing_p)
48455 return true;
48457 nelt = d->nelt;
48458 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48460 /* Generate two permutation masks. In the first permutation mask
48461 the first quarter will contain indexes for the first half
48462 of the op0, the second quarter will contain bit 7 set, third quarter
48463 will contain indexes for the second half of the op0 and the
48464 last quarter bit 7 set. In the second permutation mask
48465 the first quarter will contain bit 7 set, the second quarter
48466 indexes for the first half of the op1, the third quarter bit 7 set
48467 and last quarter indexes for the second half of the op1.
48468 I.e. the first mask e.g. for V32QImode extract even will be:
48469 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
48470 (all values masked with 0xf except for -128) and second mask
48471 for extract even will be
48472 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
48473 m128 = GEN_INT (-128);
48474 for (i = 0; i < nelt; ++i)
48476 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48477 unsigned which = d->perm[i] >= nelt;
48478 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
48480 for (j = 0; j < eltsz; ++j)
48482 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
48483 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
48487 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48488 vperm = force_reg (V32QImode, vperm);
48490 l = gen_reg_rtx (V32QImode);
48491 op = gen_lowpart (V32QImode, d->op0);
48492 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48494 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48495 vperm = force_reg (V32QImode, vperm);
48497 h = gen_reg_rtx (V32QImode);
48498 op = gen_lowpart (V32QImode, d->op1);
48499 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
48501 ior = gen_reg_rtx (V32QImode);
48502 emit_insn (gen_iorv32qi3 (ior, l, h));
48504 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
48505 op = gen_reg_rtx (V4DImode);
48506 ior = gen_lowpart (V4DImode, ior);
48507 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
48508 const1_rtx, GEN_INT (3)));
48509 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48511 return true;
48514 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
48515 and extract-odd permutations of two V16QI, V8HI, V16HI or V32QI operands
48516 with two "and" and "pack" or two "shift" and "pack" insns. We should
48517 have already failed all two instruction sequences. */
48519 static bool
48520 expand_vec_perm_even_odd_pack (struct expand_vec_perm_d *d)
48522 rtx op, dop0, dop1, t, rperm[16];
48523 unsigned i, odd, c, s, nelt = d->nelt;
48524 bool end_perm = false;
48525 machine_mode half_mode;
48526 rtx (*gen_and) (rtx, rtx, rtx);
48527 rtx (*gen_pack) (rtx, rtx, rtx);
48528 rtx (*gen_shift) (rtx, rtx, rtx);
48530 if (d->one_operand_p)
48531 return false;
48533 switch (d->vmode)
48535 case V8HImode:
48536 /* Required for "pack". */
48537 if (!TARGET_SSE4_1)
48538 return false;
48539 c = 0xffff;
48540 s = 16;
48541 half_mode = V4SImode;
48542 gen_and = gen_andv4si3;
48543 gen_pack = gen_sse4_1_packusdw;
48544 gen_shift = gen_lshrv4si3;
48545 break;
48546 case V16QImode:
48547 /* No check as all instructions are SSE2. */
48548 c = 0xff;
48549 s = 8;
48550 half_mode = V8HImode;
48551 gen_and = gen_andv8hi3;
48552 gen_pack = gen_sse2_packuswb;
48553 gen_shift = gen_lshrv8hi3;
48554 break;
48555 case V16HImode:
48556 if (!TARGET_AVX2)
48557 return false;
48558 c = 0xffff;
48559 s = 16;
48560 half_mode = V8SImode;
48561 gen_and = gen_andv8si3;
48562 gen_pack = gen_avx2_packusdw;
48563 gen_shift = gen_lshrv8si3;
48564 end_perm = true;
48565 break;
48566 case V32QImode:
48567 if (!TARGET_AVX2)
48568 return false;
48569 c = 0xff;
48570 s = 8;
48571 half_mode = V16HImode;
48572 gen_and = gen_andv16hi3;
48573 gen_pack = gen_avx2_packuswb;
48574 gen_shift = gen_lshrv16hi3;
48575 end_perm = true;
48576 break;
48577 default:
48578 /* Only V8HI, V16QI, V16HI and V32QI modes are more profitable than
48579 general shuffles. */
48580 return false;
48583 /* Check that permutation is even or odd. */
48584 odd = d->perm[0];
48585 if (odd > 1)
48586 return false;
48588 for (i = 1; i < nelt; ++i)
48589 if (d->perm[i] != 2 * i + odd)
48590 return false;
48592 if (d->testing_p)
48593 return true;
48595 dop0 = gen_reg_rtx (half_mode);
48596 dop1 = gen_reg_rtx (half_mode);
48597 if (odd == 0)
48599 for (i = 0; i < nelt / 2; i++)
48600 rperm[i] = GEN_INT (c);
48601 t = gen_rtx_CONST_VECTOR (half_mode, gen_rtvec_v (nelt / 2, rperm));
48602 t = force_reg (half_mode, t);
48603 emit_insn (gen_and (dop0, t, gen_lowpart (half_mode, d->op0)));
48604 emit_insn (gen_and (dop1, t, gen_lowpart (half_mode, d->op1)));
48606 else
48608 emit_insn (gen_shift (dop0,
48609 gen_lowpart (half_mode, d->op0),
48610 GEN_INT (s)));
48611 emit_insn (gen_shift (dop1,
48612 gen_lowpart (half_mode, d->op1),
48613 GEN_INT (s)));
48615 /* In AVX2 for 256 bit case we need to permute pack result. */
48616 if (TARGET_AVX2 && end_perm)
48618 op = gen_reg_rtx (d->vmode);
48619 t = gen_reg_rtx (V4DImode);
48620 emit_insn (gen_pack (op, dop0, dop1));
48621 emit_insn (gen_avx2_permv4di_1 (t,
48622 gen_lowpart (V4DImode, op),
48623 const0_rtx,
48624 const2_rtx,
48625 const1_rtx,
48626 GEN_INT (3)));
48627 emit_move_insn (d->target, gen_lowpart (d->vmode, t));
48629 else
48630 emit_insn (gen_pack (d->target, dop0, dop1));
48632 return true;
48635 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
48636 and extract-odd permutations. */
48638 static bool
48639 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
48641 rtx t1, t2, t3, t4, t5;
48643 switch (d->vmode)
48645 case V4DFmode:
48646 if (d->testing_p)
48647 break;
48648 t1 = gen_reg_rtx (V4DFmode);
48649 t2 = gen_reg_rtx (V4DFmode);
48651 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
48652 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
48653 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
48655 /* Now an unpck[lh]pd will produce the result required. */
48656 if (odd)
48657 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
48658 else
48659 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
48660 emit_insn (t3);
48661 break;
48663 case V8SFmode:
48665 int mask = odd ? 0xdd : 0x88;
48667 if (d->testing_p)
48668 break;
48669 t1 = gen_reg_rtx (V8SFmode);
48670 t2 = gen_reg_rtx (V8SFmode);
48671 t3 = gen_reg_rtx (V8SFmode);
48673 /* Shuffle within the 128-bit lanes to produce:
48674 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
48675 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
48676 GEN_INT (mask)));
48678 /* Shuffle the lanes around to produce:
48679 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
48680 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
48681 GEN_INT (0x3)));
48683 /* Shuffle within the 128-bit lanes to produce:
48684 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
48685 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
48687 /* Shuffle within the 128-bit lanes to produce:
48688 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
48689 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
48691 /* Shuffle the lanes around to produce:
48692 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
48693 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
48694 GEN_INT (0x20)));
48696 break;
48698 case V2DFmode:
48699 case V4SFmode:
48700 case V2DImode:
48701 case V4SImode:
48702 /* These are always directly implementable by expand_vec_perm_1. */
48703 gcc_unreachable ();
48705 case V8HImode:
48706 if (TARGET_SSE4_1)
48707 return expand_vec_perm_even_odd_pack (d);
48708 else if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
48709 return expand_vec_perm_pshufb2 (d);
48710 else
48712 if (d->testing_p)
48713 break;
48714 /* We need 2*log2(N)-1 operations to achieve odd/even
48715 with interleave. */
48716 t1 = gen_reg_rtx (V8HImode);
48717 t2 = gen_reg_rtx (V8HImode);
48718 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
48719 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
48720 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
48721 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
48722 if (odd)
48723 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
48724 else
48725 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
48726 emit_insn (t3);
48728 break;
48730 case V16QImode:
48731 return expand_vec_perm_even_odd_pack (d);
48733 case V16HImode:
48734 case V32QImode:
48735 return expand_vec_perm_even_odd_pack (d);
48737 case V4DImode:
48738 if (!TARGET_AVX2)
48740 struct expand_vec_perm_d d_copy = *d;
48741 d_copy.vmode = V4DFmode;
48742 if (d->testing_p)
48743 d_copy.target = gen_lowpart (V4DFmode, d->target);
48744 else
48745 d_copy.target = gen_reg_rtx (V4DFmode);
48746 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
48747 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
48748 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
48750 if (!d->testing_p)
48751 emit_move_insn (d->target,
48752 gen_lowpart (V4DImode, d_copy.target));
48753 return true;
48755 return false;
48758 if (d->testing_p)
48759 break;
48761 t1 = gen_reg_rtx (V4DImode);
48762 t2 = gen_reg_rtx (V4DImode);
48764 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
48765 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
48766 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
48768 /* Now an vpunpck[lh]qdq will produce the result required. */
48769 if (odd)
48770 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
48771 else
48772 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
48773 emit_insn (t3);
48774 break;
48776 case V8SImode:
48777 if (!TARGET_AVX2)
48779 struct expand_vec_perm_d d_copy = *d;
48780 d_copy.vmode = V8SFmode;
48781 if (d->testing_p)
48782 d_copy.target = gen_lowpart (V8SFmode, d->target);
48783 else
48784 d_copy.target = gen_reg_rtx (V8SFmode);
48785 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
48786 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
48787 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
48789 if (!d->testing_p)
48790 emit_move_insn (d->target,
48791 gen_lowpart (V8SImode, d_copy.target));
48792 return true;
48794 return false;
48797 if (d->testing_p)
48798 break;
48800 t1 = gen_reg_rtx (V8SImode);
48801 t2 = gen_reg_rtx (V8SImode);
48802 t3 = gen_reg_rtx (V4DImode);
48803 t4 = gen_reg_rtx (V4DImode);
48804 t5 = gen_reg_rtx (V4DImode);
48806 /* Shuffle the lanes around into
48807 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
48808 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
48809 gen_lowpart (V4DImode, d->op1),
48810 GEN_INT (0x20)));
48811 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
48812 gen_lowpart (V4DImode, d->op1),
48813 GEN_INT (0x31)));
48815 /* Swap the 2nd and 3rd position in each lane into
48816 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
48817 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
48818 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
48819 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
48820 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
48822 /* Now an vpunpck[lh]qdq will produce
48823 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
48824 if (odd)
48825 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
48826 gen_lowpart (V4DImode, t2));
48827 else
48828 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
48829 gen_lowpart (V4DImode, t2));
48830 emit_insn (t3);
48831 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
48832 break;
48834 default:
48835 gcc_unreachable ();
48838 return true;
48841 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
48842 extract-even and extract-odd permutations. */
48844 static bool
48845 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
48847 unsigned i, odd, nelt = d->nelt;
48849 odd = d->perm[0];
48850 if (odd != 0 && odd != 1)
48851 return false;
48853 for (i = 1; i < nelt; ++i)
48854 if (d->perm[i] != 2 * i + odd)
48855 return false;
48857 return expand_vec_perm_even_odd_1 (d, odd);
48860 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
48861 permutations. We assume that expand_vec_perm_1 has already failed. */
48863 static bool
48864 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
48866 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
48867 machine_mode vmode = d->vmode;
48868 unsigned char perm2[4];
48869 rtx op0 = d->op0, dest;
48870 bool ok;
48872 switch (vmode)
48874 case V4DFmode:
48875 case V8SFmode:
48876 /* These are special-cased in sse.md so that we can optionally
48877 use the vbroadcast instruction. They expand to two insns
48878 if the input happens to be in a register. */
48879 gcc_unreachable ();
48881 case V2DFmode:
48882 case V2DImode:
48883 case V4SFmode:
48884 case V4SImode:
48885 /* These are always implementable using standard shuffle patterns. */
48886 gcc_unreachable ();
48888 case V8HImode:
48889 case V16QImode:
48890 /* These can be implemented via interleave. We save one insn by
48891 stopping once we have promoted to V4SImode and then use pshufd. */
48892 if (d->testing_p)
48893 return true;
48896 rtx dest;
48897 rtx (*gen) (rtx, rtx, rtx)
48898 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
48899 : gen_vec_interleave_lowv8hi;
48901 if (elt >= nelt2)
48903 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
48904 : gen_vec_interleave_highv8hi;
48905 elt -= nelt2;
48907 nelt2 /= 2;
48909 dest = gen_reg_rtx (vmode);
48910 emit_insn (gen (dest, op0, op0));
48911 vmode = get_mode_wider_vector (vmode);
48912 op0 = gen_lowpart (vmode, dest);
48914 while (vmode != V4SImode);
48916 memset (perm2, elt, 4);
48917 dest = gen_reg_rtx (V4SImode);
48918 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
48919 gcc_assert (ok);
48920 if (!d->testing_p)
48921 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
48922 return true;
48924 case V64QImode:
48925 case V32QImode:
48926 case V16HImode:
48927 case V8SImode:
48928 case V4DImode:
48929 /* For AVX2 broadcasts of the first element vpbroadcast* or
48930 vpermq should be used by expand_vec_perm_1. */
48931 gcc_assert (!TARGET_AVX2 || d->perm[0]);
48932 return false;
48934 default:
48935 gcc_unreachable ();
48939 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
48940 broadcast permutations. */
48942 static bool
48943 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
48945 unsigned i, elt, nelt = d->nelt;
48947 if (!d->one_operand_p)
48948 return false;
48950 elt = d->perm[0];
48951 for (i = 1; i < nelt; ++i)
48952 if (d->perm[i] != elt)
48953 return false;
48955 return expand_vec_perm_broadcast_1 (d);
48958 /* Implement arbitrary permutations of two V64QImode operands
48959 will 2 vpermi2w, 2 vpshufb and one vpor instruction. */
48960 static bool
48961 expand_vec_perm_vpermi2_vpshub2 (struct expand_vec_perm_d *d)
48963 if (!TARGET_AVX512BW || !(d->vmode == V64QImode))
48964 return false;
48966 if (d->testing_p)
48967 return true;
48969 struct expand_vec_perm_d ds[2];
48970 rtx rperm[128], vperm, target0, target1;
48971 unsigned int i, nelt;
48972 machine_mode vmode;
48974 nelt = d->nelt;
48975 vmode = V64QImode;
48977 for (i = 0; i < 2; i++)
48979 ds[i] = *d;
48980 ds[i].vmode = V32HImode;
48981 ds[i].nelt = 32;
48982 ds[i].target = gen_reg_rtx (V32HImode);
48983 ds[i].op0 = gen_lowpart (V32HImode, d->op0);
48984 ds[i].op1 = gen_lowpart (V32HImode, d->op1);
48987 /* Prepare permutations such that the first one takes care of
48988 putting the even bytes into the right positions or one higher
48989 positions (ds[0]) and the second one takes care of
48990 putting the odd bytes into the right positions or one below
48991 (ds[1]). */
48993 for (i = 0; i < nelt; i++)
48995 ds[i & 1].perm[i / 2] = d->perm[i] / 2;
48996 if (i & 1)
48998 rperm[i] = constm1_rtx;
48999 rperm[i + 64] = GEN_INT ((i & 14) + (d->perm[i] & 1));
49001 else
49003 rperm[i] = GEN_INT ((i & 14) + (d->perm[i] & 1));
49004 rperm[i + 64] = constm1_rtx;
49008 bool ok = expand_vec_perm_1 (&ds[0]);
49009 gcc_assert (ok);
49010 ds[0].target = gen_lowpart (V64QImode, ds[0].target);
49012 ok = expand_vec_perm_1 (&ds[1]);
49013 gcc_assert (ok);
49014 ds[1].target = gen_lowpart (V64QImode, ds[1].target);
49016 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm));
49017 vperm = force_reg (vmode, vperm);
49018 target0 = gen_reg_rtx (V64QImode);
49019 emit_insn (gen_avx512bw_pshufbv64qi3 (target0, ds[0].target, vperm));
49021 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm + 64));
49022 vperm = force_reg (vmode, vperm);
49023 target1 = gen_reg_rtx (V64QImode);
49024 emit_insn (gen_avx512bw_pshufbv64qi3 (target1, ds[1].target, vperm));
49026 emit_insn (gen_iorv64qi3 (d->target, target0, target1));
49027 return true;
49030 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
49031 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
49032 all the shorter instruction sequences. */
49034 static bool
49035 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
49037 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
49038 unsigned int i, nelt, eltsz;
49039 bool used[4];
49041 if (!TARGET_AVX2
49042 || d->one_operand_p
49043 || (d->vmode != V32QImode && d->vmode != V16HImode))
49044 return false;
49046 if (d->testing_p)
49047 return true;
49049 nelt = d->nelt;
49050 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
49052 /* Generate 4 permutation masks. If the required element is within
49053 the same lane, it is shuffled in. If the required element from the
49054 other lane, force a zero by setting bit 7 in the permutation mask.
49055 In the other mask the mask has non-negative elements if element
49056 is requested from the other lane, but also moved to the other lane,
49057 so that the result of vpshufb can have the two V2TImode halves
49058 swapped. */
49059 m128 = GEN_INT (-128);
49060 for (i = 0; i < 32; ++i)
49062 rperm[0][i] = m128;
49063 rperm[1][i] = m128;
49064 rperm[2][i] = m128;
49065 rperm[3][i] = m128;
49067 used[0] = false;
49068 used[1] = false;
49069 used[2] = false;
49070 used[3] = false;
49071 for (i = 0; i < nelt; ++i)
49073 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
49074 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
49075 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
49077 for (j = 0; j < eltsz; ++j)
49078 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
49079 used[which] = true;
49082 for (i = 0; i < 2; ++i)
49084 if (!used[2 * i + 1])
49086 h[i] = NULL_RTX;
49087 continue;
49089 vperm = gen_rtx_CONST_VECTOR (V32QImode,
49090 gen_rtvec_v (32, rperm[2 * i + 1]));
49091 vperm = force_reg (V32QImode, vperm);
49092 h[i] = gen_reg_rtx (V32QImode);
49093 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
49094 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
49097 /* Swap the 128-byte lanes of h[X]. */
49098 for (i = 0; i < 2; ++i)
49100 if (h[i] == NULL_RTX)
49101 continue;
49102 op = gen_reg_rtx (V4DImode);
49103 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
49104 const2_rtx, GEN_INT (3), const0_rtx,
49105 const1_rtx));
49106 h[i] = gen_lowpart (V32QImode, op);
49109 for (i = 0; i < 2; ++i)
49111 if (!used[2 * i])
49113 l[i] = NULL_RTX;
49114 continue;
49116 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
49117 vperm = force_reg (V32QImode, vperm);
49118 l[i] = gen_reg_rtx (V32QImode);
49119 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
49120 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
49123 for (i = 0; i < 2; ++i)
49125 if (h[i] && l[i])
49127 op = gen_reg_rtx (V32QImode);
49128 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
49129 l[i] = op;
49131 else if (h[i])
49132 l[i] = h[i];
49135 gcc_assert (l[0] && l[1]);
49136 op = d->target;
49137 if (d->vmode != V32QImode)
49138 op = gen_reg_rtx (V32QImode);
49139 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
49140 if (op != d->target)
49141 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
49142 return true;
49145 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
49146 With all of the interface bits taken care of, perform the expansion
49147 in D and return true on success. */
49149 static bool
49150 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
49152 /* Try a single instruction expansion. */
49153 if (expand_vec_perm_1 (d))
49154 return true;
49156 /* Try sequences of two instructions. */
49158 if (expand_vec_perm_pshuflw_pshufhw (d))
49159 return true;
49161 if (expand_vec_perm_palignr (d, false))
49162 return true;
49164 if (expand_vec_perm_interleave2 (d))
49165 return true;
49167 if (expand_vec_perm_broadcast (d))
49168 return true;
49170 if (expand_vec_perm_vpermq_perm_1 (d))
49171 return true;
49173 if (expand_vec_perm_vperm2f128 (d))
49174 return true;
49176 if (expand_vec_perm_pblendv (d))
49177 return true;
49179 /* Try sequences of three instructions. */
49181 if (expand_vec_perm_even_odd_pack (d))
49182 return true;
49184 if (expand_vec_perm_2vperm2f128_vshuf (d))
49185 return true;
49187 if (expand_vec_perm_pshufb2 (d))
49188 return true;
49190 if (expand_vec_perm_interleave3 (d))
49191 return true;
49193 if (expand_vec_perm_vperm2f128_vblend (d))
49194 return true;
49196 /* Try sequences of four instructions. */
49198 if (expand_vec_perm_vpshufb2_vpermq (d))
49199 return true;
49201 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
49202 return true;
49204 if (expand_vec_perm_vpermi2_vpshub2 (d))
49205 return true;
49207 /* ??? Look for narrow permutations whose element orderings would
49208 allow the promotion to a wider mode. */
49210 /* ??? Look for sequences of interleave or a wider permute that place
49211 the data into the correct lanes for a half-vector shuffle like
49212 pshuf[lh]w or vpermilps. */
49214 /* ??? Look for sequences of interleave that produce the desired results.
49215 The combinatorics of punpck[lh] get pretty ugly... */
49217 if (expand_vec_perm_even_odd (d))
49218 return true;
49220 /* Even longer sequences. */
49221 if (expand_vec_perm_vpshufb4_vpermq2 (d))
49222 return true;
49224 return false;
49227 /* If a permutation only uses one operand, make it clear. Returns true
49228 if the permutation references both operands. */
49230 static bool
49231 canonicalize_perm (struct expand_vec_perm_d *d)
49233 int i, which, nelt = d->nelt;
49235 for (i = which = 0; i < nelt; ++i)
49236 which |= (d->perm[i] < nelt ? 1 : 2);
49238 d->one_operand_p = true;
49239 switch (which)
49241 default:
49242 gcc_unreachable();
49244 case 3:
49245 if (!rtx_equal_p (d->op0, d->op1))
49247 d->one_operand_p = false;
49248 break;
49250 /* The elements of PERM do not suggest that only the first operand
49251 is used, but both operands are identical. Allow easier matching
49252 of the permutation by folding the permutation into the single
49253 input vector. */
49254 /* FALLTHRU */
49256 case 2:
49257 for (i = 0; i < nelt; ++i)
49258 d->perm[i] &= nelt - 1;
49259 d->op0 = d->op1;
49260 break;
49262 case 1:
49263 d->op1 = d->op0;
49264 break;
49267 return (which == 3);
49270 bool
49271 ix86_expand_vec_perm_const (rtx operands[4])
49273 struct expand_vec_perm_d d;
49274 unsigned char perm[MAX_VECT_LEN];
49275 int i, nelt;
49276 bool two_args;
49277 rtx sel;
49279 d.target = operands[0];
49280 d.op0 = operands[1];
49281 d.op1 = operands[2];
49282 sel = operands[3];
49284 d.vmode = GET_MODE (d.target);
49285 gcc_assert (VECTOR_MODE_P (d.vmode));
49286 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49287 d.testing_p = false;
49289 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
49290 gcc_assert (XVECLEN (sel, 0) == nelt);
49291 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
49293 for (i = 0; i < nelt; ++i)
49295 rtx e = XVECEXP (sel, 0, i);
49296 int ei = INTVAL (e) & (2 * nelt - 1);
49297 d.perm[i] = ei;
49298 perm[i] = ei;
49301 two_args = canonicalize_perm (&d);
49303 if (ix86_expand_vec_perm_const_1 (&d))
49304 return true;
49306 /* If the selector says both arguments are needed, but the operands are the
49307 same, the above tried to expand with one_operand_p and flattened selector.
49308 If that didn't work, retry without one_operand_p; we succeeded with that
49309 during testing. */
49310 if (two_args && d.one_operand_p)
49312 d.one_operand_p = false;
49313 memcpy (d.perm, perm, sizeof (perm));
49314 return ix86_expand_vec_perm_const_1 (&d);
49317 return false;
49320 /* Implement targetm.vectorize.vec_perm_const_ok. */
49322 static bool
49323 ix86_vectorize_vec_perm_const_ok (machine_mode vmode,
49324 const unsigned char *sel)
49326 struct expand_vec_perm_d d;
49327 unsigned int i, nelt, which;
49328 bool ret;
49330 d.vmode = vmode;
49331 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49332 d.testing_p = true;
49334 /* Given sufficient ISA support we can just return true here
49335 for selected vector modes. */
49336 switch (d.vmode)
49338 case V16SFmode:
49339 case V16SImode:
49340 case V8DImode:
49341 case V8DFmode:
49342 if (TARGET_AVX512F)
49343 /* All implementable with a single vpermi2 insn. */
49344 return true;
49345 break;
49346 case V32HImode:
49347 if (TARGET_AVX512BW)
49348 /* All implementable with a single vpermi2 insn. */
49349 return true;
49350 break;
49351 case V64QImode:
49352 if (TARGET_AVX512BW)
49353 /* Implementable with 2 vpermi2, 2 vpshufb and 1 or insn. */
49354 return true;
49355 break;
49356 case V8SImode:
49357 case V8SFmode:
49358 case V4DFmode:
49359 case V4DImode:
49360 if (TARGET_AVX512VL)
49361 /* All implementable with a single vpermi2 insn. */
49362 return true;
49363 break;
49364 case V16HImode:
49365 if (TARGET_AVX2)
49366 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
49367 return true;
49368 break;
49369 case V32QImode:
49370 if (TARGET_AVX2)
49371 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
49372 return true;
49373 break;
49374 case V4SImode:
49375 case V4SFmode:
49376 case V8HImode:
49377 case V16QImode:
49378 /* All implementable with a single vpperm insn. */
49379 if (TARGET_XOP)
49380 return true;
49381 /* All implementable with 2 pshufb + 1 ior. */
49382 if (TARGET_SSSE3)
49383 return true;
49384 break;
49385 case V2DImode:
49386 case V2DFmode:
49387 /* All implementable with shufpd or unpck[lh]pd. */
49388 return true;
49389 default:
49390 return false;
49393 /* Extract the values from the vector CST into the permutation
49394 array in D. */
49395 memcpy (d.perm, sel, nelt);
49396 for (i = which = 0; i < nelt; ++i)
49398 unsigned char e = d.perm[i];
49399 gcc_assert (e < 2 * nelt);
49400 which |= (e < nelt ? 1 : 2);
49403 /* For all elements from second vector, fold the elements to first. */
49404 if (which == 2)
49405 for (i = 0; i < nelt; ++i)
49406 d.perm[i] -= nelt;
49408 /* Check whether the mask can be applied to the vector type. */
49409 d.one_operand_p = (which != 3);
49411 /* Implementable with shufps or pshufd. */
49412 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
49413 return true;
49415 /* Otherwise we have to go through the motions and see if we can
49416 figure out how to generate the requested permutation. */
49417 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
49418 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
49419 if (!d.one_operand_p)
49420 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
49422 start_sequence ();
49423 ret = ix86_expand_vec_perm_const_1 (&d);
49424 end_sequence ();
49426 return ret;
49429 void
49430 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
49432 struct expand_vec_perm_d d;
49433 unsigned i, nelt;
49435 d.target = targ;
49436 d.op0 = op0;
49437 d.op1 = op1;
49438 d.vmode = GET_MODE (targ);
49439 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49440 d.one_operand_p = false;
49441 d.testing_p = false;
49443 for (i = 0; i < nelt; ++i)
49444 d.perm[i] = i * 2 + odd;
49446 /* We'll either be able to implement the permutation directly... */
49447 if (expand_vec_perm_1 (&d))
49448 return;
49450 /* ... or we use the special-case patterns. */
49451 expand_vec_perm_even_odd_1 (&d, odd);
49454 static void
49455 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
49457 struct expand_vec_perm_d d;
49458 unsigned i, nelt, base;
49459 bool ok;
49461 d.target = targ;
49462 d.op0 = op0;
49463 d.op1 = op1;
49464 d.vmode = GET_MODE (targ);
49465 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49466 d.one_operand_p = false;
49467 d.testing_p = false;
49469 base = high_p ? nelt / 2 : 0;
49470 for (i = 0; i < nelt / 2; ++i)
49472 d.perm[i * 2] = i + base;
49473 d.perm[i * 2 + 1] = i + base + nelt;
49476 /* Note that for AVX this isn't one instruction. */
49477 ok = ix86_expand_vec_perm_const_1 (&d);
49478 gcc_assert (ok);
49482 /* Expand a vector operation CODE for a V*QImode in terms of the
49483 same operation on V*HImode. */
49485 void
49486 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
49488 machine_mode qimode = GET_MODE (dest);
49489 machine_mode himode;
49490 rtx (*gen_il) (rtx, rtx, rtx);
49491 rtx (*gen_ih) (rtx, rtx, rtx);
49492 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
49493 struct expand_vec_perm_d d;
49494 bool ok, full_interleave;
49495 bool uns_p = false;
49496 int i;
49498 switch (qimode)
49500 case V16QImode:
49501 himode = V8HImode;
49502 gen_il = gen_vec_interleave_lowv16qi;
49503 gen_ih = gen_vec_interleave_highv16qi;
49504 break;
49505 case V32QImode:
49506 himode = V16HImode;
49507 gen_il = gen_avx2_interleave_lowv32qi;
49508 gen_ih = gen_avx2_interleave_highv32qi;
49509 break;
49510 case V64QImode:
49511 himode = V32HImode;
49512 gen_il = gen_avx512bw_interleave_lowv64qi;
49513 gen_ih = gen_avx512bw_interleave_highv64qi;
49514 break;
49515 default:
49516 gcc_unreachable ();
49519 op2_l = op2_h = op2;
49520 switch (code)
49522 case MULT:
49523 /* Unpack data such that we've got a source byte in each low byte of
49524 each word. We don't care what goes into the high byte of each word.
49525 Rather than trying to get zero in there, most convenient is to let
49526 it be a copy of the low byte. */
49527 op2_l = gen_reg_rtx (qimode);
49528 op2_h = gen_reg_rtx (qimode);
49529 emit_insn (gen_il (op2_l, op2, op2));
49530 emit_insn (gen_ih (op2_h, op2, op2));
49531 /* FALLTHRU */
49533 op1_l = gen_reg_rtx (qimode);
49534 op1_h = gen_reg_rtx (qimode);
49535 emit_insn (gen_il (op1_l, op1, op1));
49536 emit_insn (gen_ih (op1_h, op1, op1));
49537 full_interleave = qimode == V16QImode;
49538 break;
49540 case ASHIFT:
49541 case LSHIFTRT:
49542 uns_p = true;
49543 /* FALLTHRU */
49544 case ASHIFTRT:
49545 op1_l = gen_reg_rtx (himode);
49546 op1_h = gen_reg_rtx (himode);
49547 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
49548 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
49549 full_interleave = true;
49550 break;
49551 default:
49552 gcc_unreachable ();
49555 /* Perform the operation. */
49556 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
49557 1, OPTAB_DIRECT);
49558 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
49559 1, OPTAB_DIRECT);
49560 gcc_assert (res_l && res_h);
49562 /* Merge the data back into the right place. */
49563 d.target = dest;
49564 d.op0 = gen_lowpart (qimode, res_l);
49565 d.op1 = gen_lowpart (qimode, res_h);
49566 d.vmode = qimode;
49567 d.nelt = GET_MODE_NUNITS (qimode);
49568 d.one_operand_p = false;
49569 d.testing_p = false;
49571 if (full_interleave)
49573 /* For SSE2, we used an full interleave, so the desired
49574 results are in the even elements. */
49575 for (i = 0; i < 64; ++i)
49576 d.perm[i] = i * 2;
49578 else
49580 /* For AVX, the interleave used above was not cross-lane. So the
49581 extraction is evens but with the second and third quarter swapped.
49582 Happily, that is even one insn shorter than even extraction. */
49583 for (i = 0; i < 64; ++i)
49584 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
49587 ok = ix86_expand_vec_perm_const_1 (&d);
49588 gcc_assert (ok);
49590 set_unique_reg_note (get_last_insn (), REG_EQUAL,
49591 gen_rtx_fmt_ee (code, qimode, op1, op2));
49594 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
49595 if op is CONST_VECTOR with all odd elements equal to their
49596 preceding element. */
49598 static bool
49599 const_vector_equal_evenodd_p (rtx op)
49601 machine_mode mode = GET_MODE (op);
49602 int i, nunits = GET_MODE_NUNITS (mode);
49603 if (GET_CODE (op) != CONST_VECTOR
49604 || nunits != CONST_VECTOR_NUNITS (op))
49605 return false;
49606 for (i = 0; i < nunits; i += 2)
49607 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
49608 return false;
49609 return true;
49612 void
49613 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
49614 bool uns_p, bool odd_p)
49616 machine_mode mode = GET_MODE (op1);
49617 machine_mode wmode = GET_MODE (dest);
49618 rtx x;
49619 rtx orig_op1 = op1, orig_op2 = op2;
49621 if (!nonimmediate_operand (op1, mode))
49622 op1 = force_reg (mode, op1);
49623 if (!nonimmediate_operand (op2, mode))
49624 op2 = force_reg (mode, op2);
49626 /* We only play even/odd games with vectors of SImode. */
49627 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
49629 /* If we're looking for the odd results, shift those members down to
49630 the even slots. For some cpus this is faster than a PSHUFD. */
49631 if (odd_p)
49633 /* For XOP use vpmacsdqh, but only for smult, as it is only
49634 signed. */
49635 if (TARGET_XOP && mode == V4SImode && !uns_p)
49637 x = force_reg (wmode, CONST0_RTX (wmode));
49638 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
49639 return;
49642 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
49643 if (!const_vector_equal_evenodd_p (orig_op1))
49644 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
49645 x, NULL, 1, OPTAB_DIRECT);
49646 if (!const_vector_equal_evenodd_p (orig_op2))
49647 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
49648 x, NULL, 1, OPTAB_DIRECT);
49649 op1 = gen_lowpart (mode, op1);
49650 op2 = gen_lowpart (mode, op2);
49653 if (mode == V16SImode)
49655 if (uns_p)
49656 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
49657 else
49658 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
49660 else if (mode == V8SImode)
49662 if (uns_p)
49663 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
49664 else
49665 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
49667 else if (uns_p)
49668 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
49669 else if (TARGET_SSE4_1)
49670 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
49671 else
49673 rtx s1, s2, t0, t1, t2;
49675 /* The easiest way to implement this without PMULDQ is to go through
49676 the motions as if we are performing a full 64-bit multiply. With
49677 the exception that we need to do less shuffling of the elements. */
49679 /* Compute the sign-extension, aka highparts, of the two operands. */
49680 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
49681 op1, pc_rtx, pc_rtx);
49682 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
49683 op2, pc_rtx, pc_rtx);
49685 /* Multiply LO(A) * HI(B), and vice-versa. */
49686 t1 = gen_reg_rtx (wmode);
49687 t2 = gen_reg_rtx (wmode);
49688 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
49689 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
49691 /* Multiply LO(A) * LO(B). */
49692 t0 = gen_reg_rtx (wmode);
49693 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
49695 /* Combine and shift the highparts into place. */
49696 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
49697 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
49698 1, OPTAB_DIRECT);
49700 /* Combine high and low parts. */
49701 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
49702 return;
49704 emit_insn (x);
49707 void
49708 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
49709 bool uns_p, bool high_p)
49711 machine_mode wmode = GET_MODE (dest);
49712 machine_mode mode = GET_MODE (op1);
49713 rtx t1, t2, t3, t4, mask;
49715 switch (mode)
49717 case V4SImode:
49718 t1 = gen_reg_rtx (mode);
49719 t2 = gen_reg_rtx (mode);
49720 if (TARGET_XOP && !uns_p)
49722 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
49723 shuffle the elements once so that all elements are in the right
49724 place for immediate use: { A C B D }. */
49725 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
49726 const1_rtx, GEN_INT (3)));
49727 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
49728 const1_rtx, GEN_INT (3)));
49730 else
49732 /* Put the elements into place for the multiply. */
49733 ix86_expand_vec_interleave (t1, op1, op1, high_p);
49734 ix86_expand_vec_interleave (t2, op2, op2, high_p);
49735 high_p = false;
49737 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
49738 break;
49740 case V8SImode:
49741 /* Shuffle the elements between the lanes. After this we
49742 have { A B E F | C D G H } for each operand. */
49743 t1 = gen_reg_rtx (V4DImode);
49744 t2 = gen_reg_rtx (V4DImode);
49745 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
49746 const0_rtx, const2_rtx,
49747 const1_rtx, GEN_INT (3)));
49748 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
49749 const0_rtx, const2_rtx,
49750 const1_rtx, GEN_INT (3)));
49752 /* Shuffle the elements within the lanes. After this we
49753 have { A A B B | C C D D } or { E E F F | G G H H }. */
49754 t3 = gen_reg_rtx (V8SImode);
49755 t4 = gen_reg_rtx (V8SImode);
49756 mask = GEN_INT (high_p
49757 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
49758 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
49759 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
49760 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
49762 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
49763 break;
49765 case V8HImode:
49766 case V16HImode:
49767 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
49768 uns_p, OPTAB_DIRECT);
49769 t2 = expand_binop (mode,
49770 uns_p ? umul_highpart_optab : smul_highpart_optab,
49771 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
49772 gcc_assert (t1 && t2);
49774 t3 = gen_reg_rtx (mode);
49775 ix86_expand_vec_interleave (t3, t1, t2, high_p);
49776 emit_move_insn (dest, gen_lowpart (wmode, t3));
49777 break;
49779 case V16QImode:
49780 case V32QImode:
49781 case V32HImode:
49782 case V16SImode:
49783 case V64QImode:
49784 t1 = gen_reg_rtx (wmode);
49785 t2 = gen_reg_rtx (wmode);
49786 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
49787 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
49789 emit_insn (gen_rtx_SET (VOIDmode, dest, gen_rtx_MULT (wmode, t1, t2)));
49790 break;
49792 default:
49793 gcc_unreachable ();
49797 void
49798 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
49800 rtx res_1, res_2, res_3, res_4;
49802 res_1 = gen_reg_rtx (V4SImode);
49803 res_2 = gen_reg_rtx (V4SImode);
49804 res_3 = gen_reg_rtx (V2DImode);
49805 res_4 = gen_reg_rtx (V2DImode);
49806 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
49807 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
49809 /* Move the results in element 2 down to element 1; we don't care
49810 what goes in elements 2 and 3. Then we can merge the parts
49811 back together with an interleave.
49813 Note that two other sequences were tried:
49814 (1) Use interleaves at the start instead of psrldq, which allows
49815 us to use a single shufps to merge things back at the end.
49816 (2) Use shufps here to combine the two vectors, then pshufd to
49817 put the elements in the correct order.
49818 In both cases the cost of the reformatting stall was too high
49819 and the overall sequence slower. */
49821 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
49822 const0_rtx, const2_rtx,
49823 const0_rtx, const0_rtx));
49824 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
49825 const0_rtx, const2_rtx,
49826 const0_rtx, const0_rtx));
49827 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
49829 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
49832 void
49833 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
49835 machine_mode mode = GET_MODE (op0);
49836 rtx t1, t2, t3, t4, t5, t6;
49838 if (TARGET_AVX512DQ && mode == V8DImode)
49839 emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2));
49840 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode)
49841 emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2));
49842 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V2DImode)
49843 emit_insn (gen_avx512dq_mulv2di3 (op0, op1, op2));
49844 else if (TARGET_XOP && mode == V2DImode)
49846 /* op1: A,B,C,D, op2: E,F,G,H */
49847 op1 = gen_lowpart (V4SImode, op1);
49848 op2 = gen_lowpart (V4SImode, op2);
49850 t1 = gen_reg_rtx (V4SImode);
49851 t2 = gen_reg_rtx (V4SImode);
49852 t3 = gen_reg_rtx (V2DImode);
49853 t4 = gen_reg_rtx (V2DImode);
49855 /* t1: B,A,D,C */
49856 emit_insn (gen_sse2_pshufd_1 (t1, op1,
49857 GEN_INT (1),
49858 GEN_INT (0),
49859 GEN_INT (3),
49860 GEN_INT (2)));
49862 /* t2: (B*E),(A*F),(D*G),(C*H) */
49863 emit_insn (gen_mulv4si3 (t2, t1, op2));
49865 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
49866 emit_insn (gen_xop_phadddq (t3, t2));
49868 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
49869 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
49871 /* Multiply lower parts and add all */
49872 t5 = gen_reg_rtx (V2DImode);
49873 emit_insn (gen_vec_widen_umult_even_v4si (t5,
49874 gen_lowpart (V4SImode, op1),
49875 gen_lowpart (V4SImode, op2)));
49876 op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
49879 else
49881 machine_mode nmode;
49882 rtx (*umul) (rtx, rtx, rtx);
49884 if (mode == V2DImode)
49886 umul = gen_vec_widen_umult_even_v4si;
49887 nmode = V4SImode;
49889 else if (mode == V4DImode)
49891 umul = gen_vec_widen_umult_even_v8si;
49892 nmode = V8SImode;
49894 else if (mode == V8DImode)
49896 umul = gen_vec_widen_umult_even_v16si;
49897 nmode = V16SImode;
49899 else
49900 gcc_unreachable ();
49903 /* Multiply low parts. */
49904 t1 = gen_reg_rtx (mode);
49905 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
49907 /* Shift input vectors right 32 bits so we can multiply high parts. */
49908 t6 = GEN_INT (32);
49909 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
49910 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
49912 /* Multiply high parts by low parts. */
49913 t4 = gen_reg_rtx (mode);
49914 t5 = gen_reg_rtx (mode);
49915 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
49916 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
49918 /* Combine and shift the highparts back. */
49919 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
49920 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
49922 /* Combine high and low parts. */
49923 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
49926 set_unique_reg_note (get_last_insn (), REG_EQUAL,
49927 gen_rtx_MULT (mode, op1, op2));
49930 /* Return 1 if control tansfer instruction INSN
49931 should be encoded with bnd prefix.
49932 If insn is NULL then return 1 when control
49933 transfer instructions should be prefixed with
49934 bnd by default for current function. */
49936 bool
49937 ix86_bnd_prefixed_insn_p (rtx insn)
49939 /* For call insns check special flag. */
49940 if (insn && CALL_P (insn))
49942 rtx call = get_call_rtx_from (insn);
49943 if (call)
49944 return CALL_EXPR_WITH_BOUNDS_P (call);
49947 /* All other insns are prefixed only if function is instrumented. */
49948 return chkp_function_instrumented_p (current_function_decl);
49951 /* Calculate integer abs() using only SSE2 instructions. */
49953 void
49954 ix86_expand_sse2_abs (rtx target, rtx input)
49956 machine_mode mode = GET_MODE (target);
49957 rtx tmp0, tmp1, x;
49959 switch (mode)
49961 /* For 32-bit signed integer X, the best way to calculate the absolute
49962 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
49963 case V4SImode:
49964 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
49965 GEN_INT (GET_MODE_BITSIZE
49966 (GET_MODE_INNER (mode)) - 1),
49967 NULL, 0, OPTAB_DIRECT);
49968 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
49969 NULL, 0, OPTAB_DIRECT);
49970 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
49971 target, 0, OPTAB_DIRECT);
49972 break;
49974 /* For 16-bit signed integer X, the best way to calculate the absolute
49975 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
49976 case V8HImode:
49977 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
49979 x = expand_simple_binop (mode, SMAX, tmp0, input,
49980 target, 0, OPTAB_DIRECT);
49981 break;
49983 /* For 8-bit signed integer X, the best way to calculate the absolute
49984 value of X is min ((unsigned char) X, (unsigned char) (-X)),
49985 as SSE2 provides the PMINUB insn. */
49986 case V16QImode:
49987 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
49989 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
49990 target, 0, OPTAB_DIRECT);
49991 break;
49993 default:
49994 gcc_unreachable ();
49997 if (x != target)
49998 emit_move_insn (target, x);
50001 /* Expand an insert into a vector register through pinsr insn.
50002 Return true if successful. */
50004 bool
50005 ix86_expand_pinsr (rtx *operands)
50007 rtx dst = operands[0];
50008 rtx src = operands[3];
50010 unsigned int size = INTVAL (operands[1]);
50011 unsigned int pos = INTVAL (operands[2]);
50013 if (GET_CODE (dst) == SUBREG)
50015 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
50016 dst = SUBREG_REG (dst);
50019 if (GET_CODE (src) == SUBREG)
50020 src = SUBREG_REG (src);
50022 switch (GET_MODE (dst))
50024 case V16QImode:
50025 case V8HImode:
50026 case V4SImode:
50027 case V2DImode:
50029 machine_mode srcmode, dstmode;
50030 rtx (*pinsr)(rtx, rtx, rtx, rtx);
50032 srcmode = mode_for_size (size, MODE_INT, 0);
50034 switch (srcmode)
50036 case QImode:
50037 if (!TARGET_SSE4_1)
50038 return false;
50039 dstmode = V16QImode;
50040 pinsr = gen_sse4_1_pinsrb;
50041 break;
50043 case HImode:
50044 if (!TARGET_SSE2)
50045 return false;
50046 dstmode = V8HImode;
50047 pinsr = gen_sse2_pinsrw;
50048 break;
50050 case SImode:
50051 if (!TARGET_SSE4_1)
50052 return false;
50053 dstmode = V4SImode;
50054 pinsr = gen_sse4_1_pinsrd;
50055 break;
50057 case DImode:
50058 gcc_assert (TARGET_64BIT);
50059 if (!TARGET_SSE4_1)
50060 return false;
50061 dstmode = V2DImode;
50062 pinsr = gen_sse4_1_pinsrq;
50063 break;
50065 default:
50066 return false;
50069 rtx d = dst;
50070 if (GET_MODE (dst) != dstmode)
50071 d = gen_reg_rtx (dstmode);
50072 src = gen_lowpart (srcmode, src);
50074 pos /= size;
50076 emit_insn (pinsr (d, gen_lowpart (dstmode, dst), src,
50077 GEN_INT (1 << pos)));
50078 if (d != dst)
50079 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
50080 return true;
50083 default:
50084 return false;
50088 /* This function returns the calling abi specific va_list type node.
50089 It returns the FNDECL specific va_list type. */
50091 static tree
50092 ix86_fn_abi_va_list (tree fndecl)
50094 if (!TARGET_64BIT)
50095 return va_list_type_node;
50096 gcc_assert (fndecl != NULL_TREE);
50098 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
50099 return ms_va_list_type_node;
50100 else
50101 return sysv_va_list_type_node;
50104 /* Returns the canonical va_list type specified by TYPE. If there
50105 is no valid TYPE provided, it return NULL_TREE. */
50107 static tree
50108 ix86_canonical_va_list_type (tree type)
50110 tree wtype, htype;
50112 /* Resolve references and pointers to va_list type. */
50113 if (TREE_CODE (type) == MEM_REF)
50114 type = TREE_TYPE (type);
50115 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
50116 type = TREE_TYPE (type);
50117 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
50118 type = TREE_TYPE (type);
50120 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
50122 wtype = va_list_type_node;
50123 gcc_assert (wtype != NULL_TREE);
50124 htype = type;
50125 if (TREE_CODE (wtype) == ARRAY_TYPE)
50127 /* If va_list is an array type, the argument may have decayed
50128 to a pointer type, e.g. by being passed to another function.
50129 In that case, unwrap both types so that we can compare the
50130 underlying records. */
50131 if (TREE_CODE (htype) == ARRAY_TYPE
50132 || POINTER_TYPE_P (htype))
50134 wtype = TREE_TYPE (wtype);
50135 htype = TREE_TYPE (htype);
50138 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50139 return va_list_type_node;
50140 wtype = sysv_va_list_type_node;
50141 gcc_assert (wtype != NULL_TREE);
50142 htype = type;
50143 if (TREE_CODE (wtype) == ARRAY_TYPE)
50145 /* If va_list is an array type, the argument may have decayed
50146 to a pointer type, e.g. by being passed to another function.
50147 In that case, unwrap both types so that we can compare the
50148 underlying records. */
50149 if (TREE_CODE (htype) == ARRAY_TYPE
50150 || POINTER_TYPE_P (htype))
50152 wtype = TREE_TYPE (wtype);
50153 htype = TREE_TYPE (htype);
50156 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50157 return sysv_va_list_type_node;
50158 wtype = ms_va_list_type_node;
50159 gcc_assert (wtype != NULL_TREE);
50160 htype = type;
50161 if (TREE_CODE (wtype) == ARRAY_TYPE)
50163 /* If va_list is an array type, the argument may have decayed
50164 to a pointer type, e.g. by being passed to another function.
50165 In that case, unwrap both types so that we can compare the
50166 underlying records. */
50167 if (TREE_CODE (htype) == ARRAY_TYPE
50168 || POINTER_TYPE_P (htype))
50170 wtype = TREE_TYPE (wtype);
50171 htype = TREE_TYPE (htype);
50174 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50175 return ms_va_list_type_node;
50176 return NULL_TREE;
50178 return std_canonical_va_list_type (type);
50181 /* Iterate through the target-specific builtin types for va_list.
50182 IDX denotes the iterator, *PTREE is set to the result type of
50183 the va_list builtin, and *PNAME to its internal type.
50184 Returns zero if there is no element for this index, otherwise
50185 IDX should be increased upon the next call.
50186 Note, do not iterate a base builtin's name like __builtin_va_list.
50187 Used from c_common_nodes_and_builtins. */
50189 static int
50190 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
50192 if (TARGET_64BIT)
50194 switch (idx)
50196 default:
50197 break;
50199 case 0:
50200 *ptree = ms_va_list_type_node;
50201 *pname = "__builtin_ms_va_list";
50202 return 1;
50204 case 1:
50205 *ptree = sysv_va_list_type_node;
50206 *pname = "__builtin_sysv_va_list";
50207 return 1;
50211 return 0;
50214 #undef TARGET_SCHED_DISPATCH
50215 #define TARGET_SCHED_DISPATCH has_dispatch
50216 #undef TARGET_SCHED_DISPATCH_DO
50217 #define TARGET_SCHED_DISPATCH_DO do_dispatch
50218 #undef TARGET_SCHED_REASSOCIATION_WIDTH
50219 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
50220 #undef TARGET_SCHED_REORDER
50221 #define TARGET_SCHED_REORDER ix86_sched_reorder
50222 #undef TARGET_SCHED_ADJUST_PRIORITY
50223 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
50224 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
50225 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
50226 ix86_dependencies_evaluation_hook
50228 /* The size of the dispatch window is the total number of bytes of
50229 object code allowed in a window. */
50230 #define DISPATCH_WINDOW_SIZE 16
50232 /* Number of dispatch windows considered for scheduling. */
50233 #define MAX_DISPATCH_WINDOWS 3
50235 /* Maximum number of instructions in a window. */
50236 #define MAX_INSN 4
50238 /* Maximum number of immediate operands in a window. */
50239 #define MAX_IMM 4
50241 /* Maximum number of immediate bits allowed in a window. */
50242 #define MAX_IMM_SIZE 128
50244 /* Maximum number of 32 bit immediates allowed in a window. */
50245 #define MAX_IMM_32 4
50247 /* Maximum number of 64 bit immediates allowed in a window. */
50248 #define MAX_IMM_64 2
50250 /* Maximum total of loads or prefetches allowed in a window. */
50251 #define MAX_LOAD 2
50253 /* Maximum total of stores allowed in a window. */
50254 #define MAX_STORE 1
50256 #undef BIG
50257 #define BIG 100
50260 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
50261 enum dispatch_group {
50262 disp_no_group = 0,
50263 disp_load,
50264 disp_store,
50265 disp_load_store,
50266 disp_prefetch,
50267 disp_imm,
50268 disp_imm_32,
50269 disp_imm_64,
50270 disp_branch,
50271 disp_cmp,
50272 disp_jcc,
50273 disp_last
50276 /* Number of allowable groups in a dispatch window. It is an array
50277 indexed by dispatch_group enum. 100 is used as a big number,
50278 because the number of these kind of operations does not have any
50279 effect in dispatch window, but we need them for other reasons in
50280 the table. */
50281 static unsigned int num_allowable_groups[disp_last] = {
50282 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
50285 char group_name[disp_last + 1][16] = {
50286 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
50287 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
50288 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
50291 /* Instruction path. */
50292 enum insn_path {
50293 no_path = 0,
50294 path_single, /* Single micro op. */
50295 path_double, /* Double micro op. */
50296 path_multi, /* Instructions with more than 2 micro op.. */
50297 last_path
50300 /* sched_insn_info defines a window to the instructions scheduled in
50301 the basic block. It contains a pointer to the insn_info table and
50302 the instruction scheduled.
50304 Windows are allocated for each basic block and are linked
50305 together. */
50306 typedef struct sched_insn_info_s {
50307 rtx insn;
50308 enum dispatch_group group;
50309 enum insn_path path;
50310 int byte_len;
50311 int imm_bytes;
50312 } sched_insn_info;
50314 /* Linked list of dispatch windows. This is a two way list of
50315 dispatch windows of a basic block. It contains information about
50316 the number of uops in the window and the total number of
50317 instructions and of bytes in the object code for this dispatch
50318 window. */
50319 typedef struct dispatch_windows_s {
50320 int num_insn; /* Number of insn in the window. */
50321 int num_uops; /* Number of uops in the window. */
50322 int window_size; /* Number of bytes in the window. */
50323 int window_num; /* Window number between 0 or 1. */
50324 int num_imm; /* Number of immediates in an insn. */
50325 int num_imm_32; /* Number of 32 bit immediates in an insn. */
50326 int num_imm_64; /* Number of 64 bit immediates in an insn. */
50327 int imm_size; /* Total immediates in the window. */
50328 int num_loads; /* Total memory loads in the window. */
50329 int num_stores; /* Total memory stores in the window. */
50330 int violation; /* Violation exists in window. */
50331 sched_insn_info *window; /* Pointer to the window. */
50332 struct dispatch_windows_s *next;
50333 struct dispatch_windows_s *prev;
50334 } dispatch_windows;
50336 /* Immediate valuse used in an insn. */
50337 typedef struct imm_info_s
50339 int imm;
50340 int imm32;
50341 int imm64;
50342 } imm_info;
50344 static dispatch_windows *dispatch_window_list;
50345 static dispatch_windows *dispatch_window_list1;
50347 /* Get dispatch group of insn. */
50349 static enum dispatch_group
50350 get_mem_group (rtx_insn *insn)
50352 enum attr_memory memory;
50354 if (INSN_CODE (insn) < 0)
50355 return disp_no_group;
50356 memory = get_attr_memory (insn);
50357 if (memory == MEMORY_STORE)
50358 return disp_store;
50360 if (memory == MEMORY_LOAD)
50361 return disp_load;
50363 if (memory == MEMORY_BOTH)
50364 return disp_load_store;
50366 return disp_no_group;
50369 /* Return true if insn is a compare instruction. */
50371 static bool
50372 is_cmp (rtx_insn *insn)
50374 enum attr_type type;
50376 type = get_attr_type (insn);
50377 return (type == TYPE_TEST
50378 || type == TYPE_ICMP
50379 || type == TYPE_FCMP
50380 || GET_CODE (PATTERN (insn)) == COMPARE);
50383 /* Return true if a dispatch violation encountered. */
50385 static bool
50386 dispatch_violation (void)
50388 if (dispatch_window_list->next)
50389 return dispatch_window_list->next->violation;
50390 return dispatch_window_list->violation;
50393 /* Return true if insn is a branch instruction. */
50395 static bool
50396 is_branch (rtx insn)
50398 return (CALL_P (insn) || JUMP_P (insn));
50401 /* Return true if insn is a prefetch instruction. */
50403 static bool
50404 is_prefetch (rtx insn)
50406 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
50409 /* This function initializes a dispatch window and the list container holding a
50410 pointer to the window. */
50412 static void
50413 init_window (int window_num)
50415 int i;
50416 dispatch_windows *new_list;
50418 if (window_num == 0)
50419 new_list = dispatch_window_list;
50420 else
50421 new_list = dispatch_window_list1;
50423 new_list->num_insn = 0;
50424 new_list->num_uops = 0;
50425 new_list->window_size = 0;
50426 new_list->next = NULL;
50427 new_list->prev = NULL;
50428 new_list->window_num = window_num;
50429 new_list->num_imm = 0;
50430 new_list->num_imm_32 = 0;
50431 new_list->num_imm_64 = 0;
50432 new_list->imm_size = 0;
50433 new_list->num_loads = 0;
50434 new_list->num_stores = 0;
50435 new_list->violation = false;
50437 for (i = 0; i < MAX_INSN; i++)
50439 new_list->window[i].insn = NULL;
50440 new_list->window[i].group = disp_no_group;
50441 new_list->window[i].path = no_path;
50442 new_list->window[i].byte_len = 0;
50443 new_list->window[i].imm_bytes = 0;
50445 return;
50448 /* This function allocates and initializes a dispatch window and the
50449 list container holding a pointer to the window. */
50451 static dispatch_windows *
50452 allocate_window (void)
50454 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
50455 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
50457 return new_list;
50460 /* This routine initializes the dispatch scheduling information. It
50461 initiates building dispatch scheduler tables and constructs the
50462 first dispatch window. */
50464 static void
50465 init_dispatch_sched (void)
50467 /* Allocate a dispatch list and a window. */
50468 dispatch_window_list = allocate_window ();
50469 dispatch_window_list1 = allocate_window ();
50470 init_window (0);
50471 init_window (1);
50474 /* This function returns true if a branch is detected. End of a basic block
50475 does not have to be a branch, but here we assume only branches end a
50476 window. */
50478 static bool
50479 is_end_basic_block (enum dispatch_group group)
50481 return group == disp_branch;
50484 /* This function is called when the end of a window processing is reached. */
50486 static void
50487 process_end_window (void)
50489 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
50490 if (dispatch_window_list->next)
50492 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
50493 gcc_assert (dispatch_window_list->window_size
50494 + dispatch_window_list1->window_size <= 48);
50495 init_window (1);
50497 init_window (0);
50500 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
50501 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
50502 for 48 bytes of instructions. Note that these windows are not dispatch
50503 windows that their sizes are DISPATCH_WINDOW_SIZE. */
50505 static dispatch_windows *
50506 allocate_next_window (int window_num)
50508 if (window_num == 0)
50510 if (dispatch_window_list->next)
50511 init_window (1);
50512 init_window (0);
50513 return dispatch_window_list;
50516 dispatch_window_list->next = dispatch_window_list1;
50517 dispatch_window_list1->prev = dispatch_window_list;
50519 return dispatch_window_list1;
50522 /* Compute number of immediate operands of an instruction. */
50524 static void
50525 find_constant (rtx in_rtx, imm_info *imm_values)
50527 if (INSN_P (in_rtx))
50528 in_rtx = PATTERN (in_rtx);
50529 subrtx_iterator::array_type array;
50530 FOR_EACH_SUBRTX (iter, array, in_rtx, ALL)
50531 if (const_rtx x = *iter)
50532 switch (GET_CODE (x))
50534 case CONST:
50535 case SYMBOL_REF:
50536 case CONST_INT:
50537 (imm_values->imm)++;
50538 if (x86_64_immediate_operand (CONST_CAST_RTX (x), SImode))
50539 (imm_values->imm32)++;
50540 else
50541 (imm_values->imm64)++;
50542 break;
50544 case CONST_DOUBLE:
50545 (imm_values->imm)++;
50546 (imm_values->imm64)++;
50547 break;
50549 case CODE_LABEL:
50550 if (LABEL_KIND (x) == LABEL_NORMAL)
50552 (imm_values->imm)++;
50553 (imm_values->imm32)++;
50555 break;
50557 default:
50558 break;
50562 /* Return total size of immediate operands of an instruction along with number
50563 of corresponding immediate-operands. It initializes its parameters to zero
50564 befor calling FIND_CONSTANT.
50565 INSN is the input instruction. IMM is the total of immediates.
50566 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
50567 bit immediates. */
50569 static int
50570 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
50572 imm_info imm_values = {0, 0, 0};
50574 find_constant (insn, &imm_values);
50575 *imm = imm_values.imm;
50576 *imm32 = imm_values.imm32;
50577 *imm64 = imm_values.imm64;
50578 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
50581 /* This function indicates if an operand of an instruction is an
50582 immediate. */
50584 static bool
50585 has_immediate (rtx insn)
50587 int num_imm_operand;
50588 int num_imm32_operand;
50589 int num_imm64_operand;
50591 if (insn)
50592 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50593 &num_imm64_operand);
50594 return false;
50597 /* Return single or double path for instructions. */
50599 static enum insn_path
50600 get_insn_path (rtx_insn *insn)
50602 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
50604 if ((int)path == 0)
50605 return path_single;
50607 if ((int)path == 1)
50608 return path_double;
50610 return path_multi;
50613 /* Return insn dispatch group. */
50615 static enum dispatch_group
50616 get_insn_group (rtx_insn *insn)
50618 enum dispatch_group group = get_mem_group (insn);
50619 if (group)
50620 return group;
50622 if (is_branch (insn))
50623 return disp_branch;
50625 if (is_cmp (insn))
50626 return disp_cmp;
50628 if (has_immediate (insn))
50629 return disp_imm;
50631 if (is_prefetch (insn))
50632 return disp_prefetch;
50634 return disp_no_group;
50637 /* Count number of GROUP restricted instructions in a dispatch
50638 window WINDOW_LIST. */
50640 static int
50641 count_num_restricted (rtx_insn *insn, dispatch_windows *window_list)
50643 enum dispatch_group group = get_insn_group (insn);
50644 int imm_size;
50645 int num_imm_operand;
50646 int num_imm32_operand;
50647 int num_imm64_operand;
50649 if (group == disp_no_group)
50650 return 0;
50652 if (group == disp_imm)
50654 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50655 &num_imm64_operand);
50656 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
50657 || num_imm_operand + window_list->num_imm > MAX_IMM
50658 || (num_imm32_operand > 0
50659 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
50660 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
50661 || (num_imm64_operand > 0
50662 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
50663 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
50664 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
50665 && num_imm64_operand > 0
50666 && ((window_list->num_imm_64 > 0
50667 && window_list->num_insn >= 2)
50668 || window_list->num_insn >= 3)))
50669 return BIG;
50671 return 1;
50674 if ((group == disp_load_store
50675 && (window_list->num_loads >= MAX_LOAD
50676 || window_list->num_stores >= MAX_STORE))
50677 || ((group == disp_load
50678 || group == disp_prefetch)
50679 && window_list->num_loads >= MAX_LOAD)
50680 || (group == disp_store
50681 && window_list->num_stores >= MAX_STORE))
50682 return BIG;
50684 return 1;
50687 /* This function returns true if insn satisfies dispatch rules on the
50688 last window scheduled. */
50690 static bool
50691 fits_dispatch_window (rtx_insn *insn)
50693 dispatch_windows *window_list = dispatch_window_list;
50694 dispatch_windows *window_list_next = dispatch_window_list->next;
50695 unsigned int num_restrict;
50696 enum dispatch_group group = get_insn_group (insn);
50697 enum insn_path path = get_insn_path (insn);
50698 int sum;
50700 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
50701 instructions should be given the lowest priority in the
50702 scheduling process in Haifa scheduler to make sure they will be
50703 scheduled in the same dispatch window as the reference to them. */
50704 if (group == disp_jcc || group == disp_cmp)
50705 return false;
50707 /* Check nonrestricted. */
50708 if (group == disp_no_group || group == disp_branch)
50709 return true;
50711 /* Get last dispatch window. */
50712 if (window_list_next)
50713 window_list = window_list_next;
50715 if (window_list->window_num == 1)
50717 sum = window_list->prev->window_size + window_list->window_size;
50719 if (sum == 32
50720 || (min_insn_size (insn) + sum) >= 48)
50721 /* Window 1 is full. Go for next window. */
50722 return true;
50725 num_restrict = count_num_restricted (insn, window_list);
50727 if (num_restrict > num_allowable_groups[group])
50728 return false;
50730 /* See if it fits in the first window. */
50731 if (window_list->window_num == 0)
50733 /* The first widow should have only single and double path
50734 uops. */
50735 if (path == path_double
50736 && (window_list->num_uops + 2) > MAX_INSN)
50737 return false;
50738 else if (path != path_single)
50739 return false;
50741 return true;
50744 /* Add an instruction INSN with NUM_UOPS micro-operations to the
50745 dispatch window WINDOW_LIST. */
50747 static void
50748 add_insn_window (rtx_insn *insn, dispatch_windows *window_list, int num_uops)
50750 int byte_len = min_insn_size (insn);
50751 int num_insn = window_list->num_insn;
50752 int imm_size;
50753 sched_insn_info *window = window_list->window;
50754 enum dispatch_group group = get_insn_group (insn);
50755 enum insn_path path = get_insn_path (insn);
50756 int num_imm_operand;
50757 int num_imm32_operand;
50758 int num_imm64_operand;
50760 if (!window_list->violation && group != disp_cmp
50761 && !fits_dispatch_window (insn))
50762 window_list->violation = true;
50764 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50765 &num_imm64_operand);
50767 /* Initialize window with new instruction. */
50768 window[num_insn].insn = insn;
50769 window[num_insn].byte_len = byte_len;
50770 window[num_insn].group = group;
50771 window[num_insn].path = path;
50772 window[num_insn].imm_bytes = imm_size;
50774 window_list->window_size += byte_len;
50775 window_list->num_insn = num_insn + 1;
50776 window_list->num_uops = window_list->num_uops + num_uops;
50777 window_list->imm_size += imm_size;
50778 window_list->num_imm += num_imm_operand;
50779 window_list->num_imm_32 += num_imm32_operand;
50780 window_list->num_imm_64 += num_imm64_operand;
50782 if (group == disp_store)
50783 window_list->num_stores += 1;
50784 else if (group == disp_load
50785 || group == disp_prefetch)
50786 window_list->num_loads += 1;
50787 else if (group == disp_load_store)
50789 window_list->num_stores += 1;
50790 window_list->num_loads += 1;
50794 /* Adds a scheduled instruction, INSN, to the current dispatch window.
50795 If the total bytes of instructions or the number of instructions in
50796 the window exceed allowable, it allocates a new window. */
50798 static void
50799 add_to_dispatch_window (rtx_insn *insn)
50801 int byte_len;
50802 dispatch_windows *window_list;
50803 dispatch_windows *next_list;
50804 dispatch_windows *window0_list;
50805 enum insn_path path;
50806 enum dispatch_group insn_group;
50807 bool insn_fits;
50808 int num_insn;
50809 int num_uops;
50810 int window_num;
50811 int insn_num_uops;
50812 int sum;
50814 if (INSN_CODE (insn) < 0)
50815 return;
50817 byte_len = min_insn_size (insn);
50818 window_list = dispatch_window_list;
50819 next_list = window_list->next;
50820 path = get_insn_path (insn);
50821 insn_group = get_insn_group (insn);
50823 /* Get the last dispatch window. */
50824 if (next_list)
50825 window_list = dispatch_window_list->next;
50827 if (path == path_single)
50828 insn_num_uops = 1;
50829 else if (path == path_double)
50830 insn_num_uops = 2;
50831 else
50832 insn_num_uops = (int) path;
50834 /* If current window is full, get a new window.
50835 Window number zero is full, if MAX_INSN uops are scheduled in it.
50836 Window number one is full, if window zero's bytes plus window
50837 one's bytes is 32, or if the bytes of the new instruction added
50838 to the total makes it greater than 48, or it has already MAX_INSN
50839 instructions in it. */
50840 num_insn = window_list->num_insn;
50841 num_uops = window_list->num_uops;
50842 window_num = window_list->window_num;
50843 insn_fits = fits_dispatch_window (insn);
50845 if (num_insn >= MAX_INSN
50846 || num_uops + insn_num_uops > MAX_INSN
50847 || !(insn_fits))
50849 window_num = ~window_num & 1;
50850 window_list = allocate_next_window (window_num);
50853 if (window_num == 0)
50855 add_insn_window (insn, window_list, insn_num_uops);
50856 if (window_list->num_insn >= MAX_INSN
50857 && insn_group == disp_branch)
50859 process_end_window ();
50860 return;
50863 else if (window_num == 1)
50865 window0_list = window_list->prev;
50866 sum = window0_list->window_size + window_list->window_size;
50867 if (sum == 32
50868 || (byte_len + sum) >= 48)
50870 process_end_window ();
50871 window_list = dispatch_window_list;
50874 add_insn_window (insn, window_list, insn_num_uops);
50876 else
50877 gcc_unreachable ();
50879 if (is_end_basic_block (insn_group))
50881 /* End of basic block is reached do end-basic-block process. */
50882 process_end_window ();
50883 return;
50887 /* Print the dispatch window, WINDOW_NUM, to FILE. */
50889 DEBUG_FUNCTION static void
50890 debug_dispatch_window_file (FILE *file, int window_num)
50892 dispatch_windows *list;
50893 int i;
50895 if (window_num == 0)
50896 list = dispatch_window_list;
50897 else
50898 list = dispatch_window_list1;
50900 fprintf (file, "Window #%d:\n", list->window_num);
50901 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
50902 list->num_insn, list->num_uops, list->window_size);
50903 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
50904 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
50906 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
50907 list->num_stores);
50908 fprintf (file, " insn info:\n");
50910 for (i = 0; i < MAX_INSN; i++)
50912 if (!list->window[i].insn)
50913 break;
50914 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
50915 i, group_name[list->window[i].group],
50916 i, (void *)list->window[i].insn,
50917 i, list->window[i].path,
50918 i, list->window[i].byte_len,
50919 i, list->window[i].imm_bytes);
50923 /* Print to stdout a dispatch window. */
50925 DEBUG_FUNCTION void
50926 debug_dispatch_window (int window_num)
50928 debug_dispatch_window_file (stdout, window_num);
50931 /* Print INSN dispatch information to FILE. */
50933 DEBUG_FUNCTION static void
50934 debug_insn_dispatch_info_file (FILE *file, rtx_insn *insn)
50936 int byte_len;
50937 enum insn_path path;
50938 enum dispatch_group group;
50939 int imm_size;
50940 int num_imm_operand;
50941 int num_imm32_operand;
50942 int num_imm64_operand;
50944 if (INSN_CODE (insn) < 0)
50945 return;
50947 byte_len = min_insn_size (insn);
50948 path = get_insn_path (insn);
50949 group = get_insn_group (insn);
50950 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50951 &num_imm64_operand);
50953 fprintf (file, " insn info:\n");
50954 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
50955 group_name[group], path, byte_len);
50956 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
50957 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
50960 /* Print to STDERR the status of the ready list with respect to
50961 dispatch windows. */
50963 DEBUG_FUNCTION void
50964 debug_ready_dispatch (void)
50966 int i;
50967 int no_ready = number_in_ready ();
50969 fprintf (stdout, "Number of ready: %d\n", no_ready);
50971 for (i = 0; i < no_ready; i++)
50972 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
50975 /* This routine is the driver of the dispatch scheduler. */
50977 static void
50978 do_dispatch (rtx_insn *insn, int mode)
50980 if (mode == DISPATCH_INIT)
50981 init_dispatch_sched ();
50982 else if (mode == ADD_TO_DISPATCH_WINDOW)
50983 add_to_dispatch_window (insn);
50986 /* Return TRUE if Dispatch Scheduling is supported. */
50988 static bool
50989 has_dispatch (rtx_insn *insn, int action)
50991 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
50992 && flag_dispatch_scheduler)
50993 switch (action)
50995 default:
50996 return false;
50998 case IS_DISPATCH_ON:
50999 return true;
51000 break;
51002 case IS_CMP:
51003 return is_cmp (insn);
51005 case DISPATCH_VIOLATION:
51006 return dispatch_violation ();
51008 case FITS_DISPATCH_WINDOW:
51009 return fits_dispatch_window (insn);
51012 return false;
51015 /* Implementation of reassociation_width target hook used by
51016 reassoc phase to identify parallelism level in reassociated
51017 tree. Statements tree_code is passed in OPC. Arguments type
51018 is passed in MODE.
51020 Currently parallel reassociation is enabled for Atom
51021 processors only and we set reassociation width to be 2
51022 because Atom may issue up to 2 instructions per cycle.
51024 Return value should be fixed if parallel reassociation is
51025 enabled for other processors. */
51027 static int
51028 ix86_reassociation_width (unsigned int, machine_mode mode)
51030 int res = 1;
51032 /* Vector part. */
51033 if (VECTOR_MODE_P (mode))
51035 if (TARGET_VECTOR_PARALLEL_EXECUTION)
51036 return 2;
51037 else
51038 return 1;
51041 /* Scalar part. */
51042 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
51043 res = 2;
51044 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
51045 res = 2;
51047 return res;
51050 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
51051 place emms and femms instructions. */
51053 static machine_mode
51054 ix86_preferred_simd_mode (machine_mode mode)
51056 if (!TARGET_SSE)
51057 return word_mode;
51059 switch (mode)
51061 case QImode:
51062 return TARGET_AVX512BW ? V64QImode :
51063 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
51064 case HImode:
51065 return TARGET_AVX512BW ? V32HImode :
51066 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
51067 case SImode:
51068 return TARGET_AVX512F ? V16SImode :
51069 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
51070 case DImode:
51071 return TARGET_AVX512F ? V8DImode :
51072 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
51074 case SFmode:
51075 if (TARGET_AVX512F)
51076 return V16SFmode;
51077 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
51078 return V8SFmode;
51079 else
51080 return V4SFmode;
51082 case DFmode:
51083 if (!TARGET_VECTORIZE_DOUBLE)
51084 return word_mode;
51085 else if (TARGET_AVX512F)
51086 return V8DFmode;
51087 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
51088 return V4DFmode;
51089 else if (TARGET_SSE2)
51090 return V2DFmode;
51091 /* FALLTHRU */
51093 default:
51094 return word_mode;
51098 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
51099 vectors. If AVX512F is enabled then try vectorizing with 512bit,
51100 256bit and 128bit vectors. */
51102 static unsigned int
51103 ix86_autovectorize_vector_sizes (void)
51105 return TARGET_AVX512F ? 64 | 32 | 16 :
51106 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
51111 /* Return class of registers which could be used for pseudo of MODE
51112 and of class RCLASS for spilling instead of memory. Return NO_REGS
51113 if it is not possible or non-profitable. */
51114 static reg_class_t
51115 ix86_spill_class (reg_class_t rclass, machine_mode mode)
51117 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
51118 && (mode == SImode || (TARGET_64BIT && mode == DImode))
51119 && rclass != NO_REGS && INTEGER_CLASS_P (rclass))
51120 return ALL_SSE_REGS;
51121 return NO_REGS;
51124 /* Implement targetm.vectorize.init_cost. */
51126 static void *
51127 ix86_init_cost (struct loop *)
51129 unsigned *cost = XNEWVEC (unsigned, 3);
51130 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
51131 return cost;
51134 /* Implement targetm.vectorize.add_stmt_cost. */
51136 static unsigned
51137 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
51138 struct _stmt_vec_info *stmt_info, int misalign,
51139 enum vect_cost_model_location where)
51141 unsigned *cost = (unsigned *) data;
51142 unsigned retval = 0;
51144 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
51145 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
51147 /* Statements in an inner loop relative to the loop being
51148 vectorized are weighted more heavily. The value here is
51149 arbitrary and could potentially be improved with analysis. */
51150 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
51151 count *= 50; /* FIXME. */
51153 retval = (unsigned) (count * stmt_cost);
51155 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
51156 for Silvermont as it has out of order integer pipeline and can execute
51157 2 scalar instruction per tick, but has in order SIMD pipeline. */
51158 if (TARGET_SILVERMONT || TARGET_INTEL)
51159 if (stmt_info && stmt_info->stmt)
51161 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
51162 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
51163 retval = (retval * 17) / 10;
51166 cost[where] += retval;
51168 return retval;
51171 /* Implement targetm.vectorize.finish_cost. */
51173 static void
51174 ix86_finish_cost (void *data, unsigned *prologue_cost,
51175 unsigned *body_cost, unsigned *epilogue_cost)
51177 unsigned *cost = (unsigned *) data;
51178 *prologue_cost = cost[vect_prologue];
51179 *body_cost = cost[vect_body];
51180 *epilogue_cost = cost[vect_epilogue];
51183 /* Implement targetm.vectorize.destroy_cost_data. */
51185 static void
51186 ix86_destroy_cost_data (void *data)
51188 free (data);
51191 /* Validate target specific memory model bits in VAL. */
51193 static unsigned HOST_WIDE_INT
51194 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
51196 unsigned HOST_WIDE_INT model = val & MEMMODEL_MASK;
51197 bool strong;
51199 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
51200 |MEMMODEL_MASK)
51201 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
51203 warning (OPT_Winvalid_memory_model,
51204 "Unknown architecture specific memory model");
51205 return MEMMODEL_SEQ_CST;
51207 strong = (model == MEMMODEL_ACQ_REL || model == MEMMODEL_SEQ_CST);
51208 if (val & IX86_HLE_ACQUIRE && !(model == MEMMODEL_ACQUIRE || strong))
51210 warning (OPT_Winvalid_memory_model,
51211 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
51212 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
51214 if (val & IX86_HLE_RELEASE && !(model == MEMMODEL_RELEASE || strong))
51216 warning (OPT_Winvalid_memory_model,
51217 "HLE_RELEASE not used with RELEASE or stronger memory model");
51218 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
51220 return val;
51223 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
51224 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
51225 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
51226 or number of vecsize_mangle variants that should be emitted. */
51228 static int
51229 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
51230 struct cgraph_simd_clone *clonei,
51231 tree base_type, int num)
51233 int ret = 1;
51235 if (clonei->simdlen
51236 && (clonei->simdlen < 2
51237 || clonei->simdlen > 16
51238 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
51240 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51241 "unsupported simdlen %d", clonei->simdlen);
51242 return 0;
51245 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
51246 if (TREE_CODE (ret_type) != VOID_TYPE)
51247 switch (TYPE_MODE (ret_type))
51249 case QImode:
51250 case HImode:
51251 case SImode:
51252 case DImode:
51253 case SFmode:
51254 case DFmode:
51255 /* case SCmode: */
51256 /* case DCmode: */
51257 break;
51258 default:
51259 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51260 "unsupported return type %qT for simd\n", ret_type);
51261 return 0;
51264 tree t;
51265 int i;
51267 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
51268 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
51269 switch (TYPE_MODE (TREE_TYPE (t)))
51271 case QImode:
51272 case HImode:
51273 case SImode:
51274 case DImode:
51275 case SFmode:
51276 case DFmode:
51277 /* case SCmode: */
51278 /* case DCmode: */
51279 break;
51280 default:
51281 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51282 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
51283 return 0;
51286 if (clonei->cilk_elemental)
51288 /* Parse here processor clause. If not present, default to 'b'. */
51289 clonei->vecsize_mangle = 'b';
51291 else if (!TREE_PUBLIC (node->decl))
51293 /* If the function isn't exported, we can pick up just one ISA
51294 for the clones. */
51295 if (TARGET_AVX2)
51296 clonei->vecsize_mangle = 'd';
51297 else if (TARGET_AVX)
51298 clonei->vecsize_mangle = 'c';
51299 else
51300 clonei->vecsize_mangle = 'b';
51301 ret = 1;
51303 else
51305 clonei->vecsize_mangle = "bcd"[num];
51306 ret = 3;
51308 switch (clonei->vecsize_mangle)
51310 case 'b':
51311 clonei->vecsize_int = 128;
51312 clonei->vecsize_float = 128;
51313 break;
51314 case 'c':
51315 clonei->vecsize_int = 128;
51316 clonei->vecsize_float = 256;
51317 break;
51318 case 'd':
51319 clonei->vecsize_int = 256;
51320 clonei->vecsize_float = 256;
51321 break;
51323 if (clonei->simdlen == 0)
51325 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
51326 clonei->simdlen = clonei->vecsize_int;
51327 else
51328 clonei->simdlen = clonei->vecsize_float;
51329 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
51330 if (clonei->simdlen > 16)
51331 clonei->simdlen = 16;
51333 return ret;
51336 /* Add target attribute to SIMD clone NODE if needed. */
51338 static void
51339 ix86_simd_clone_adjust (struct cgraph_node *node)
51341 const char *str = NULL;
51342 gcc_assert (node->decl == cfun->decl);
51343 switch (node->simdclone->vecsize_mangle)
51345 case 'b':
51346 if (!TARGET_SSE2)
51347 str = "sse2";
51348 break;
51349 case 'c':
51350 if (!TARGET_AVX)
51351 str = "avx";
51352 break;
51353 case 'd':
51354 if (!TARGET_AVX2)
51355 str = "avx2";
51356 break;
51357 default:
51358 gcc_unreachable ();
51360 if (str == NULL)
51361 return;
51362 push_cfun (NULL);
51363 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
51364 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
51365 gcc_assert (ok);
51366 pop_cfun ();
51367 ix86_reset_previous_fndecl ();
51368 ix86_set_current_function (node->decl);
51371 /* If SIMD clone NODE can't be used in a vectorized loop
51372 in current function, return -1, otherwise return a badness of using it
51373 (0 if it is most desirable from vecsize_mangle point of view, 1
51374 slightly less desirable, etc.). */
51376 static int
51377 ix86_simd_clone_usable (struct cgraph_node *node)
51379 switch (node->simdclone->vecsize_mangle)
51381 case 'b':
51382 if (!TARGET_SSE2)
51383 return -1;
51384 if (!TARGET_AVX)
51385 return 0;
51386 return TARGET_AVX2 ? 2 : 1;
51387 case 'c':
51388 if (!TARGET_AVX)
51389 return -1;
51390 return TARGET_AVX2 ? 1 : 0;
51391 break;
51392 case 'd':
51393 if (!TARGET_AVX2)
51394 return -1;
51395 return 0;
51396 default:
51397 gcc_unreachable ();
51401 /* This function adjusts the unroll factor based on
51402 the hardware capabilities. For ex, bdver3 has
51403 a loop buffer which makes unrolling of smaller
51404 loops less important. This function decides the
51405 unroll factor using number of memory references
51406 (value 32 is used) as a heuristic. */
51408 static unsigned
51409 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
51411 basic_block *bbs;
51412 rtx_insn *insn;
51413 unsigned i;
51414 unsigned mem_count = 0;
51416 if (!TARGET_ADJUST_UNROLL)
51417 return nunroll;
51419 /* Count the number of memory references within the loop body.
51420 This value determines the unrolling factor for bdver3 and bdver4
51421 architectures. */
51422 subrtx_iterator::array_type array;
51423 bbs = get_loop_body (loop);
51424 for (i = 0; i < loop->num_nodes; i++)
51425 FOR_BB_INSNS (bbs[i], insn)
51426 if (NONDEBUG_INSN_P (insn))
51427 FOR_EACH_SUBRTX (iter, array, insn, NONCONST)
51428 if (const_rtx x = *iter)
51429 if (MEM_P (x))
51431 machine_mode mode = GET_MODE (x);
51432 unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
51433 if (n_words > 4)
51434 mem_count += 2;
51435 else
51436 mem_count += 1;
51438 free (bbs);
51440 if (mem_count && mem_count <=32)
51441 return 32/mem_count;
51443 return nunroll;
51447 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
51449 static bool
51450 ix86_float_exceptions_rounding_supported_p (void)
51452 /* For x87 floating point with standard excess precision handling,
51453 there is no adddf3 pattern (since x87 floating point only has
51454 XFmode operations) so the default hook implementation gets this
51455 wrong. */
51456 return TARGET_80387 || TARGET_SSE_MATH;
51459 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
51461 static void
51462 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
51464 if (!TARGET_80387 && !TARGET_SSE_MATH)
51465 return;
51466 tree exceptions_var = create_tmp_var (integer_type_node);
51467 if (TARGET_80387)
51469 tree fenv_index_type = build_index_type (size_int (6));
51470 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
51471 tree fenv_var = create_tmp_var (fenv_type);
51472 mark_addressable (fenv_var);
51473 tree fenv_ptr = build_pointer_type (fenv_type);
51474 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
51475 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
51476 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
51477 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
51478 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
51479 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
51480 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
51481 tree hold_fnclex = build_call_expr (fnclex, 0);
51482 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_fnstenv,
51483 hold_fnclex);
51484 *clear = build_call_expr (fnclex, 0);
51485 tree sw_var = create_tmp_var (short_unsigned_type_node);
51486 tree fnstsw_call = build_call_expr (fnstsw, 0);
51487 tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
51488 sw_var, fnstsw_call);
51489 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
51490 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
51491 exceptions_var, exceptions_x87);
51492 *update = build2 (COMPOUND_EXPR, integer_type_node,
51493 sw_mod, update_mod);
51494 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
51495 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
51497 if (TARGET_SSE_MATH)
51499 tree mxcsr_orig_var = create_tmp_var (unsigned_type_node);
51500 tree mxcsr_mod_var = create_tmp_var (unsigned_type_node);
51501 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
51502 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
51503 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
51504 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
51505 mxcsr_orig_var, stmxcsr_hold_call);
51506 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
51507 mxcsr_orig_var,
51508 build_int_cst (unsigned_type_node, 0x1f80));
51509 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
51510 build_int_cst (unsigned_type_node, 0xffffffc0));
51511 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
51512 mxcsr_mod_var, hold_mod_val);
51513 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
51514 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
51515 hold_assign_orig, hold_assign_mod);
51516 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
51517 ldmxcsr_hold_call);
51518 if (*hold)
51519 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
51520 else
51521 *hold = hold_all;
51522 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
51523 if (*clear)
51524 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
51525 ldmxcsr_clear_call);
51526 else
51527 *clear = ldmxcsr_clear_call;
51528 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
51529 tree exceptions_sse = fold_convert (integer_type_node,
51530 stxmcsr_update_call);
51531 if (*update)
51533 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
51534 exceptions_var, exceptions_sse);
51535 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
51536 exceptions_var, exceptions_mod);
51537 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
51538 exceptions_assign);
51540 else
51541 *update = build2 (MODIFY_EXPR, integer_type_node,
51542 exceptions_var, exceptions_sse);
51543 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
51544 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
51545 ldmxcsr_update_call);
51547 tree atomic_feraiseexcept
51548 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
51549 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
51550 1, exceptions_var);
51551 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
51552 atomic_feraiseexcept_call);
51555 /* Return mode to be used for bounds or VOIDmode
51556 if bounds are not supported. */
51558 static enum machine_mode
51559 ix86_mpx_bound_mode ()
51561 /* Do not support pointer checker if MPX
51562 is not enabled. */
51563 if (!TARGET_MPX)
51565 if (flag_check_pointer_bounds)
51566 warning (0, "Pointer Checker requires MPX support on this target."
51567 " Use -mmpx options to enable MPX.");
51568 return VOIDmode;
51571 return BNDmode;
51574 /* Return constant used to statically initialize constant bounds.
51576 This function is used to create special bound values. For now
51577 only INIT bounds and NONE bounds are expected. More special
51578 values may be added later. */
51580 static tree
51581 ix86_make_bounds_constant (HOST_WIDE_INT lb, HOST_WIDE_INT ub)
51583 tree low = lb ? build_minus_one_cst (pointer_sized_int_node)
51584 : build_zero_cst (pointer_sized_int_node);
51585 tree high = ub ? build_zero_cst (pointer_sized_int_node)
51586 : build_minus_one_cst (pointer_sized_int_node);
51588 /* This function is supposed to be used to create INIT and
51589 NONE bounds only. */
51590 gcc_assert ((lb == 0 && ub == -1)
51591 || (lb == -1 && ub == 0));
51593 return build_complex (NULL, low, high);
51596 /* Generate a list of statements STMTS to initialize pointer bounds
51597 variable VAR with bounds LB and UB. Return the number of generated
51598 statements. */
51600 static int
51601 ix86_initialize_bounds (tree var, tree lb, tree ub, tree *stmts)
51603 tree bnd_ptr = build_pointer_type (pointer_sized_int_node);
51604 tree lhs, modify, var_p;
51606 ub = build1 (BIT_NOT_EXPR, pointer_sized_int_node, ub);
51607 var_p = fold_convert (bnd_ptr, build_fold_addr_expr (var));
51609 lhs = build1 (INDIRECT_REF, pointer_sized_int_node, var_p);
51610 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, lb);
51611 append_to_statement_list (modify, stmts);
51613 lhs = build1 (INDIRECT_REF, pointer_sized_int_node,
51614 build2 (POINTER_PLUS_EXPR, bnd_ptr, var_p,
51615 TYPE_SIZE_UNIT (pointer_sized_int_node)));
51616 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, ub);
51617 append_to_statement_list (modify, stmts);
51619 return 2;
51622 /* Initialize the GCC target structure. */
51623 #undef TARGET_RETURN_IN_MEMORY
51624 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
51626 #undef TARGET_LEGITIMIZE_ADDRESS
51627 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
51629 #undef TARGET_ATTRIBUTE_TABLE
51630 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
51631 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
51632 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
51633 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
51634 # undef TARGET_MERGE_DECL_ATTRIBUTES
51635 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
51636 #endif
51638 #undef TARGET_COMP_TYPE_ATTRIBUTES
51639 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
51641 #undef TARGET_INIT_BUILTINS
51642 #define TARGET_INIT_BUILTINS ix86_init_builtins
51643 #undef TARGET_BUILTIN_DECL
51644 #define TARGET_BUILTIN_DECL ix86_builtin_decl
51645 #undef TARGET_EXPAND_BUILTIN
51646 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
51648 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
51649 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
51650 ix86_builtin_vectorized_function
51652 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
51653 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
51655 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
51656 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
51658 #undef TARGET_VECTORIZE_BUILTIN_GATHER
51659 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
51661 #undef TARGET_BUILTIN_RECIPROCAL
51662 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
51664 #undef TARGET_ASM_FUNCTION_EPILOGUE
51665 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
51667 #undef TARGET_ENCODE_SECTION_INFO
51668 #ifndef SUBTARGET_ENCODE_SECTION_INFO
51669 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
51670 #else
51671 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
51672 #endif
51674 #undef TARGET_ASM_OPEN_PAREN
51675 #define TARGET_ASM_OPEN_PAREN ""
51676 #undef TARGET_ASM_CLOSE_PAREN
51677 #define TARGET_ASM_CLOSE_PAREN ""
51679 #undef TARGET_ASM_BYTE_OP
51680 #define TARGET_ASM_BYTE_OP ASM_BYTE
51682 #undef TARGET_ASM_ALIGNED_HI_OP
51683 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
51684 #undef TARGET_ASM_ALIGNED_SI_OP
51685 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
51686 #ifdef ASM_QUAD
51687 #undef TARGET_ASM_ALIGNED_DI_OP
51688 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
51689 #endif
51691 #undef TARGET_PROFILE_BEFORE_PROLOGUE
51692 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
51694 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
51695 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
51697 #undef TARGET_ASM_UNALIGNED_HI_OP
51698 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
51699 #undef TARGET_ASM_UNALIGNED_SI_OP
51700 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
51701 #undef TARGET_ASM_UNALIGNED_DI_OP
51702 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
51704 #undef TARGET_PRINT_OPERAND
51705 #define TARGET_PRINT_OPERAND ix86_print_operand
51706 #undef TARGET_PRINT_OPERAND_ADDRESS
51707 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
51708 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
51709 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
51710 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
51711 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
51713 #undef TARGET_SCHED_INIT_GLOBAL
51714 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
51715 #undef TARGET_SCHED_ADJUST_COST
51716 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
51717 #undef TARGET_SCHED_ISSUE_RATE
51718 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
51719 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
51720 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
51721 ia32_multipass_dfa_lookahead
51722 #undef TARGET_SCHED_MACRO_FUSION_P
51723 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
51724 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
51725 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
51727 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
51728 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
51730 #undef TARGET_MEMMODEL_CHECK
51731 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
51733 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
51734 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
51736 #ifdef HAVE_AS_TLS
51737 #undef TARGET_HAVE_TLS
51738 #define TARGET_HAVE_TLS true
51739 #endif
51740 #undef TARGET_CANNOT_FORCE_CONST_MEM
51741 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
51742 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
51743 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
51745 #undef TARGET_DELEGITIMIZE_ADDRESS
51746 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
51748 #undef TARGET_MS_BITFIELD_LAYOUT_P
51749 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
51751 #if TARGET_MACHO
51752 #undef TARGET_BINDS_LOCAL_P
51753 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
51754 #endif
51755 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
51756 #undef TARGET_BINDS_LOCAL_P
51757 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
51758 #endif
51760 #undef TARGET_ASM_OUTPUT_MI_THUNK
51761 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
51762 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
51763 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
51765 #undef TARGET_ASM_FILE_START
51766 #define TARGET_ASM_FILE_START x86_file_start
51768 #undef TARGET_OPTION_OVERRIDE
51769 #define TARGET_OPTION_OVERRIDE ix86_option_override
51771 #undef TARGET_REGISTER_MOVE_COST
51772 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
51773 #undef TARGET_MEMORY_MOVE_COST
51774 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
51775 #undef TARGET_RTX_COSTS
51776 #define TARGET_RTX_COSTS ix86_rtx_costs
51777 #undef TARGET_ADDRESS_COST
51778 #define TARGET_ADDRESS_COST ix86_address_cost
51780 #undef TARGET_FIXED_CONDITION_CODE_REGS
51781 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
51782 #undef TARGET_CC_MODES_COMPATIBLE
51783 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
51785 #undef TARGET_MACHINE_DEPENDENT_REORG
51786 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
51788 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
51789 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
51791 #undef TARGET_BUILD_BUILTIN_VA_LIST
51792 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
51794 #undef TARGET_FOLD_BUILTIN
51795 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
51797 #undef TARGET_COMPARE_VERSION_PRIORITY
51798 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
51800 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
51801 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
51802 ix86_generate_version_dispatcher_body
51804 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
51805 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
51806 ix86_get_function_versions_dispatcher
51808 #undef TARGET_ENUM_VA_LIST_P
51809 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
51811 #undef TARGET_FN_ABI_VA_LIST
51812 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
51814 #undef TARGET_CANONICAL_VA_LIST_TYPE
51815 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
51817 #undef TARGET_EXPAND_BUILTIN_VA_START
51818 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
51820 #undef TARGET_MD_ASM_CLOBBERS
51821 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
51823 #undef TARGET_PROMOTE_PROTOTYPES
51824 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
51825 #undef TARGET_SETUP_INCOMING_VARARGS
51826 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
51827 #undef TARGET_MUST_PASS_IN_STACK
51828 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
51829 #undef TARGET_FUNCTION_ARG_ADVANCE
51830 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
51831 #undef TARGET_FUNCTION_ARG
51832 #define TARGET_FUNCTION_ARG ix86_function_arg
51833 #undef TARGET_INIT_PIC_REG
51834 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
51835 #undef TARGET_USE_PSEUDO_PIC_REG
51836 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
51837 #undef TARGET_FUNCTION_ARG_BOUNDARY
51838 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
51839 #undef TARGET_PASS_BY_REFERENCE
51840 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
51841 #undef TARGET_INTERNAL_ARG_POINTER
51842 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
51843 #undef TARGET_UPDATE_STACK_BOUNDARY
51844 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
51845 #undef TARGET_GET_DRAP_RTX
51846 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
51847 #undef TARGET_STRICT_ARGUMENT_NAMING
51848 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
51849 #undef TARGET_STATIC_CHAIN
51850 #define TARGET_STATIC_CHAIN ix86_static_chain
51851 #undef TARGET_TRAMPOLINE_INIT
51852 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
51853 #undef TARGET_RETURN_POPS_ARGS
51854 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
51856 #undef TARGET_LEGITIMATE_COMBINED_INSN
51857 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
51859 #undef TARGET_ASAN_SHADOW_OFFSET
51860 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
51862 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
51863 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
51865 #undef TARGET_SCALAR_MODE_SUPPORTED_P
51866 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
51868 #undef TARGET_VECTOR_MODE_SUPPORTED_P
51869 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
51871 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
51872 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
51873 ix86_libgcc_floating_mode_supported_p
51875 #undef TARGET_C_MODE_FOR_SUFFIX
51876 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
51878 #ifdef HAVE_AS_TLS
51879 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
51880 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
51881 #endif
51883 #ifdef SUBTARGET_INSERT_ATTRIBUTES
51884 #undef TARGET_INSERT_ATTRIBUTES
51885 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
51886 #endif
51888 #undef TARGET_MANGLE_TYPE
51889 #define TARGET_MANGLE_TYPE ix86_mangle_type
51891 #if !TARGET_MACHO
51892 #undef TARGET_STACK_PROTECT_FAIL
51893 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
51894 #endif
51896 #undef TARGET_FUNCTION_VALUE
51897 #define TARGET_FUNCTION_VALUE ix86_function_value
51899 #undef TARGET_FUNCTION_VALUE_REGNO_P
51900 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
51902 #undef TARGET_PROMOTE_FUNCTION_MODE
51903 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
51905 #undef TARGET_MEMBER_TYPE_FORCES_BLK
51906 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
51908 #undef TARGET_INSTANTIATE_DECLS
51909 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
51911 #undef TARGET_SECONDARY_RELOAD
51912 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
51914 #undef TARGET_CLASS_MAX_NREGS
51915 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
51917 #undef TARGET_PREFERRED_RELOAD_CLASS
51918 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
51919 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
51920 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
51921 #undef TARGET_CLASS_LIKELY_SPILLED_P
51922 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
51924 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
51925 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
51926 ix86_builtin_vectorization_cost
51927 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
51928 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
51929 ix86_vectorize_vec_perm_const_ok
51930 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
51931 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
51932 ix86_preferred_simd_mode
51933 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
51934 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
51935 ix86_autovectorize_vector_sizes
51936 #undef TARGET_VECTORIZE_INIT_COST
51937 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
51938 #undef TARGET_VECTORIZE_ADD_STMT_COST
51939 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
51940 #undef TARGET_VECTORIZE_FINISH_COST
51941 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
51942 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
51943 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
51945 #undef TARGET_SET_CURRENT_FUNCTION
51946 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
51948 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
51949 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
51951 #undef TARGET_OPTION_SAVE
51952 #define TARGET_OPTION_SAVE ix86_function_specific_save
51954 #undef TARGET_OPTION_RESTORE
51955 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
51957 #undef TARGET_OPTION_PRINT
51958 #define TARGET_OPTION_PRINT ix86_function_specific_print
51960 #undef TARGET_OPTION_FUNCTION_VERSIONS
51961 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
51963 #undef TARGET_CAN_INLINE_P
51964 #define TARGET_CAN_INLINE_P ix86_can_inline_p
51966 #undef TARGET_EXPAND_TO_RTL_HOOK
51967 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
51969 #undef TARGET_LEGITIMATE_ADDRESS_P
51970 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
51972 #undef TARGET_LRA_P
51973 #define TARGET_LRA_P hook_bool_void_true
51975 #undef TARGET_REGISTER_PRIORITY
51976 #define TARGET_REGISTER_PRIORITY ix86_register_priority
51978 #undef TARGET_REGISTER_USAGE_LEVELING_P
51979 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
51981 #undef TARGET_LEGITIMATE_CONSTANT_P
51982 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
51984 #undef TARGET_FRAME_POINTER_REQUIRED
51985 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
51987 #undef TARGET_CAN_ELIMINATE
51988 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
51990 #undef TARGET_EXTRA_LIVE_ON_ENTRY
51991 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
51993 #undef TARGET_ASM_CODE_END
51994 #define TARGET_ASM_CODE_END ix86_code_end
51996 #undef TARGET_CONDITIONAL_REGISTER_USAGE
51997 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
51999 #if TARGET_MACHO
52000 #undef TARGET_INIT_LIBFUNCS
52001 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
52002 #endif
52004 #undef TARGET_LOOP_UNROLL_ADJUST
52005 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
52007 #undef TARGET_SPILL_CLASS
52008 #define TARGET_SPILL_CLASS ix86_spill_class
52010 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
52011 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
52012 ix86_simd_clone_compute_vecsize_and_simdlen
52014 #undef TARGET_SIMD_CLONE_ADJUST
52015 #define TARGET_SIMD_CLONE_ADJUST \
52016 ix86_simd_clone_adjust
52018 #undef TARGET_SIMD_CLONE_USABLE
52019 #define TARGET_SIMD_CLONE_USABLE \
52020 ix86_simd_clone_usable
52022 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
52023 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
52024 ix86_float_exceptions_rounding_supported_p
52026 #undef TARGET_MODE_EMIT
52027 #define TARGET_MODE_EMIT ix86_emit_mode_set
52029 #undef TARGET_MODE_NEEDED
52030 #define TARGET_MODE_NEEDED ix86_mode_needed
52032 #undef TARGET_MODE_AFTER
52033 #define TARGET_MODE_AFTER ix86_mode_after
52035 #undef TARGET_MODE_ENTRY
52036 #define TARGET_MODE_ENTRY ix86_mode_entry
52038 #undef TARGET_MODE_EXIT
52039 #define TARGET_MODE_EXIT ix86_mode_exit
52041 #undef TARGET_MODE_PRIORITY
52042 #define TARGET_MODE_PRIORITY ix86_mode_priority
52044 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
52045 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
52047 #undef TARGET_LOAD_BOUNDS_FOR_ARG
52048 #define TARGET_LOAD_BOUNDS_FOR_ARG ix86_load_bounds
52050 #undef TARGET_STORE_BOUNDS_FOR_ARG
52051 #define TARGET_STORE_BOUNDS_FOR_ARG ix86_store_bounds
52053 #undef TARGET_LOAD_RETURNED_BOUNDS
52054 #define TARGET_LOAD_RETURNED_BOUNDS ix86_load_returned_bounds
52056 #undef TARGET_STORE_RETURNED_BOUNDS
52057 #define TARGET_STORE_RETURNED_BOUNDS ix86_store_returned_bounds
52059 #undef TARGET_CHKP_BOUND_MODE
52060 #define TARGET_CHKP_BOUND_MODE ix86_mpx_bound_mode
52062 #undef TARGET_BUILTIN_CHKP_FUNCTION
52063 #define TARGET_BUILTIN_CHKP_FUNCTION ix86_builtin_mpx_function
52065 #undef TARGET_CHKP_FUNCTION_VALUE_BOUNDS
52066 #define TARGET_CHKP_FUNCTION_VALUE_BOUNDS ix86_function_value_bounds
52068 #undef TARGET_CHKP_MAKE_BOUNDS_CONSTANT
52069 #define TARGET_CHKP_MAKE_BOUNDS_CONSTANT ix86_make_bounds_constant
52071 #undef TARGET_CHKP_INITIALIZE_BOUNDS
52072 #define TARGET_CHKP_INITIALIZE_BOUNDS ix86_initialize_bounds
52074 #undef TARGET_SETUP_INCOMING_VARARG_BOUNDS
52075 #define TARGET_SETUP_INCOMING_VARARG_BOUNDS ix86_setup_incoming_vararg_bounds
52077 #undef TARGET_OFFLOAD_OPTIONS
52078 #define TARGET_OFFLOAD_OPTIONS \
52079 ix86_offload_options
52081 struct gcc_target targetm = TARGET_INITIALIZER;
52083 #include "gt-i386.h"