* config/i386/i386.c (ix86_decompose_address): Replace open-coded
[official-gcc.git] / gcc / config / i386 / i386.c
blob28669009014022afb3e1be8668fe8abc19dbe799
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2014 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "stringpool.h"
27 #include "attribs.h"
28 #include "calls.h"
29 #include "stor-layout.h"
30 #include "varasm.h"
31 #include "tm_p.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-codes.h"
38 #include "insn-attr.h"
39 #include "flags.h"
40 #include "except.h"
41 #include "hashtab.h"
42 #include "hash-set.h"
43 #include "vec.h"
44 #include "machmode.h"
45 #include "input.h"
46 #include "function.h"
47 #include "recog.h"
48 #include "expr.h"
49 #include "optabs.h"
50 #include "diagnostic-core.h"
51 #include "toplev.h"
52 #include "predict.h"
53 #include "dominance.h"
54 #include "cfg.h"
55 #include "cfgrtl.h"
56 #include "cfganal.h"
57 #include "lcm.h"
58 #include "cfgbuild.h"
59 #include "cfgcleanup.h"
60 #include "basic-block.h"
61 #include "ggc.h"
62 #include "target.h"
63 #include "target-def.h"
64 #include "common/common-target.h"
65 #include "langhooks.h"
66 #include "reload.h"
67 #include "hash-map.h"
68 #include "is-a.h"
69 #include "plugin-api.h"
70 #include "ipa-ref.h"
71 #include "cgraph.h"
72 #include "hash-table.h"
73 #include "tree-ssa-alias.h"
74 #include "internal-fn.h"
75 #include "gimple-fold.h"
76 #include "tree-eh.h"
77 #include "gimple-expr.h"
78 #include "gimple.h"
79 #include "gimplify.h"
80 #include "cfgloop.h"
81 #include "dwarf2.h"
82 #include "df.h"
83 #include "tm-constrs.h"
84 #include "params.h"
85 #include "cselib.h"
86 #include "debug.h"
87 #include "sched-int.h"
88 #include "sbitmap.h"
89 #include "fibheap.h"
90 #include "opts.h"
91 #include "diagnostic.h"
92 #include "dumpfile.h"
93 #include "tree-pass.h"
94 #include "wide-int.h"
95 #include "context.h"
96 #include "pass_manager.h"
97 #include "target-globals.h"
98 #include "tree-vectorizer.h"
99 #include "shrink-wrap.h"
100 #include "builtins.h"
101 #include "rtl-iter.h"
102 #include "tree-iterator.h"
103 #include "tree-chkp.h"
104 #include "rtl-chkp.h"
106 static rtx legitimize_dllimport_symbol (rtx, bool);
107 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
108 static rtx legitimize_pe_coff_symbol (rtx, bool);
110 #ifndef CHECK_STACK_LIMIT
111 #define CHECK_STACK_LIMIT (-1)
112 #endif
114 /* Return index of given mode in mult and division cost tables. */
115 #define MODE_INDEX(mode) \
116 ((mode) == QImode ? 0 \
117 : (mode) == HImode ? 1 \
118 : (mode) == SImode ? 2 \
119 : (mode) == DImode ? 3 \
120 : 4)
122 /* Processor costs (relative to an add) */
123 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
124 #define COSTS_N_BYTES(N) ((N) * 2)
126 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
128 static stringop_algs ix86_size_memcpy[2] = {
129 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
130 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
131 static stringop_algs ix86_size_memset[2] = {
132 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
133 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
135 const
136 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
137 COSTS_N_BYTES (2), /* cost of an add instruction */
138 COSTS_N_BYTES (3), /* cost of a lea instruction */
139 COSTS_N_BYTES (2), /* variable shift costs */
140 COSTS_N_BYTES (3), /* constant shift costs */
141 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
142 COSTS_N_BYTES (3), /* HI */
143 COSTS_N_BYTES (3), /* SI */
144 COSTS_N_BYTES (3), /* DI */
145 COSTS_N_BYTES (5)}, /* other */
146 0, /* cost of multiply per each bit set */
147 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
148 COSTS_N_BYTES (3), /* HI */
149 COSTS_N_BYTES (3), /* SI */
150 COSTS_N_BYTES (3), /* DI */
151 COSTS_N_BYTES (5)}, /* other */
152 COSTS_N_BYTES (3), /* cost of movsx */
153 COSTS_N_BYTES (3), /* cost of movzx */
154 0, /* "large" insn */
155 2, /* MOVE_RATIO */
156 2, /* cost for loading QImode using movzbl */
157 {2, 2, 2}, /* cost of loading integer registers
158 in QImode, HImode and SImode.
159 Relative to reg-reg move (2). */
160 {2, 2, 2}, /* cost of storing integer registers */
161 2, /* cost of reg,reg fld/fst */
162 {2, 2, 2}, /* cost of loading fp registers
163 in SFmode, DFmode and XFmode */
164 {2, 2, 2}, /* cost of storing fp registers
165 in SFmode, DFmode and XFmode */
166 3, /* cost of moving MMX register */
167 {3, 3}, /* cost of loading MMX registers
168 in SImode and DImode */
169 {3, 3}, /* cost of storing MMX registers
170 in SImode and DImode */
171 3, /* cost of moving SSE register */
172 {3, 3, 3}, /* cost of loading SSE registers
173 in SImode, DImode and TImode */
174 {3, 3, 3}, /* cost of storing SSE registers
175 in SImode, DImode and TImode */
176 3, /* MMX or SSE register to integer */
177 0, /* size of l1 cache */
178 0, /* size of l2 cache */
179 0, /* size of prefetch block */
180 0, /* number of parallel prefetches */
181 2, /* Branch cost */
182 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
183 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
184 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
185 COSTS_N_BYTES (2), /* cost of FABS instruction. */
186 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
187 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
188 ix86_size_memcpy,
189 ix86_size_memset,
190 1, /* scalar_stmt_cost. */
191 1, /* scalar load_cost. */
192 1, /* scalar_store_cost. */
193 1, /* vec_stmt_cost. */
194 1, /* vec_to_scalar_cost. */
195 1, /* scalar_to_vec_cost. */
196 1, /* vec_align_load_cost. */
197 1, /* vec_unalign_load_cost. */
198 1, /* vec_store_cost. */
199 1, /* cond_taken_branch_cost. */
200 1, /* cond_not_taken_branch_cost. */
203 /* Processor costs (relative to an add) */
204 static stringop_algs i386_memcpy[2] = {
205 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
206 DUMMY_STRINGOP_ALGS};
207 static stringop_algs i386_memset[2] = {
208 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
209 DUMMY_STRINGOP_ALGS};
211 static const
212 struct processor_costs i386_cost = { /* 386 specific costs */
213 COSTS_N_INSNS (1), /* cost of an add instruction */
214 COSTS_N_INSNS (1), /* cost of a lea instruction */
215 COSTS_N_INSNS (3), /* variable shift costs */
216 COSTS_N_INSNS (2), /* constant shift costs */
217 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
218 COSTS_N_INSNS (6), /* HI */
219 COSTS_N_INSNS (6), /* SI */
220 COSTS_N_INSNS (6), /* DI */
221 COSTS_N_INSNS (6)}, /* other */
222 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
223 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
224 COSTS_N_INSNS (23), /* HI */
225 COSTS_N_INSNS (23), /* SI */
226 COSTS_N_INSNS (23), /* DI */
227 COSTS_N_INSNS (23)}, /* other */
228 COSTS_N_INSNS (3), /* cost of movsx */
229 COSTS_N_INSNS (2), /* cost of movzx */
230 15, /* "large" insn */
231 3, /* MOVE_RATIO */
232 4, /* cost for loading QImode using movzbl */
233 {2, 4, 2}, /* cost of loading integer registers
234 in QImode, HImode and SImode.
235 Relative to reg-reg move (2). */
236 {2, 4, 2}, /* cost of storing integer registers */
237 2, /* cost of reg,reg fld/fst */
238 {8, 8, 8}, /* cost of loading fp registers
239 in SFmode, DFmode and XFmode */
240 {8, 8, 8}, /* cost of storing fp registers
241 in SFmode, DFmode and XFmode */
242 2, /* cost of moving MMX register */
243 {4, 8}, /* cost of loading MMX registers
244 in SImode and DImode */
245 {4, 8}, /* cost of storing MMX registers
246 in SImode and DImode */
247 2, /* cost of moving SSE register */
248 {4, 8, 16}, /* cost of loading SSE registers
249 in SImode, DImode and TImode */
250 {4, 8, 16}, /* cost of storing SSE registers
251 in SImode, DImode and TImode */
252 3, /* MMX or SSE register to integer */
253 0, /* size of l1 cache */
254 0, /* size of l2 cache */
255 0, /* size of prefetch block */
256 0, /* number of parallel prefetches */
257 1, /* Branch cost */
258 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
259 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
260 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
261 COSTS_N_INSNS (22), /* cost of FABS instruction. */
262 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
263 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
264 i386_memcpy,
265 i386_memset,
266 1, /* scalar_stmt_cost. */
267 1, /* scalar load_cost. */
268 1, /* scalar_store_cost. */
269 1, /* vec_stmt_cost. */
270 1, /* vec_to_scalar_cost. */
271 1, /* scalar_to_vec_cost. */
272 1, /* vec_align_load_cost. */
273 2, /* vec_unalign_load_cost. */
274 1, /* vec_store_cost. */
275 3, /* cond_taken_branch_cost. */
276 1, /* cond_not_taken_branch_cost. */
279 static stringop_algs i486_memcpy[2] = {
280 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
281 DUMMY_STRINGOP_ALGS};
282 static stringop_algs i486_memset[2] = {
283 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
284 DUMMY_STRINGOP_ALGS};
286 static const
287 struct processor_costs i486_cost = { /* 486 specific costs */
288 COSTS_N_INSNS (1), /* cost of an add instruction */
289 COSTS_N_INSNS (1), /* cost of a lea instruction */
290 COSTS_N_INSNS (3), /* variable shift costs */
291 COSTS_N_INSNS (2), /* constant shift costs */
292 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
293 COSTS_N_INSNS (12), /* HI */
294 COSTS_N_INSNS (12), /* SI */
295 COSTS_N_INSNS (12), /* DI */
296 COSTS_N_INSNS (12)}, /* other */
297 1, /* cost of multiply per each bit set */
298 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
299 COSTS_N_INSNS (40), /* HI */
300 COSTS_N_INSNS (40), /* SI */
301 COSTS_N_INSNS (40), /* DI */
302 COSTS_N_INSNS (40)}, /* other */
303 COSTS_N_INSNS (3), /* cost of movsx */
304 COSTS_N_INSNS (2), /* cost of movzx */
305 15, /* "large" insn */
306 3, /* MOVE_RATIO */
307 4, /* cost for loading QImode using movzbl */
308 {2, 4, 2}, /* cost of loading integer registers
309 in QImode, HImode and SImode.
310 Relative to reg-reg move (2). */
311 {2, 4, 2}, /* cost of storing integer registers */
312 2, /* cost of reg,reg fld/fst */
313 {8, 8, 8}, /* cost of loading fp registers
314 in SFmode, DFmode and XFmode */
315 {8, 8, 8}, /* cost of storing fp registers
316 in SFmode, DFmode and XFmode */
317 2, /* cost of moving MMX register */
318 {4, 8}, /* cost of loading MMX registers
319 in SImode and DImode */
320 {4, 8}, /* cost of storing MMX registers
321 in SImode and DImode */
322 2, /* cost of moving SSE register */
323 {4, 8, 16}, /* cost of loading SSE registers
324 in SImode, DImode and TImode */
325 {4, 8, 16}, /* cost of storing SSE registers
326 in SImode, DImode and TImode */
327 3, /* MMX or SSE register to integer */
328 4, /* size of l1 cache. 486 has 8kB cache
329 shared for code and data, so 4kB is
330 not really precise. */
331 4, /* size of l2 cache */
332 0, /* size of prefetch block */
333 0, /* number of parallel prefetches */
334 1, /* Branch cost */
335 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
336 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
337 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
338 COSTS_N_INSNS (3), /* cost of FABS instruction. */
339 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
340 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
341 i486_memcpy,
342 i486_memset,
343 1, /* scalar_stmt_cost. */
344 1, /* scalar load_cost. */
345 1, /* scalar_store_cost. */
346 1, /* vec_stmt_cost. */
347 1, /* vec_to_scalar_cost. */
348 1, /* scalar_to_vec_cost. */
349 1, /* vec_align_load_cost. */
350 2, /* vec_unalign_load_cost. */
351 1, /* vec_store_cost. */
352 3, /* cond_taken_branch_cost. */
353 1, /* cond_not_taken_branch_cost. */
356 static stringop_algs pentium_memcpy[2] = {
357 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
358 DUMMY_STRINGOP_ALGS};
359 static stringop_algs pentium_memset[2] = {
360 {libcall, {{-1, rep_prefix_4_byte, false}}},
361 DUMMY_STRINGOP_ALGS};
363 static const
364 struct processor_costs pentium_cost = {
365 COSTS_N_INSNS (1), /* cost of an add instruction */
366 COSTS_N_INSNS (1), /* cost of a lea instruction */
367 COSTS_N_INSNS (4), /* variable shift costs */
368 COSTS_N_INSNS (1), /* constant shift costs */
369 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
370 COSTS_N_INSNS (11), /* HI */
371 COSTS_N_INSNS (11), /* SI */
372 COSTS_N_INSNS (11), /* DI */
373 COSTS_N_INSNS (11)}, /* other */
374 0, /* cost of multiply per each bit set */
375 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
376 COSTS_N_INSNS (25), /* HI */
377 COSTS_N_INSNS (25), /* SI */
378 COSTS_N_INSNS (25), /* DI */
379 COSTS_N_INSNS (25)}, /* other */
380 COSTS_N_INSNS (3), /* cost of movsx */
381 COSTS_N_INSNS (2), /* cost of movzx */
382 8, /* "large" insn */
383 6, /* MOVE_RATIO */
384 6, /* cost for loading QImode using movzbl */
385 {2, 4, 2}, /* cost of loading integer registers
386 in QImode, HImode and SImode.
387 Relative to reg-reg move (2). */
388 {2, 4, 2}, /* cost of storing integer registers */
389 2, /* cost of reg,reg fld/fst */
390 {2, 2, 6}, /* cost of loading fp registers
391 in SFmode, DFmode and XFmode */
392 {4, 4, 6}, /* cost of storing fp registers
393 in SFmode, DFmode and XFmode */
394 8, /* cost of moving MMX register */
395 {8, 8}, /* cost of loading MMX registers
396 in SImode and DImode */
397 {8, 8}, /* cost of storing MMX registers
398 in SImode and DImode */
399 2, /* cost of moving SSE register */
400 {4, 8, 16}, /* cost of loading SSE registers
401 in SImode, DImode and TImode */
402 {4, 8, 16}, /* cost of storing SSE registers
403 in SImode, DImode and TImode */
404 3, /* MMX or SSE register to integer */
405 8, /* size of l1 cache. */
406 8, /* size of l2 cache */
407 0, /* size of prefetch block */
408 0, /* number of parallel prefetches */
409 2, /* Branch cost */
410 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
411 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
412 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
413 COSTS_N_INSNS (1), /* cost of FABS instruction. */
414 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
415 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
416 pentium_memcpy,
417 pentium_memset,
418 1, /* scalar_stmt_cost. */
419 1, /* scalar load_cost. */
420 1, /* scalar_store_cost. */
421 1, /* vec_stmt_cost. */
422 1, /* vec_to_scalar_cost. */
423 1, /* scalar_to_vec_cost. */
424 1, /* vec_align_load_cost. */
425 2, /* vec_unalign_load_cost. */
426 1, /* vec_store_cost. */
427 3, /* cond_taken_branch_cost. */
428 1, /* cond_not_taken_branch_cost. */
431 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
432 (we ensure the alignment). For small blocks inline loop is still a
433 noticeable win, for bigger blocks either rep movsl or rep movsb is
434 way to go. Rep movsb has apparently more expensive startup time in CPU,
435 but after 4K the difference is down in the noise. */
436 static stringop_algs pentiumpro_memcpy[2] = {
437 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
438 {8192, rep_prefix_4_byte, false},
439 {-1, rep_prefix_1_byte, false}}},
440 DUMMY_STRINGOP_ALGS};
441 static stringop_algs pentiumpro_memset[2] = {
442 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
443 {8192, rep_prefix_4_byte, false},
444 {-1, libcall, false}}},
445 DUMMY_STRINGOP_ALGS};
446 static const
447 struct processor_costs pentiumpro_cost = {
448 COSTS_N_INSNS (1), /* cost of an add instruction */
449 COSTS_N_INSNS (1), /* cost of a lea instruction */
450 COSTS_N_INSNS (1), /* variable shift costs */
451 COSTS_N_INSNS (1), /* constant shift costs */
452 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
453 COSTS_N_INSNS (4), /* HI */
454 COSTS_N_INSNS (4), /* SI */
455 COSTS_N_INSNS (4), /* DI */
456 COSTS_N_INSNS (4)}, /* other */
457 0, /* cost of multiply per each bit set */
458 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
459 COSTS_N_INSNS (17), /* HI */
460 COSTS_N_INSNS (17), /* SI */
461 COSTS_N_INSNS (17), /* DI */
462 COSTS_N_INSNS (17)}, /* other */
463 COSTS_N_INSNS (1), /* cost of movsx */
464 COSTS_N_INSNS (1), /* cost of movzx */
465 8, /* "large" insn */
466 6, /* MOVE_RATIO */
467 2, /* cost for loading QImode using movzbl */
468 {4, 4, 4}, /* cost of loading integer registers
469 in QImode, HImode and SImode.
470 Relative to reg-reg move (2). */
471 {2, 2, 2}, /* cost of storing integer registers */
472 2, /* cost of reg,reg fld/fst */
473 {2, 2, 6}, /* cost of loading fp registers
474 in SFmode, DFmode and XFmode */
475 {4, 4, 6}, /* cost of storing fp registers
476 in SFmode, DFmode and XFmode */
477 2, /* cost of moving MMX register */
478 {2, 2}, /* cost of loading MMX registers
479 in SImode and DImode */
480 {2, 2}, /* cost of storing MMX registers
481 in SImode and DImode */
482 2, /* cost of moving SSE register */
483 {2, 2, 8}, /* cost of loading SSE registers
484 in SImode, DImode and TImode */
485 {2, 2, 8}, /* cost of storing SSE registers
486 in SImode, DImode and TImode */
487 3, /* MMX or SSE register to integer */
488 8, /* size of l1 cache. */
489 256, /* size of l2 cache */
490 32, /* size of prefetch block */
491 6, /* number of parallel prefetches */
492 2, /* Branch cost */
493 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
494 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
495 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
496 COSTS_N_INSNS (2), /* cost of FABS instruction. */
497 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
498 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
499 pentiumpro_memcpy,
500 pentiumpro_memset,
501 1, /* scalar_stmt_cost. */
502 1, /* scalar load_cost. */
503 1, /* scalar_store_cost. */
504 1, /* vec_stmt_cost. */
505 1, /* vec_to_scalar_cost. */
506 1, /* scalar_to_vec_cost. */
507 1, /* vec_align_load_cost. */
508 2, /* vec_unalign_load_cost. */
509 1, /* vec_store_cost. */
510 3, /* cond_taken_branch_cost. */
511 1, /* cond_not_taken_branch_cost. */
514 static stringop_algs geode_memcpy[2] = {
515 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
516 DUMMY_STRINGOP_ALGS};
517 static stringop_algs geode_memset[2] = {
518 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
519 DUMMY_STRINGOP_ALGS};
520 static const
521 struct processor_costs geode_cost = {
522 COSTS_N_INSNS (1), /* cost of an add instruction */
523 COSTS_N_INSNS (1), /* cost of a lea instruction */
524 COSTS_N_INSNS (2), /* variable shift costs */
525 COSTS_N_INSNS (1), /* constant shift costs */
526 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
527 COSTS_N_INSNS (4), /* HI */
528 COSTS_N_INSNS (7), /* SI */
529 COSTS_N_INSNS (7), /* DI */
530 COSTS_N_INSNS (7)}, /* other */
531 0, /* cost of multiply per each bit set */
532 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
533 COSTS_N_INSNS (23), /* HI */
534 COSTS_N_INSNS (39), /* SI */
535 COSTS_N_INSNS (39), /* DI */
536 COSTS_N_INSNS (39)}, /* other */
537 COSTS_N_INSNS (1), /* cost of movsx */
538 COSTS_N_INSNS (1), /* cost of movzx */
539 8, /* "large" insn */
540 4, /* MOVE_RATIO */
541 1, /* cost for loading QImode using movzbl */
542 {1, 1, 1}, /* cost of loading integer registers
543 in QImode, HImode and SImode.
544 Relative to reg-reg move (2). */
545 {1, 1, 1}, /* cost of storing integer registers */
546 1, /* cost of reg,reg fld/fst */
547 {1, 1, 1}, /* cost of loading fp registers
548 in SFmode, DFmode and XFmode */
549 {4, 6, 6}, /* cost of storing fp registers
550 in SFmode, DFmode and XFmode */
552 1, /* cost of moving MMX register */
553 {1, 1}, /* cost of loading MMX registers
554 in SImode and DImode */
555 {1, 1}, /* cost of storing MMX registers
556 in SImode and DImode */
557 1, /* cost of moving SSE register */
558 {1, 1, 1}, /* cost of loading SSE registers
559 in SImode, DImode and TImode */
560 {1, 1, 1}, /* cost of storing SSE registers
561 in SImode, DImode and TImode */
562 1, /* MMX or SSE register to integer */
563 64, /* size of l1 cache. */
564 128, /* size of l2 cache. */
565 32, /* size of prefetch block */
566 1, /* number of parallel prefetches */
567 1, /* Branch cost */
568 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
569 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
570 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
571 COSTS_N_INSNS (1), /* cost of FABS instruction. */
572 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
573 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
574 geode_memcpy,
575 geode_memset,
576 1, /* scalar_stmt_cost. */
577 1, /* scalar load_cost. */
578 1, /* scalar_store_cost. */
579 1, /* vec_stmt_cost. */
580 1, /* vec_to_scalar_cost. */
581 1, /* scalar_to_vec_cost. */
582 1, /* vec_align_load_cost. */
583 2, /* vec_unalign_load_cost. */
584 1, /* vec_store_cost. */
585 3, /* cond_taken_branch_cost. */
586 1, /* cond_not_taken_branch_cost. */
589 static stringop_algs k6_memcpy[2] = {
590 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
591 DUMMY_STRINGOP_ALGS};
592 static stringop_algs k6_memset[2] = {
593 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
594 DUMMY_STRINGOP_ALGS};
595 static const
596 struct processor_costs k6_cost = {
597 COSTS_N_INSNS (1), /* cost of an add instruction */
598 COSTS_N_INSNS (2), /* cost of a lea instruction */
599 COSTS_N_INSNS (1), /* variable shift costs */
600 COSTS_N_INSNS (1), /* constant shift costs */
601 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
602 COSTS_N_INSNS (3), /* HI */
603 COSTS_N_INSNS (3), /* SI */
604 COSTS_N_INSNS (3), /* DI */
605 COSTS_N_INSNS (3)}, /* other */
606 0, /* cost of multiply per each bit set */
607 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
608 COSTS_N_INSNS (18), /* HI */
609 COSTS_N_INSNS (18), /* SI */
610 COSTS_N_INSNS (18), /* DI */
611 COSTS_N_INSNS (18)}, /* other */
612 COSTS_N_INSNS (2), /* cost of movsx */
613 COSTS_N_INSNS (2), /* cost of movzx */
614 8, /* "large" insn */
615 4, /* MOVE_RATIO */
616 3, /* cost for loading QImode using movzbl */
617 {4, 5, 4}, /* cost of loading integer registers
618 in QImode, HImode and SImode.
619 Relative to reg-reg move (2). */
620 {2, 3, 2}, /* cost of storing integer registers */
621 4, /* cost of reg,reg fld/fst */
622 {6, 6, 6}, /* cost of loading fp registers
623 in SFmode, DFmode and XFmode */
624 {4, 4, 4}, /* cost of storing fp registers
625 in SFmode, DFmode and XFmode */
626 2, /* cost of moving MMX register */
627 {2, 2}, /* cost of loading MMX registers
628 in SImode and DImode */
629 {2, 2}, /* cost of storing MMX registers
630 in SImode and DImode */
631 2, /* cost of moving SSE register */
632 {2, 2, 8}, /* cost of loading SSE registers
633 in SImode, DImode and TImode */
634 {2, 2, 8}, /* cost of storing SSE registers
635 in SImode, DImode and TImode */
636 6, /* MMX or SSE register to integer */
637 32, /* size of l1 cache. */
638 32, /* size of l2 cache. Some models
639 have integrated l2 cache, but
640 optimizing for k6 is not important
641 enough to worry about that. */
642 32, /* size of prefetch block */
643 1, /* number of parallel prefetches */
644 1, /* Branch cost */
645 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
646 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
647 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
648 COSTS_N_INSNS (2), /* cost of FABS instruction. */
649 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
650 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
651 k6_memcpy,
652 k6_memset,
653 1, /* scalar_stmt_cost. */
654 1, /* scalar load_cost. */
655 1, /* scalar_store_cost. */
656 1, /* vec_stmt_cost. */
657 1, /* vec_to_scalar_cost. */
658 1, /* scalar_to_vec_cost. */
659 1, /* vec_align_load_cost. */
660 2, /* vec_unalign_load_cost. */
661 1, /* vec_store_cost. */
662 3, /* cond_taken_branch_cost. */
663 1, /* cond_not_taken_branch_cost. */
666 /* For some reason, Athlon deals better with REP prefix (relative to loops)
667 compared to K8. Alignment becomes important after 8 bytes for memcpy and
668 128 bytes for memset. */
669 static stringop_algs athlon_memcpy[2] = {
670 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
671 DUMMY_STRINGOP_ALGS};
672 static stringop_algs athlon_memset[2] = {
673 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
674 DUMMY_STRINGOP_ALGS};
675 static const
676 struct processor_costs athlon_cost = {
677 COSTS_N_INSNS (1), /* cost of an add instruction */
678 COSTS_N_INSNS (2), /* cost of a lea instruction */
679 COSTS_N_INSNS (1), /* variable shift costs */
680 COSTS_N_INSNS (1), /* constant shift costs */
681 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
682 COSTS_N_INSNS (5), /* HI */
683 COSTS_N_INSNS (5), /* SI */
684 COSTS_N_INSNS (5), /* DI */
685 COSTS_N_INSNS (5)}, /* other */
686 0, /* cost of multiply per each bit set */
687 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
688 COSTS_N_INSNS (26), /* HI */
689 COSTS_N_INSNS (42), /* SI */
690 COSTS_N_INSNS (74), /* DI */
691 COSTS_N_INSNS (74)}, /* other */
692 COSTS_N_INSNS (1), /* cost of movsx */
693 COSTS_N_INSNS (1), /* cost of movzx */
694 8, /* "large" insn */
695 9, /* MOVE_RATIO */
696 4, /* cost for loading QImode using movzbl */
697 {3, 4, 3}, /* cost of loading integer registers
698 in QImode, HImode and SImode.
699 Relative to reg-reg move (2). */
700 {3, 4, 3}, /* cost of storing integer registers */
701 4, /* cost of reg,reg fld/fst */
702 {4, 4, 12}, /* cost of loading fp registers
703 in SFmode, DFmode and XFmode */
704 {6, 6, 8}, /* cost of storing fp registers
705 in SFmode, DFmode and XFmode */
706 2, /* cost of moving MMX register */
707 {4, 4}, /* cost of loading MMX registers
708 in SImode and DImode */
709 {4, 4}, /* cost of storing MMX registers
710 in SImode and DImode */
711 2, /* cost of moving SSE register */
712 {4, 4, 6}, /* cost of loading SSE registers
713 in SImode, DImode and TImode */
714 {4, 4, 5}, /* cost of storing SSE registers
715 in SImode, DImode and TImode */
716 5, /* MMX or SSE register to integer */
717 64, /* size of l1 cache. */
718 256, /* size of l2 cache. */
719 64, /* size of prefetch block */
720 6, /* number of parallel prefetches */
721 5, /* Branch cost */
722 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
723 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
724 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
725 COSTS_N_INSNS (2), /* cost of FABS instruction. */
726 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
727 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
728 athlon_memcpy,
729 athlon_memset,
730 1, /* scalar_stmt_cost. */
731 1, /* scalar load_cost. */
732 1, /* scalar_store_cost. */
733 1, /* vec_stmt_cost. */
734 1, /* vec_to_scalar_cost. */
735 1, /* scalar_to_vec_cost. */
736 1, /* vec_align_load_cost. */
737 2, /* vec_unalign_load_cost. */
738 1, /* vec_store_cost. */
739 3, /* cond_taken_branch_cost. */
740 1, /* cond_not_taken_branch_cost. */
743 /* K8 has optimized REP instruction for medium sized blocks, but for very
744 small blocks it is better to use loop. For large blocks, libcall can
745 do nontemporary accesses and beat inline considerably. */
746 static stringop_algs k8_memcpy[2] = {
747 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
748 {-1, rep_prefix_4_byte, false}}},
749 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
750 {-1, libcall, false}}}};
751 static stringop_algs k8_memset[2] = {
752 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
753 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
754 {libcall, {{48, unrolled_loop, false},
755 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
756 static const
757 struct processor_costs k8_cost = {
758 COSTS_N_INSNS (1), /* cost of an add instruction */
759 COSTS_N_INSNS (2), /* cost of a lea instruction */
760 COSTS_N_INSNS (1), /* variable shift costs */
761 COSTS_N_INSNS (1), /* constant shift costs */
762 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
763 COSTS_N_INSNS (4), /* HI */
764 COSTS_N_INSNS (3), /* SI */
765 COSTS_N_INSNS (4), /* DI */
766 COSTS_N_INSNS (5)}, /* other */
767 0, /* cost of multiply per each bit set */
768 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
769 COSTS_N_INSNS (26), /* HI */
770 COSTS_N_INSNS (42), /* SI */
771 COSTS_N_INSNS (74), /* DI */
772 COSTS_N_INSNS (74)}, /* other */
773 COSTS_N_INSNS (1), /* cost of movsx */
774 COSTS_N_INSNS (1), /* cost of movzx */
775 8, /* "large" insn */
776 9, /* MOVE_RATIO */
777 4, /* cost for loading QImode using movzbl */
778 {3, 4, 3}, /* cost of loading integer registers
779 in QImode, HImode and SImode.
780 Relative to reg-reg move (2). */
781 {3, 4, 3}, /* cost of storing integer registers */
782 4, /* cost of reg,reg fld/fst */
783 {4, 4, 12}, /* cost of loading fp registers
784 in SFmode, DFmode and XFmode */
785 {6, 6, 8}, /* cost of storing fp registers
786 in SFmode, DFmode and XFmode */
787 2, /* cost of moving MMX register */
788 {3, 3}, /* cost of loading MMX registers
789 in SImode and DImode */
790 {4, 4}, /* cost of storing MMX registers
791 in SImode and DImode */
792 2, /* cost of moving SSE register */
793 {4, 3, 6}, /* cost of loading SSE registers
794 in SImode, DImode and TImode */
795 {4, 4, 5}, /* cost of storing SSE registers
796 in SImode, DImode and TImode */
797 5, /* MMX or SSE register to integer */
798 64, /* size of l1 cache. */
799 512, /* size of l2 cache. */
800 64, /* size of prefetch block */
801 /* New AMD processors never drop prefetches; if they cannot be performed
802 immediately, they are queued. We set number of simultaneous prefetches
803 to a large constant to reflect this (it probably is not a good idea not
804 to limit number of prefetches at all, as their execution also takes some
805 time). */
806 100, /* number of parallel prefetches */
807 3, /* Branch cost */
808 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
809 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
810 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
811 COSTS_N_INSNS (2), /* cost of FABS instruction. */
812 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
813 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
815 k8_memcpy,
816 k8_memset,
817 4, /* scalar_stmt_cost. */
818 2, /* scalar load_cost. */
819 2, /* scalar_store_cost. */
820 5, /* vec_stmt_cost. */
821 0, /* vec_to_scalar_cost. */
822 2, /* scalar_to_vec_cost. */
823 2, /* vec_align_load_cost. */
824 3, /* vec_unalign_load_cost. */
825 3, /* vec_store_cost. */
826 3, /* cond_taken_branch_cost. */
827 2, /* cond_not_taken_branch_cost. */
830 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
831 very small blocks it is better to use loop. For large blocks, libcall can
832 do nontemporary accesses and beat inline considerably. */
833 static stringop_algs amdfam10_memcpy[2] = {
834 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
835 {-1, rep_prefix_4_byte, false}}},
836 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
837 {-1, libcall, false}}}};
838 static stringop_algs amdfam10_memset[2] = {
839 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
840 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
841 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
842 {-1, libcall, false}}}};
843 struct processor_costs amdfam10_cost = {
844 COSTS_N_INSNS (1), /* cost of an add instruction */
845 COSTS_N_INSNS (2), /* cost of a lea instruction */
846 COSTS_N_INSNS (1), /* variable shift costs */
847 COSTS_N_INSNS (1), /* constant shift costs */
848 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
849 COSTS_N_INSNS (4), /* HI */
850 COSTS_N_INSNS (3), /* SI */
851 COSTS_N_INSNS (4), /* DI */
852 COSTS_N_INSNS (5)}, /* other */
853 0, /* cost of multiply per each bit set */
854 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
855 COSTS_N_INSNS (35), /* HI */
856 COSTS_N_INSNS (51), /* SI */
857 COSTS_N_INSNS (83), /* DI */
858 COSTS_N_INSNS (83)}, /* other */
859 COSTS_N_INSNS (1), /* cost of movsx */
860 COSTS_N_INSNS (1), /* cost of movzx */
861 8, /* "large" insn */
862 9, /* MOVE_RATIO */
863 4, /* cost for loading QImode using movzbl */
864 {3, 4, 3}, /* cost of loading integer registers
865 in QImode, HImode and SImode.
866 Relative to reg-reg move (2). */
867 {3, 4, 3}, /* cost of storing integer registers */
868 4, /* cost of reg,reg fld/fst */
869 {4, 4, 12}, /* cost of loading fp registers
870 in SFmode, DFmode and XFmode */
871 {6, 6, 8}, /* cost of storing fp registers
872 in SFmode, DFmode and XFmode */
873 2, /* cost of moving MMX register */
874 {3, 3}, /* cost of loading MMX registers
875 in SImode and DImode */
876 {4, 4}, /* cost of storing MMX registers
877 in SImode and DImode */
878 2, /* cost of moving SSE register */
879 {4, 4, 3}, /* cost of loading SSE registers
880 in SImode, DImode and TImode */
881 {4, 4, 5}, /* cost of storing SSE registers
882 in SImode, DImode and TImode */
883 3, /* MMX or SSE register to integer */
884 /* On K8:
885 MOVD reg64, xmmreg Double FSTORE 4
886 MOVD reg32, xmmreg Double FSTORE 4
887 On AMDFAM10:
888 MOVD reg64, xmmreg Double FADD 3
889 1/1 1/1
890 MOVD reg32, xmmreg Double FADD 3
891 1/1 1/1 */
892 64, /* size of l1 cache. */
893 512, /* size of l2 cache. */
894 64, /* size of prefetch block */
895 /* New AMD processors never drop prefetches; if they cannot be performed
896 immediately, they are queued. We set number of simultaneous prefetches
897 to a large constant to reflect this (it probably is not a good idea not
898 to limit number of prefetches at all, as their execution also takes some
899 time). */
900 100, /* number of parallel prefetches */
901 2, /* Branch cost */
902 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
903 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
904 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
905 COSTS_N_INSNS (2), /* cost of FABS instruction. */
906 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
907 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
909 amdfam10_memcpy,
910 amdfam10_memset,
911 4, /* scalar_stmt_cost. */
912 2, /* scalar load_cost. */
913 2, /* scalar_store_cost. */
914 6, /* vec_stmt_cost. */
915 0, /* vec_to_scalar_cost. */
916 2, /* scalar_to_vec_cost. */
917 2, /* vec_align_load_cost. */
918 2, /* vec_unalign_load_cost. */
919 2, /* vec_store_cost. */
920 2, /* cond_taken_branch_cost. */
921 1, /* cond_not_taken_branch_cost. */
924 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
925 very small blocks it is better to use loop. For large blocks, libcall
926 can do nontemporary accesses and beat inline considerably. */
927 static stringop_algs bdver1_memcpy[2] = {
928 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
929 {-1, rep_prefix_4_byte, false}}},
930 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
931 {-1, libcall, false}}}};
932 static stringop_algs bdver1_memset[2] = {
933 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
934 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
935 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
936 {-1, libcall, false}}}};
938 const struct processor_costs bdver1_cost = {
939 COSTS_N_INSNS (1), /* cost of an add instruction */
940 COSTS_N_INSNS (1), /* cost of a lea instruction */
941 COSTS_N_INSNS (1), /* variable shift costs */
942 COSTS_N_INSNS (1), /* constant shift costs */
943 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
944 COSTS_N_INSNS (4), /* HI */
945 COSTS_N_INSNS (4), /* SI */
946 COSTS_N_INSNS (6), /* DI */
947 COSTS_N_INSNS (6)}, /* other */
948 0, /* cost of multiply per each bit set */
949 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
950 COSTS_N_INSNS (35), /* HI */
951 COSTS_N_INSNS (51), /* SI */
952 COSTS_N_INSNS (83), /* DI */
953 COSTS_N_INSNS (83)}, /* other */
954 COSTS_N_INSNS (1), /* cost of movsx */
955 COSTS_N_INSNS (1), /* cost of movzx */
956 8, /* "large" insn */
957 9, /* MOVE_RATIO */
958 4, /* cost for loading QImode using movzbl */
959 {5, 5, 4}, /* cost of loading integer registers
960 in QImode, HImode and SImode.
961 Relative to reg-reg move (2). */
962 {4, 4, 4}, /* cost of storing integer registers */
963 2, /* cost of reg,reg fld/fst */
964 {5, 5, 12}, /* cost of loading fp registers
965 in SFmode, DFmode and XFmode */
966 {4, 4, 8}, /* cost of storing fp registers
967 in SFmode, DFmode and XFmode */
968 2, /* cost of moving MMX register */
969 {4, 4}, /* cost of loading MMX registers
970 in SImode and DImode */
971 {4, 4}, /* cost of storing MMX registers
972 in SImode and DImode */
973 2, /* cost of moving SSE register */
974 {4, 4, 4}, /* cost of loading SSE registers
975 in SImode, DImode and TImode */
976 {4, 4, 4}, /* cost of storing SSE registers
977 in SImode, DImode and TImode */
978 2, /* MMX or SSE register to integer */
979 /* On K8:
980 MOVD reg64, xmmreg Double FSTORE 4
981 MOVD reg32, xmmreg Double FSTORE 4
982 On AMDFAM10:
983 MOVD reg64, xmmreg Double FADD 3
984 1/1 1/1
985 MOVD reg32, xmmreg Double FADD 3
986 1/1 1/1 */
987 16, /* size of l1 cache. */
988 2048, /* size of l2 cache. */
989 64, /* size of prefetch block */
990 /* New AMD processors never drop prefetches; if they cannot be performed
991 immediately, they are queued. We set number of simultaneous prefetches
992 to a large constant to reflect this (it probably is not a good idea not
993 to limit number of prefetches at all, as their execution also takes some
994 time). */
995 100, /* number of parallel prefetches */
996 2, /* Branch cost */
997 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
998 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
999 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1000 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1001 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1002 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1004 bdver1_memcpy,
1005 bdver1_memset,
1006 6, /* scalar_stmt_cost. */
1007 4, /* scalar load_cost. */
1008 4, /* scalar_store_cost. */
1009 6, /* vec_stmt_cost. */
1010 0, /* vec_to_scalar_cost. */
1011 2, /* scalar_to_vec_cost. */
1012 4, /* vec_align_load_cost. */
1013 4, /* vec_unalign_load_cost. */
1014 4, /* vec_store_cost. */
1015 2, /* cond_taken_branch_cost. */
1016 1, /* cond_not_taken_branch_cost. */
1019 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1020 very small blocks it is better to use loop. For large blocks, libcall
1021 can do nontemporary accesses and beat inline considerably. */
1023 static stringop_algs bdver2_memcpy[2] = {
1024 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1025 {-1, rep_prefix_4_byte, false}}},
1026 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1027 {-1, libcall, false}}}};
1028 static stringop_algs bdver2_memset[2] = {
1029 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1030 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1031 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1032 {-1, libcall, false}}}};
1034 const struct processor_costs bdver2_cost = {
1035 COSTS_N_INSNS (1), /* cost of an add instruction */
1036 COSTS_N_INSNS (1), /* cost of a lea instruction */
1037 COSTS_N_INSNS (1), /* variable shift costs */
1038 COSTS_N_INSNS (1), /* constant shift costs */
1039 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1040 COSTS_N_INSNS (4), /* HI */
1041 COSTS_N_INSNS (4), /* SI */
1042 COSTS_N_INSNS (6), /* DI */
1043 COSTS_N_INSNS (6)}, /* other */
1044 0, /* cost of multiply per each bit set */
1045 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1046 COSTS_N_INSNS (35), /* HI */
1047 COSTS_N_INSNS (51), /* SI */
1048 COSTS_N_INSNS (83), /* DI */
1049 COSTS_N_INSNS (83)}, /* other */
1050 COSTS_N_INSNS (1), /* cost of movsx */
1051 COSTS_N_INSNS (1), /* cost of movzx */
1052 8, /* "large" insn */
1053 9, /* MOVE_RATIO */
1054 4, /* cost for loading QImode using movzbl */
1055 {5, 5, 4}, /* cost of loading integer registers
1056 in QImode, HImode and SImode.
1057 Relative to reg-reg move (2). */
1058 {4, 4, 4}, /* cost of storing integer registers */
1059 2, /* cost of reg,reg fld/fst */
1060 {5, 5, 12}, /* cost of loading fp registers
1061 in SFmode, DFmode and XFmode */
1062 {4, 4, 8}, /* cost of storing fp registers
1063 in SFmode, DFmode and XFmode */
1064 2, /* cost of moving MMX register */
1065 {4, 4}, /* cost of loading MMX registers
1066 in SImode and DImode */
1067 {4, 4}, /* cost of storing MMX registers
1068 in SImode and DImode */
1069 2, /* cost of moving SSE register */
1070 {4, 4, 4}, /* cost of loading SSE registers
1071 in SImode, DImode and TImode */
1072 {4, 4, 4}, /* cost of storing SSE registers
1073 in SImode, DImode and TImode */
1074 2, /* MMX or SSE register to integer */
1075 /* On K8:
1076 MOVD reg64, xmmreg Double FSTORE 4
1077 MOVD reg32, xmmreg Double FSTORE 4
1078 On AMDFAM10:
1079 MOVD reg64, xmmreg Double FADD 3
1080 1/1 1/1
1081 MOVD reg32, xmmreg Double FADD 3
1082 1/1 1/1 */
1083 16, /* size of l1 cache. */
1084 2048, /* size of l2 cache. */
1085 64, /* size of prefetch block */
1086 /* New AMD processors never drop prefetches; if they cannot be performed
1087 immediately, they are queued. We set number of simultaneous prefetches
1088 to a large constant to reflect this (it probably is not a good idea not
1089 to limit number of prefetches at all, as their execution also takes some
1090 time). */
1091 100, /* number of parallel prefetches */
1092 2, /* Branch cost */
1093 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1094 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1095 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1096 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1097 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1098 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1100 bdver2_memcpy,
1101 bdver2_memset,
1102 6, /* scalar_stmt_cost. */
1103 4, /* scalar load_cost. */
1104 4, /* scalar_store_cost. */
1105 6, /* vec_stmt_cost. */
1106 0, /* vec_to_scalar_cost. */
1107 2, /* scalar_to_vec_cost. */
1108 4, /* vec_align_load_cost. */
1109 4, /* vec_unalign_load_cost. */
1110 4, /* vec_store_cost. */
1111 2, /* cond_taken_branch_cost. */
1112 1, /* cond_not_taken_branch_cost. */
1116 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1117 very small blocks it is better to use loop. For large blocks, libcall
1118 can do nontemporary accesses and beat inline considerably. */
1119 static stringop_algs bdver3_memcpy[2] = {
1120 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1121 {-1, rep_prefix_4_byte, false}}},
1122 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1123 {-1, libcall, false}}}};
1124 static stringop_algs bdver3_memset[2] = {
1125 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1126 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1127 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1128 {-1, libcall, false}}}};
1129 struct processor_costs bdver3_cost = {
1130 COSTS_N_INSNS (1), /* cost of an add instruction */
1131 COSTS_N_INSNS (1), /* cost of a lea instruction */
1132 COSTS_N_INSNS (1), /* variable shift costs */
1133 COSTS_N_INSNS (1), /* constant shift costs */
1134 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1135 COSTS_N_INSNS (4), /* HI */
1136 COSTS_N_INSNS (4), /* SI */
1137 COSTS_N_INSNS (6), /* DI */
1138 COSTS_N_INSNS (6)}, /* other */
1139 0, /* cost of multiply per each bit set */
1140 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1141 COSTS_N_INSNS (35), /* HI */
1142 COSTS_N_INSNS (51), /* SI */
1143 COSTS_N_INSNS (83), /* DI */
1144 COSTS_N_INSNS (83)}, /* other */
1145 COSTS_N_INSNS (1), /* cost of movsx */
1146 COSTS_N_INSNS (1), /* cost of movzx */
1147 8, /* "large" insn */
1148 9, /* MOVE_RATIO */
1149 4, /* cost for loading QImode using movzbl */
1150 {5, 5, 4}, /* cost of loading integer registers
1151 in QImode, HImode and SImode.
1152 Relative to reg-reg move (2). */
1153 {4, 4, 4}, /* cost of storing integer registers */
1154 2, /* cost of reg,reg fld/fst */
1155 {5, 5, 12}, /* cost of loading fp registers
1156 in SFmode, DFmode and XFmode */
1157 {4, 4, 8}, /* cost of storing fp registers
1158 in SFmode, DFmode and XFmode */
1159 2, /* cost of moving MMX register */
1160 {4, 4}, /* cost of loading MMX registers
1161 in SImode and DImode */
1162 {4, 4}, /* cost of storing MMX registers
1163 in SImode and DImode */
1164 2, /* cost of moving SSE register */
1165 {4, 4, 4}, /* cost of loading SSE registers
1166 in SImode, DImode and TImode */
1167 {4, 4, 4}, /* cost of storing SSE registers
1168 in SImode, DImode and TImode */
1169 2, /* MMX or SSE register to integer */
1170 16, /* size of l1 cache. */
1171 2048, /* size of l2 cache. */
1172 64, /* size of prefetch block */
1173 /* New AMD processors never drop prefetches; if they cannot be performed
1174 immediately, they are queued. We set number of simultaneous prefetches
1175 to a large constant to reflect this (it probably is not a good idea not
1176 to limit number of prefetches at all, as their execution also takes some
1177 time). */
1178 100, /* number of parallel prefetches */
1179 2, /* Branch cost */
1180 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1181 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1182 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1183 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1184 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1185 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1187 bdver3_memcpy,
1188 bdver3_memset,
1189 6, /* scalar_stmt_cost. */
1190 4, /* scalar load_cost. */
1191 4, /* scalar_store_cost. */
1192 6, /* vec_stmt_cost. */
1193 0, /* vec_to_scalar_cost. */
1194 2, /* scalar_to_vec_cost. */
1195 4, /* vec_align_load_cost. */
1196 4, /* vec_unalign_load_cost. */
1197 4, /* vec_store_cost. */
1198 2, /* cond_taken_branch_cost. */
1199 1, /* cond_not_taken_branch_cost. */
1202 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1203 very small blocks it is better to use loop. For large blocks, libcall
1204 can do nontemporary accesses and beat inline considerably. */
1205 static stringop_algs bdver4_memcpy[2] = {
1206 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1207 {-1, rep_prefix_4_byte, false}}},
1208 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1209 {-1, libcall, false}}}};
1210 static stringop_algs bdver4_memset[2] = {
1211 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1212 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1213 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1214 {-1, libcall, false}}}};
1215 struct processor_costs bdver4_cost = {
1216 COSTS_N_INSNS (1), /* cost of an add instruction */
1217 COSTS_N_INSNS (1), /* cost of a lea instruction */
1218 COSTS_N_INSNS (1), /* variable shift costs */
1219 COSTS_N_INSNS (1), /* constant shift costs */
1220 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1221 COSTS_N_INSNS (4), /* HI */
1222 COSTS_N_INSNS (4), /* SI */
1223 COSTS_N_INSNS (6), /* DI */
1224 COSTS_N_INSNS (6)}, /* other */
1225 0, /* cost of multiply per each bit set */
1226 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1227 COSTS_N_INSNS (35), /* HI */
1228 COSTS_N_INSNS (51), /* SI */
1229 COSTS_N_INSNS (83), /* DI */
1230 COSTS_N_INSNS (83)}, /* other */
1231 COSTS_N_INSNS (1), /* cost of movsx */
1232 COSTS_N_INSNS (1), /* cost of movzx */
1233 8, /* "large" insn */
1234 9, /* MOVE_RATIO */
1235 4, /* cost for loading QImode using movzbl */
1236 {5, 5, 4}, /* cost of loading integer registers
1237 in QImode, HImode and SImode.
1238 Relative to reg-reg move (2). */
1239 {4, 4, 4}, /* cost of storing integer registers */
1240 2, /* cost of reg,reg fld/fst */
1241 {5, 5, 12}, /* cost of loading fp registers
1242 in SFmode, DFmode and XFmode */
1243 {4, 4, 8}, /* cost of storing fp registers
1244 in SFmode, DFmode and XFmode */
1245 2, /* cost of moving MMX register */
1246 {4, 4}, /* cost of loading MMX registers
1247 in SImode and DImode */
1248 {4, 4}, /* cost of storing MMX registers
1249 in SImode and DImode */
1250 2, /* cost of moving SSE register */
1251 {4, 4, 4}, /* cost of loading SSE registers
1252 in SImode, DImode and TImode */
1253 {4, 4, 4}, /* cost of storing SSE registers
1254 in SImode, DImode and TImode */
1255 2, /* MMX or SSE register to integer */
1256 16, /* size of l1 cache. */
1257 2048, /* size of l2 cache. */
1258 64, /* size of prefetch block */
1259 /* New AMD processors never drop prefetches; if they cannot be performed
1260 immediately, they are queued. We set number of simultaneous prefetches
1261 to a large constant to reflect this (it probably is not a good idea not
1262 to limit number of prefetches at all, as their execution also takes some
1263 time). */
1264 100, /* number of parallel prefetches */
1265 2, /* Branch cost */
1266 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1267 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1268 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1269 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1270 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1271 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1273 bdver4_memcpy,
1274 bdver4_memset,
1275 6, /* scalar_stmt_cost. */
1276 4, /* scalar load_cost. */
1277 4, /* scalar_store_cost. */
1278 6, /* vec_stmt_cost. */
1279 0, /* vec_to_scalar_cost. */
1280 2, /* scalar_to_vec_cost. */
1281 4, /* vec_align_load_cost. */
1282 4, /* vec_unalign_load_cost. */
1283 4, /* vec_store_cost. */
1284 2, /* cond_taken_branch_cost. */
1285 1, /* cond_not_taken_branch_cost. */
1288 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1289 very small blocks it is better to use loop. For large blocks, libcall can
1290 do nontemporary accesses and beat inline considerably. */
1291 static stringop_algs btver1_memcpy[2] = {
1292 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1293 {-1, rep_prefix_4_byte, false}}},
1294 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1295 {-1, libcall, false}}}};
1296 static stringop_algs btver1_memset[2] = {
1297 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1298 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1299 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1300 {-1, libcall, false}}}};
1301 const struct processor_costs btver1_cost = {
1302 COSTS_N_INSNS (1), /* cost of an add instruction */
1303 COSTS_N_INSNS (2), /* cost of a lea instruction */
1304 COSTS_N_INSNS (1), /* variable shift costs */
1305 COSTS_N_INSNS (1), /* constant shift costs */
1306 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1307 COSTS_N_INSNS (4), /* HI */
1308 COSTS_N_INSNS (3), /* SI */
1309 COSTS_N_INSNS (4), /* DI */
1310 COSTS_N_INSNS (5)}, /* other */
1311 0, /* cost of multiply per each bit set */
1312 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1313 COSTS_N_INSNS (35), /* HI */
1314 COSTS_N_INSNS (51), /* SI */
1315 COSTS_N_INSNS (83), /* DI */
1316 COSTS_N_INSNS (83)}, /* other */
1317 COSTS_N_INSNS (1), /* cost of movsx */
1318 COSTS_N_INSNS (1), /* cost of movzx */
1319 8, /* "large" insn */
1320 9, /* MOVE_RATIO */
1321 4, /* cost for loading QImode using movzbl */
1322 {3, 4, 3}, /* cost of loading integer registers
1323 in QImode, HImode and SImode.
1324 Relative to reg-reg move (2). */
1325 {3, 4, 3}, /* cost of storing integer registers */
1326 4, /* cost of reg,reg fld/fst */
1327 {4, 4, 12}, /* cost of loading fp registers
1328 in SFmode, DFmode and XFmode */
1329 {6, 6, 8}, /* cost of storing fp registers
1330 in SFmode, DFmode and XFmode */
1331 2, /* cost of moving MMX register */
1332 {3, 3}, /* cost of loading MMX registers
1333 in SImode and DImode */
1334 {4, 4}, /* cost of storing MMX registers
1335 in SImode and DImode */
1336 2, /* cost of moving SSE register */
1337 {4, 4, 3}, /* cost of loading SSE registers
1338 in SImode, DImode and TImode */
1339 {4, 4, 5}, /* cost of storing SSE registers
1340 in SImode, DImode and TImode */
1341 3, /* MMX or SSE register to integer */
1342 /* On K8:
1343 MOVD reg64, xmmreg Double FSTORE 4
1344 MOVD reg32, xmmreg Double FSTORE 4
1345 On AMDFAM10:
1346 MOVD reg64, xmmreg Double FADD 3
1347 1/1 1/1
1348 MOVD reg32, xmmreg Double FADD 3
1349 1/1 1/1 */
1350 32, /* size of l1 cache. */
1351 512, /* size of l2 cache. */
1352 64, /* size of prefetch block */
1353 100, /* number of parallel prefetches */
1354 2, /* Branch cost */
1355 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1356 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1357 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1358 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1359 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1360 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1362 btver1_memcpy,
1363 btver1_memset,
1364 4, /* scalar_stmt_cost. */
1365 2, /* scalar load_cost. */
1366 2, /* scalar_store_cost. */
1367 6, /* vec_stmt_cost. */
1368 0, /* vec_to_scalar_cost. */
1369 2, /* scalar_to_vec_cost. */
1370 2, /* vec_align_load_cost. */
1371 2, /* vec_unalign_load_cost. */
1372 2, /* vec_store_cost. */
1373 2, /* cond_taken_branch_cost. */
1374 1, /* cond_not_taken_branch_cost. */
1377 static stringop_algs btver2_memcpy[2] = {
1378 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1379 {-1, rep_prefix_4_byte, false}}},
1380 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1381 {-1, libcall, false}}}};
1382 static stringop_algs btver2_memset[2] = {
1383 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1384 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1385 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1386 {-1, libcall, false}}}};
1387 const struct processor_costs btver2_cost = {
1388 COSTS_N_INSNS (1), /* cost of an add instruction */
1389 COSTS_N_INSNS (2), /* cost of a lea instruction */
1390 COSTS_N_INSNS (1), /* variable shift costs */
1391 COSTS_N_INSNS (1), /* constant shift costs */
1392 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1393 COSTS_N_INSNS (4), /* HI */
1394 COSTS_N_INSNS (3), /* SI */
1395 COSTS_N_INSNS (4), /* DI */
1396 COSTS_N_INSNS (5)}, /* other */
1397 0, /* cost of multiply per each bit set */
1398 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1399 COSTS_N_INSNS (35), /* HI */
1400 COSTS_N_INSNS (51), /* SI */
1401 COSTS_N_INSNS (83), /* DI */
1402 COSTS_N_INSNS (83)}, /* other */
1403 COSTS_N_INSNS (1), /* cost of movsx */
1404 COSTS_N_INSNS (1), /* cost of movzx */
1405 8, /* "large" insn */
1406 9, /* MOVE_RATIO */
1407 4, /* cost for loading QImode using movzbl */
1408 {3, 4, 3}, /* cost of loading integer registers
1409 in QImode, HImode and SImode.
1410 Relative to reg-reg move (2). */
1411 {3, 4, 3}, /* cost of storing integer registers */
1412 4, /* cost of reg,reg fld/fst */
1413 {4, 4, 12}, /* cost of loading fp registers
1414 in SFmode, DFmode and XFmode */
1415 {6, 6, 8}, /* cost of storing fp registers
1416 in SFmode, DFmode and XFmode */
1417 2, /* cost of moving MMX register */
1418 {3, 3}, /* cost of loading MMX registers
1419 in SImode and DImode */
1420 {4, 4}, /* cost of storing MMX registers
1421 in SImode and DImode */
1422 2, /* cost of moving SSE register */
1423 {4, 4, 3}, /* cost of loading SSE registers
1424 in SImode, DImode and TImode */
1425 {4, 4, 5}, /* cost of storing SSE registers
1426 in SImode, DImode and TImode */
1427 3, /* MMX or SSE register to integer */
1428 /* On K8:
1429 MOVD reg64, xmmreg Double FSTORE 4
1430 MOVD reg32, xmmreg Double FSTORE 4
1431 On AMDFAM10:
1432 MOVD reg64, xmmreg Double FADD 3
1433 1/1 1/1
1434 MOVD reg32, xmmreg Double FADD 3
1435 1/1 1/1 */
1436 32, /* size of l1 cache. */
1437 2048, /* size of l2 cache. */
1438 64, /* size of prefetch block */
1439 100, /* number of parallel prefetches */
1440 2, /* Branch cost */
1441 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1442 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1443 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1444 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1445 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1446 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1447 btver2_memcpy,
1448 btver2_memset,
1449 4, /* scalar_stmt_cost. */
1450 2, /* scalar load_cost. */
1451 2, /* scalar_store_cost. */
1452 6, /* vec_stmt_cost. */
1453 0, /* vec_to_scalar_cost. */
1454 2, /* scalar_to_vec_cost. */
1455 2, /* vec_align_load_cost. */
1456 2, /* vec_unalign_load_cost. */
1457 2, /* vec_store_cost. */
1458 2, /* cond_taken_branch_cost. */
1459 1, /* cond_not_taken_branch_cost. */
1462 static stringop_algs pentium4_memcpy[2] = {
1463 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1464 DUMMY_STRINGOP_ALGS};
1465 static stringop_algs pentium4_memset[2] = {
1466 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1467 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1468 DUMMY_STRINGOP_ALGS};
1470 static const
1471 struct processor_costs pentium4_cost = {
1472 COSTS_N_INSNS (1), /* cost of an add instruction */
1473 COSTS_N_INSNS (3), /* cost of a lea instruction */
1474 COSTS_N_INSNS (4), /* variable shift costs */
1475 COSTS_N_INSNS (4), /* constant shift costs */
1476 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1477 COSTS_N_INSNS (15), /* HI */
1478 COSTS_N_INSNS (15), /* SI */
1479 COSTS_N_INSNS (15), /* DI */
1480 COSTS_N_INSNS (15)}, /* other */
1481 0, /* cost of multiply per each bit set */
1482 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1483 COSTS_N_INSNS (56), /* HI */
1484 COSTS_N_INSNS (56), /* SI */
1485 COSTS_N_INSNS (56), /* DI */
1486 COSTS_N_INSNS (56)}, /* other */
1487 COSTS_N_INSNS (1), /* cost of movsx */
1488 COSTS_N_INSNS (1), /* cost of movzx */
1489 16, /* "large" insn */
1490 6, /* MOVE_RATIO */
1491 2, /* cost for loading QImode using movzbl */
1492 {4, 5, 4}, /* cost of loading integer registers
1493 in QImode, HImode and SImode.
1494 Relative to reg-reg move (2). */
1495 {2, 3, 2}, /* cost of storing integer registers */
1496 2, /* cost of reg,reg fld/fst */
1497 {2, 2, 6}, /* cost of loading fp registers
1498 in SFmode, DFmode and XFmode */
1499 {4, 4, 6}, /* cost of storing fp registers
1500 in SFmode, DFmode and XFmode */
1501 2, /* cost of moving MMX register */
1502 {2, 2}, /* cost of loading MMX registers
1503 in SImode and DImode */
1504 {2, 2}, /* cost of storing MMX registers
1505 in SImode and DImode */
1506 12, /* cost of moving SSE register */
1507 {12, 12, 12}, /* cost of loading SSE registers
1508 in SImode, DImode and TImode */
1509 {2, 2, 8}, /* cost of storing SSE registers
1510 in SImode, DImode and TImode */
1511 10, /* MMX or SSE register to integer */
1512 8, /* size of l1 cache. */
1513 256, /* size of l2 cache. */
1514 64, /* size of prefetch block */
1515 6, /* number of parallel prefetches */
1516 2, /* Branch cost */
1517 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1518 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1519 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1520 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1521 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1522 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1523 pentium4_memcpy,
1524 pentium4_memset,
1525 1, /* scalar_stmt_cost. */
1526 1, /* scalar load_cost. */
1527 1, /* scalar_store_cost. */
1528 1, /* vec_stmt_cost. */
1529 1, /* vec_to_scalar_cost. */
1530 1, /* scalar_to_vec_cost. */
1531 1, /* vec_align_load_cost. */
1532 2, /* vec_unalign_load_cost. */
1533 1, /* vec_store_cost. */
1534 3, /* cond_taken_branch_cost. */
1535 1, /* cond_not_taken_branch_cost. */
1538 static stringop_algs nocona_memcpy[2] = {
1539 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1540 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1541 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1543 static stringop_algs nocona_memset[2] = {
1544 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1545 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1546 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1547 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1549 static const
1550 struct processor_costs nocona_cost = {
1551 COSTS_N_INSNS (1), /* cost of an add instruction */
1552 COSTS_N_INSNS (1), /* cost of a lea instruction */
1553 COSTS_N_INSNS (1), /* variable shift costs */
1554 COSTS_N_INSNS (1), /* constant shift costs */
1555 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1556 COSTS_N_INSNS (10), /* HI */
1557 COSTS_N_INSNS (10), /* SI */
1558 COSTS_N_INSNS (10), /* DI */
1559 COSTS_N_INSNS (10)}, /* other */
1560 0, /* cost of multiply per each bit set */
1561 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1562 COSTS_N_INSNS (66), /* HI */
1563 COSTS_N_INSNS (66), /* SI */
1564 COSTS_N_INSNS (66), /* DI */
1565 COSTS_N_INSNS (66)}, /* other */
1566 COSTS_N_INSNS (1), /* cost of movsx */
1567 COSTS_N_INSNS (1), /* cost of movzx */
1568 16, /* "large" insn */
1569 17, /* MOVE_RATIO */
1570 4, /* cost for loading QImode using movzbl */
1571 {4, 4, 4}, /* cost of loading integer registers
1572 in QImode, HImode and SImode.
1573 Relative to reg-reg move (2). */
1574 {4, 4, 4}, /* cost of storing integer registers */
1575 3, /* cost of reg,reg fld/fst */
1576 {12, 12, 12}, /* cost of loading fp registers
1577 in SFmode, DFmode and XFmode */
1578 {4, 4, 4}, /* cost of storing fp registers
1579 in SFmode, DFmode and XFmode */
1580 6, /* cost of moving MMX register */
1581 {12, 12}, /* cost of loading MMX registers
1582 in SImode and DImode */
1583 {12, 12}, /* cost of storing MMX registers
1584 in SImode and DImode */
1585 6, /* cost of moving SSE register */
1586 {12, 12, 12}, /* cost of loading SSE registers
1587 in SImode, DImode and TImode */
1588 {12, 12, 12}, /* cost of storing SSE registers
1589 in SImode, DImode and TImode */
1590 8, /* MMX or SSE register to integer */
1591 8, /* size of l1 cache. */
1592 1024, /* size of l2 cache. */
1593 64, /* size of prefetch block */
1594 8, /* number of parallel prefetches */
1595 1, /* Branch cost */
1596 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1597 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1598 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1599 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1600 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1601 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1602 nocona_memcpy,
1603 nocona_memset,
1604 1, /* scalar_stmt_cost. */
1605 1, /* scalar load_cost. */
1606 1, /* scalar_store_cost. */
1607 1, /* vec_stmt_cost. */
1608 1, /* vec_to_scalar_cost. */
1609 1, /* scalar_to_vec_cost. */
1610 1, /* vec_align_load_cost. */
1611 2, /* vec_unalign_load_cost. */
1612 1, /* vec_store_cost. */
1613 3, /* cond_taken_branch_cost. */
1614 1, /* cond_not_taken_branch_cost. */
1617 static stringop_algs atom_memcpy[2] = {
1618 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1619 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1620 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1621 static stringop_algs atom_memset[2] = {
1622 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1623 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1624 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1625 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1626 static const
1627 struct processor_costs atom_cost = {
1628 COSTS_N_INSNS (1), /* cost of an add instruction */
1629 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1630 COSTS_N_INSNS (1), /* variable shift costs */
1631 COSTS_N_INSNS (1), /* constant shift costs */
1632 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1633 COSTS_N_INSNS (4), /* HI */
1634 COSTS_N_INSNS (3), /* SI */
1635 COSTS_N_INSNS (4), /* DI */
1636 COSTS_N_INSNS (2)}, /* other */
1637 0, /* cost of multiply per each bit set */
1638 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1639 COSTS_N_INSNS (26), /* HI */
1640 COSTS_N_INSNS (42), /* SI */
1641 COSTS_N_INSNS (74), /* DI */
1642 COSTS_N_INSNS (74)}, /* other */
1643 COSTS_N_INSNS (1), /* cost of movsx */
1644 COSTS_N_INSNS (1), /* cost of movzx */
1645 8, /* "large" insn */
1646 17, /* MOVE_RATIO */
1647 4, /* cost for loading QImode using movzbl */
1648 {4, 4, 4}, /* cost of loading integer registers
1649 in QImode, HImode and SImode.
1650 Relative to reg-reg move (2). */
1651 {4, 4, 4}, /* cost of storing integer registers */
1652 4, /* cost of reg,reg fld/fst */
1653 {12, 12, 12}, /* cost of loading fp registers
1654 in SFmode, DFmode and XFmode */
1655 {6, 6, 8}, /* cost of storing fp registers
1656 in SFmode, DFmode and XFmode */
1657 2, /* cost of moving MMX register */
1658 {8, 8}, /* cost of loading MMX registers
1659 in SImode and DImode */
1660 {8, 8}, /* cost of storing MMX registers
1661 in SImode and DImode */
1662 2, /* cost of moving SSE register */
1663 {8, 8, 8}, /* cost of loading SSE registers
1664 in SImode, DImode and TImode */
1665 {8, 8, 8}, /* cost of storing SSE registers
1666 in SImode, DImode and TImode */
1667 5, /* MMX or SSE register to integer */
1668 32, /* size of l1 cache. */
1669 256, /* size of l2 cache. */
1670 64, /* size of prefetch block */
1671 6, /* number of parallel prefetches */
1672 3, /* Branch cost */
1673 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1674 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1675 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1676 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1677 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1678 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1679 atom_memcpy,
1680 atom_memset,
1681 1, /* scalar_stmt_cost. */
1682 1, /* scalar load_cost. */
1683 1, /* scalar_store_cost. */
1684 1, /* vec_stmt_cost. */
1685 1, /* vec_to_scalar_cost. */
1686 1, /* scalar_to_vec_cost. */
1687 1, /* vec_align_load_cost. */
1688 2, /* vec_unalign_load_cost. */
1689 1, /* vec_store_cost. */
1690 3, /* cond_taken_branch_cost. */
1691 1, /* cond_not_taken_branch_cost. */
1694 static stringop_algs slm_memcpy[2] = {
1695 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1696 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1697 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1698 static stringop_algs slm_memset[2] = {
1699 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1700 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1701 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1702 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1703 static const
1704 struct processor_costs slm_cost = {
1705 COSTS_N_INSNS (1), /* cost of an add instruction */
1706 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1707 COSTS_N_INSNS (1), /* variable shift costs */
1708 COSTS_N_INSNS (1), /* constant shift costs */
1709 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1710 COSTS_N_INSNS (3), /* HI */
1711 COSTS_N_INSNS (3), /* SI */
1712 COSTS_N_INSNS (4), /* DI */
1713 COSTS_N_INSNS (2)}, /* other */
1714 0, /* cost of multiply per each bit set */
1715 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1716 COSTS_N_INSNS (26), /* HI */
1717 COSTS_N_INSNS (42), /* SI */
1718 COSTS_N_INSNS (74), /* DI */
1719 COSTS_N_INSNS (74)}, /* other */
1720 COSTS_N_INSNS (1), /* cost of movsx */
1721 COSTS_N_INSNS (1), /* cost of movzx */
1722 8, /* "large" insn */
1723 17, /* MOVE_RATIO */
1724 4, /* cost for loading QImode using movzbl */
1725 {4, 4, 4}, /* cost of loading integer registers
1726 in QImode, HImode and SImode.
1727 Relative to reg-reg move (2). */
1728 {4, 4, 4}, /* cost of storing integer registers */
1729 4, /* cost of reg,reg fld/fst */
1730 {12, 12, 12}, /* cost of loading fp registers
1731 in SFmode, DFmode and XFmode */
1732 {6, 6, 8}, /* cost of storing fp registers
1733 in SFmode, DFmode and XFmode */
1734 2, /* cost of moving MMX register */
1735 {8, 8}, /* cost of loading MMX registers
1736 in SImode and DImode */
1737 {8, 8}, /* cost of storing MMX registers
1738 in SImode and DImode */
1739 2, /* cost of moving SSE register */
1740 {8, 8, 8}, /* cost of loading SSE registers
1741 in SImode, DImode and TImode */
1742 {8, 8, 8}, /* cost of storing SSE registers
1743 in SImode, DImode and TImode */
1744 5, /* MMX or SSE register to integer */
1745 32, /* size of l1 cache. */
1746 256, /* size of l2 cache. */
1747 64, /* size of prefetch block */
1748 6, /* number of parallel prefetches */
1749 3, /* Branch cost */
1750 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1751 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1752 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1753 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1754 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1755 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1756 slm_memcpy,
1757 slm_memset,
1758 1, /* scalar_stmt_cost. */
1759 1, /* scalar load_cost. */
1760 1, /* scalar_store_cost. */
1761 1, /* vec_stmt_cost. */
1762 4, /* vec_to_scalar_cost. */
1763 1, /* scalar_to_vec_cost. */
1764 1, /* vec_align_load_cost. */
1765 2, /* vec_unalign_load_cost. */
1766 1, /* vec_store_cost. */
1767 3, /* cond_taken_branch_cost. */
1768 1, /* cond_not_taken_branch_cost. */
1771 static stringop_algs intel_memcpy[2] = {
1772 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1773 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1774 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1775 static stringop_algs intel_memset[2] = {
1776 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1777 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1778 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1779 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1780 static const
1781 struct processor_costs intel_cost = {
1782 COSTS_N_INSNS (1), /* cost of an add instruction */
1783 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1784 COSTS_N_INSNS (1), /* variable shift costs */
1785 COSTS_N_INSNS (1), /* constant shift costs */
1786 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1787 COSTS_N_INSNS (3), /* HI */
1788 COSTS_N_INSNS (3), /* SI */
1789 COSTS_N_INSNS (4), /* DI */
1790 COSTS_N_INSNS (2)}, /* other */
1791 0, /* cost of multiply per each bit set */
1792 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1793 COSTS_N_INSNS (26), /* HI */
1794 COSTS_N_INSNS (42), /* SI */
1795 COSTS_N_INSNS (74), /* DI */
1796 COSTS_N_INSNS (74)}, /* other */
1797 COSTS_N_INSNS (1), /* cost of movsx */
1798 COSTS_N_INSNS (1), /* cost of movzx */
1799 8, /* "large" insn */
1800 17, /* MOVE_RATIO */
1801 4, /* cost for loading QImode using movzbl */
1802 {4, 4, 4}, /* cost of loading integer registers
1803 in QImode, HImode and SImode.
1804 Relative to reg-reg move (2). */
1805 {4, 4, 4}, /* cost of storing integer registers */
1806 4, /* cost of reg,reg fld/fst */
1807 {12, 12, 12}, /* cost of loading fp registers
1808 in SFmode, DFmode and XFmode */
1809 {6, 6, 8}, /* cost of storing fp registers
1810 in SFmode, DFmode and XFmode */
1811 2, /* cost of moving MMX register */
1812 {8, 8}, /* cost of loading MMX registers
1813 in SImode and DImode */
1814 {8, 8}, /* cost of storing MMX registers
1815 in SImode and DImode */
1816 2, /* cost of moving SSE register */
1817 {8, 8, 8}, /* cost of loading SSE registers
1818 in SImode, DImode and TImode */
1819 {8, 8, 8}, /* cost of storing SSE registers
1820 in SImode, DImode and TImode */
1821 5, /* MMX or SSE register to integer */
1822 32, /* size of l1 cache. */
1823 256, /* size of l2 cache. */
1824 64, /* size of prefetch block */
1825 6, /* number of parallel prefetches */
1826 3, /* Branch cost */
1827 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1828 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1829 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1830 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1831 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1832 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1833 intel_memcpy,
1834 intel_memset,
1835 1, /* scalar_stmt_cost. */
1836 1, /* scalar load_cost. */
1837 1, /* scalar_store_cost. */
1838 1, /* vec_stmt_cost. */
1839 4, /* vec_to_scalar_cost. */
1840 1, /* scalar_to_vec_cost. */
1841 1, /* vec_align_load_cost. */
1842 2, /* vec_unalign_load_cost. */
1843 1, /* vec_store_cost. */
1844 3, /* cond_taken_branch_cost. */
1845 1, /* cond_not_taken_branch_cost. */
1848 /* Generic should produce code tuned for Core-i7 (and newer chips)
1849 and btver1 (and newer chips). */
1851 static stringop_algs generic_memcpy[2] = {
1852 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1853 {-1, libcall, false}}},
1854 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1855 {-1, libcall, false}}}};
1856 static stringop_algs generic_memset[2] = {
1857 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1858 {-1, libcall, false}}},
1859 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1860 {-1, libcall, false}}}};
1861 static const
1862 struct processor_costs generic_cost = {
1863 COSTS_N_INSNS (1), /* cost of an add instruction */
1864 /* On all chips taken into consideration lea is 2 cycles and more. With
1865 this cost however our current implementation of synth_mult results in
1866 use of unnecessary temporary registers causing regression on several
1867 SPECfp benchmarks. */
1868 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1869 COSTS_N_INSNS (1), /* variable shift costs */
1870 COSTS_N_INSNS (1), /* constant shift costs */
1871 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1872 COSTS_N_INSNS (4), /* HI */
1873 COSTS_N_INSNS (3), /* SI */
1874 COSTS_N_INSNS (4), /* DI */
1875 COSTS_N_INSNS (2)}, /* other */
1876 0, /* cost of multiply per each bit set */
1877 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1878 COSTS_N_INSNS (26), /* HI */
1879 COSTS_N_INSNS (42), /* SI */
1880 COSTS_N_INSNS (74), /* DI */
1881 COSTS_N_INSNS (74)}, /* other */
1882 COSTS_N_INSNS (1), /* cost of movsx */
1883 COSTS_N_INSNS (1), /* cost of movzx */
1884 8, /* "large" insn */
1885 17, /* MOVE_RATIO */
1886 4, /* cost for loading QImode using movzbl */
1887 {4, 4, 4}, /* cost of loading integer registers
1888 in QImode, HImode and SImode.
1889 Relative to reg-reg move (2). */
1890 {4, 4, 4}, /* cost of storing integer registers */
1891 4, /* cost of reg,reg fld/fst */
1892 {12, 12, 12}, /* cost of loading fp registers
1893 in SFmode, DFmode and XFmode */
1894 {6, 6, 8}, /* cost of storing fp registers
1895 in SFmode, DFmode and XFmode */
1896 2, /* cost of moving MMX register */
1897 {8, 8}, /* cost of loading MMX registers
1898 in SImode and DImode */
1899 {8, 8}, /* cost of storing MMX registers
1900 in SImode and DImode */
1901 2, /* cost of moving SSE register */
1902 {8, 8, 8}, /* cost of loading SSE registers
1903 in SImode, DImode and TImode */
1904 {8, 8, 8}, /* cost of storing SSE registers
1905 in SImode, DImode and TImode */
1906 5, /* MMX or SSE register to integer */
1907 32, /* size of l1 cache. */
1908 512, /* size of l2 cache. */
1909 64, /* size of prefetch block */
1910 6, /* number of parallel prefetches */
1911 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1912 value is increased to perhaps more appropriate value of 5. */
1913 3, /* Branch cost */
1914 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1915 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1916 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1917 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1918 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1919 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1920 generic_memcpy,
1921 generic_memset,
1922 1, /* scalar_stmt_cost. */
1923 1, /* scalar load_cost. */
1924 1, /* scalar_store_cost. */
1925 1, /* vec_stmt_cost. */
1926 1, /* vec_to_scalar_cost. */
1927 1, /* scalar_to_vec_cost. */
1928 1, /* vec_align_load_cost. */
1929 2, /* vec_unalign_load_cost. */
1930 1, /* vec_store_cost. */
1931 3, /* cond_taken_branch_cost. */
1932 1, /* cond_not_taken_branch_cost. */
1935 /* core_cost should produce code tuned for Core familly of CPUs. */
1936 static stringop_algs core_memcpy[2] = {
1937 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
1938 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
1939 {-1, libcall, false}}}};
1940 static stringop_algs core_memset[2] = {
1941 {libcall, {{6, loop_1_byte, true},
1942 {24, loop, true},
1943 {8192, rep_prefix_4_byte, true},
1944 {-1, libcall, false}}},
1945 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
1946 {-1, libcall, false}}}};
1948 static const
1949 struct processor_costs core_cost = {
1950 COSTS_N_INSNS (1), /* cost of an add instruction */
1951 /* On all chips taken into consideration lea is 2 cycles and more. With
1952 this cost however our current implementation of synth_mult results in
1953 use of unnecessary temporary registers causing regression on several
1954 SPECfp benchmarks. */
1955 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1956 COSTS_N_INSNS (1), /* variable shift costs */
1957 COSTS_N_INSNS (1), /* constant shift costs */
1958 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1959 COSTS_N_INSNS (4), /* HI */
1960 COSTS_N_INSNS (3), /* SI */
1961 COSTS_N_INSNS (4), /* DI */
1962 COSTS_N_INSNS (2)}, /* other */
1963 0, /* cost of multiply per each bit set */
1964 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1965 COSTS_N_INSNS (26), /* HI */
1966 COSTS_N_INSNS (42), /* SI */
1967 COSTS_N_INSNS (74), /* DI */
1968 COSTS_N_INSNS (74)}, /* other */
1969 COSTS_N_INSNS (1), /* cost of movsx */
1970 COSTS_N_INSNS (1), /* cost of movzx */
1971 8, /* "large" insn */
1972 17, /* MOVE_RATIO */
1973 4, /* cost for loading QImode using movzbl */
1974 {4, 4, 4}, /* cost of loading integer registers
1975 in QImode, HImode and SImode.
1976 Relative to reg-reg move (2). */
1977 {4, 4, 4}, /* cost of storing integer registers */
1978 4, /* cost of reg,reg fld/fst */
1979 {12, 12, 12}, /* cost of loading fp registers
1980 in SFmode, DFmode and XFmode */
1981 {6, 6, 8}, /* cost of storing fp registers
1982 in SFmode, DFmode and XFmode */
1983 2, /* cost of moving MMX register */
1984 {8, 8}, /* cost of loading MMX registers
1985 in SImode and DImode */
1986 {8, 8}, /* cost of storing MMX registers
1987 in SImode and DImode */
1988 2, /* cost of moving SSE register */
1989 {8, 8, 8}, /* cost of loading SSE registers
1990 in SImode, DImode and TImode */
1991 {8, 8, 8}, /* cost of storing SSE registers
1992 in SImode, DImode and TImode */
1993 5, /* MMX or SSE register to integer */
1994 64, /* size of l1 cache. */
1995 512, /* size of l2 cache. */
1996 64, /* size of prefetch block */
1997 6, /* number of parallel prefetches */
1998 /* FIXME perhaps more appropriate value is 5. */
1999 3, /* Branch cost */
2000 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
2001 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
2002 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
2003 COSTS_N_INSNS (8), /* cost of FABS instruction. */
2004 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
2005 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
2006 core_memcpy,
2007 core_memset,
2008 1, /* scalar_stmt_cost. */
2009 1, /* scalar load_cost. */
2010 1, /* scalar_store_cost. */
2011 1, /* vec_stmt_cost. */
2012 1, /* vec_to_scalar_cost. */
2013 1, /* scalar_to_vec_cost. */
2014 1, /* vec_align_load_cost. */
2015 2, /* vec_unalign_load_cost. */
2016 1, /* vec_store_cost. */
2017 3, /* cond_taken_branch_cost. */
2018 1, /* cond_not_taken_branch_cost. */
2022 /* Set by -mtune. */
2023 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2025 /* Set by -mtune or -Os. */
2026 const struct processor_costs *ix86_cost = &pentium_cost;
2028 /* Processor feature/optimization bitmasks. */
2029 #define m_386 (1<<PROCESSOR_I386)
2030 #define m_486 (1<<PROCESSOR_I486)
2031 #define m_PENT (1<<PROCESSOR_PENTIUM)
2032 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2033 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2034 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2035 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2036 #define m_CORE2 (1<<PROCESSOR_CORE2)
2037 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2038 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2039 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2040 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2041 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2042 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2043 #define m_INTEL (1<<PROCESSOR_INTEL)
2045 #define m_GEODE (1<<PROCESSOR_GEODE)
2046 #define m_K6 (1<<PROCESSOR_K6)
2047 #define m_K6_GEODE (m_K6 | m_GEODE)
2048 #define m_K8 (1<<PROCESSOR_K8)
2049 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2050 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2051 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2052 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2053 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2054 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2055 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2056 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2057 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2058 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2059 #define m_BTVER (m_BTVER1 | m_BTVER2)
2060 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
2062 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2064 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2065 #undef DEF_TUNE
2066 #define DEF_TUNE(tune, name, selector) name,
2067 #include "x86-tune.def"
2068 #undef DEF_TUNE
2071 /* Feature tests against the various tunings. */
2072 unsigned char ix86_tune_features[X86_TUNE_LAST];
2074 /* Feature tests against the various tunings used to create ix86_tune_features
2075 based on the processor mask. */
2076 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2077 #undef DEF_TUNE
2078 #define DEF_TUNE(tune, name, selector) selector,
2079 #include "x86-tune.def"
2080 #undef DEF_TUNE
2083 /* Feature tests against the various architecture variations. */
2084 unsigned char ix86_arch_features[X86_ARCH_LAST];
2086 /* Feature tests against the various architecture variations, used to create
2087 ix86_arch_features based on the processor mask. */
2088 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2089 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2090 ~(m_386 | m_486 | m_PENT | m_K6),
2092 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2093 ~m_386,
2095 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2096 ~(m_386 | m_486),
2098 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2099 ~m_386,
2101 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2102 ~m_386,
2105 /* In case the average insn count for single function invocation is
2106 lower than this constant, emit fast (but longer) prologue and
2107 epilogue code. */
2108 #define FAST_PROLOGUE_INSN_COUNT 20
2110 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2111 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2112 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2113 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2115 /* Array of the smallest class containing reg number REGNO, indexed by
2116 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2118 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2120 /* ax, dx, cx, bx */
2121 AREG, DREG, CREG, BREG,
2122 /* si, di, bp, sp */
2123 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2124 /* FP registers */
2125 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2126 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2127 /* arg pointer */
2128 NON_Q_REGS,
2129 /* flags, fpsr, fpcr, frame */
2130 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2131 /* SSE registers */
2132 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2133 SSE_REGS, SSE_REGS,
2134 /* MMX registers */
2135 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2136 MMX_REGS, MMX_REGS,
2137 /* REX registers */
2138 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2139 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2140 /* SSE REX registers */
2141 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2142 SSE_REGS, SSE_REGS,
2143 /* AVX-512 SSE registers */
2144 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2145 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2146 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2147 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2148 /* Mask registers. */
2149 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2150 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2151 /* MPX bound registers */
2152 BND_REGS, BND_REGS, BND_REGS, BND_REGS,
2155 /* The "default" register map used in 32bit mode. */
2157 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2159 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2160 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2161 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2162 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2163 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2164 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2165 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2166 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2167 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2168 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2169 101, 102, 103, 104, /* bound registers */
2172 /* The "default" register map used in 64bit mode. */
2174 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2176 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2177 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2178 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2179 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2180 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2181 8,9,10,11,12,13,14,15, /* extended integer registers */
2182 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2183 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2184 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2185 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2186 126, 127, 128, 129, /* bound registers */
2189 /* Define the register numbers to be used in Dwarf debugging information.
2190 The SVR4 reference port C compiler uses the following register numbers
2191 in its Dwarf output code:
2192 0 for %eax (gcc regno = 0)
2193 1 for %ecx (gcc regno = 2)
2194 2 for %edx (gcc regno = 1)
2195 3 for %ebx (gcc regno = 3)
2196 4 for %esp (gcc regno = 7)
2197 5 for %ebp (gcc regno = 6)
2198 6 for %esi (gcc regno = 4)
2199 7 for %edi (gcc regno = 5)
2200 The following three DWARF register numbers are never generated by
2201 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2202 believes these numbers have these meanings.
2203 8 for %eip (no gcc equivalent)
2204 9 for %eflags (gcc regno = 17)
2205 10 for %trapno (no gcc equivalent)
2206 It is not at all clear how we should number the FP stack registers
2207 for the x86 architecture. If the version of SDB on x86/svr4 were
2208 a bit less brain dead with respect to floating-point then we would
2209 have a precedent to follow with respect to DWARF register numbers
2210 for x86 FP registers, but the SDB on x86/svr4 is so completely
2211 broken with respect to FP registers that it is hardly worth thinking
2212 of it as something to strive for compatibility with.
2213 The version of x86/svr4 SDB I have at the moment does (partially)
2214 seem to believe that DWARF register number 11 is associated with
2215 the x86 register %st(0), but that's about all. Higher DWARF
2216 register numbers don't seem to be associated with anything in
2217 particular, and even for DWARF regno 11, SDB only seems to under-
2218 stand that it should say that a variable lives in %st(0) (when
2219 asked via an `=' command) if we said it was in DWARF regno 11,
2220 but SDB still prints garbage when asked for the value of the
2221 variable in question (via a `/' command).
2222 (Also note that the labels SDB prints for various FP stack regs
2223 when doing an `x' command are all wrong.)
2224 Note that these problems generally don't affect the native SVR4
2225 C compiler because it doesn't allow the use of -O with -g and
2226 because when it is *not* optimizing, it allocates a memory
2227 location for each floating-point variable, and the memory
2228 location is what gets described in the DWARF AT_location
2229 attribute for the variable in question.
2230 Regardless of the severe mental illness of the x86/svr4 SDB, we
2231 do something sensible here and we use the following DWARF
2232 register numbers. Note that these are all stack-top-relative
2233 numbers.
2234 11 for %st(0) (gcc regno = 8)
2235 12 for %st(1) (gcc regno = 9)
2236 13 for %st(2) (gcc regno = 10)
2237 14 for %st(3) (gcc regno = 11)
2238 15 for %st(4) (gcc regno = 12)
2239 16 for %st(5) (gcc regno = 13)
2240 17 for %st(6) (gcc regno = 14)
2241 18 for %st(7) (gcc regno = 15)
2243 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2245 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2246 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2247 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2248 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2249 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2250 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2251 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2252 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2253 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2254 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2255 101, 102, 103, 104, /* bound registers */
2258 /* Define parameter passing and return registers. */
2260 static int const x86_64_int_parameter_registers[6] =
2262 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2265 static int const x86_64_ms_abi_int_parameter_registers[4] =
2267 CX_REG, DX_REG, R8_REG, R9_REG
2270 static int const x86_64_int_return_registers[4] =
2272 AX_REG, DX_REG, DI_REG, SI_REG
2275 /* Additional registers that are clobbered by SYSV calls. */
2277 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2279 SI_REG, DI_REG,
2280 XMM6_REG, XMM7_REG,
2281 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2282 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2285 /* Define the structure for the machine field in struct function. */
2287 struct GTY(()) stack_local_entry {
2288 unsigned short mode;
2289 unsigned short n;
2290 rtx rtl;
2291 struct stack_local_entry *next;
2294 /* Structure describing stack frame layout.
2295 Stack grows downward:
2297 [arguments]
2298 <- ARG_POINTER
2299 saved pc
2301 saved static chain if ix86_static_chain_on_stack
2303 saved frame pointer if frame_pointer_needed
2304 <- HARD_FRAME_POINTER
2305 [saved regs]
2306 <- regs_save_offset
2307 [padding0]
2309 [saved SSE regs]
2310 <- sse_regs_save_offset
2311 [padding1] |
2312 | <- FRAME_POINTER
2313 [va_arg registers] |
2315 [frame] |
2317 [padding2] | = to_allocate
2318 <- STACK_POINTER
2320 struct ix86_frame
2322 int nsseregs;
2323 int nregs;
2324 int va_arg_size;
2325 int red_zone_size;
2326 int outgoing_arguments_size;
2328 /* The offsets relative to ARG_POINTER. */
2329 HOST_WIDE_INT frame_pointer_offset;
2330 HOST_WIDE_INT hard_frame_pointer_offset;
2331 HOST_WIDE_INT stack_pointer_offset;
2332 HOST_WIDE_INT hfp_save_offset;
2333 HOST_WIDE_INT reg_save_offset;
2334 HOST_WIDE_INT sse_reg_save_offset;
2336 /* When save_regs_using_mov is set, emit prologue using
2337 move instead of push instructions. */
2338 bool save_regs_using_mov;
2341 /* Which cpu are we scheduling for. */
2342 enum attr_cpu ix86_schedule;
2344 /* Which cpu are we optimizing for. */
2345 enum processor_type ix86_tune;
2347 /* Which instruction set architecture to use. */
2348 enum processor_type ix86_arch;
2350 /* True if processor has SSE prefetch instruction. */
2351 unsigned char x86_prefetch_sse;
2353 /* -mstackrealign option */
2354 static const char ix86_force_align_arg_pointer_string[]
2355 = "force_align_arg_pointer";
2357 static rtx (*ix86_gen_leave) (void);
2358 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2359 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2360 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2361 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2362 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2363 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2364 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2365 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2366 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2367 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2368 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2370 /* Preferred alignment for stack boundary in bits. */
2371 unsigned int ix86_preferred_stack_boundary;
2373 /* Alignment for incoming stack boundary in bits specified at
2374 command line. */
2375 static unsigned int ix86_user_incoming_stack_boundary;
2377 /* Default alignment for incoming stack boundary in bits. */
2378 static unsigned int ix86_default_incoming_stack_boundary;
2380 /* Alignment for incoming stack boundary in bits. */
2381 unsigned int ix86_incoming_stack_boundary;
2383 /* Calling abi specific va_list type nodes. */
2384 static GTY(()) tree sysv_va_list_type_node;
2385 static GTY(()) tree ms_va_list_type_node;
2387 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2388 char internal_label_prefix[16];
2389 int internal_label_prefix_len;
2391 /* Fence to use after loop using movnt. */
2392 tree x86_mfence;
2394 /* Register class used for passing given 64bit part of the argument.
2395 These represent classes as documented by the PS ABI, with the exception
2396 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2397 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2399 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2400 whenever possible (upper half does contain padding). */
2401 enum x86_64_reg_class
2403 X86_64_NO_CLASS,
2404 X86_64_INTEGER_CLASS,
2405 X86_64_INTEGERSI_CLASS,
2406 X86_64_SSE_CLASS,
2407 X86_64_SSESF_CLASS,
2408 X86_64_SSEDF_CLASS,
2409 X86_64_SSEUP_CLASS,
2410 X86_64_X87_CLASS,
2411 X86_64_X87UP_CLASS,
2412 X86_64_COMPLEX_X87_CLASS,
2413 X86_64_MEMORY_CLASS
2416 #define MAX_CLASSES 8
2418 /* Table of constants used by fldpi, fldln2, etc.... */
2419 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2420 static bool ext_80387_constants_init = 0;
2423 static struct machine_function * ix86_init_machine_status (void);
2424 static rtx ix86_function_value (const_tree, const_tree, bool);
2425 static bool ix86_function_value_regno_p (const unsigned int);
2426 static unsigned int ix86_function_arg_boundary (machine_mode,
2427 const_tree);
2428 static rtx ix86_static_chain (const_tree, bool);
2429 static int ix86_function_regparm (const_tree, const_tree);
2430 static void ix86_compute_frame_layout (struct ix86_frame *);
2431 static bool ix86_expand_vector_init_one_nonzero (bool, machine_mode,
2432 rtx, rtx, int);
2433 static void ix86_add_new_builtins (HOST_WIDE_INT);
2434 static tree ix86_canonical_va_list_type (tree);
2435 static void predict_jump (int);
2436 static unsigned int split_stack_prologue_scratch_regno (void);
2437 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2439 enum ix86_function_specific_strings
2441 IX86_FUNCTION_SPECIFIC_ARCH,
2442 IX86_FUNCTION_SPECIFIC_TUNE,
2443 IX86_FUNCTION_SPECIFIC_MAX
2446 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2447 const char *, enum fpmath_unit, bool);
2448 static void ix86_function_specific_save (struct cl_target_option *,
2449 struct gcc_options *opts);
2450 static void ix86_function_specific_restore (struct gcc_options *opts,
2451 struct cl_target_option *);
2452 static void ix86_function_specific_print (FILE *, int,
2453 struct cl_target_option *);
2454 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2455 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2456 struct gcc_options *,
2457 struct gcc_options *,
2458 struct gcc_options *);
2459 static bool ix86_can_inline_p (tree, tree);
2460 static void ix86_set_current_function (tree);
2461 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2463 static enum calling_abi ix86_function_abi (const_tree);
2466 #ifndef SUBTARGET32_DEFAULT_CPU
2467 #define SUBTARGET32_DEFAULT_CPU "i386"
2468 #endif
2470 /* Whether -mtune= or -march= were specified */
2471 static int ix86_tune_defaulted;
2472 static int ix86_arch_specified;
2474 /* Vectorization library interface and handlers. */
2475 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2477 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2478 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2480 /* Processor target table, indexed by processor number */
2481 struct ptt
2483 const char *const name; /* processor name */
2484 const struct processor_costs *cost; /* Processor costs */
2485 const int align_loop; /* Default alignments. */
2486 const int align_loop_max_skip;
2487 const int align_jump;
2488 const int align_jump_max_skip;
2489 const int align_func;
2492 /* This table must be in sync with enum processor_type in i386.h. */
2493 static const struct ptt processor_target_table[PROCESSOR_max] =
2495 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2496 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2497 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2498 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2499 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2500 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2501 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2502 {"core2", &core_cost, 16, 10, 16, 10, 16},
2503 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2504 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2505 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2506 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2507 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2508 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2509 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2510 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2511 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2512 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2513 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2514 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2515 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2516 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2517 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2518 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2519 {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2522 static unsigned int
2523 rest_of_handle_insert_vzeroupper (void)
2525 int i;
2527 /* vzeroupper instructions are inserted immediately after reload to
2528 account for possible spills from 256bit registers. The pass
2529 reuses mode switching infrastructure by re-running mode insertion
2530 pass, so disable entities that have already been processed. */
2531 for (i = 0; i < MAX_386_ENTITIES; i++)
2532 ix86_optimize_mode_switching[i] = 0;
2534 ix86_optimize_mode_switching[AVX_U128] = 1;
2536 /* Call optimize_mode_switching. */
2537 g->get_passes ()->execute_pass_mode_switching ();
2538 return 0;
2541 namespace {
2543 const pass_data pass_data_insert_vzeroupper =
2545 RTL_PASS, /* type */
2546 "vzeroupper", /* name */
2547 OPTGROUP_NONE, /* optinfo_flags */
2548 TV_NONE, /* tv_id */
2549 0, /* properties_required */
2550 0, /* properties_provided */
2551 0, /* properties_destroyed */
2552 0, /* todo_flags_start */
2553 TODO_df_finish, /* todo_flags_finish */
2556 class pass_insert_vzeroupper : public rtl_opt_pass
2558 public:
2559 pass_insert_vzeroupper(gcc::context *ctxt)
2560 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2563 /* opt_pass methods: */
2564 virtual bool gate (function *)
2566 return TARGET_AVX && !TARGET_AVX512F && TARGET_VZEROUPPER;
2569 virtual unsigned int execute (function *)
2571 return rest_of_handle_insert_vzeroupper ();
2574 }; // class pass_insert_vzeroupper
2576 } // anon namespace
2578 rtl_opt_pass *
2579 make_pass_insert_vzeroupper (gcc::context *ctxt)
2581 return new pass_insert_vzeroupper (ctxt);
2584 /* Return true if a red-zone is in use. */
2586 static inline bool
2587 ix86_using_red_zone (void)
2589 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2592 /* Return a string that documents the current -m options. The caller is
2593 responsible for freeing the string. */
2595 static char *
2596 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2597 const char *tune, enum fpmath_unit fpmath,
2598 bool add_nl_p)
2600 struct ix86_target_opts
2602 const char *option; /* option string */
2603 HOST_WIDE_INT mask; /* isa mask options */
2606 /* This table is ordered so that options like -msse4.2 that imply
2607 preceding options while match those first. */
2608 static struct ix86_target_opts isa_opts[] =
2610 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2611 { "-mfma", OPTION_MASK_ISA_FMA },
2612 { "-mxop", OPTION_MASK_ISA_XOP },
2613 { "-mlwp", OPTION_MASK_ISA_LWP },
2614 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2615 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2616 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2617 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2618 { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ },
2619 { "-mavx512bw", OPTION_MASK_ISA_AVX512BW },
2620 { "-mavx512vl", OPTION_MASK_ISA_AVX512VL },
2621 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2622 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2623 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2624 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2625 { "-msse3", OPTION_MASK_ISA_SSE3 },
2626 { "-msse2", OPTION_MASK_ISA_SSE2 },
2627 { "-msse", OPTION_MASK_ISA_SSE },
2628 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2629 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2630 { "-mmmx", OPTION_MASK_ISA_MMX },
2631 { "-mabm", OPTION_MASK_ISA_ABM },
2632 { "-mbmi", OPTION_MASK_ISA_BMI },
2633 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2634 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2635 { "-mhle", OPTION_MASK_ISA_HLE },
2636 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2637 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2638 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2639 { "-madx", OPTION_MASK_ISA_ADX },
2640 { "-mtbm", OPTION_MASK_ISA_TBM },
2641 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2642 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2643 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2644 { "-maes", OPTION_MASK_ISA_AES },
2645 { "-msha", OPTION_MASK_ISA_SHA },
2646 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2647 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2648 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2649 { "-mf16c", OPTION_MASK_ISA_F16C },
2650 { "-mrtm", OPTION_MASK_ISA_RTM },
2651 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2652 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2653 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
2654 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT },
2655 { "-mxsavec", OPTION_MASK_ISA_XSAVEC },
2656 { "-mxsaves", OPTION_MASK_ISA_XSAVES },
2657 { "-mmpx", OPTION_MASK_ISA_MPX },
2660 /* Flag options. */
2661 static struct ix86_target_opts flag_opts[] =
2663 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2664 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2665 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2666 { "-m80387", MASK_80387 },
2667 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2668 { "-malign-double", MASK_ALIGN_DOUBLE },
2669 { "-mcld", MASK_CLD },
2670 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2671 { "-mieee-fp", MASK_IEEE_FP },
2672 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2673 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2674 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2675 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2676 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2677 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2678 { "-mno-red-zone", MASK_NO_RED_ZONE },
2679 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2680 { "-mrecip", MASK_RECIP },
2681 { "-mrtd", MASK_RTD },
2682 { "-msseregparm", MASK_SSEREGPARM },
2683 { "-mstack-arg-probe", MASK_STACK_PROBE },
2684 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2685 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2686 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2687 { "-mvzeroupper", MASK_VZEROUPPER },
2688 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2689 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2690 { "-mprefer-avx128", MASK_PREFER_AVX128},
2693 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2695 char isa_other[40];
2696 char target_other[40];
2697 unsigned num = 0;
2698 unsigned i, j;
2699 char *ret;
2700 char *ptr;
2701 size_t len;
2702 size_t line_len;
2703 size_t sep_len;
2704 const char *abi;
2706 memset (opts, '\0', sizeof (opts));
2708 /* Add -march= option. */
2709 if (arch)
2711 opts[num][0] = "-march=";
2712 opts[num++][1] = arch;
2715 /* Add -mtune= option. */
2716 if (tune)
2718 opts[num][0] = "-mtune=";
2719 opts[num++][1] = tune;
2722 /* Add -m32/-m64/-mx32. */
2723 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2725 if ((isa & OPTION_MASK_ABI_64) != 0)
2726 abi = "-m64";
2727 else
2728 abi = "-mx32";
2729 isa &= ~ (OPTION_MASK_ISA_64BIT
2730 | OPTION_MASK_ABI_64
2731 | OPTION_MASK_ABI_X32);
2733 else
2734 abi = "-m32";
2735 opts[num++][0] = abi;
2737 /* Pick out the options in isa options. */
2738 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2740 if ((isa & isa_opts[i].mask) != 0)
2742 opts[num++][0] = isa_opts[i].option;
2743 isa &= ~ isa_opts[i].mask;
2747 if (isa && add_nl_p)
2749 opts[num++][0] = isa_other;
2750 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2751 isa);
2754 /* Add flag options. */
2755 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2757 if ((flags & flag_opts[i].mask) != 0)
2759 opts[num++][0] = flag_opts[i].option;
2760 flags &= ~ flag_opts[i].mask;
2764 if (flags && add_nl_p)
2766 opts[num++][0] = target_other;
2767 sprintf (target_other, "(other flags: %#x)", flags);
2770 /* Add -fpmath= option. */
2771 if (fpmath)
2773 opts[num][0] = "-mfpmath=";
2774 switch ((int) fpmath)
2776 case FPMATH_387:
2777 opts[num++][1] = "387";
2778 break;
2780 case FPMATH_SSE:
2781 opts[num++][1] = "sse";
2782 break;
2784 case FPMATH_387 | FPMATH_SSE:
2785 opts[num++][1] = "sse+387";
2786 break;
2788 default:
2789 gcc_unreachable ();
2793 /* Any options? */
2794 if (num == 0)
2795 return NULL;
2797 gcc_assert (num < ARRAY_SIZE (opts));
2799 /* Size the string. */
2800 len = 0;
2801 sep_len = (add_nl_p) ? 3 : 1;
2802 for (i = 0; i < num; i++)
2804 len += sep_len;
2805 for (j = 0; j < 2; j++)
2806 if (opts[i][j])
2807 len += strlen (opts[i][j]);
2810 /* Build the string. */
2811 ret = ptr = (char *) xmalloc (len);
2812 line_len = 0;
2814 for (i = 0; i < num; i++)
2816 size_t len2[2];
2818 for (j = 0; j < 2; j++)
2819 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2821 if (i != 0)
2823 *ptr++ = ' ';
2824 line_len++;
2826 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2828 *ptr++ = '\\';
2829 *ptr++ = '\n';
2830 line_len = 0;
2834 for (j = 0; j < 2; j++)
2835 if (opts[i][j])
2837 memcpy (ptr, opts[i][j], len2[j]);
2838 ptr += len2[j];
2839 line_len += len2[j];
2843 *ptr = '\0';
2844 gcc_assert (ret + len >= ptr);
2846 return ret;
2849 /* Return true, if profiling code should be emitted before
2850 prologue. Otherwise it returns false.
2851 Note: For x86 with "hotfix" it is sorried. */
2852 static bool
2853 ix86_profile_before_prologue (void)
2855 return flag_fentry != 0;
2858 /* Function that is callable from the debugger to print the current
2859 options. */
2860 void ATTRIBUTE_UNUSED
2861 ix86_debug_options (void)
2863 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2864 ix86_arch_string, ix86_tune_string,
2865 ix86_fpmath, true);
2867 if (opts)
2869 fprintf (stderr, "%s\n\n", opts);
2870 free (opts);
2872 else
2873 fputs ("<no options>\n\n", stderr);
2875 return;
2878 static const char *stringop_alg_names[] = {
2879 #define DEF_ENUM
2880 #define DEF_ALG(alg, name) #name,
2881 #include "stringop.def"
2882 #undef DEF_ENUM
2883 #undef DEF_ALG
2886 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2887 The string is of the following form (or comma separated list of it):
2889 strategy_alg:max_size:[align|noalign]
2891 where the full size range for the strategy is either [0, max_size] or
2892 [min_size, max_size], in which min_size is the max_size + 1 of the
2893 preceding range. The last size range must have max_size == -1.
2895 Examples:
2898 -mmemcpy-strategy=libcall:-1:noalign
2900 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2904 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2906 This is to tell the compiler to use the following strategy for memset
2907 1) when the expected size is between [1, 16], use rep_8byte strategy;
2908 2) when the size is between [17, 2048], use vector_loop;
2909 3) when the size is > 2048, use libcall. */
2911 struct stringop_size_range
2913 int max;
2914 stringop_alg alg;
2915 bool noalign;
2918 static void
2919 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
2921 const struct stringop_algs *default_algs;
2922 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
2923 char *curr_range_str, *next_range_str;
2924 int i = 0, n = 0;
2926 if (is_memset)
2927 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
2928 else
2929 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
2931 curr_range_str = strategy_str;
2935 int maxs;
2936 char alg_name[128];
2937 char align[16];
2938 next_range_str = strchr (curr_range_str, ',');
2939 if (next_range_str)
2940 *next_range_str++ = '\0';
2942 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
2943 alg_name, &maxs, align))
2945 error ("wrong arg %s to option %s", curr_range_str,
2946 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2947 return;
2950 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
2952 error ("size ranges of option %s should be increasing",
2953 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2954 return;
2957 for (i = 0; i < last_alg; i++)
2958 if (!strcmp (alg_name, stringop_alg_names[i]))
2959 break;
2961 if (i == last_alg)
2963 error ("wrong stringop strategy name %s specified for option %s",
2964 alg_name,
2965 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2966 return;
2969 input_ranges[n].max = maxs;
2970 input_ranges[n].alg = (stringop_alg) i;
2971 if (!strcmp (align, "align"))
2972 input_ranges[n].noalign = false;
2973 else if (!strcmp (align, "noalign"))
2974 input_ranges[n].noalign = true;
2975 else
2977 error ("unknown alignment %s specified for option %s",
2978 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2979 return;
2981 n++;
2982 curr_range_str = next_range_str;
2984 while (curr_range_str);
2986 if (input_ranges[n - 1].max != -1)
2988 error ("the max value for the last size range should be -1"
2989 " for option %s",
2990 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2991 return;
2994 if (n > MAX_STRINGOP_ALGS)
2996 error ("too many size ranges specified in option %s",
2997 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2998 return;
3001 /* Now override the default algs array. */
3002 for (i = 0; i < n; i++)
3004 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
3005 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
3006 = input_ranges[i].alg;
3007 *const_cast<int *>(&default_algs->size[i].noalign)
3008 = input_ranges[i].noalign;
3013 /* parse -mtune-ctrl= option. When DUMP is true,
3014 print the features that are explicitly set. */
3016 static void
3017 parse_mtune_ctrl_str (bool dump)
3019 if (!ix86_tune_ctrl_string)
3020 return;
3022 char *next_feature_string = NULL;
3023 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
3024 char *orig = curr_feature_string;
3025 int i;
3028 bool clear = false;
3030 next_feature_string = strchr (curr_feature_string, ',');
3031 if (next_feature_string)
3032 *next_feature_string++ = '\0';
3033 if (*curr_feature_string == '^')
3035 curr_feature_string++;
3036 clear = true;
3038 for (i = 0; i < X86_TUNE_LAST; i++)
3040 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3042 ix86_tune_features[i] = !clear;
3043 if (dump)
3044 fprintf (stderr, "Explicitly %s feature %s\n",
3045 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3046 break;
3049 if (i == X86_TUNE_LAST)
3050 error ("Unknown parameter to option -mtune-ctrl: %s",
3051 clear ? curr_feature_string - 1 : curr_feature_string);
3052 curr_feature_string = next_feature_string;
3054 while (curr_feature_string);
3055 free (orig);
3058 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3059 processor type. */
3061 static void
3062 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3064 unsigned int ix86_tune_mask = 1u << ix86_tune;
3065 int i;
3067 for (i = 0; i < X86_TUNE_LAST; ++i)
3069 if (ix86_tune_no_default)
3070 ix86_tune_features[i] = 0;
3071 else
3072 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3075 if (dump)
3077 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3078 for (i = 0; i < X86_TUNE_LAST; i++)
3079 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3080 ix86_tune_features[i] ? "on" : "off");
3083 parse_mtune_ctrl_str (dump);
3087 /* Override various settings based on options. If MAIN_ARGS_P, the
3088 options are from the command line, otherwise they are from
3089 attributes. */
3091 static void
3092 ix86_option_override_internal (bool main_args_p,
3093 struct gcc_options *opts,
3094 struct gcc_options *opts_set)
3096 int i;
3097 unsigned int ix86_arch_mask;
3098 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3099 const char *prefix;
3100 const char *suffix;
3101 const char *sw;
3103 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3104 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3105 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3106 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3107 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3108 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3109 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3110 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3111 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3112 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3113 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3114 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3115 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3116 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3117 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3118 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3119 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3120 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3121 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3122 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3123 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3124 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3125 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3126 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3127 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3128 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3129 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3130 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3131 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3132 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3133 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3134 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3135 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3136 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3137 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3138 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3139 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3140 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3141 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3142 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3143 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3144 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3145 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3146 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3147 #define PTA_MPX (HOST_WIDE_INT_1 << 44)
3148 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3149 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
3150 #define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
3151 #define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
3152 #define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
3153 #define PTA_AVX512DQ (HOST_WIDE_INT_1 << 50)
3154 #define PTA_AVX512BW (HOST_WIDE_INT_1 << 51)
3155 #define PTA_AVX512VL (HOST_WIDE_INT_1 << 52)
3157 #define PTA_CORE2 \
3158 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3159 | PTA_CX16 | PTA_FXSR)
3160 #define PTA_NEHALEM \
3161 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3162 #define PTA_WESTMERE \
3163 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3164 #define PTA_SANDYBRIDGE \
3165 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3166 #define PTA_IVYBRIDGE \
3167 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3168 #define PTA_HASWELL \
3169 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3170 | PTA_FMA | PTA_MOVBE | PTA_HLE)
3171 #define PTA_BROADWELL \
3172 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3173 #define PTA_BONNELL \
3174 (PTA_CORE2 | PTA_MOVBE)
3175 #define PTA_SILVERMONT \
3176 (PTA_WESTMERE | PTA_MOVBE)
3178 /* if this reaches 64, need to widen struct pta flags below */
3180 static struct pta
3182 const char *const name; /* processor name or nickname. */
3183 const enum processor_type processor;
3184 const enum attr_cpu schedule;
3185 const unsigned HOST_WIDE_INT flags;
3187 const processor_alias_table[] =
3189 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3190 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3191 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3192 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3193 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3194 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3195 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3196 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3197 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3198 PTA_MMX | PTA_SSE | PTA_FXSR},
3199 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3200 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3201 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3202 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3203 PTA_MMX | PTA_SSE | PTA_FXSR},
3204 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3205 PTA_MMX | PTA_SSE | PTA_FXSR},
3206 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3207 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3208 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3209 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3210 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3211 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3212 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3213 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3214 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3215 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3216 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3217 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3218 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3219 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3220 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3221 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3222 PTA_SANDYBRIDGE},
3223 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3224 PTA_SANDYBRIDGE},
3225 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3226 PTA_IVYBRIDGE},
3227 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3228 PTA_IVYBRIDGE},
3229 {"haswell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3230 {"core-avx2", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3231 {"broadwell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_BROADWELL},
3232 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3233 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3234 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3235 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3236 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3237 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3238 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3239 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3240 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3241 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3242 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3243 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3244 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3245 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3246 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3247 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3248 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3249 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3250 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3251 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3252 {"x86-64", PROCESSOR_K8, CPU_K8,
3253 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3254 {"k8", PROCESSOR_K8, CPU_K8,
3255 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3256 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3257 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3258 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3259 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3260 {"opteron", PROCESSOR_K8, CPU_K8,
3261 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3262 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3263 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3264 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3265 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3266 {"athlon64", PROCESSOR_K8, CPU_K8,
3267 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3268 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3269 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3270 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3271 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3272 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3273 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3274 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3275 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3276 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3277 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3278 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3279 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3280 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3281 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3282 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3283 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3284 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3285 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3286 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3287 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3288 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3289 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3290 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3291 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3292 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3293 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3294 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3295 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3296 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3297 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3298 | PTA_XSAVEOPT | PTA_FSGSBASE},
3299 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3300 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3301 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3302 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3303 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3304 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3305 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
3306 | PTA_MOVBE},
3307 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3308 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3309 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3310 | PTA_FXSR | PTA_XSAVE},
3311 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3312 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3313 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3314 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3315 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3316 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3318 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3319 PTA_64BIT
3320 | PTA_HLE /* flags are only used for -march switch. */ },
3323 /* -mrecip options. */
3324 static struct
3326 const char *string; /* option name */
3327 unsigned int mask; /* mask bits to set */
3329 const recip_options[] =
3331 { "all", RECIP_MASK_ALL },
3332 { "none", RECIP_MASK_NONE },
3333 { "div", RECIP_MASK_DIV },
3334 { "sqrt", RECIP_MASK_SQRT },
3335 { "vec-div", RECIP_MASK_VEC_DIV },
3336 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3339 int const pta_size = ARRAY_SIZE (processor_alias_table);
3341 /* Set up prefix/suffix so the error messages refer to either the command
3342 line argument, or the attribute(target). */
3343 if (main_args_p)
3345 prefix = "-m";
3346 suffix = "";
3347 sw = "switch";
3349 else
3351 prefix = "option(\"";
3352 suffix = "\")";
3353 sw = "attribute";
3356 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3357 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3358 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3359 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3360 #ifdef TARGET_BI_ARCH
3361 else
3363 #if TARGET_BI_ARCH == 1
3364 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3365 is on and OPTION_MASK_ABI_X32 is off. We turn off
3366 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3367 -mx32. */
3368 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3369 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3370 #else
3371 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3372 on and OPTION_MASK_ABI_64 is off. We turn off
3373 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3374 -m64 or OPTION_MASK_CODE16 is turned on by -m16. */
3375 if (TARGET_LP64_P (opts->x_ix86_isa_flags)
3376 || TARGET_16BIT_P (opts->x_ix86_isa_flags))
3377 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3378 #endif
3380 #endif
3382 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3384 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3385 OPTION_MASK_ABI_64 for TARGET_X32. */
3386 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3387 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3389 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3390 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3391 | OPTION_MASK_ABI_X32
3392 | OPTION_MASK_ABI_64);
3393 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3395 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3396 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3397 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3398 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3401 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3402 SUBTARGET_OVERRIDE_OPTIONS;
3403 #endif
3405 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3406 SUBSUBTARGET_OVERRIDE_OPTIONS;
3407 #endif
3409 /* -fPIC is the default for x86_64. */
3410 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3411 opts->x_flag_pic = 2;
3413 /* Need to check -mtune=generic first. */
3414 if (opts->x_ix86_tune_string)
3416 /* As special support for cross compilers we read -mtune=native
3417 as -mtune=generic. With native compilers we won't see the
3418 -mtune=native, as it was changed by the driver. */
3419 if (!strcmp (opts->x_ix86_tune_string, "native"))
3421 opts->x_ix86_tune_string = "generic";
3423 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3424 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3425 "%stune=k8%s or %stune=generic%s instead as appropriate",
3426 prefix, suffix, prefix, suffix, prefix, suffix);
3428 else
3430 if (opts->x_ix86_arch_string)
3431 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3432 if (!opts->x_ix86_tune_string)
3434 opts->x_ix86_tune_string
3435 = processor_target_table[TARGET_CPU_DEFAULT].name;
3436 ix86_tune_defaulted = 1;
3439 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3440 or defaulted. We need to use a sensible tune option. */
3441 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3443 opts->x_ix86_tune_string = "generic";
3447 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3448 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3450 /* rep; movq isn't available in 32-bit code. */
3451 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3452 opts->x_ix86_stringop_alg = no_stringop;
3455 if (!opts->x_ix86_arch_string)
3456 opts->x_ix86_arch_string
3457 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3458 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3459 else
3460 ix86_arch_specified = 1;
3462 if (opts_set->x_ix86_pmode)
3464 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3465 && opts->x_ix86_pmode == PMODE_SI)
3466 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3467 && opts->x_ix86_pmode == PMODE_DI))
3468 error ("address mode %qs not supported in the %s bit mode",
3469 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3470 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3472 else
3473 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3474 ? PMODE_DI : PMODE_SI;
3476 if (!opts_set->x_ix86_abi)
3477 opts->x_ix86_abi = DEFAULT_ABI;
3479 /* For targets using ms ABI enable ms-extensions, if not
3480 explicit turned off. For non-ms ABI we turn off this
3481 option. */
3482 if (!opts_set->x_flag_ms_extensions)
3483 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3485 if (opts_set->x_ix86_cmodel)
3487 switch (opts->x_ix86_cmodel)
3489 case CM_SMALL:
3490 case CM_SMALL_PIC:
3491 if (opts->x_flag_pic)
3492 opts->x_ix86_cmodel = CM_SMALL_PIC;
3493 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3494 error ("code model %qs not supported in the %s bit mode",
3495 "small", "32");
3496 break;
3498 case CM_MEDIUM:
3499 case CM_MEDIUM_PIC:
3500 if (opts->x_flag_pic)
3501 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3502 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3503 error ("code model %qs not supported in the %s bit mode",
3504 "medium", "32");
3505 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3506 error ("code model %qs not supported in x32 mode",
3507 "medium");
3508 break;
3510 case CM_LARGE:
3511 case CM_LARGE_PIC:
3512 if (opts->x_flag_pic)
3513 opts->x_ix86_cmodel = CM_LARGE_PIC;
3514 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3515 error ("code model %qs not supported in the %s bit mode",
3516 "large", "32");
3517 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3518 error ("code model %qs not supported in x32 mode",
3519 "large");
3520 break;
3522 case CM_32:
3523 if (opts->x_flag_pic)
3524 error ("code model %s does not support PIC mode", "32");
3525 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3526 error ("code model %qs not supported in the %s bit mode",
3527 "32", "64");
3528 break;
3530 case CM_KERNEL:
3531 if (opts->x_flag_pic)
3533 error ("code model %s does not support PIC mode", "kernel");
3534 opts->x_ix86_cmodel = CM_32;
3536 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3537 error ("code model %qs not supported in the %s bit mode",
3538 "kernel", "32");
3539 break;
3541 default:
3542 gcc_unreachable ();
3545 else
3547 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3548 use of rip-relative addressing. This eliminates fixups that
3549 would otherwise be needed if this object is to be placed in a
3550 DLL, and is essentially just as efficient as direct addressing. */
3551 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3552 && (TARGET_RDOS || TARGET_PECOFF))
3553 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3554 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3555 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3556 else
3557 opts->x_ix86_cmodel = CM_32;
3559 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3561 error ("-masm=intel not supported in this configuration");
3562 opts->x_ix86_asm_dialect = ASM_ATT;
3564 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3565 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3566 sorry ("%i-bit mode not compiled in",
3567 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3569 for (i = 0; i < pta_size; i++)
3570 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3572 ix86_schedule = processor_alias_table[i].schedule;
3573 ix86_arch = processor_alias_table[i].processor;
3574 /* Default cpu tuning to the architecture. */
3575 ix86_tune = ix86_arch;
3577 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3578 && !(processor_alias_table[i].flags & PTA_64BIT))
3579 error ("CPU you selected does not support x86-64 "
3580 "instruction set");
3582 if (processor_alias_table[i].flags & PTA_MMX
3583 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3584 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3585 if (processor_alias_table[i].flags & PTA_3DNOW
3586 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3587 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3588 if (processor_alias_table[i].flags & PTA_3DNOW_A
3589 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3590 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3591 if (processor_alias_table[i].flags & PTA_SSE
3592 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3593 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3594 if (processor_alias_table[i].flags & PTA_SSE2
3595 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3596 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3597 if (processor_alias_table[i].flags & PTA_SSE3
3598 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3599 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3600 if (processor_alias_table[i].flags & PTA_SSSE3
3601 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3602 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3603 if (processor_alias_table[i].flags & PTA_SSE4_1
3604 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3605 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3606 if (processor_alias_table[i].flags & PTA_SSE4_2
3607 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3608 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3609 if (processor_alias_table[i].flags & PTA_AVX
3610 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3611 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3612 if (processor_alias_table[i].flags & PTA_AVX2
3613 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3614 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3615 if (processor_alias_table[i].flags & PTA_FMA
3616 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3617 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3618 if (processor_alias_table[i].flags & PTA_SSE4A
3619 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3620 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3621 if (processor_alias_table[i].flags & PTA_FMA4
3622 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3623 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3624 if (processor_alias_table[i].flags & PTA_XOP
3625 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3626 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3627 if (processor_alias_table[i].flags & PTA_LWP
3628 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3629 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3630 if (processor_alias_table[i].flags & PTA_ABM
3631 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3632 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3633 if (processor_alias_table[i].flags & PTA_BMI
3634 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3635 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3636 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3637 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3638 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3639 if (processor_alias_table[i].flags & PTA_TBM
3640 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3641 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3642 if (processor_alias_table[i].flags & PTA_BMI2
3643 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3644 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3645 if (processor_alias_table[i].flags & PTA_CX16
3646 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3647 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3648 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3649 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3650 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3651 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3652 && (processor_alias_table[i].flags & PTA_NO_SAHF))
3653 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3654 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3655 if (processor_alias_table[i].flags & PTA_MOVBE
3656 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3657 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3658 if (processor_alias_table[i].flags & PTA_AES
3659 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3660 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3661 if (processor_alias_table[i].flags & PTA_SHA
3662 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
3663 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
3664 if (processor_alias_table[i].flags & PTA_PCLMUL
3665 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3666 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3667 if (processor_alias_table[i].flags & PTA_FSGSBASE
3668 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3669 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3670 if (processor_alias_table[i].flags & PTA_RDRND
3671 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3672 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3673 if (processor_alias_table[i].flags & PTA_F16C
3674 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3675 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3676 if (processor_alias_table[i].flags & PTA_RTM
3677 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3678 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3679 if (processor_alias_table[i].flags & PTA_HLE
3680 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3681 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3682 if (processor_alias_table[i].flags & PTA_PRFCHW
3683 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3684 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3685 if (processor_alias_table[i].flags & PTA_RDSEED
3686 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3687 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3688 if (processor_alias_table[i].flags & PTA_ADX
3689 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3690 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3691 if (processor_alias_table[i].flags & PTA_FXSR
3692 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3693 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3694 if (processor_alias_table[i].flags & PTA_XSAVE
3695 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3696 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3697 if (processor_alias_table[i].flags & PTA_XSAVEOPT
3698 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3699 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3700 if (processor_alias_table[i].flags & PTA_AVX512F
3701 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
3702 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
3703 if (processor_alias_table[i].flags & PTA_AVX512ER
3704 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
3705 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
3706 if (processor_alias_table[i].flags & PTA_AVX512PF
3707 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
3708 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
3709 if (processor_alias_table[i].flags & PTA_AVX512CD
3710 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
3711 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
3712 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
3713 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
3714 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
3715 if (processor_alias_table[i].flags & PTA_CLFLUSHOPT
3716 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
3717 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
3718 if (processor_alias_table[i].flags & PTA_XSAVEC
3719 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
3720 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
3721 if (processor_alias_table[i].flags & PTA_XSAVES
3722 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
3723 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
3724 if (processor_alias_table[i].flags & PTA_AVX512DQ
3725 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ))
3726 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ;
3727 if (processor_alias_table[i].flags & PTA_AVX512BW
3728 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW))
3729 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW;
3730 if (processor_alias_table[i].flags & PTA_AVX512VL
3731 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL))
3732 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL;
3733 if (processor_alias_table[i].flags & PTA_MPX
3734 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MPX))
3735 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MPX;
3736 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3737 x86_prefetch_sse = true;
3739 break;
3742 if (TARGET_X32 && (opts->x_ix86_isa_flags & OPTION_MASK_ISA_MPX))
3743 error ("Intel MPX does not support x32");
3745 if (TARGET_X32 && (ix86_isa_flags & OPTION_MASK_ISA_MPX))
3746 error ("Intel MPX does not support x32");
3748 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3749 error ("generic CPU can be used only for %stune=%s %s",
3750 prefix, suffix, sw);
3751 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3752 error ("intel CPU can be used only for %stune=%s %s",
3753 prefix, suffix, sw);
3754 else if (i == pta_size)
3755 error ("bad value (%s) for %sarch=%s %s",
3756 opts->x_ix86_arch_string, prefix, suffix, sw);
3758 ix86_arch_mask = 1u << ix86_arch;
3759 for (i = 0; i < X86_ARCH_LAST; ++i)
3760 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3762 for (i = 0; i < pta_size; i++)
3763 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
3765 ix86_schedule = processor_alias_table[i].schedule;
3766 ix86_tune = processor_alias_table[i].processor;
3767 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3769 if (!(processor_alias_table[i].flags & PTA_64BIT))
3771 if (ix86_tune_defaulted)
3773 opts->x_ix86_tune_string = "x86-64";
3774 for (i = 0; i < pta_size; i++)
3775 if (! strcmp (opts->x_ix86_tune_string,
3776 processor_alias_table[i].name))
3777 break;
3778 ix86_schedule = processor_alias_table[i].schedule;
3779 ix86_tune = processor_alias_table[i].processor;
3781 else
3782 error ("CPU you selected does not support x86-64 "
3783 "instruction set");
3786 /* Intel CPUs have always interpreted SSE prefetch instructions as
3787 NOPs; so, we can enable SSE prefetch instructions even when
3788 -mtune (rather than -march) points us to a processor that has them.
3789 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3790 higher processors. */
3791 if (TARGET_CMOV
3792 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3793 x86_prefetch_sse = true;
3794 break;
3797 if (ix86_tune_specified && i == pta_size)
3798 error ("bad value (%s) for %stune=%s %s",
3799 opts->x_ix86_tune_string, prefix, suffix, sw);
3801 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
3803 #ifndef USE_IX86_FRAME_POINTER
3804 #define USE_IX86_FRAME_POINTER 0
3805 #endif
3807 #ifndef USE_X86_64_FRAME_POINTER
3808 #define USE_X86_64_FRAME_POINTER 0
3809 #endif
3811 /* Set the default values for switches whose default depends on TARGET_64BIT
3812 in case they weren't overwritten by command line options. */
3813 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3815 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3816 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3817 if (opts->x_flag_asynchronous_unwind_tables
3818 && !opts_set->x_flag_unwind_tables
3819 && TARGET_64BIT_MS_ABI)
3820 opts->x_flag_unwind_tables = 1;
3821 if (opts->x_flag_asynchronous_unwind_tables == 2)
3822 opts->x_flag_unwind_tables
3823 = opts->x_flag_asynchronous_unwind_tables = 1;
3824 if (opts->x_flag_pcc_struct_return == 2)
3825 opts->x_flag_pcc_struct_return = 0;
3827 else
3829 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3830 opts->x_flag_omit_frame_pointer
3831 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
3832 if (opts->x_flag_asynchronous_unwind_tables == 2)
3833 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3834 if (opts->x_flag_pcc_struct_return == 2)
3835 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3838 ix86_tune_cost = processor_target_table[ix86_tune].cost;
3839 if (opts->x_optimize_size)
3840 ix86_cost = &ix86_size_cost;
3841 else
3842 ix86_cost = ix86_tune_cost;
3844 /* Arrange to set up i386_stack_locals for all functions. */
3845 init_machine_status = ix86_init_machine_status;
3847 /* Validate -mregparm= value. */
3848 if (opts_set->x_ix86_regparm)
3850 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3851 warning (0, "-mregparm is ignored in 64-bit mode");
3852 if (opts->x_ix86_regparm > REGPARM_MAX)
3854 error ("-mregparm=%d is not between 0 and %d",
3855 opts->x_ix86_regparm, REGPARM_MAX);
3856 opts->x_ix86_regparm = 0;
3859 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3860 opts->x_ix86_regparm = REGPARM_MAX;
3862 /* Default align_* from the processor table. */
3863 if (opts->x_align_loops == 0)
3865 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3866 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3868 if (opts->x_align_jumps == 0)
3870 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3871 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3873 if (opts->x_align_functions == 0)
3875 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3878 /* Provide default for -mbranch-cost= value. */
3879 if (!opts_set->x_ix86_branch_cost)
3880 opts->x_ix86_branch_cost = ix86_cost->branch_cost;
3882 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3884 opts->x_target_flags
3885 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
3887 /* Enable by default the SSE and MMX builtins. Do allow the user to
3888 explicitly disable any of these. In particular, disabling SSE and
3889 MMX for kernel code is extremely useful. */
3890 if (!ix86_arch_specified)
3891 opts->x_ix86_isa_flags
3892 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3893 | TARGET_SUBTARGET64_ISA_DEFAULT)
3894 & ~opts->x_ix86_isa_flags_explicit);
3896 if (TARGET_RTD_P (opts->x_target_flags))
3897 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3899 else
3901 opts->x_target_flags
3902 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
3904 if (!ix86_arch_specified)
3905 opts->x_ix86_isa_flags
3906 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
3908 /* i386 ABI does not specify red zone. It still makes sense to use it
3909 when programmer takes care to stack from being destroyed. */
3910 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
3911 opts->x_target_flags |= MASK_NO_RED_ZONE;
3914 /* Keep nonleaf frame pointers. */
3915 if (opts->x_flag_omit_frame_pointer)
3916 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3917 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
3918 opts->x_flag_omit_frame_pointer = 1;
3920 /* If we're doing fast math, we don't care about comparison order
3921 wrt NaNs. This lets us use a shorter comparison sequence. */
3922 if (opts->x_flag_finite_math_only)
3923 opts->x_target_flags &= ~MASK_IEEE_FP;
3925 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3926 since the insns won't need emulation. */
3927 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
3928 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
3930 /* Likewise, if the target doesn't have a 387, or we've specified
3931 software floating point, don't use 387 inline intrinsics. */
3932 if (!TARGET_80387_P (opts->x_target_flags))
3933 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
3935 /* Turn on MMX builtins for -msse. */
3936 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
3937 opts->x_ix86_isa_flags
3938 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
3940 /* Enable SSE prefetch. */
3941 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
3942 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
3943 x86_prefetch_sse = true;
3945 /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
3946 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
3947 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
3948 opts->x_ix86_isa_flags
3949 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
3951 /* Enable popcnt instruction for -msse4.2 or -mabm. */
3952 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
3953 || TARGET_ABM_P (opts->x_ix86_isa_flags))
3954 opts->x_ix86_isa_flags
3955 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
3957 /* Enable lzcnt instruction for -mabm. */
3958 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
3959 opts->x_ix86_isa_flags
3960 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
3962 /* Validate -mpreferred-stack-boundary= value or default it to
3963 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3964 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3965 if (opts_set->x_ix86_preferred_stack_boundary_arg)
3967 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3968 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
3969 int max = (TARGET_SEH ? 4 : 12);
3971 if (opts->x_ix86_preferred_stack_boundary_arg < min
3972 || opts->x_ix86_preferred_stack_boundary_arg > max)
3974 if (min == max)
3975 error ("-mpreferred-stack-boundary is not supported "
3976 "for this target");
3977 else
3978 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3979 opts->x_ix86_preferred_stack_boundary_arg, min, max);
3981 else
3982 ix86_preferred_stack_boundary
3983 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
3986 /* Set the default value for -mstackrealign. */
3987 if (opts->x_ix86_force_align_arg_pointer == -1)
3988 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3990 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3992 /* Validate -mincoming-stack-boundary= value or default it to
3993 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3994 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3995 if (opts_set->x_ix86_incoming_stack_boundary_arg)
3997 if (opts->x_ix86_incoming_stack_boundary_arg
3998 < (TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2)
3999 || opts->x_ix86_incoming_stack_boundary_arg > 12)
4000 error ("-mincoming-stack-boundary=%d is not between %d and 12",
4001 opts->x_ix86_incoming_stack_boundary_arg,
4002 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2);
4003 else
4005 ix86_user_incoming_stack_boundary
4006 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
4007 ix86_incoming_stack_boundary
4008 = ix86_user_incoming_stack_boundary;
4012 #ifndef NO_PROFILE_COUNTERS
4013 if (flag_nop_mcount)
4014 error ("-mnop-mcount is not compatible with this target");
4015 #endif
4016 if (flag_nop_mcount && flag_pic)
4017 error ("-mnop-mcount is not implemented for -fPIC");
4019 /* Accept -msseregparm only if at least SSE support is enabled. */
4020 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
4021 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
4022 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
4024 if (opts_set->x_ix86_fpmath)
4026 if (opts->x_ix86_fpmath & FPMATH_SSE)
4028 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
4030 warning (0, "SSE instruction set disabled, using 387 arithmetics");
4031 opts->x_ix86_fpmath = FPMATH_387;
4033 else if ((opts->x_ix86_fpmath & FPMATH_387)
4034 && !TARGET_80387_P (opts->x_target_flags))
4036 warning (0, "387 instruction set disabled, using SSE arithmetics");
4037 opts->x_ix86_fpmath = FPMATH_SSE;
4041 /* For all chips supporting SSE2, -mfpmath=sse performs better than
4042 fpmath=387. The second is however default at many targets since the
4043 extra 80bit precision of temporaries is considered to be part of ABI.
4044 Overwrite the default at least for -ffast-math.
4045 TODO: -mfpmath=both seems to produce same performing code with bit
4046 smaller binaries. It is however not clear if register allocation is
4047 ready for this setting.
4048 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
4049 codegen. We may switch to 387 with -ffast-math for size optimized
4050 functions. */
4051 else if (fast_math_flags_set_p (&global_options)
4052 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
4053 opts->x_ix86_fpmath = FPMATH_SSE;
4054 else
4055 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
4057 /* If the i387 is disabled, then do not return values in it. */
4058 if (!TARGET_80387_P (opts->x_target_flags))
4059 opts->x_target_flags &= ~MASK_FLOAT_RETURNS;
4061 /* Use external vectorized library in vectorizing intrinsics. */
4062 if (opts_set->x_ix86_veclibabi_type)
4063 switch (opts->x_ix86_veclibabi_type)
4065 case ix86_veclibabi_type_svml:
4066 ix86_veclib_handler = ix86_veclibabi_svml;
4067 break;
4069 case ix86_veclibabi_type_acml:
4070 ix86_veclib_handler = ix86_veclibabi_acml;
4071 break;
4073 default:
4074 gcc_unreachable ();
4077 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4078 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4079 && !opts->x_optimize_size)
4080 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4082 /* If stack probes are required, the space used for large function
4083 arguments on the stack must also be probed, so enable
4084 -maccumulate-outgoing-args so this happens in the prologue. */
4085 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4086 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4088 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4089 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4090 "for correctness", prefix, suffix);
4091 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4094 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4096 char *p;
4097 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4098 p = strchr (internal_label_prefix, 'X');
4099 internal_label_prefix_len = p - internal_label_prefix;
4100 *p = '\0';
4103 /* When scheduling description is not available, disable scheduler pass
4104 so it won't slow down the compilation and make x87 code slower. */
4105 if (!TARGET_SCHEDULE)
4106 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4108 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4109 ix86_tune_cost->simultaneous_prefetches,
4110 opts->x_param_values,
4111 opts_set->x_param_values);
4112 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4113 ix86_tune_cost->prefetch_block,
4114 opts->x_param_values,
4115 opts_set->x_param_values);
4116 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4117 ix86_tune_cost->l1_cache_size,
4118 opts->x_param_values,
4119 opts_set->x_param_values);
4120 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4121 ix86_tune_cost->l2_cache_size,
4122 opts->x_param_values,
4123 opts_set->x_param_values);
4125 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4126 if (opts->x_flag_prefetch_loop_arrays < 0
4127 && HAVE_prefetch
4128 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4129 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4130 opts->x_flag_prefetch_loop_arrays = 1;
4132 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4133 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4134 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4135 targetm.expand_builtin_va_start = NULL;
4137 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4139 ix86_gen_leave = gen_leave_rex64;
4140 if (Pmode == DImode)
4142 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4143 ix86_gen_tls_local_dynamic_base_64
4144 = gen_tls_local_dynamic_base_64_di;
4146 else
4148 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4149 ix86_gen_tls_local_dynamic_base_64
4150 = gen_tls_local_dynamic_base_64_si;
4153 else
4154 ix86_gen_leave = gen_leave;
4156 if (Pmode == DImode)
4158 ix86_gen_add3 = gen_adddi3;
4159 ix86_gen_sub3 = gen_subdi3;
4160 ix86_gen_sub3_carry = gen_subdi3_carry;
4161 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4162 ix86_gen_andsp = gen_anddi3;
4163 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4164 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4165 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4166 ix86_gen_monitor = gen_sse3_monitor_di;
4168 else
4170 ix86_gen_add3 = gen_addsi3;
4171 ix86_gen_sub3 = gen_subsi3;
4172 ix86_gen_sub3_carry = gen_subsi3_carry;
4173 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4174 ix86_gen_andsp = gen_andsi3;
4175 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4176 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4177 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4178 ix86_gen_monitor = gen_sse3_monitor_si;
4181 #ifdef USE_IX86_CLD
4182 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4183 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4184 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4185 #endif
4187 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
4189 if (opts->x_flag_fentry > 0)
4190 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4191 "with -fpic");
4192 opts->x_flag_fentry = 0;
4194 else if (TARGET_SEH)
4196 if (opts->x_flag_fentry == 0)
4197 sorry ("-mno-fentry isn%'t compatible with SEH");
4198 opts->x_flag_fentry = 1;
4200 else if (opts->x_flag_fentry < 0)
4202 #if defined(PROFILE_BEFORE_PROLOGUE)
4203 opts->x_flag_fentry = 1;
4204 #else
4205 opts->x_flag_fentry = 0;
4206 #endif
4209 /* When not opts->x_optimize for size, enable vzeroupper optimization for
4210 TARGET_AVX with -fexpensive-optimizations and split 32-byte
4211 AVX unaligned load/store. */
4212 if (!opts->x_optimize_size)
4214 if (flag_expensive_optimizations
4215 && !(opts_set->x_target_flags & MASK_VZEROUPPER))
4216 opts->x_target_flags |= MASK_VZEROUPPER;
4217 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4218 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4219 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4220 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4221 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4222 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4223 /* Enable 128-bit AVX instruction generation
4224 for the auto-vectorizer. */
4225 if (TARGET_AVX128_OPTIMAL
4226 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
4227 opts->x_target_flags |= MASK_PREFER_AVX128;
4230 if (opts->x_ix86_recip_name)
4232 char *p = ASTRDUP (opts->x_ix86_recip_name);
4233 char *q;
4234 unsigned int mask, i;
4235 bool invert;
4237 while ((q = strtok (p, ",")) != NULL)
4239 p = NULL;
4240 if (*q == '!')
4242 invert = true;
4243 q++;
4245 else
4246 invert = false;
4248 if (!strcmp (q, "default"))
4249 mask = RECIP_MASK_ALL;
4250 else
4252 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4253 if (!strcmp (q, recip_options[i].string))
4255 mask = recip_options[i].mask;
4256 break;
4259 if (i == ARRAY_SIZE (recip_options))
4261 error ("unknown option for -mrecip=%s", q);
4262 invert = false;
4263 mask = RECIP_MASK_NONE;
4267 opts->x_recip_mask_explicit |= mask;
4268 if (invert)
4269 opts->x_recip_mask &= ~mask;
4270 else
4271 opts->x_recip_mask |= mask;
4275 if (TARGET_RECIP_P (opts->x_target_flags))
4276 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4277 else if (opts_set->x_target_flags & MASK_RECIP)
4278 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4280 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4281 for 64-bit Bionic. */
4282 if (TARGET_HAS_BIONIC
4283 && !(opts_set->x_target_flags
4284 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4285 opts->x_target_flags |= (TARGET_64BIT
4286 ? MASK_LONG_DOUBLE_128
4287 : MASK_LONG_DOUBLE_64);
4289 /* Only one of them can be active. */
4290 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4291 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4293 /* Save the initial options in case the user does function specific
4294 options. */
4295 if (main_args_p)
4296 target_option_default_node = target_option_current_node
4297 = build_target_option_node (opts);
4299 /* Handle stack protector */
4300 if (!opts_set->x_ix86_stack_protector_guard)
4301 opts->x_ix86_stack_protector_guard
4302 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4304 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4305 if (opts->x_ix86_tune_memcpy_strategy)
4307 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4308 ix86_parse_stringop_strategy_string (str, false);
4309 free (str);
4312 if (opts->x_ix86_tune_memset_strategy)
4314 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4315 ix86_parse_stringop_strategy_string (str, true);
4316 free (str);
4320 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4322 static void
4323 ix86_option_override (void)
4325 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
4326 static struct register_pass_info insert_vzeroupper_info
4327 = { pass_insert_vzeroupper, "reload",
4328 1, PASS_POS_INSERT_AFTER
4331 ix86_option_override_internal (true, &global_options, &global_options_set);
4334 /* This needs to be done at start up. It's convenient to do it here. */
4335 register_pass (&insert_vzeroupper_info);
4338 /* Update register usage after having seen the compiler flags. */
4340 static void
4341 ix86_conditional_register_usage (void)
4343 int i, c_mask;
4344 unsigned int j;
4346 /* The PIC register, if it exists, is fixed. */
4347 j = PIC_OFFSET_TABLE_REGNUM;
4348 if (j != INVALID_REGNUM)
4349 fixed_regs[j] = call_used_regs[j] = 1;
4351 /* For 32-bit targets, squash the REX registers. */
4352 if (! TARGET_64BIT)
4354 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4355 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4356 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4357 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4358 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4359 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4362 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4363 c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
4364 : TARGET_64BIT ? (1 << 2)
4365 : (1 << 1));
4367 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4369 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4371 /* Set/reset conditionally defined registers from
4372 CALL_USED_REGISTERS initializer. */
4373 if (call_used_regs[i] > 1)
4374 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4376 /* Calculate registers of CLOBBERED_REGS register set
4377 as call used registers from GENERAL_REGS register set. */
4378 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4379 && call_used_regs[i])
4380 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4383 /* If MMX is disabled, squash the registers. */
4384 if (! TARGET_MMX)
4385 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4386 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4387 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4389 /* If SSE is disabled, squash the registers. */
4390 if (! TARGET_SSE)
4391 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4392 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4393 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4395 /* If the FPU is disabled, squash the registers. */
4396 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4397 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4398 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4399 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4401 /* If AVX512F is disabled, squash the registers. */
4402 if (! TARGET_AVX512F)
4404 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4405 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4407 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4408 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4411 /* If MPX is disabled, squash the registers. */
4412 if (! TARGET_MPX)
4413 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
4414 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4418 /* Save the current options */
4420 static void
4421 ix86_function_specific_save (struct cl_target_option *ptr,
4422 struct gcc_options *opts)
4424 ptr->arch = ix86_arch;
4425 ptr->schedule = ix86_schedule;
4426 ptr->tune = ix86_tune;
4427 ptr->branch_cost = ix86_branch_cost;
4428 ptr->tune_defaulted = ix86_tune_defaulted;
4429 ptr->arch_specified = ix86_arch_specified;
4430 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
4431 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
4432 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
4433 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
4434 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
4435 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
4436 ptr->x_ix86_abi = opts->x_ix86_abi;
4437 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
4438 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
4439 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
4440 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
4441 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
4442 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
4443 ptr->x_ix86_pmode = opts->x_ix86_pmode;
4444 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
4445 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
4446 ptr->x_ix86_regparm = opts->x_ix86_regparm;
4447 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
4448 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
4449 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
4450 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
4451 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
4452 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
4453 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
4454 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
4455 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
4456 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
4458 /* The fields are char but the variables are not; make sure the
4459 values fit in the fields. */
4460 gcc_assert (ptr->arch == ix86_arch);
4461 gcc_assert (ptr->schedule == ix86_schedule);
4462 gcc_assert (ptr->tune == ix86_tune);
4463 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4466 /* Restore the current options */
4468 static void
4469 ix86_function_specific_restore (struct gcc_options *opts,
4470 struct cl_target_option *ptr)
4472 enum processor_type old_tune = ix86_tune;
4473 enum processor_type old_arch = ix86_arch;
4474 unsigned int ix86_arch_mask;
4475 int i;
4477 /* We don't change -fPIC. */
4478 opts->x_flag_pic = flag_pic;
4480 ix86_arch = (enum processor_type) ptr->arch;
4481 ix86_schedule = (enum attr_cpu) ptr->schedule;
4482 ix86_tune = (enum processor_type) ptr->tune;
4483 opts->x_ix86_branch_cost = ptr->branch_cost;
4484 ix86_tune_defaulted = ptr->tune_defaulted;
4485 ix86_arch_specified = ptr->arch_specified;
4486 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4487 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
4488 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
4489 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
4490 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
4491 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
4492 opts->x_ix86_abi = ptr->x_ix86_abi;
4493 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
4494 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
4495 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
4496 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
4497 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
4498 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
4499 opts->x_ix86_pmode = ptr->x_ix86_pmode;
4500 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
4501 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
4502 opts->x_ix86_regparm = ptr->x_ix86_regparm;
4503 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
4504 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
4505 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
4506 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
4507 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
4508 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
4509 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
4510 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
4511 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
4512 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
4514 /* Recreate the arch feature tests if the arch changed */
4515 if (old_arch != ix86_arch)
4517 ix86_arch_mask = 1u << ix86_arch;
4518 for (i = 0; i < X86_ARCH_LAST; ++i)
4519 ix86_arch_features[i]
4520 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4523 /* Recreate the tune optimization tests */
4524 if (old_tune != ix86_tune)
4525 set_ix86_tune_features (ix86_tune, false);
4528 /* Print the current options */
4530 static void
4531 ix86_function_specific_print (FILE *file, int indent,
4532 struct cl_target_option *ptr)
4534 char *target_string
4535 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4536 NULL, NULL, ptr->x_ix86_fpmath, false);
4538 gcc_assert (ptr->arch < PROCESSOR_max);
4539 fprintf (file, "%*sarch = %d (%s)\n",
4540 indent, "",
4541 ptr->arch, processor_target_table[ptr->arch].name);
4543 gcc_assert (ptr->tune < PROCESSOR_max);
4544 fprintf (file, "%*stune = %d (%s)\n",
4545 indent, "",
4546 ptr->tune, processor_target_table[ptr->tune].name);
4548 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4550 if (target_string)
4552 fprintf (file, "%*s%s\n", indent, "", target_string);
4553 free (target_string);
4558 /* Inner function to process the attribute((target(...))), take an argument and
4559 set the current options from the argument. If we have a list, recursively go
4560 over the list. */
4562 static bool
4563 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4564 struct gcc_options *opts,
4565 struct gcc_options *opts_set,
4566 struct gcc_options *enum_opts_set)
4568 char *next_optstr;
4569 bool ret = true;
4571 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4572 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4573 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4574 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4575 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4577 enum ix86_opt_type
4579 ix86_opt_unknown,
4580 ix86_opt_yes,
4581 ix86_opt_no,
4582 ix86_opt_str,
4583 ix86_opt_enum,
4584 ix86_opt_isa
4587 static const struct
4589 const char *string;
4590 size_t len;
4591 enum ix86_opt_type type;
4592 int opt;
4593 int mask;
4594 } attrs[] = {
4595 /* isa options */
4596 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4597 IX86_ATTR_ISA ("abm", OPT_mabm),
4598 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4599 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4600 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4601 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4602 IX86_ATTR_ISA ("aes", OPT_maes),
4603 IX86_ATTR_ISA ("sha", OPT_msha),
4604 IX86_ATTR_ISA ("avx", OPT_mavx),
4605 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4606 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
4607 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
4608 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
4609 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
4610 IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq),
4611 IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw),
4612 IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl),
4613 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4614 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4615 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4616 IX86_ATTR_ISA ("sse", OPT_msse),
4617 IX86_ATTR_ISA ("sse2", OPT_msse2),
4618 IX86_ATTR_ISA ("sse3", OPT_msse3),
4619 IX86_ATTR_ISA ("sse4", OPT_msse4),
4620 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4621 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4622 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4623 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4624 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4625 IX86_ATTR_ISA ("fma", OPT_mfma),
4626 IX86_ATTR_ISA ("xop", OPT_mxop),
4627 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4628 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4629 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4630 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4631 IX86_ATTR_ISA ("rtm", OPT_mrtm),
4632 IX86_ATTR_ISA ("hle", OPT_mhle),
4633 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
4634 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
4635 IX86_ATTR_ISA ("adx", OPT_madx),
4636 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
4637 IX86_ATTR_ISA ("xsave", OPT_mxsave),
4638 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
4639 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
4640 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt),
4641 IX86_ATTR_ISA ("xsavec", OPT_mxsavec),
4642 IX86_ATTR_ISA ("xsaves", OPT_mxsaves),
4644 /* enum options */
4645 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4647 /* string options */
4648 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4649 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4651 /* flag options */
4652 IX86_ATTR_YES ("cld",
4653 OPT_mcld,
4654 MASK_CLD),
4656 IX86_ATTR_NO ("fancy-math-387",
4657 OPT_mfancy_math_387,
4658 MASK_NO_FANCY_MATH_387),
4660 IX86_ATTR_YES ("ieee-fp",
4661 OPT_mieee_fp,
4662 MASK_IEEE_FP),
4664 IX86_ATTR_YES ("inline-all-stringops",
4665 OPT_minline_all_stringops,
4666 MASK_INLINE_ALL_STRINGOPS),
4668 IX86_ATTR_YES ("inline-stringops-dynamically",
4669 OPT_minline_stringops_dynamically,
4670 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4672 IX86_ATTR_NO ("align-stringops",
4673 OPT_mno_align_stringops,
4674 MASK_NO_ALIGN_STRINGOPS),
4676 IX86_ATTR_YES ("recip",
4677 OPT_mrecip,
4678 MASK_RECIP),
4682 /* If this is a list, recurse to get the options. */
4683 if (TREE_CODE (args) == TREE_LIST)
4685 bool ret = true;
4687 for (; args; args = TREE_CHAIN (args))
4688 if (TREE_VALUE (args)
4689 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4690 p_strings, opts, opts_set,
4691 enum_opts_set))
4692 ret = false;
4694 return ret;
4697 else if (TREE_CODE (args) != STRING_CST)
4699 error ("attribute %<target%> argument not a string");
4700 return false;
4703 /* Handle multiple arguments separated by commas. */
4704 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4706 while (next_optstr && *next_optstr != '\0')
4708 char *p = next_optstr;
4709 char *orig_p = p;
4710 char *comma = strchr (next_optstr, ',');
4711 const char *opt_string;
4712 size_t len, opt_len;
4713 int opt;
4714 bool opt_set_p;
4715 char ch;
4716 unsigned i;
4717 enum ix86_opt_type type = ix86_opt_unknown;
4718 int mask = 0;
4720 if (comma)
4722 *comma = '\0';
4723 len = comma - next_optstr;
4724 next_optstr = comma + 1;
4726 else
4728 len = strlen (p);
4729 next_optstr = NULL;
4732 /* Recognize no-xxx. */
4733 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4735 opt_set_p = false;
4736 p += 3;
4737 len -= 3;
4739 else
4740 opt_set_p = true;
4742 /* Find the option. */
4743 ch = *p;
4744 opt = N_OPTS;
4745 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4747 type = attrs[i].type;
4748 opt_len = attrs[i].len;
4749 if (ch == attrs[i].string[0]
4750 && ((type != ix86_opt_str && type != ix86_opt_enum)
4751 ? len == opt_len
4752 : len > opt_len)
4753 && memcmp (p, attrs[i].string, opt_len) == 0)
4755 opt = attrs[i].opt;
4756 mask = attrs[i].mask;
4757 opt_string = attrs[i].string;
4758 break;
4762 /* Process the option. */
4763 if (opt == N_OPTS)
4765 error ("attribute(target(\"%s\")) is unknown", orig_p);
4766 ret = false;
4769 else if (type == ix86_opt_isa)
4771 struct cl_decoded_option decoded;
4773 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4774 ix86_handle_option (opts, opts_set,
4775 &decoded, input_location);
4778 else if (type == ix86_opt_yes || type == ix86_opt_no)
4780 if (type == ix86_opt_no)
4781 opt_set_p = !opt_set_p;
4783 if (opt_set_p)
4784 opts->x_target_flags |= mask;
4785 else
4786 opts->x_target_flags &= ~mask;
4789 else if (type == ix86_opt_str)
4791 if (p_strings[opt])
4793 error ("option(\"%s\") was already specified", opt_string);
4794 ret = false;
4796 else
4797 p_strings[opt] = xstrdup (p + opt_len);
4800 else if (type == ix86_opt_enum)
4802 bool arg_ok;
4803 int value;
4805 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4806 if (arg_ok)
4807 set_option (opts, enum_opts_set, opt, value,
4808 p + opt_len, DK_UNSPECIFIED, input_location,
4809 global_dc);
4810 else
4812 error ("attribute(target(\"%s\")) is unknown", orig_p);
4813 ret = false;
4817 else
4818 gcc_unreachable ();
4821 return ret;
4824 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4826 tree
4827 ix86_valid_target_attribute_tree (tree args,
4828 struct gcc_options *opts,
4829 struct gcc_options *opts_set)
4831 const char *orig_arch_string = opts->x_ix86_arch_string;
4832 const char *orig_tune_string = opts->x_ix86_tune_string;
4833 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
4834 int orig_tune_defaulted = ix86_tune_defaulted;
4835 int orig_arch_specified = ix86_arch_specified;
4836 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4837 tree t = NULL_TREE;
4838 int i;
4839 struct cl_target_option *def
4840 = TREE_TARGET_OPTION (target_option_default_node);
4841 struct gcc_options enum_opts_set;
4843 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4845 /* Process each of the options on the chain. */
4846 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
4847 opts_set, &enum_opts_set))
4848 return error_mark_node;
4850 /* If the changed options are different from the default, rerun
4851 ix86_option_override_internal, and then save the options away.
4852 The string options are are attribute options, and will be undone
4853 when we copy the save structure. */
4854 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
4855 || opts->x_target_flags != def->x_target_flags
4856 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4857 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4858 || enum_opts_set.x_ix86_fpmath)
4860 /* If we are using the default tune= or arch=, undo the string assigned,
4861 and use the default. */
4862 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4863 opts->x_ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4864 else if (!orig_arch_specified)
4865 opts->x_ix86_arch_string = NULL;
4867 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4868 opts->x_ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4869 else if (orig_tune_defaulted)
4870 opts->x_ix86_tune_string = NULL;
4872 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4873 if (enum_opts_set.x_ix86_fpmath)
4874 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4875 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4876 && TARGET_SSE_P (opts->x_ix86_isa_flags))
4878 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4879 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4882 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4883 ix86_option_override_internal (false, opts, opts_set);
4885 /* Add any builtin functions with the new isa if any. */
4886 ix86_add_new_builtins (opts->x_ix86_isa_flags);
4888 /* Save the current options unless we are validating options for
4889 #pragma. */
4890 t = build_target_option_node (opts);
4892 opts->x_ix86_arch_string = orig_arch_string;
4893 opts->x_ix86_tune_string = orig_tune_string;
4894 opts_set->x_ix86_fpmath = orig_fpmath_set;
4896 /* Free up memory allocated to hold the strings */
4897 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4898 free (option_strings[i]);
4901 return t;
4904 /* Hook to validate attribute((target("string"))). */
4906 static bool
4907 ix86_valid_target_attribute_p (tree fndecl,
4908 tree ARG_UNUSED (name),
4909 tree args,
4910 int ARG_UNUSED (flags))
4912 struct gcc_options func_options;
4913 tree new_target, new_optimize;
4914 bool ret = true;
4916 /* attribute((target("default"))) does nothing, beyond
4917 affecting multi-versioning. */
4918 if (TREE_VALUE (args)
4919 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
4920 && TREE_CHAIN (args) == NULL_TREE
4921 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
4922 return true;
4924 tree old_optimize = build_optimization_node (&global_options);
4926 /* Get the optimization options of the current function. */
4927 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4929 if (!func_optimize)
4930 func_optimize = old_optimize;
4932 /* Init func_options. */
4933 memset (&func_options, 0, sizeof (func_options));
4934 init_options_struct (&func_options, NULL);
4935 lang_hooks.init_options_struct (&func_options);
4937 cl_optimization_restore (&func_options,
4938 TREE_OPTIMIZATION (func_optimize));
4940 /* Initialize func_options to the default before its target options can
4941 be set. */
4942 cl_target_option_restore (&func_options,
4943 TREE_TARGET_OPTION (target_option_default_node));
4945 new_target = ix86_valid_target_attribute_tree (args, &func_options,
4946 &global_options_set);
4948 new_optimize = build_optimization_node (&func_options);
4950 if (new_target == error_mark_node)
4951 ret = false;
4953 else if (fndecl && new_target)
4955 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4957 if (old_optimize != new_optimize)
4958 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4961 return ret;
4965 /* Hook to determine if one function can safely inline another. */
4967 static bool
4968 ix86_can_inline_p (tree caller, tree callee)
4970 bool ret = false;
4971 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4972 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4974 /* If callee has no option attributes, then it is ok to inline. */
4975 if (!callee_tree)
4976 ret = true;
4978 /* If caller has no option attributes, but callee does then it is not ok to
4979 inline. */
4980 else if (!caller_tree)
4981 ret = false;
4983 else
4985 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4986 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4988 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4989 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4990 function. */
4991 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4992 != callee_opts->x_ix86_isa_flags)
4993 ret = false;
4995 /* See if we have the same non-isa options. */
4996 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4997 ret = false;
4999 /* See if arch, tune, etc. are the same. */
5000 else if (caller_opts->arch != callee_opts->arch)
5001 ret = false;
5003 else if (caller_opts->tune != callee_opts->tune)
5004 ret = false;
5006 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
5007 ret = false;
5009 else if (caller_opts->branch_cost != callee_opts->branch_cost)
5010 ret = false;
5012 else
5013 ret = true;
5016 return ret;
5020 /* Remember the last target of ix86_set_current_function. */
5021 static GTY(()) tree ix86_previous_fndecl;
5023 /* Invalidate ix86_previous_fndecl cache. */
5024 void
5025 ix86_reset_previous_fndecl (void)
5027 ix86_previous_fndecl = NULL_TREE;
5030 /* Establish appropriate back-end context for processing the function
5031 FNDECL. The argument might be NULL to indicate processing at top
5032 level, outside of any function scope. */
5033 static void
5034 ix86_set_current_function (tree fndecl)
5036 /* Only change the context if the function changes. This hook is called
5037 several times in the course of compiling a function, and we don't want to
5038 slow things down too much or call target_reinit when it isn't safe. */
5039 if (fndecl && fndecl != ix86_previous_fndecl)
5041 tree old_tree = (ix86_previous_fndecl
5042 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
5043 : NULL_TREE);
5045 tree new_tree = (fndecl
5046 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
5047 : NULL_TREE);
5049 ix86_previous_fndecl = fndecl;
5050 if (old_tree == new_tree)
5053 else if (new_tree)
5055 cl_target_option_restore (&global_options,
5056 TREE_TARGET_OPTION (new_tree));
5057 if (TREE_TARGET_GLOBALS (new_tree))
5058 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5059 else
5060 TREE_TARGET_GLOBALS (new_tree)
5061 = save_target_globals_default_opts ();
5064 else if (old_tree)
5066 new_tree = target_option_current_node;
5067 cl_target_option_restore (&global_options,
5068 TREE_TARGET_OPTION (new_tree));
5069 if (TREE_TARGET_GLOBALS (new_tree))
5070 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5071 else if (new_tree == target_option_default_node)
5072 restore_target_globals (&default_target_globals);
5073 else
5074 TREE_TARGET_GLOBALS (new_tree)
5075 = save_target_globals_default_opts ();
5081 /* Return true if this goes in large data/bss. */
5083 static bool
5084 ix86_in_large_data_p (tree exp)
5086 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
5087 return false;
5089 /* Functions are never large data. */
5090 if (TREE_CODE (exp) == FUNCTION_DECL)
5091 return false;
5093 /* Automatic variables are never large data. */
5094 if (TREE_CODE (exp) == VAR_DECL && !is_global_var (exp))
5095 return false;
5097 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
5099 const char *section = DECL_SECTION_NAME (exp);
5100 if (strcmp (section, ".ldata") == 0
5101 || strcmp (section, ".lbss") == 0)
5102 return true;
5103 return false;
5105 else
5107 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5109 /* If this is an incomplete type with size 0, then we can't put it
5110 in data because it might be too big when completed. Also,
5111 int_size_in_bytes returns -1 if size can vary or is larger than
5112 an integer in which case also it is safer to assume that it goes in
5113 large data. */
5114 if (size <= 0 || size > ix86_section_threshold)
5115 return true;
5118 return false;
5121 /* Switch to the appropriate section for output of DECL.
5122 DECL is either a `VAR_DECL' node or a constant of some sort.
5123 RELOC indicates whether forming the initial value of DECL requires
5124 link-time relocations. */
5126 ATTRIBUTE_UNUSED static section *
5127 x86_64_elf_select_section (tree decl, int reloc,
5128 unsigned HOST_WIDE_INT align)
5130 if (ix86_in_large_data_p (decl))
5132 const char *sname = NULL;
5133 unsigned int flags = SECTION_WRITE;
5134 switch (categorize_decl_for_section (decl, reloc))
5136 case SECCAT_DATA:
5137 sname = ".ldata";
5138 break;
5139 case SECCAT_DATA_REL:
5140 sname = ".ldata.rel";
5141 break;
5142 case SECCAT_DATA_REL_LOCAL:
5143 sname = ".ldata.rel.local";
5144 break;
5145 case SECCAT_DATA_REL_RO:
5146 sname = ".ldata.rel.ro";
5147 break;
5148 case SECCAT_DATA_REL_RO_LOCAL:
5149 sname = ".ldata.rel.ro.local";
5150 break;
5151 case SECCAT_BSS:
5152 sname = ".lbss";
5153 flags |= SECTION_BSS;
5154 break;
5155 case SECCAT_RODATA:
5156 case SECCAT_RODATA_MERGE_STR:
5157 case SECCAT_RODATA_MERGE_STR_INIT:
5158 case SECCAT_RODATA_MERGE_CONST:
5159 sname = ".lrodata";
5160 flags = 0;
5161 break;
5162 case SECCAT_SRODATA:
5163 case SECCAT_SDATA:
5164 case SECCAT_SBSS:
5165 gcc_unreachable ();
5166 case SECCAT_TEXT:
5167 case SECCAT_TDATA:
5168 case SECCAT_TBSS:
5169 /* We don't split these for medium model. Place them into
5170 default sections and hope for best. */
5171 break;
5173 if (sname)
5175 /* We might get called with string constants, but get_named_section
5176 doesn't like them as they are not DECLs. Also, we need to set
5177 flags in that case. */
5178 if (!DECL_P (decl))
5179 return get_section (sname, flags, NULL);
5180 return get_named_section (decl, sname, reloc);
5183 return default_elf_select_section (decl, reloc, align);
5186 /* Select a set of attributes for section NAME based on the properties
5187 of DECL and whether or not RELOC indicates that DECL's initializer
5188 might contain runtime relocations. */
5190 static unsigned int ATTRIBUTE_UNUSED
5191 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5193 unsigned int flags = default_section_type_flags (decl, name, reloc);
5195 if (decl == NULL_TREE
5196 && (strcmp (name, ".ldata.rel.ro") == 0
5197 || strcmp (name, ".ldata.rel.ro.local") == 0))
5198 flags |= SECTION_RELRO;
5200 if (strcmp (name, ".lbss") == 0
5201 || strncmp (name, ".lbss.", 5) == 0
5202 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
5203 flags |= SECTION_BSS;
5205 return flags;
5208 /* Build up a unique section name, expressed as a
5209 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5210 RELOC indicates whether the initial value of EXP requires
5211 link-time relocations. */
5213 static void ATTRIBUTE_UNUSED
5214 x86_64_elf_unique_section (tree decl, int reloc)
5216 if (ix86_in_large_data_p (decl))
5218 const char *prefix = NULL;
5219 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5220 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
5222 switch (categorize_decl_for_section (decl, reloc))
5224 case SECCAT_DATA:
5225 case SECCAT_DATA_REL:
5226 case SECCAT_DATA_REL_LOCAL:
5227 case SECCAT_DATA_REL_RO:
5228 case SECCAT_DATA_REL_RO_LOCAL:
5229 prefix = one_only ? ".ld" : ".ldata";
5230 break;
5231 case SECCAT_BSS:
5232 prefix = one_only ? ".lb" : ".lbss";
5233 break;
5234 case SECCAT_RODATA:
5235 case SECCAT_RODATA_MERGE_STR:
5236 case SECCAT_RODATA_MERGE_STR_INIT:
5237 case SECCAT_RODATA_MERGE_CONST:
5238 prefix = one_only ? ".lr" : ".lrodata";
5239 break;
5240 case SECCAT_SRODATA:
5241 case SECCAT_SDATA:
5242 case SECCAT_SBSS:
5243 gcc_unreachable ();
5244 case SECCAT_TEXT:
5245 case SECCAT_TDATA:
5246 case SECCAT_TBSS:
5247 /* We don't split these for medium model. Place them into
5248 default sections and hope for best. */
5249 break;
5251 if (prefix)
5253 const char *name, *linkonce;
5254 char *string;
5256 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5257 name = targetm.strip_name_encoding (name);
5259 /* If we're using one_only, then there needs to be a .gnu.linkonce
5260 prefix to the section name. */
5261 linkonce = one_only ? ".gnu.linkonce" : "";
5263 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5265 set_decl_section_name (decl, string);
5266 return;
5269 default_unique_section (decl, reloc);
5272 #ifdef COMMON_ASM_OP
5273 /* This says how to output assembler code to declare an
5274 uninitialized external linkage data object.
5276 For medium model x86-64 we need to use .largecomm opcode for
5277 large objects. */
5278 void
5279 x86_elf_aligned_common (FILE *file,
5280 const char *name, unsigned HOST_WIDE_INT size,
5281 int align)
5283 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5284 && size > (unsigned int)ix86_section_threshold)
5285 fputs ("\t.largecomm\t", file);
5286 else
5287 fputs (COMMON_ASM_OP, file);
5288 assemble_name (file, name);
5289 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5290 size, align / BITS_PER_UNIT);
5292 #endif
5294 /* Utility function for targets to use in implementing
5295 ASM_OUTPUT_ALIGNED_BSS. */
5297 void
5298 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
5299 unsigned HOST_WIDE_INT size, int align)
5301 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5302 && size > (unsigned int)ix86_section_threshold)
5303 switch_to_section (get_named_section (decl, ".lbss", 0));
5304 else
5305 switch_to_section (bss_section);
5306 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5307 #ifdef ASM_DECLARE_OBJECT_NAME
5308 last_assemble_variable_decl = decl;
5309 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5310 #else
5311 /* Standard thing is just output label for the object. */
5312 ASM_OUTPUT_LABEL (file, name);
5313 #endif /* ASM_DECLARE_OBJECT_NAME */
5314 ASM_OUTPUT_SKIP (file, size ? size : 1);
5317 /* Decide whether we must probe the stack before any space allocation
5318 on this target. It's essentially TARGET_STACK_PROBE except when
5319 -fstack-check causes the stack to be already probed differently. */
5321 bool
5322 ix86_target_stack_probe (void)
5324 /* Do not probe the stack twice if static stack checking is enabled. */
5325 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5326 return false;
5328 return TARGET_STACK_PROBE;
5331 /* Decide whether we can make a sibling call to a function. DECL is the
5332 declaration of the function being targeted by the call and EXP is the
5333 CALL_EXPR representing the call. */
5335 static bool
5336 ix86_function_ok_for_sibcall (tree decl, tree exp)
5338 tree type, decl_or_type;
5339 rtx a, b;
5341 /* If we are generating position-independent code, we cannot sibcall
5342 optimize any indirect call, or a direct call to a global function,
5343 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5344 if (!TARGET_MACHO
5345 && !TARGET_64BIT
5346 && flag_pic
5347 && (!decl || !targetm.binds_local_p (decl)))
5348 return false;
5350 /* If we need to align the outgoing stack, then sibcalling would
5351 unalign the stack, which may break the called function. */
5352 if (ix86_minimum_incoming_stack_boundary (true)
5353 < PREFERRED_STACK_BOUNDARY)
5354 return false;
5356 if (decl)
5358 decl_or_type = decl;
5359 type = TREE_TYPE (decl);
5361 else
5363 /* We're looking at the CALL_EXPR, we need the type of the function. */
5364 type = CALL_EXPR_FN (exp); /* pointer expression */
5365 type = TREE_TYPE (type); /* pointer type */
5366 type = TREE_TYPE (type); /* function type */
5367 decl_or_type = type;
5370 /* Check that the return value locations are the same. Like
5371 if we are returning floats on the 80387 register stack, we cannot
5372 make a sibcall from a function that doesn't return a float to a
5373 function that does or, conversely, from a function that does return
5374 a float to a function that doesn't; the necessary stack adjustment
5375 would not be executed. This is also the place we notice
5376 differences in the return value ABI. Note that it is ok for one
5377 of the functions to have void return type as long as the return
5378 value of the other is passed in a register. */
5379 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5380 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5381 cfun->decl, false);
5382 if (STACK_REG_P (a) || STACK_REG_P (b))
5384 if (!rtx_equal_p (a, b))
5385 return false;
5387 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5389 else if (!rtx_equal_p (a, b))
5390 return false;
5392 if (TARGET_64BIT)
5394 /* The SYSV ABI has more call-clobbered registers;
5395 disallow sibcalls from MS to SYSV. */
5396 if (cfun->machine->call_abi == MS_ABI
5397 && ix86_function_type_abi (type) == SYSV_ABI)
5398 return false;
5400 else
5402 /* If this call is indirect, we'll need to be able to use a
5403 call-clobbered register for the address of the target function.
5404 Make sure that all such registers are not used for passing
5405 parameters. Note that DLLIMPORT functions are indirect. */
5406 if (!decl
5407 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5409 if (ix86_function_regparm (type, NULL) >= 3)
5411 /* ??? Need to count the actual number of registers to be used,
5412 not the possible number of registers. Fix later. */
5413 return false;
5418 /* Otherwise okay. That also includes certain types of indirect calls. */
5419 return true;
5422 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5423 and "sseregparm" calling convention attributes;
5424 arguments as in struct attribute_spec.handler. */
5426 static tree
5427 ix86_handle_cconv_attribute (tree *node, tree name,
5428 tree args,
5429 int,
5430 bool *no_add_attrs)
5432 if (TREE_CODE (*node) != FUNCTION_TYPE
5433 && TREE_CODE (*node) != METHOD_TYPE
5434 && TREE_CODE (*node) != FIELD_DECL
5435 && TREE_CODE (*node) != TYPE_DECL)
5437 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5438 name);
5439 *no_add_attrs = true;
5440 return NULL_TREE;
5443 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5444 if (is_attribute_p ("regparm", name))
5446 tree cst;
5448 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5450 error ("fastcall and regparm attributes are not compatible");
5453 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5455 error ("regparam and thiscall attributes are not compatible");
5458 cst = TREE_VALUE (args);
5459 if (TREE_CODE (cst) != INTEGER_CST)
5461 warning (OPT_Wattributes,
5462 "%qE attribute requires an integer constant argument",
5463 name);
5464 *no_add_attrs = true;
5466 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5468 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5469 name, REGPARM_MAX);
5470 *no_add_attrs = true;
5473 return NULL_TREE;
5476 if (TARGET_64BIT)
5478 /* Do not warn when emulating the MS ABI. */
5479 if ((TREE_CODE (*node) != FUNCTION_TYPE
5480 && TREE_CODE (*node) != METHOD_TYPE)
5481 || ix86_function_type_abi (*node) != MS_ABI)
5482 warning (OPT_Wattributes, "%qE attribute ignored",
5483 name);
5484 *no_add_attrs = true;
5485 return NULL_TREE;
5488 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5489 if (is_attribute_p ("fastcall", name))
5491 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5493 error ("fastcall and cdecl attributes are not compatible");
5495 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5497 error ("fastcall and stdcall attributes are not compatible");
5499 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5501 error ("fastcall and regparm attributes are not compatible");
5503 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5505 error ("fastcall and thiscall attributes are not compatible");
5509 /* Can combine stdcall with fastcall (redundant), regparm and
5510 sseregparm. */
5511 else if (is_attribute_p ("stdcall", name))
5513 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5515 error ("stdcall and cdecl attributes are not compatible");
5517 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5519 error ("stdcall and fastcall attributes are not compatible");
5521 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5523 error ("stdcall and thiscall attributes are not compatible");
5527 /* Can combine cdecl with regparm and sseregparm. */
5528 else if (is_attribute_p ("cdecl", name))
5530 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5532 error ("stdcall and cdecl attributes are not compatible");
5534 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5536 error ("fastcall and cdecl attributes are not compatible");
5538 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5540 error ("cdecl and thiscall attributes are not compatible");
5543 else if (is_attribute_p ("thiscall", name))
5545 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5546 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5547 name);
5548 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5550 error ("stdcall and thiscall attributes are not compatible");
5552 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5554 error ("fastcall and thiscall attributes are not compatible");
5556 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5558 error ("cdecl and thiscall attributes are not compatible");
5562 /* Can combine sseregparm with all attributes. */
5564 return NULL_TREE;
5567 /* The transactional memory builtins are implicitly regparm or fastcall
5568 depending on the ABI. Override the generic do-nothing attribute that
5569 these builtins were declared with, and replace it with one of the two
5570 attributes that we expect elsewhere. */
5572 static tree
5573 ix86_handle_tm_regparm_attribute (tree *node, tree, tree,
5574 int flags, bool *no_add_attrs)
5576 tree alt;
5578 /* In no case do we want to add the placeholder attribute. */
5579 *no_add_attrs = true;
5581 /* The 64-bit ABI is unchanged for transactional memory. */
5582 if (TARGET_64BIT)
5583 return NULL_TREE;
5585 /* ??? Is there a better way to validate 32-bit windows? We have
5586 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5587 if (CHECK_STACK_LIMIT > 0)
5588 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5589 else
5591 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5592 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5594 decl_attributes (node, alt, flags);
5596 return NULL_TREE;
5599 /* This function determines from TYPE the calling-convention. */
5601 unsigned int
5602 ix86_get_callcvt (const_tree type)
5604 unsigned int ret = 0;
5605 bool is_stdarg;
5606 tree attrs;
5608 if (TARGET_64BIT)
5609 return IX86_CALLCVT_CDECL;
5611 attrs = TYPE_ATTRIBUTES (type);
5612 if (attrs != NULL_TREE)
5614 if (lookup_attribute ("cdecl", attrs))
5615 ret |= IX86_CALLCVT_CDECL;
5616 else if (lookup_attribute ("stdcall", attrs))
5617 ret |= IX86_CALLCVT_STDCALL;
5618 else if (lookup_attribute ("fastcall", attrs))
5619 ret |= IX86_CALLCVT_FASTCALL;
5620 else if (lookup_attribute ("thiscall", attrs))
5621 ret |= IX86_CALLCVT_THISCALL;
5623 /* Regparam isn't allowed for thiscall and fastcall. */
5624 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5626 if (lookup_attribute ("regparm", attrs))
5627 ret |= IX86_CALLCVT_REGPARM;
5628 if (lookup_attribute ("sseregparm", attrs))
5629 ret |= IX86_CALLCVT_SSEREGPARM;
5632 if (IX86_BASE_CALLCVT(ret) != 0)
5633 return ret;
5636 is_stdarg = stdarg_p (type);
5637 if (TARGET_RTD && !is_stdarg)
5638 return IX86_CALLCVT_STDCALL | ret;
5640 if (ret != 0
5641 || is_stdarg
5642 || TREE_CODE (type) != METHOD_TYPE
5643 || ix86_function_type_abi (type) != MS_ABI)
5644 return IX86_CALLCVT_CDECL | ret;
5646 return IX86_CALLCVT_THISCALL;
5649 /* Return 0 if the attributes for two types are incompatible, 1 if they
5650 are compatible, and 2 if they are nearly compatible (which causes a
5651 warning to be generated). */
5653 static int
5654 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5656 unsigned int ccvt1, ccvt2;
5658 if (TREE_CODE (type1) != FUNCTION_TYPE
5659 && TREE_CODE (type1) != METHOD_TYPE)
5660 return 1;
5662 ccvt1 = ix86_get_callcvt (type1);
5663 ccvt2 = ix86_get_callcvt (type2);
5664 if (ccvt1 != ccvt2)
5665 return 0;
5666 if (ix86_function_regparm (type1, NULL)
5667 != ix86_function_regparm (type2, NULL))
5668 return 0;
5670 return 1;
5673 /* Return the regparm value for a function with the indicated TYPE and DECL.
5674 DECL may be NULL when calling function indirectly
5675 or considering a libcall. */
5677 static int
5678 ix86_function_regparm (const_tree type, const_tree decl)
5680 tree attr;
5681 int regparm;
5682 unsigned int ccvt;
5684 if (TARGET_64BIT)
5685 return (ix86_function_type_abi (type) == SYSV_ABI
5686 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5687 ccvt = ix86_get_callcvt (type);
5688 regparm = ix86_regparm;
5690 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5692 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5693 if (attr)
5695 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5696 return regparm;
5699 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5700 return 2;
5701 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5702 return 1;
5704 /* Use register calling convention for local functions when possible. */
5705 if (decl
5706 && TREE_CODE (decl) == FUNCTION_DECL
5707 /* Caller and callee must agree on the calling convention, so
5708 checking here just optimize means that with
5709 __attribute__((optimize (...))) caller could use regparm convention
5710 and callee not, or vice versa. Instead look at whether the callee
5711 is optimized or not. */
5712 && opt_for_fn (decl, optimize)
5713 && !(profile_flag && !flag_fentry))
5715 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5716 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE (decl));
5717 if (i && i->local && i->can_change_signature)
5719 int local_regparm, globals = 0, regno;
5721 /* Make sure no regparm register is taken by a
5722 fixed register variable. */
5723 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5724 if (fixed_regs[local_regparm])
5725 break;
5727 /* We don't want to use regparm(3) for nested functions as
5728 these use a static chain pointer in the third argument. */
5729 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5730 local_regparm = 2;
5732 /* In 32-bit mode save a register for the split stack. */
5733 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5734 local_regparm = 2;
5736 /* Each fixed register usage increases register pressure,
5737 so less registers should be used for argument passing.
5738 This functionality can be overriden by an explicit
5739 regparm value. */
5740 for (regno = AX_REG; regno <= DI_REG; regno++)
5741 if (fixed_regs[regno])
5742 globals++;
5744 local_regparm
5745 = globals < local_regparm ? local_regparm - globals : 0;
5747 if (local_regparm > regparm)
5748 regparm = local_regparm;
5752 return regparm;
5755 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5756 DFmode (2) arguments in SSE registers for a function with the
5757 indicated TYPE and DECL. DECL may be NULL when calling function
5758 indirectly or considering a libcall. Otherwise return 0. */
5760 static int
5761 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5763 gcc_assert (!TARGET_64BIT);
5765 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5766 by the sseregparm attribute. */
5767 if (TARGET_SSEREGPARM
5768 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5770 if (!TARGET_SSE)
5772 if (warn)
5774 if (decl)
5775 error ("calling %qD with attribute sseregparm without "
5776 "SSE/SSE2 enabled", decl);
5777 else
5778 error ("calling %qT with attribute sseregparm without "
5779 "SSE/SSE2 enabled", type);
5781 return 0;
5784 return 2;
5787 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5788 (and DFmode for SSE2) arguments in SSE registers. */
5789 if (decl && TARGET_SSE_MATH && optimize
5790 && !(profile_flag && !flag_fentry))
5792 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5793 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5794 if (i && i->local && i->can_change_signature)
5795 return TARGET_SSE2 ? 2 : 1;
5798 return 0;
5801 /* Return true if EAX is live at the start of the function. Used by
5802 ix86_expand_prologue to determine if we need special help before
5803 calling allocate_stack_worker. */
5805 static bool
5806 ix86_eax_live_at_start_p (void)
5808 /* Cheat. Don't bother working forward from ix86_function_regparm
5809 to the function type to whether an actual argument is located in
5810 eax. Instead just look at cfg info, which is still close enough
5811 to correct at this point. This gives false positives for broken
5812 functions that might use uninitialized data that happens to be
5813 allocated in eax, but who cares? */
5814 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
5817 static bool
5818 ix86_keep_aggregate_return_pointer (tree fntype)
5820 tree attr;
5822 if (!TARGET_64BIT)
5824 attr = lookup_attribute ("callee_pop_aggregate_return",
5825 TYPE_ATTRIBUTES (fntype));
5826 if (attr)
5827 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5829 /* For 32-bit MS-ABI the default is to keep aggregate
5830 return pointer. */
5831 if (ix86_function_type_abi (fntype) == MS_ABI)
5832 return true;
5834 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5837 /* Value is the number of bytes of arguments automatically
5838 popped when returning from a subroutine call.
5839 FUNDECL is the declaration node of the function (as a tree),
5840 FUNTYPE is the data type of the function (as a tree),
5841 or for a library call it is an identifier node for the subroutine name.
5842 SIZE is the number of bytes of arguments passed on the stack.
5844 On the 80386, the RTD insn may be used to pop them if the number
5845 of args is fixed, but if the number is variable then the caller
5846 must pop them all. RTD can't be used for library calls now
5847 because the library is compiled with the Unix compiler.
5848 Use of RTD is a selectable option, since it is incompatible with
5849 standard Unix calling sequences. If the option is not selected,
5850 the caller must always pop the args.
5852 The attribute stdcall is equivalent to RTD on a per module basis. */
5854 static int
5855 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5857 unsigned int ccvt;
5859 /* None of the 64-bit ABIs pop arguments. */
5860 if (TARGET_64BIT)
5861 return 0;
5863 ccvt = ix86_get_callcvt (funtype);
5865 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5866 | IX86_CALLCVT_THISCALL)) != 0
5867 && ! stdarg_p (funtype))
5868 return size;
5870 /* Lose any fake structure return argument if it is passed on the stack. */
5871 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5872 && !ix86_keep_aggregate_return_pointer (funtype))
5874 int nregs = ix86_function_regparm (funtype, fundecl);
5875 if (nregs == 0)
5876 return GET_MODE_SIZE (Pmode);
5879 return 0;
5882 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
5884 static bool
5885 ix86_legitimate_combined_insn (rtx_insn *insn)
5887 /* Check operand constraints in case hard registers were propagated
5888 into insn pattern. This check prevents combine pass from
5889 generating insn patterns with invalid hard register operands.
5890 These invalid insns can eventually confuse reload to error out
5891 with a spill failure. See also PRs 46829 and 46843. */
5892 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
5894 int i;
5896 extract_insn (insn);
5897 preprocess_constraints (insn);
5899 int n_operands = recog_data.n_operands;
5900 int n_alternatives = recog_data.n_alternatives;
5901 for (i = 0; i < n_operands; i++)
5903 rtx op = recog_data.operand[i];
5904 machine_mode mode = GET_MODE (op);
5905 const operand_alternative *op_alt;
5906 int offset = 0;
5907 bool win;
5908 int j;
5910 /* For pre-AVX disallow unaligned loads/stores where the
5911 instructions don't support it. */
5912 if (!TARGET_AVX
5913 && VECTOR_MODE_P (GET_MODE (op))
5914 && misaligned_operand (op, GET_MODE (op)))
5916 int min_align = get_attr_ssememalign (insn);
5917 if (min_align == 0)
5918 return false;
5921 /* A unary operator may be accepted by the predicate, but it
5922 is irrelevant for matching constraints. */
5923 if (UNARY_P (op))
5924 op = XEXP (op, 0);
5926 if (GET_CODE (op) == SUBREG)
5928 if (REG_P (SUBREG_REG (op))
5929 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
5930 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
5931 GET_MODE (SUBREG_REG (op)),
5932 SUBREG_BYTE (op),
5933 GET_MODE (op));
5934 op = SUBREG_REG (op);
5937 if (!(REG_P (op) && HARD_REGISTER_P (op)))
5938 continue;
5940 op_alt = recog_op_alt;
5942 /* Operand has no constraints, anything is OK. */
5943 win = !n_alternatives;
5945 alternative_mask preferred = get_preferred_alternatives (insn);
5946 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
5948 if (!TEST_BIT (preferred, j))
5949 continue;
5950 if (op_alt[i].anything_ok
5951 || (op_alt[i].matches != -1
5952 && operands_match_p
5953 (recog_data.operand[i],
5954 recog_data.operand[op_alt[i].matches]))
5955 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
5957 win = true;
5958 break;
5962 if (!win)
5963 return false;
5967 return true;
5970 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
5972 static unsigned HOST_WIDE_INT
5973 ix86_asan_shadow_offset (void)
5975 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
5976 : HOST_WIDE_INT_C (0x7fff8000))
5977 : (HOST_WIDE_INT_1 << 29);
5980 /* Argument support functions. */
5982 /* Return true when register may be used to pass function parameters. */
5983 bool
5984 ix86_function_arg_regno_p (int regno)
5986 int i;
5987 const int *parm_regs;
5989 if (!TARGET_64BIT)
5991 if (TARGET_MACHO)
5992 return (regno < REGPARM_MAX
5993 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5994 else
5995 return (regno < REGPARM_MAX
5996 || (TARGET_MMX && MMX_REGNO_P (regno)
5997 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5998 || (TARGET_SSE && SSE_REGNO_P (regno)
5999 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
6002 if (TARGET_SSE && SSE_REGNO_P (regno)
6003 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
6004 return true;
6006 /* TODO: The function should depend on current function ABI but
6007 builtins.c would need updating then. Therefore we use the
6008 default ABI. */
6010 /* RAX is used as hidden argument to va_arg functions. */
6011 if (ix86_abi == SYSV_ABI && regno == AX_REG)
6012 return true;
6014 if (ix86_abi == MS_ABI)
6015 parm_regs = x86_64_ms_abi_int_parameter_registers;
6016 else
6017 parm_regs = x86_64_int_parameter_registers;
6018 for (i = 0; i < (ix86_abi == MS_ABI
6019 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
6020 if (regno == parm_regs[i])
6021 return true;
6022 return false;
6025 /* Return if we do not know how to pass TYPE solely in registers. */
6027 static bool
6028 ix86_must_pass_in_stack (machine_mode mode, const_tree type)
6030 if (must_pass_in_stack_var_size_or_pad (mode, type))
6031 return true;
6033 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
6034 The layout_type routine is crafty and tries to trick us into passing
6035 currently unsupported vector types on the stack by using TImode. */
6036 return (!TARGET_64BIT && mode == TImode
6037 && type && TREE_CODE (type) != VECTOR_TYPE);
6040 /* It returns the size, in bytes, of the area reserved for arguments passed
6041 in registers for the function represented by fndecl dependent to the used
6042 abi format. */
6044 ix86_reg_parm_stack_space (const_tree fndecl)
6046 enum calling_abi call_abi = SYSV_ABI;
6047 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
6048 call_abi = ix86_function_abi (fndecl);
6049 else
6050 call_abi = ix86_function_type_abi (fndecl);
6051 if (TARGET_64BIT && call_abi == MS_ABI)
6052 return 32;
6053 return 0;
6056 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
6057 call abi used. */
6058 enum calling_abi
6059 ix86_function_type_abi (const_tree fntype)
6061 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
6063 enum calling_abi abi = ix86_abi;
6064 if (abi == SYSV_ABI)
6066 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
6067 abi = MS_ABI;
6069 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
6070 abi = SYSV_ABI;
6071 return abi;
6073 return ix86_abi;
6076 /* We add this as a workaround in order to use libc_has_function
6077 hook in i386.md. */
6078 bool
6079 ix86_libc_has_function (enum function_class fn_class)
6081 return targetm.libc_has_function (fn_class);
6084 static bool
6085 ix86_function_ms_hook_prologue (const_tree fn)
6087 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
6089 if (decl_function_context (fn) != NULL_TREE)
6090 error_at (DECL_SOURCE_LOCATION (fn),
6091 "ms_hook_prologue is not compatible with nested function");
6092 else
6093 return true;
6095 return false;
6098 static enum calling_abi
6099 ix86_function_abi (const_tree fndecl)
6101 if (! fndecl)
6102 return ix86_abi;
6103 return ix86_function_type_abi (TREE_TYPE (fndecl));
6106 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
6107 call abi used. */
6108 enum calling_abi
6109 ix86_cfun_abi (void)
6111 if (! cfun)
6112 return ix86_abi;
6113 return cfun->machine->call_abi;
6116 /* Write the extra assembler code needed to declare a function properly. */
6118 void
6119 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
6120 tree decl)
6122 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
6124 if (is_ms_hook)
6126 int i, filler_count = (TARGET_64BIT ? 32 : 16);
6127 unsigned int filler_cc = 0xcccccccc;
6129 for (i = 0; i < filler_count; i += 4)
6130 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
6133 #ifdef SUBTARGET_ASM_UNWIND_INIT
6134 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
6135 #endif
6137 ASM_OUTPUT_LABEL (asm_out_file, fname);
6139 /* Output magic byte marker, if hot-patch attribute is set. */
6140 if (is_ms_hook)
6142 if (TARGET_64BIT)
6144 /* leaq [%rsp + 0], %rsp */
6145 asm_fprintf (asm_out_file, ASM_BYTE
6146 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
6148 else
6150 /* movl.s %edi, %edi
6151 push %ebp
6152 movl.s %esp, %ebp */
6153 asm_fprintf (asm_out_file, ASM_BYTE
6154 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
6159 /* regclass.c */
6160 extern void init_regs (void);
6162 /* Implementation of call abi switching target hook. Specific to FNDECL
6163 the specific call register sets are set. See also
6164 ix86_conditional_register_usage for more details. */
6165 void
6166 ix86_call_abi_override (const_tree fndecl)
6168 if (fndecl == NULL_TREE)
6169 cfun->machine->call_abi = ix86_abi;
6170 else
6171 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
6174 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
6175 expensive re-initialization of init_regs each time we switch function context
6176 since this is needed only during RTL expansion. */
6177 static void
6178 ix86_maybe_switch_abi (void)
6180 if (TARGET_64BIT &&
6181 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
6182 reinit_regs ();
6185 /* Return 1 if pseudo register should be created and used to hold
6186 GOT address for PIC code. */
6187 static bool
6188 ix86_use_pseudo_pic_reg (void)
6190 if ((TARGET_64BIT
6191 && (ix86_cmodel == CM_SMALL_PIC
6192 || TARGET_PECOFF))
6193 || !flag_pic)
6194 return false;
6195 return true;
6198 /* Create and initialize PIC register if required. */
6199 static void
6200 ix86_init_pic_reg (void)
6202 edge entry_edge;
6203 rtx_insn *seq;
6205 if (!ix86_use_pseudo_pic_reg ())
6206 return;
6208 start_sequence ();
6210 if (TARGET_64BIT)
6212 if (ix86_cmodel == CM_LARGE_PIC)
6214 rtx_code_label *label;
6215 rtx tmp_reg;
6217 gcc_assert (Pmode == DImode);
6218 label = gen_label_rtx ();
6219 emit_label (label);
6220 LABEL_PRESERVE_P (label) = 1;
6221 tmp_reg = gen_rtx_REG (Pmode, R11_REG);
6222 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
6223 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
6224 label));
6225 emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6226 emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
6227 pic_offset_table_rtx, tmp_reg));
6229 else
6230 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6232 else
6234 /* If there is future mcount call in the function it is more profitable
6235 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
6236 rtx reg = crtl->profile
6237 ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
6238 : pic_offset_table_rtx;
6239 rtx insn = emit_insn (gen_set_got (reg));
6240 RTX_FRAME_RELATED_P (insn) = 1;
6241 if (crtl->profile)
6242 emit_move_insn (pic_offset_table_rtx, reg);
6243 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
6246 seq = get_insns ();
6247 end_sequence ();
6249 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6250 insert_insn_on_edge (seq, entry_edge);
6251 commit_one_edge_insertion (entry_edge);
6254 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6255 for a call to a function whose data type is FNTYPE.
6256 For a library call, FNTYPE is 0. */
6258 void
6259 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
6260 tree fntype, /* tree ptr for function decl */
6261 rtx libname, /* SYMBOL_REF of library name or 0 */
6262 tree fndecl,
6263 int caller)
6265 struct cgraph_local_info *i;
6267 memset (cum, 0, sizeof (*cum));
6269 if (fndecl)
6271 i = cgraph_node::local_info (fndecl);
6272 cum->call_abi = ix86_function_abi (fndecl);
6274 else
6276 i = NULL;
6277 cum->call_abi = ix86_function_type_abi (fntype);
6280 cum->caller = caller;
6282 /* Set up the number of registers to use for passing arguments. */
6283 cum->nregs = ix86_regparm;
6284 if (TARGET_64BIT)
6286 cum->nregs = (cum->call_abi == SYSV_ABI
6287 ? X86_64_REGPARM_MAX
6288 : X86_64_MS_REGPARM_MAX);
6290 if (TARGET_SSE)
6292 cum->sse_nregs = SSE_REGPARM_MAX;
6293 if (TARGET_64BIT)
6295 cum->sse_nregs = (cum->call_abi == SYSV_ABI
6296 ? X86_64_SSE_REGPARM_MAX
6297 : X86_64_MS_SSE_REGPARM_MAX);
6300 if (TARGET_MMX)
6301 cum->mmx_nregs = MMX_REGPARM_MAX;
6302 cum->warn_avx512f = true;
6303 cum->warn_avx = true;
6304 cum->warn_sse = true;
6305 cum->warn_mmx = true;
6307 /* Because type might mismatch in between caller and callee, we need to
6308 use actual type of function for local calls.
6309 FIXME: cgraph_analyze can be told to actually record if function uses
6310 va_start so for local functions maybe_vaarg can be made aggressive
6311 helping K&R code.
6312 FIXME: once typesytem is fixed, we won't need this code anymore. */
6313 if (i && i->local && i->can_change_signature)
6314 fntype = TREE_TYPE (fndecl);
6315 cum->stdarg = stdarg_p (fntype);
6316 cum->maybe_vaarg = (fntype
6317 ? (!prototype_p (fntype) || stdarg_p (fntype))
6318 : !libname);
6320 cum->bnd_regno = FIRST_BND_REG;
6321 cum->bnds_in_bt = 0;
6322 cum->force_bnd_pass = 0;
6324 if (!TARGET_64BIT)
6326 /* If there are variable arguments, then we won't pass anything
6327 in registers in 32-bit mode. */
6328 if (stdarg_p (fntype))
6330 cum->nregs = 0;
6331 cum->sse_nregs = 0;
6332 cum->mmx_nregs = 0;
6333 cum->warn_avx512f = false;
6334 cum->warn_avx = false;
6335 cum->warn_sse = false;
6336 cum->warn_mmx = false;
6337 return;
6340 /* Use ecx and edx registers if function has fastcall attribute,
6341 else look for regparm information. */
6342 if (fntype)
6344 unsigned int ccvt = ix86_get_callcvt (fntype);
6345 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6347 cum->nregs = 1;
6348 cum->fastcall = 1; /* Same first register as in fastcall. */
6350 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6352 cum->nregs = 2;
6353 cum->fastcall = 1;
6355 else
6356 cum->nregs = ix86_function_regparm (fntype, fndecl);
6359 /* Set up the number of SSE registers used for passing SFmode
6360 and DFmode arguments. Warn for mismatching ABI. */
6361 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6365 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6366 But in the case of vector types, it is some vector mode.
6368 When we have only some of our vector isa extensions enabled, then there
6369 are some modes for which vector_mode_supported_p is false. For these
6370 modes, the generic vector support in gcc will choose some non-vector mode
6371 in order to implement the type. By computing the natural mode, we'll
6372 select the proper ABI location for the operand and not depend on whatever
6373 the middle-end decides to do with these vector types.
6375 The midde-end can't deal with the vector types > 16 bytes. In this
6376 case, we return the original mode and warn ABI change if CUM isn't
6377 NULL.
6379 If INT_RETURN is true, warn ABI change if the vector mode isn't
6380 available for function return value. */
6382 static machine_mode
6383 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
6384 bool in_return)
6386 machine_mode mode = TYPE_MODE (type);
6388 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6390 HOST_WIDE_INT size = int_size_in_bytes (type);
6391 if ((size == 8 || size == 16 || size == 32 || size == 64)
6392 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6393 && TYPE_VECTOR_SUBPARTS (type) > 1)
6395 machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6397 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6398 mode = MIN_MODE_VECTOR_FLOAT;
6399 else
6400 mode = MIN_MODE_VECTOR_INT;
6402 /* Get the mode which has this inner mode and number of units. */
6403 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6404 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6405 && GET_MODE_INNER (mode) == innermode)
6407 if (size == 64 && !TARGET_AVX512F)
6409 static bool warnedavx512f;
6410 static bool warnedavx512f_ret;
6412 if (cum && cum->warn_avx512f && !warnedavx512f)
6414 if (warning (OPT_Wpsabi, "AVX512F vector argument "
6415 "without AVX512F enabled changes the ABI"))
6416 warnedavx512f = true;
6418 else if (in_return && !warnedavx512f_ret)
6420 if (warning (OPT_Wpsabi, "AVX512F vector return "
6421 "without AVX512F enabled changes the ABI"))
6422 warnedavx512f_ret = true;
6425 return TYPE_MODE (type);
6427 else if (size == 32 && !TARGET_AVX)
6429 static bool warnedavx;
6430 static bool warnedavx_ret;
6432 if (cum && cum->warn_avx && !warnedavx)
6434 if (warning (OPT_Wpsabi, "AVX vector argument "
6435 "without AVX enabled changes the ABI"))
6436 warnedavx = true;
6438 else if (in_return && !warnedavx_ret)
6440 if (warning (OPT_Wpsabi, "AVX vector return "
6441 "without AVX enabled changes the ABI"))
6442 warnedavx_ret = true;
6445 return TYPE_MODE (type);
6447 else if (((size == 8 && TARGET_64BIT) || size == 16)
6448 && !TARGET_SSE)
6450 static bool warnedsse;
6451 static bool warnedsse_ret;
6453 if (cum && cum->warn_sse && !warnedsse)
6455 if (warning (OPT_Wpsabi, "SSE vector argument "
6456 "without SSE enabled changes the ABI"))
6457 warnedsse = true;
6459 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
6461 if (warning (OPT_Wpsabi, "SSE vector return "
6462 "without SSE enabled changes the ABI"))
6463 warnedsse_ret = true;
6466 else if ((size == 8 && !TARGET_64BIT) && !TARGET_MMX)
6468 static bool warnedmmx;
6469 static bool warnedmmx_ret;
6471 if (cum && cum->warn_mmx && !warnedmmx)
6473 if (warning (OPT_Wpsabi, "MMX vector argument "
6474 "without MMX enabled changes the ABI"))
6475 warnedmmx = true;
6477 else if (in_return && !warnedmmx_ret)
6479 if (warning (OPT_Wpsabi, "MMX vector return "
6480 "without MMX enabled changes the ABI"))
6481 warnedmmx_ret = true;
6484 return mode;
6487 gcc_unreachable ();
6491 return mode;
6494 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6495 this may not agree with the mode that the type system has chosen for the
6496 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6497 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6499 static rtx
6500 gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
6501 unsigned int regno)
6503 rtx tmp;
6505 if (orig_mode != BLKmode)
6506 tmp = gen_rtx_REG (orig_mode, regno);
6507 else
6509 tmp = gen_rtx_REG (mode, regno);
6510 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6511 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6514 return tmp;
6517 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6518 of this code is to classify each 8bytes of incoming argument by the register
6519 class and assign registers accordingly. */
6521 /* Return the union class of CLASS1 and CLASS2.
6522 See the x86-64 PS ABI for details. */
6524 static enum x86_64_reg_class
6525 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6527 /* Rule #1: If both classes are equal, this is the resulting class. */
6528 if (class1 == class2)
6529 return class1;
6531 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6532 the other class. */
6533 if (class1 == X86_64_NO_CLASS)
6534 return class2;
6535 if (class2 == X86_64_NO_CLASS)
6536 return class1;
6538 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6539 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6540 return X86_64_MEMORY_CLASS;
6542 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6543 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6544 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6545 return X86_64_INTEGERSI_CLASS;
6546 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6547 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6548 return X86_64_INTEGER_CLASS;
6550 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6551 MEMORY is used. */
6552 if (class1 == X86_64_X87_CLASS
6553 || class1 == X86_64_X87UP_CLASS
6554 || class1 == X86_64_COMPLEX_X87_CLASS
6555 || class2 == X86_64_X87_CLASS
6556 || class2 == X86_64_X87UP_CLASS
6557 || class2 == X86_64_COMPLEX_X87_CLASS)
6558 return X86_64_MEMORY_CLASS;
6560 /* Rule #6: Otherwise class SSE is used. */
6561 return X86_64_SSE_CLASS;
6564 /* Classify the argument of type TYPE and mode MODE.
6565 CLASSES will be filled by the register class used to pass each word
6566 of the operand. The number of words is returned. In case the parameter
6567 should be passed in memory, 0 is returned. As a special case for zero
6568 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6570 BIT_OFFSET is used internally for handling records and specifies offset
6571 of the offset in bits modulo 512 to avoid overflow cases.
6573 See the x86-64 PS ABI for details.
6576 static int
6577 classify_argument (machine_mode mode, const_tree type,
6578 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6580 HOST_WIDE_INT bytes =
6581 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6582 int words
6583 = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6585 /* Variable sized entities are always passed/returned in memory. */
6586 if (bytes < 0)
6587 return 0;
6589 if (mode != VOIDmode
6590 && targetm.calls.must_pass_in_stack (mode, type))
6591 return 0;
6593 if (type && AGGREGATE_TYPE_P (type))
6595 int i;
6596 tree field;
6597 enum x86_64_reg_class subclasses[MAX_CLASSES];
6599 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
6600 if (bytes > 64)
6601 return 0;
6603 for (i = 0; i < words; i++)
6604 classes[i] = X86_64_NO_CLASS;
6606 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6607 signalize memory class, so handle it as special case. */
6608 if (!words)
6610 classes[0] = X86_64_NO_CLASS;
6611 return 1;
6614 /* Classify each field of record and merge classes. */
6615 switch (TREE_CODE (type))
6617 case RECORD_TYPE:
6618 /* And now merge the fields of structure. */
6619 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6621 if (TREE_CODE (field) == FIELD_DECL)
6623 int num;
6625 if (TREE_TYPE (field) == error_mark_node)
6626 continue;
6628 /* Bitfields are always classified as integer. Handle them
6629 early, since later code would consider them to be
6630 misaligned integers. */
6631 if (DECL_BIT_FIELD (field))
6633 for (i = (int_bit_position (field)
6634 + (bit_offset % 64)) / 8 / 8;
6635 i < ((int_bit_position (field) + (bit_offset % 64))
6636 + tree_to_shwi (DECL_SIZE (field))
6637 + 63) / 8 / 8; i++)
6638 classes[i] =
6639 merge_classes (X86_64_INTEGER_CLASS,
6640 classes[i]);
6642 else
6644 int pos;
6646 type = TREE_TYPE (field);
6648 /* Flexible array member is ignored. */
6649 if (TYPE_MODE (type) == BLKmode
6650 && TREE_CODE (type) == ARRAY_TYPE
6651 && TYPE_SIZE (type) == NULL_TREE
6652 && TYPE_DOMAIN (type) != NULL_TREE
6653 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6654 == NULL_TREE))
6656 static bool warned;
6658 if (!warned && warn_psabi)
6660 warned = true;
6661 inform (input_location,
6662 "the ABI of passing struct with"
6663 " a flexible array member has"
6664 " changed in GCC 4.4");
6666 continue;
6668 num = classify_argument (TYPE_MODE (type), type,
6669 subclasses,
6670 (int_bit_position (field)
6671 + bit_offset) % 512);
6672 if (!num)
6673 return 0;
6674 pos = (int_bit_position (field)
6675 + (bit_offset % 64)) / 8 / 8;
6676 for (i = 0; i < num && (i + pos) < words; i++)
6677 classes[i + pos] =
6678 merge_classes (subclasses[i], classes[i + pos]);
6682 break;
6684 case ARRAY_TYPE:
6685 /* Arrays are handled as small records. */
6687 int num;
6688 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6689 TREE_TYPE (type), subclasses, bit_offset);
6690 if (!num)
6691 return 0;
6693 /* The partial classes are now full classes. */
6694 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6695 subclasses[0] = X86_64_SSE_CLASS;
6696 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6697 && !((bit_offset % 64) == 0 && bytes == 4))
6698 subclasses[0] = X86_64_INTEGER_CLASS;
6700 for (i = 0; i < words; i++)
6701 classes[i] = subclasses[i % num];
6703 break;
6705 case UNION_TYPE:
6706 case QUAL_UNION_TYPE:
6707 /* Unions are similar to RECORD_TYPE but offset is always 0.
6709 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6711 if (TREE_CODE (field) == FIELD_DECL)
6713 int num;
6715 if (TREE_TYPE (field) == error_mark_node)
6716 continue;
6718 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6719 TREE_TYPE (field), subclasses,
6720 bit_offset);
6721 if (!num)
6722 return 0;
6723 for (i = 0; i < num && i < words; i++)
6724 classes[i] = merge_classes (subclasses[i], classes[i]);
6727 break;
6729 default:
6730 gcc_unreachable ();
6733 if (words > 2)
6735 /* When size > 16 bytes, if the first one isn't
6736 X86_64_SSE_CLASS or any other ones aren't
6737 X86_64_SSEUP_CLASS, everything should be passed in
6738 memory. */
6739 if (classes[0] != X86_64_SSE_CLASS)
6740 return 0;
6742 for (i = 1; i < words; i++)
6743 if (classes[i] != X86_64_SSEUP_CLASS)
6744 return 0;
6747 /* Final merger cleanup. */
6748 for (i = 0; i < words; i++)
6750 /* If one class is MEMORY, everything should be passed in
6751 memory. */
6752 if (classes[i] == X86_64_MEMORY_CLASS)
6753 return 0;
6755 /* The X86_64_SSEUP_CLASS should be always preceded by
6756 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6757 if (classes[i] == X86_64_SSEUP_CLASS
6758 && classes[i - 1] != X86_64_SSE_CLASS
6759 && classes[i - 1] != X86_64_SSEUP_CLASS)
6761 /* The first one should never be X86_64_SSEUP_CLASS. */
6762 gcc_assert (i != 0);
6763 classes[i] = X86_64_SSE_CLASS;
6766 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6767 everything should be passed in memory. */
6768 if (classes[i] == X86_64_X87UP_CLASS
6769 && (classes[i - 1] != X86_64_X87_CLASS))
6771 static bool warned;
6773 /* The first one should never be X86_64_X87UP_CLASS. */
6774 gcc_assert (i != 0);
6775 if (!warned && warn_psabi)
6777 warned = true;
6778 inform (input_location,
6779 "the ABI of passing union with long double"
6780 " has changed in GCC 4.4");
6782 return 0;
6785 return words;
6788 /* Compute alignment needed. We align all types to natural boundaries with
6789 exception of XFmode that is aligned to 64bits. */
6790 if (mode != VOIDmode && mode != BLKmode)
6792 int mode_alignment = GET_MODE_BITSIZE (mode);
6794 if (mode == XFmode)
6795 mode_alignment = 128;
6796 else if (mode == XCmode)
6797 mode_alignment = 256;
6798 if (COMPLEX_MODE_P (mode))
6799 mode_alignment /= 2;
6800 /* Misaligned fields are always returned in memory. */
6801 if (bit_offset % mode_alignment)
6802 return 0;
6805 /* for V1xx modes, just use the base mode */
6806 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6807 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6808 mode = GET_MODE_INNER (mode);
6810 /* Classification of atomic types. */
6811 switch (mode)
6813 case SDmode:
6814 case DDmode:
6815 classes[0] = X86_64_SSE_CLASS;
6816 return 1;
6817 case TDmode:
6818 classes[0] = X86_64_SSE_CLASS;
6819 classes[1] = X86_64_SSEUP_CLASS;
6820 return 2;
6821 case DImode:
6822 case SImode:
6823 case HImode:
6824 case QImode:
6825 case CSImode:
6826 case CHImode:
6827 case CQImode:
6829 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
6831 /* Analyze last 128 bits only. */
6832 size = (size - 1) & 0x7f;
6834 if (size < 32)
6836 classes[0] = X86_64_INTEGERSI_CLASS;
6837 return 1;
6839 else if (size < 64)
6841 classes[0] = X86_64_INTEGER_CLASS;
6842 return 1;
6844 else if (size < 64+32)
6846 classes[0] = X86_64_INTEGER_CLASS;
6847 classes[1] = X86_64_INTEGERSI_CLASS;
6848 return 2;
6850 else if (size < 64+64)
6852 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6853 return 2;
6855 else
6856 gcc_unreachable ();
6858 case CDImode:
6859 case TImode:
6860 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6861 return 2;
6862 case COImode:
6863 case OImode:
6864 /* OImode shouldn't be used directly. */
6865 gcc_unreachable ();
6866 case CTImode:
6867 return 0;
6868 case SFmode:
6869 if (!(bit_offset % 64))
6870 classes[0] = X86_64_SSESF_CLASS;
6871 else
6872 classes[0] = X86_64_SSE_CLASS;
6873 return 1;
6874 case DFmode:
6875 classes[0] = X86_64_SSEDF_CLASS;
6876 return 1;
6877 case XFmode:
6878 classes[0] = X86_64_X87_CLASS;
6879 classes[1] = X86_64_X87UP_CLASS;
6880 return 2;
6881 case TFmode:
6882 classes[0] = X86_64_SSE_CLASS;
6883 classes[1] = X86_64_SSEUP_CLASS;
6884 return 2;
6885 case SCmode:
6886 classes[0] = X86_64_SSE_CLASS;
6887 if (!(bit_offset % 64))
6888 return 1;
6889 else
6891 static bool warned;
6893 if (!warned && warn_psabi)
6895 warned = true;
6896 inform (input_location,
6897 "the ABI of passing structure with complex float"
6898 " member has changed in GCC 4.4");
6900 classes[1] = X86_64_SSESF_CLASS;
6901 return 2;
6903 case DCmode:
6904 classes[0] = X86_64_SSEDF_CLASS;
6905 classes[1] = X86_64_SSEDF_CLASS;
6906 return 2;
6907 case XCmode:
6908 classes[0] = X86_64_COMPLEX_X87_CLASS;
6909 return 1;
6910 case TCmode:
6911 /* This modes is larger than 16 bytes. */
6912 return 0;
6913 case V8SFmode:
6914 case V8SImode:
6915 case V32QImode:
6916 case V16HImode:
6917 case V4DFmode:
6918 case V4DImode:
6919 classes[0] = X86_64_SSE_CLASS;
6920 classes[1] = X86_64_SSEUP_CLASS;
6921 classes[2] = X86_64_SSEUP_CLASS;
6922 classes[3] = X86_64_SSEUP_CLASS;
6923 return 4;
6924 case V8DFmode:
6925 case V16SFmode:
6926 case V8DImode:
6927 case V16SImode:
6928 case V32HImode:
6929 case V64QImode:
6930 classes[0] = X86_64_SSE_CLASS;
6931 classes[1] = X86_64_SSEUP_CLASS;
6932 classes[2] = X86_64_SSEUP_CLASS;
6933 classes[3] = X86_64_SSEUP_CLASS;
6934 classes[4] = X86_64_SSEUP_CLASS;
6935 classes[5] = X86_64_SSEUP_CLASS;
6936 classes[6] = X86_64_SSEUP_CLASS;
6937 classes[7] = X86_64_SSEUP_CLASS;
6938 return 8;
6939 case V4SFmode:
6940 case V4SImode:
6941 case V16QImode:
6942 case V8HImode:
6943 case V2DFmode:
6944 case V2DImode:
6945 classes[0] = X86_64_SSE_CLASS;
6946 classes[1] = X86_64_SSEUP_CLASS;
6947 return 2;
6948 case V1TImode:
6949 case V1DImode:
6950 case V2SFmode:
6951 case V2SImode:
6952 case V4HImode:
6953 case V8QImode:
6954 classes[0] = X86_64_SSE_CLASS;
6955 return 1;
6956 case BLKmode:
6957 case VOIDmode:
6958 return 0;
6959 default:
6960 gcc_assert (VECTOR_MODE_P (mode));
6962 if (bytes > 16)
6963 return 0;
6965 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6967 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6968 classes[0] = X86_64_INTEGERSI_CLASS;
6969 else
6970 classes[0] = X86_64_INTEGER_CLASS;
6971 classes[1] = X86_64_INTEGER_CLASS;
6972 return 1 + (bytes > 8);
6976 /* Examine the argument and return set number of register required in each
6977 class. Return true iff parameter should be passed in memory. */
6979 static bool
6980 examine_argument (machine_mode mode, const_tree type, int in_return,
6981 int *int_nregs, int *sse_nregs)
6983 enum x86_64_reg_class regclass[MAX_CLASSES];
6984 int n = classify_argument (mode, type, regclass, 0);
6986 *int_nregs = 0;
6987 *sse_nregs = 0;
6989 if (!n)
6990 return true;
6991 for (n--; n >= 0; n--)
6992 switch (regclass[n])
6994 case X86_64_INTEGER_CLASS:
6995 case X86_64_INTEGERSI_CLASS:
6996 (*int_nregs)++;
6997 break;
6998 case X86_64_SSE_CLASS:
6999 case X86_64_SSESF_CLASS:
7000 case X86_64_SSEDF_CLASS:
7001 (*sse_nregs)++;
7002 break;
7003 case X86_64_NO_CLASS:
7004 case X86_64_SSEUP_CLASS:
7005 break;
7006 case X86_64_X87_CLASS:
7007 case X86_64_X87UP_CLASS:
7008 case X86_64_COMPLEX_X87_CLASS:
7009 if (!in_return)
7010 return true;
7011 break;
7012 case X86_64_MEMORY_CLASS:
7013 gcc_unreachable ();
7016 return false;
7019 /* Construct container for the argument used by GCC interface. See
7020 FUNCTION_ARG for the detailed description. */
7022 static rtx
7023 construct_container (machine_mode mode, machine_mode orig_mode,
7024 const_tree type, int in_return, int nintregs, int nsseregs,
7025 const int *intreg, int sse_regno)
7027 /* The following variables hold the static issued_error state. */
7028 static bool issued_sse_arg_error;
7029 static bool issued_sse_ret_error;
7030 static bool issued_x87_ret_error;
7032 machine_mode tmpmode;
7033 int bytes =
7034 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
7035 enum x86_64_reg_class regclass[MAX_CLASSES];
7036 int n;
7037 int i;
7038 int nexps = 0;
7039 int needed_sseregs, needed_intregs;
7040 rtx exp[MAX_CLASSES];
7041 rtx ret;
7043 n = classify_argument (mode, type, regclass, 0);
7044 if (!n)
7045 return NULL;
7046 if (examine_argument (mode, type, in_return, &needed_intregs,
7047 &needed_sseregs))
7048 return NULL;
7049 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
7050 return NULL;
7052 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
7053 some less clueful developer tries to use floating-point anyway. */
7054 if (needed_sseregs && !TARGET_SSE)
7056 if (in_return)
7058 if (!issued_sse_ret_error)
7060 error ("SSE register return with SSE disabled");
7061 issued_sse_ret_error = true;
7064 else if (!issued_sse_arg_error)
7066 error ("SSE register argument with SSE disabled");
7067 issued_sse_arg_error = true;
7069 return NULL;
7072 /* Likewise, error if the ABI requires us to return values in the
7073 x87 registers and the user specified -mno-80387. */
7074 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
7075 for (i = 0; i < n; i++)
7076 if (regclass[i] == X86_64_X87_CLASS
7077 || regclass[i] == X86_64_X87UP_CLASS
7078 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
7080 if (!issued_x87_ret_error)
7082 error ("x87 register return with x87 disabled");
7083 issued_x87_ret_error = true;
7085 return NULL;
7088 /* First construct simple cases. Avoid SCmode, since we want to use
7089 single register to pass this type. */
7090 if (n == 1 && mode != SCmode)
7091 switch (regclass[0])
7093 case X86_64_INTEGER_CLASS:
7094 case X86_64_INTEGERSI_CLASS:
7095 return gen_rtx_REG (mode, intreg[0]);
7096 case X86_64_SSE_CLASS:
7097 case X86_64_SSESF_CLASS:
7098 case X86_64_SSEDF_CLASS:
7099 if (mode != BLKmode)
7100 return gen_reg_or_parallel (mode, orig_mode,
7101 SSE_REGNO (sse_regno));
7102 break;
7103 case X86_64_X87_CLASS:
7104 case X86_64_COMPLEX_X87_CLASS:
7105 return gen_rtx_REG (mode, FIRST_STACK_REG);
7106 case X86_64_NO_CLASS:
7107 /* Zero sized array, struct or class. */
7108 return NULL;
7109 default:
7110 gcc_unreachable ();
7112 if (n == 2
7113 && regclass[0] == X86_64_SSE_CLASS
7114 && regclass[1] == X86_64_SSEUP_CLASS
7115 && mode != BLKmode)
7116 return gen_reg_or_parallel (mode, orig_mode,
7117 SSE_REGNO (sse_regno));
7118 if (n == 4
7119 && regclass[0] == X86_64_SSE_CLASS
7120 && regclass[1] == X86_64_SSEUP_CLASS
7121 && regclass[2] == X86_64_SSEUP_CLASS
7122 && regclass[3] == X86_64_SSEUP_CLASS
7123 && mode != BLKmode)
7124 return gen_reg_or_parallel (mode, orig_mode,
7125 SSE_REGNO (sse_regno));
7126 if (n == 8
7127 && regclass[0] == X86_64_SSE_CLASS
7128 && regclass[1] == X86_64_SSEUP_CLASS
7129 && regclass[2] == X86_64_SSEUP_CLASS
7130 && regclass[3] == X86_64_SSEUP_CLASS
7131 && regclass[4] == X86_64_SSEUP_CLASS
7132 && regclass[5] == X86_64_SSEUP_CLASS
7133 && regclass[6] == X86_64_SSEUP_CLASS
7134 && regclass[7] == X86_64_SSEUP_CLASS
7135 && mode != BLKmode)
7136 return gen_reg_or_parallel (mode, orig_mode,
7137 SSE_REGNO (sse_regno));
7138 if (n == 2
7139 && regclass[0] == X86_64_X87_CLASS
7140 && regclass[1] == X86_64_X87UP_CLASS)
7141 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
7143 if (n == 2
7144 && regclass[0] == X86_64_INTEGER_CLASS
7145 && regclass[1] == X86_64_INTEGER_CLASS
7146 && (mode == CDImode || mode == TImode)
7147 && intreg[0] + 1 == intreg[1])
7148 return gen_rtx_REG (mode, intreg[0]);
7150 /* Otherwise figure out the entries of the PARALLEL. */
7151 for (i = 0; i < n; i++)
7153 int pos;
7155 switch (regclass[i])
7157 case X86_64_NO_CLASS:
7158 break;
7159 case X86_64_INTEGER_CLASS:
7160 case X86_64_INTEGERSI_CLASS:
7161 /* Merge TImodes on aligned occasions here too. */
7162 if (i * 8 + 8 > bytes)
7163 tmpmode
7164 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
7165 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
7166 tmpmode = SImode;
7167 else
7168 tmpmode = DImode;
7169 /* We've requested 24 bytes we
7170 don't have mode for. Use DImode. */
7171 if (tmpmode == BLKmode)
7172 tmpmode = DImode;
7173 exp [nexps++]
7174 = gen_rtx_EXPR_LIST (VOIDmode,
7175 gen_rtx_REG (tmpmode, *intreg),
7176 GEN_INT (i*8));
7177 intreg++;
7178 break;
7179 case X86_64_SSESF_CLASS:
7180 exp [nexps++]
7181 = gen_rtx_EXPR_LIST (VOIDmode,
7182 gen_rtx_REG (SFmode,
7183 SSE_REGNO (sse_regno)),
7184 GEN_INT (i*8));
7185 sse_regno++;
7186 break;
7187 case X86_64_SSEDF_CLASS:
7188 exp [nexps++]
7189 = gen_rtx_EXPR_LIST (VOIDmode,
7190 gen_rtx_REG (DFmode,
7191 SSE_REGNO (sse_regno)),
7192 GEN_INT (i*8));
7193 sse_regno++;
7194 break;
7195 case X86_64_SSE_CLASS:
7196 pos = i;
7197 switch (n)
7199 case 1:
7200 tmpmode = DImode;
7201 break;
7202 case 2:
7203 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
7205 tmpmode = TImode;
7206 i++;
7208 else
7209 tmpmode = DImode;
7210 break;
7211 case 4:
7212 gcc_assert (i == 0
7213 && regclass[1] == X86_64_SSEUP_CLASS
7214 && regclass[2] == X86_64_SSEUP_CLASS
7215 && regclass[3] == X86_64_SSEUP_CLASS);
7216 tmpmode = OImode;
7217 i += 3;
7218 break;
7219 case 8:
7220 gcc_assert (i == 0
7221 && regclass[1] == X86_64_SSEUP_CLASS
7222 && regclass[2] == X86_64_SSEUP_CLASS
7223 && regclass[3] == X86_64_SSEUP_CLASS
7224 && regclass[4] == X86_64_SSEUP_CLASS
7225 && regclass[5] == X86_64_SSEUP_CLASS
7226 && regclass[6] == X86_64_SSEUP_CLASS
7227 && regclass[7] == X86_64_SSEUP_CLASS);
7228 tmpmode = XImode;
7229 i += 7;
7230 break;
7231 default:
7232 gcc_unreachable ();
7234 exp [nexps++]
7235 = gen_rtx_EXPR_LIST (VOIDmode,
7236 gen_rtx_REG (tmpmode,
7237 SSE_REGNO (sse_regno)),
7238 GEN_INT (pos*8));
7239 sse_regno++;
7240 break;
7241 default:
7242 gcc_unreachable ();
7246 /* Empty aligned struct, union or class. */
7247 if (nexps == 0)
7248 return NULL;
7250 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
7251 for (i = 0; i < nexps; i++)
7252 XVECEXP (ret, 0, i) = exp [i];
7253 return ret;
7256 /* Update the data in CUM to advance over an argument of mode MODE
7257 and data type TYPE. (TYPE is null for libcalls where that information
7258 may not be available.)
7260 Return a number of integer regsiters advanced over. */
7262 static int
7263 function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
7264 const_tree type, HOST_WIDE_INT bytes,
7265 HOST_WIDE_INT words)
7267 int res = 0;
7269 switch (mode)
7271 default:
7272 break;
7274 case BLKmode:
7275 if (bytes < 0)
7276 break;
7277 /* FALLTHRU */
7279 case DImode:
7280 case SImode:
7281 case HImode:
7282 case QImode:
7283 cum->words += words;
7284 cum->nregs -= words;
7285 cum->regno += words;
7286 if (cum->nregs >= 0)
7287 res = words;
7288 if (cum->nregs <= 0)
7290 cum->nregs = 0;
7291 cum->regno = 0;
7293 break;
7295 case OImode:
7296 /* OImode shouldn't be used directly. */
7297 gcc_unreachable ();
7299 case DFmode:
7300 if (cum->float_in_sse < 2)
7301 break;
7302 case SFmode:
7303 if (cum->float_in_sse < 1)
7304 break;
7305 /* FALLTHRU */
7307 case V8SFmode:
7308 case V8SImode:
7309 case V64QImode:
7310 case V32HImode:
7311 case V16SImode:
7312 case V8DImode:
7313 case V16SFmode:
7314 case V8DFmode:
7315 case V32QImode:
7316 case V16HImode:
7317 case V4DFmode:
7318 case V4DImode:
7319 case TImode:
7320 case V16QImode:
7321 case V8HImode:
7322 case V4SImode:
7323 case V2DImode:
7324 case V4SFmode:
7325 case V2DFmode:
7326 if (!type || !AGGREGATE_TYPE_P (type))
7328 cum->sse_words += words;
7329 cum->sse_nregs -= 1;
7330 cum->sse_regno += 1;
7331 if (cum->sse_nregs <= 0)
7333 cum->sse_nregs = 0;
7334 cum->sse_regno = 0;
7337 break;
7339 case V8QImode:
7340 case V4HImode:
7341 case V2SImode:
7342 case V2SFmode:
7343 case V1TImode:
7344 case V1DImode:
7345 if (!type || !AGGREGATE_TYPE_P (type))
7347 cum->mmx_words += words;
7348 cum->mmx_nregs -= 1;
7349 cum->mmx_regno += 1;
7350 if (cum->mmx_nregs <= 0)
7352 cum->mmx_nregs = 0;
7353 cum->mmx_regno = 0;
7356 break;
7359 return res;
7362 static int
7363 function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
7364 const_tree type, HOST_WIDE_INT words, bool named)
7366 int int_nregs, sse_nregs;
7368 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
7369 if (!named && (VALID_AVX512F_REG_MODE (mode)
7370 || VALID_AVX256_REG_MODE (mode)))
7371 return 0;
7373 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
7374 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
7376 cum->nregs -= int_nregs;
7377 cum->sse_nregs -= sse_nregs;
7378 cum->regno += int_nregs;
7379 cum->sse_regno += sse_nregs;
7380 return int_nregs;
7382 else
7384 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
7385 cum->words = (cum->words + align - 1) & ~(align - 1);
7386 cum->words += words;
7387 return 0;
7391 static int
7392 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
7393 HOST_WIDE_INT words)
7395 /* Otherwise, this should be passed indirect. */
7396 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
7398 cum->words += words;
7399 if (cum->nregs > 0)
7401 cum->nregs -= 1;
7402 cum->regno += 1;
7403 return 1;
7405 return 0;
7408 /* Update the data in CUM to advance over an argument of mode MODE and
7409 data type TYPE. (TYPE is null for libcalls where that information
7410 may not be available.) */
7412 static void
7413 ix86_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
7414 const_tree type, bool named)
7416 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7417 HOST_WIDE_INT bytes, words;
7418 int nregs;
7420 if (mode == BLKmode)
7421 bytes = int_size_in_bytes (type);
7422 else
7423 bytes = GET_MODE_SIZE (mode);
7424 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7426 if (type)
7427 mode = type_natural_mode (type, NULL, false);
7429 if ((type && POINTER_BOUNDS_TYPE_P (type))
7430 || POINTER_BOUNDS_MODE_P (mode))
7432 /* If we pass bounds in BT then just update remained bounds count. */
7433 if (cum->bnds_in_bt)
7435 cum->bnds_in_bt--;
7436 return;
7439 /* Update remained number of bounds to force. */
7440 if (cum->force_bnd_pass)
7441 cum->force_bnd_pass--;
7443 cum->bnd_regno++;
7445 return;
7448 /* The first arg not going to Bounds Tables resets this counter. */
7449 cum->bnds_in_bt = 0;
7450 /* For unnamed args we always pass bounds to avoid bounds mess when
7451 passed and received types do not match. If bounds do not follow
7452 unnamed arg, still pretend required number of bounds were passed. */
7453 if (cum->force_bnd_pass)
7455 cum->bnd_regno += cum->force_bnd_pass;
7456 cum->force_bnd_pass = 0;
7459 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7460 nregs = function_arg_advance_ms_64 (cum, bytes, words);
7461 else if (TARGET_64BIT)
7462 nregs = function_arg_advance_64 (cum, mode, type, words, named);
7463 else
7464 nregs = function_arg_advance_32 (cum, mode, type, bytes, words);
7466 /* For stdarg we expect bounds to be passed for each value passed
7467 in register. */
7468 if (cum->stdarg)
7469 cum->force_bnd_pass = nregs;
7470 /* For pointers passed in memory we expect bounds passed in Bounds
7471 Table. */
7472 if (!nregs)
7473 cum->bnds_in_bt = chkp_type_bounds_count (type);
7476 /* Define where to put the arguments to a function.
7477 Value is zero to push the argument on the stack,
7478 or a hard register in which to store the argument.
7480 MODE is the argument's machine mode.
7481 TYPE is the data type of the argument (as a tree).
7482 This is null for libcalls where that information may
7483 not be available.
7484 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7485 the preceding args and about the function being called.
7486 NAMED is nonzero if this argument is a named parameter
7487 (otherwise it is an extra parameter matching an ellipsis). */
7489 static rtx
7490 function_arg_32 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7491 machine_mode orig_mode, const_tree type,
7492 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
7494 /* Avoid the AL settings for the Unix64 ABI. */
7495 if (mode == VOIDmode)
7496 return constm1_rtx;
7498 switch (mode)
7500 default:
7501 break;
7503 case BLKmode:
7504 if (bytes < 0)
7505 break;
7506 /* FALLTHRU */
7507 case DImode:
7508 case SImode:
7509 case HImode:
7510 case QImode:
7511 if (words <= cum->nregs)
7513 int regno = cum->regno;
7515 /* Fastcall allocates the first two DWORD (SImode) or
7516 smaller arguments to ECX and EDX if it isn't an
7517 aggregate type . */
7518 if (cum->fastcall)
7520 if (mode == BLKmode
7521 || mode == DImode
7522 || (type && AGGREGATE_TYPE_P (type)))
7523 break;
7525 /* ECX not EAX is the first allocated register. */
7526 if (regno == AX_REG)
7527 regno = CX_REG;
7529 return gen_rtx_REG (mode, regno);
7531 break;
7533 case DFmode:
7534 if (cum->float_in_sse < 2)
7535 break;
7536 case SFmode:
7537 if (cum->float_in_sse < 1)
7538 break;
7539 /* FALLTHRU */
7540 case TImode:
7541 /* In 32bit, we pass TImode in xmm registers. */
7542 case V16QImode:
7543 case V8HImode:
7544 case V4SImode:
7545 case V2DImode:
7546 case V4SFmode:
7547 case V2DFmode:
7548 if (!type || !AGGREGATE_TYPE_P (type))
7550 if (cum->sse_nregs)
7551 return gen_reg_or_parallel (mode, orig_mode,
7552 cum->sse_regno + FIRST_SSE_REG);
7554 break;
7556 case OImode:
7557 case XImode:
7558 /* OImode and XImode shouldn't be used directly. */
7559 gcc_unreachable ();
7561 case V64QImode:
7562 case V32HImode:
7563 case V16SImode:
7564 case V8DImode:
7565 case V16SFmode:
7566 case V8DFmode:
7567 case V8SFmode:
7568 case V8SImode:
7569 case V32QImode:
7570 case V16HImode:
7571 case V4DFmode:
7572 case V4DImode:
7573 if (!type || !AGGREGATE_TYPE_P (type))
7575 if (cum->sse_nregs)
7576 return gen_reg_or_parallel (mode, orig_mode,
7577 cum->sse_regno + FIRST_SSE_REG);
7579 break;
7581 case V8QImode:
7582 case V4HImode:
7583 case V2SImode:
7584 case V2SFmode:
7585 case V1TImode:
7586 case V1DImode:
7587 if (!type || !AGGREGATE_TYPE_P (type))
7589 if (cum->mmx_nregs)
7590 return gen_reg_or_parallel (mode, orig_mode,
7591 cum->mmx_regno + FIRST_MMX_REG);
7593 break;
7596 return NULL_RTX;
7599 static rtx
7600 function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7601 machine_mode orig_mode, const_tree type, bool named)
7603 /* Handle a hidden AL argument containing number of registers
7604 for varargs x86-64 functions. */
7605 if (mode == VOIDmode)
7606 return GEN_INT (cum->maybe_vaarg
7607 ? (cum->sse_nregs < 0
7608 ? X86_64_SSE_REGPARM_MAX
7609 : cum->sse_regno)
7610 : -1);
7612 switch (mode)
7614 default:
7615 break;
7617 case V8SFmode:
7618 case V8SImode:
7619 case V32QImode:
7620 case V16HImode:
7621 case V4DFmode:
7622 case V4DImode:
7623 case V16SFmode:
7624 case V16SImode:
7625 case V64QImode:
7626 case V32HImode:
7627 case V8DFmode:
7628 case V8DImode:
7629 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
7630 if (!named)
7631 return NULL;
7632 break;
7635 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7636 cum->sse_nregs,
7637 &x86_64_int_parameter_registers [cum->regno],
7638 cum->sse_regno);
7641 static rtx
7642 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7643 machine_mode orig_mode, bool named,
7644 HOST_WIDE_INT bytes)
7646 unsigned int regno;
7648 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7649 We use value of -2 to specify that current function call is MSABI. */
7650 if (mode == VOIDmode)
7651 return GEN_INT (-2);
7653 /* If we've run out of registers, it goes on the stack. */
7654 if (cum->nregs == 0)
7655 return NULL_RTX;
7657 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7659 /* Only floating point modes are passed in anything but integer regs. */
7660 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7662 if (named)
7663 regno = cum->regno + FIRST_SSE_REG;
7664 else
7666 rtx t1, t2;
7668 /* Unnamed floating parameters are passed in both the
7669 SSE and integer registers. */
7670 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7671 t2 = gen_rtx_REG (mode, regno);
7672 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7673 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7674 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7677 /* Handle aggregated types passed in register. */
7678 if (orig_mode == BLKmode)
7680 if (bytes > 0 && bytes <= 8)
7681 mode = (bytes > 4 ? DImode : SImode);
7682 if (mode == BLKmode)
7683 mode = DImode;
7686 return gen_reg_or_parallel (mode, orig_mode, regno);
7689 /* Return where to put the arguments to a function.
7690 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7692 MODE is the argument's machine mode. TYPE is the data type of the
7693 argument. It is null for libcalls where that information may not be
7694 available. CUM gives information about the preceding args and about
7695 the function being called. NAMED is nonzero if this argument is a
7696 named parameter (otherwise it is an extra parameter matching an
7697 ellipsis). */
7699 static rtx
7700 ix86_function_arg (cumulative_args_t cum_v, machine_mode omode,
7701 const_tree type, bool named)
7703 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7704 machine_mode mode = omode;
7705 HOST_WIDE_INT bytes, words;
7706 rtx arg;
7708 /* All pointer bounds argumntas are handled separately here. */
7709 if ((type && POINTER_BOUNDS_TYPE_P (type))
7710 || POINTER_BOUNDS_MODE_P (mode))
7712 /* Return NULL if bounds are forced to go in Bounds Table. */
7713 if (cum->bnds_in_bt)
7714 arg = NULL;
7715 /* Return the next available bound reg if any. */
7716 else if (cum->bnd_regno <= LAST_BND_REG)
7717 arg = gen_rtx_REG (BNDmode, cum->bnd_regno);
7718 /* Return the next special slot number otherwise. */
7719 else
7720 arg = GEN_INT (cum->bnd_regno - LAST_BND_REG - 1);
7722 return arg;
7725 if (mode == BLKmode)
7726 bytes = int_size_in_bytes (type);
7727 else
7728 bytes = GET_MODE_SIZE (mode);
7729 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7731 /* To simplify the code below, represent vector types with a vector mode
7732 even if MMX/SSE are not active. */
7733 if (type && TREE_CODE (type) == VECTOR_TYPE)
7734 mode = type_natural_mode (type, cum, false);
7736 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7737 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7738 else if (TARGET_64BIT)
7739 arg = function_arg_64 (cum, mode, omode, type, named);
7740 else
7741 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7743 return arg;
7746 /* A C expression that indicates when an argument must be passed by
7747 reference. If nonzero for an argument, a copy of that argument is
7748 made in memory and a pointer to the argument is passed instead of
7749 the argument itself. The pointer is passed in whatever way is
7750 appropriate for passing a pointer to that type. */
7752 static bool
7753 ix86_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
7754 const_tree type, bool)
7756 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7758 /* See Windows x64 Software Convention. */
7759 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7761 int msize = (int) GET_MODE_SIZE (mode);
7762 if (type)
7764 /* Arrays are passed by reference. */
7765 if (TREE_CODE (type) == ARRAY_TYPE)
7766 return true;
7768 if (AGGREGATE_TYPE_P (type))
7770 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7771 are passed by reference. */
7772 msize = int_size_in_bytes (type);
7776 /* __m128 is passed by reference. */
7777 switch (msize) {
7778 case 1: case 2: case 4: case 8:
7779 break;
7780 default:
7781 return true;
7784 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7785 return 1;
7787 return 0;
7790 /* Return true when TYPE should be 128bit aligned for 32bit argument
7791 passing ABI. XXX: This function is obsolete and is only used for
7792 checking psABI compatibility with previous versions of GCC. */
7794 static bool
7795 ix86_compat_aligned_value_p (const_tree type)
7797 machine_mode mode = TYPE_MODE (type);
7798 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7799 || mode == TDmode
7800 || mode == TFmode
7801 || mode == TCmode)
7802 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7803 return true;
7804 if (TYPE_ALIGN (type) < 128)
7805 return false;
7807 if (AGGREGATE_TYPE_P (type))
7809 /* Walk the aggregates recursively. */
7810 switch (TREE_CODE (type))
7812 case RECORD_TYPE:
7813 case UNION_TYPE:
7814 case QUAL_UNION_TYPE:
7816 tree field;
7818 /* Walk all the structure fields. */
7819 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7821 if (TREE_CODE (field) == FIELD_DECL
7822 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
7823 return true;
7825 break;
7828 case ARRAY_TYPE:
7829 /* Just for use if some languages passes arrays by value. */
7830 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
7831 return true;
7832 break;
7834 default:
7835 gcc_unreachable ();
7838 return false;
7841 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7842 XXX: This function is obsolete and is only used for checking psABI
7843 compatibility with previous versions of GCC. */
7845 static unsigned int
7846 ix86_compat_function_arg_boundary (machine_mode mode,
7847 const_tree type, unsigned int align)
7849 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7850 natural boundaries. */
7851 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
7853 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7854 make an exception for SSE modes since these require 128bit
7855 alignment.
7857 The handling here differs from field_alignment. ICC aligns MMX
7858 arguments to 4 byte boundaries, while structure fields are aligned
7859 to 8 byte boundaries. */
7860 if (!type)
7862 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
7863 align = PARM_BOUNDARY;
7865 else
7867 if (!ix86_compat_aligned_value_p (type))
7868 align = PARM_BOUNDARY;
7871 if (align > BIGGEST_ALIGNMENT)
7872 align = BIGGEST_ALIGNMENT;
7873 return align;
7876 /* Return true when TYPE should be 128bit aligned for 32bit argument
7877 passing ABI. */
7879 static bool
7880 ix86_contains_aligned_value_p (const_tree type)
7882 machine_mode mode = TYPE_MODE (type);
7884 if (mode == XFmode || mode == XCmode)
7885 return false;
7887 if (TYPE_ALIGN (type) < 128)
7888 return false;
7890 if (AGGREGATE_TYPE_P (type))
7892 /* Walk the aggregates recursively. */
7893 switch (TREE_CODE (type))
7895 case RECORD_TYPE:
7896 case UNION_TYPE:
7897 case QUAL_UNION_TYPE:
7899 tree field;
7901 /* Walk all the structure fields. */
7902 for (field = TYPE_FIELDS (type);
7903 field;
7904 field = DECL_CHAIN (field))
7906 if (TREE_CODE (field) == FIELD_DECL
7907 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7908 return true;
7910 break;
7913 case ARRAY_TYPE:
7914 /* Just for use if some languages passes arrays by value. */
7915 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7916 return true;
7917 break;
7919 default:
7920 gcc_unreachable ();
7923 else
7924 return TYPE_ALIGN (type) >= 128;
7926 return false;
7929 /* Gives the alignment boundary, in bits, of an argument with the
7930 specified mode and type. */
7932 static unsigned int
7933 ix86_function_arg_boundary (machine_mode mode, const_tree type)
7935 unsigned int align;
7936 if (type)
7938 /* Since the main variant type is used for call, we convert it to
7939 the main variant type. */
7940 type = TYPE_MAIN_VARIANT (type);
7941 align = TYPE_ALIGN (type);
7943 else
7944 align = GET_MODE_ALIGNMENT (mode);
7945 if (align < PARM_BOUNDARY)
7946 align = PARM_BOUNDARY;
7947 else
7949 static bool warned;
7950 unsigned int saved_align = align;
7952 if (!TARGET_64BIT)
7954 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7955 if (!type)
7957 if (mode == XFmode || mode == XCmode)
7958 align = PARM_BOUNDARY;
7960 else if (!ix86_contains_aligned_value_p (type))
7961 align = PARM_BOUNDARY;
7963 if (align < 128)
7964 align = PARM_BOUNDARY;
7967 if (warn_psabi
7968 && !warned
7969 && align != ix86_compat_function_arg_boundary (mode, type,
7970 saved_align))
7972 warned = true;
7973 inform (input_location,
7974 "The ABI for passing parameters with %d-byte"
7975 " alignment has changed in GCC 4.6",
7976 align / BITS_PER_UNIT);
7980 return align;
7983 /* Return true if N is a possible register number of function value. */
7985 static bool
7986 ix86_function_value_regno_p (const unsigned int regno)
7988 switch (regno)
7990 case AX_REG:
7991 return true;
7992 case DX_REG:
7993 return (!TARGET_64BIT || ix86_abi != MS_ABI);
7994 case DI_REG:
7995 case SI_REG:
7996 return TARGET_64BIT && ix86_abi != MS_ABI;
7998 case FIRST_BND_REG:
7999 return chkp_function_instrumented_p (current_function_decl);
8001 /* Complex values are returned in %st(0)/%st(1) pair. */
8002 case ST0_REG:
8003 case ST1_REG:
8004 /* TODO: The function should depend on current function ABI but
8005 builtins.c would need updating then. Therefore we use the
8006 default ABI. */
8007 if (TARGET_64BIT && ix86_abi == MS_ABI)
8008 return false;
8009 return TARGET_FLOAT_RETURNS_IN_80387;
8011 /* Complex values are returned in %xmm0/%xmm1 pair. */
8012 case XMM0_REG:
8013 case XMM1_REG:
8014 return TARGET_SSE;
8016 case MM0_REG:
8017 if (TARGET_MACHO || TARGET_64BIT)
8018 return false;
8019 return TARGET_MMX;
8022 return false;
8025 /* Define how to find the value returned by a function.
8026 VALTYPE is the data type of the value (as a tree).
8027 If the precise function being called is known, FUNC is its FUNCTION_DECL;
8028 otherwise, FUNC is 0. */
8030 static rtx
8031 function_value_32 (machine_mode orig_mode, machine_mode mode,
8032 const_tree fntype, const_tree fn)
8034 unsigned int regno;
8036 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
8037 we normally prevent this case when mmx is not available. However
8038 some ABIs may require the result to be returned like DImode. */
8039 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
8040 regno = FIRST_MMX_REG;
8042 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
8043 we prevent this case when sse is not available. However some ABIs
8044 may require the result to be returned like integer TImode. */
8045 else if (mode == TImode
8046 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
8047 regno = FIRST_SSE_REG;
8049 /* 32-byte vector modes in %ymm0. */
8050 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
8051 regno = FIRST_SSE_REG;
8053 /* 64-byte vector modes in %zmm0. */
8054 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
8055 regno = FIRST_SSE_REG;
8057 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
8058 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
8059 regno = FIRST_FLOAT_REG;
8060 else
8061 /* Most things go in %eax. */
8062 regno = AX_REG;
8064 /* Override FP return register with %xmm0 for local functions when
8065 SSE math is enabled or for functions with sseregparm attribute. */
8066 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
8068 int sse_level = ix86_function_sseregparm (fntype, fn, false);
8069 if ((sse_level >= 1 && mode == SFmode)
8070 || (sse_level == 2 && mode == DFmode))
8071 regno = FIRST_SSE_REG;
8074 /* OImode shouldn't be used directly. */
8075 gcc_assert (mode != OImode);
8077 return gen_rtx_REG (orig_mode, regno);
8080 static rtx
8081 function_value_64 (machine_mode orig_mode, machine_mode mode,
8082 const_tree valtype)
8084 rtx ret;
8086 /* Handle libcalls, which don't provide a type node. */
8087 if (valtype == NULL)
8089 unsigned int regno;
8091 switch (mode)
8093 case SFmode:
8094 case SCmode:
8095 case DFmode:
8096 case DCmode:
8097 case TFmode:
8098 case SDmode:
8099 case DDmode:
8100 case TDmode:
8101 regno = FIRST_SSE_REG;
8102 break;
8103 case XFmode:
8104 case XCmode:
8105 regno = FIRST_FLOAT_REG;
8106 break;
8107 case TCmode:
8108 return NULL;
8109 default:
8110 regno = AX_REG;
8113 return gen_rtx_REG (mode, regno);
8115 else if (POINTER_TYPE_P (valtype))
8117 /* Pointers are always returned in word_mode. */
8118 mode = word_mode;
8121 ret = construct_container (mode, orig_mode, valtype, 1,
8122 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
8123 x86_64_int_return_registers, 0);
8125 /* For zero sized structures, construct_container returns NULL, but we
8126 need to keep rest of compiler happy by returning meaningful value. */
8127 if (!ret)
8128 ret = gen_rtx_REG (orig_mode, AX_REG);
8130 return ret;
8133 static rtx
8134 function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
8135 const_tree valtype)
8137 unsigned int regno = AX_REG;
8139 if (TARGET_SSE)
8141 switch (GET_MODE_SIZE (mode))
8143 case 16:
8144 if (valtype != NULL_TREE
8145 && !VECTOR_INTEGER_TYPE_P (valtype)
8146 && !VECTOR_INTEGER_TYPE_P (valtype)
8147 && !INTEGRAL_TYPE_P (valtype)
8148 && !VECTOR_FLOAT_TYPE_P (valtype))
8149 break;
8150 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8151 && !COMPLEX_MODE_P (mode))
8152 regno = FIRST_SSE_REG;
8153 break;
8154 case 8:
8155 case 4:
8156 if (mode == SFmode || mode == DFmode)
8157 regno = FIRST_SSE_REG;
8158 break;
8159 default:
8160 break;
8163 return gen_rtx_REG (orig_mode, regno);
8166 static rtx
8167 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
8168 machine_mode orig_mode, machine_mode mode)
8170 const_tree fn, fntype;
8172 fn = NULL_TREE;
8173 if (fntype_or_decl && DECL_P (fntype_or_decl))
8174 fn = fntype_or_decl;
8175 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
8177 if ((valtype && POINTER_BOUNDS_TYPE_P (valtype))
8178 || POINTER_BOUNDS_MODE_P (mode))
8179 return gen_rtx_REG (BNDmode, FIRST_BND_REG);
8180 else if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
8181 return function_value_ms_64 (orig_mode, mode, valtype);
8182 else if (TARGET_64BIT)
8183 return function_value_64 (orig_mode, mode, valtype);
8184 else
8185 return function_value_32 (orig_mode, mode, fntype, fn);
8188 static rtx
8189 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
8191 machine_mode mode, orig_mode;
8193 orig_mode = TYPE_MODE (valtype);
8194 mode = type_natural_mode (valtype, NULL, true);
8195 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
8198 /* Return an RTX representing a place where a function returns
8199 or recieves pointer bounds or NULL if no bounds are returned.
8201 VALTYPE is a data type of a value returned by the function.
8203 FN_DECL_OR_TYPE is a tree node representing FUNCTION_DECL
8204 or FUNCTION_TYPE of the function.
8206 If OUTGOING is false, return a place in which the caller will
8207 see the return value. Otherwise, return a place where a
8208 function returns a value. */
8210 static rtx
8211 ix86_function_value_bounds (const_tree valtype,
8212 const_tree fntype_or_decl ATTRIBUTE_UNUSED,
8213 bool outgoing ATTRIBUTE_UNUSED)
8215 rtx res = NULL_RTX;
8217 if (BOUNDED_TYPE_P (valtype))
8218 res = gen_rtx_REG (BNDmode, FIRST_BND_REG);
8219 else if (chkp_type_has_pointer (valtype))
8221 bitmap slots;
8222 rtx bounds[2];
8223 bitmap_iterator bi;
8224 unsigned i, bnd_no = 0;
8226 bitmap_obstack_initialize (NULL);
8227 slots = BITMAP_ALLOC (NULL);
8228 chkp_find_bound_slots (valtype, slots);
8230 EXECUTE_IF_SET_IN_BITMAP (slots, 0, i, bi)
8232 rtx reg = gen_rtx_REG (BNDmode, FIRST_BND_REG + bnd_no);
8233 rtx offs = GEN_INT (i * POINTER_SIZE / BITS_PER_UNIT);
8234 gcc_assert (bnd_no < 2);
8235 bounds[bnd_no++] = gen_rtx_EXPR_LIST (VOIDmode, reg, offs);
8238 res = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (bnd_no, bounds));
8240 BITMAP_FREE (slots);
8241 bitmap_obstack_release (NULL);
8243 else
8244 res = NULL_RTX;
8246 return res;
8249 /* Pointer function arguments and return values are promoted to
8250 word_mode. */
8252 static machine_mode
8253 ix86_promote_function_mode (const_tree type, machine_mode mode,
8254 int *punsignedp, const_tree fntype,
8255 int for_return)
8257 if (type != NULL_TREE && POINTER_TYPE_P (type))
8259 *punsignedp = POINTERS_EXTEND_UNSIGNED;
8260 return word_mode;
8262 return default_promote_function_mode (type, mode, punsignedp, fntype,
8263 for_return);
8266 /* Return true if a structure, union or array with MODE containing FIELD
8267 should be accessed using BLKmode. */
8269 static bool
8270 ix86_member_type_forces_blk (const_tree field, machine_mode mode)
8272 /* Union with XFmode must be in BLKmode. */
8273 return (mode == XFmode
8274 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
8275 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
8279 ix86_libcall_value (machine_mode mode)
8281 return ix86_function_value_1 (NULL, NULL, mode, mode);
8284 /* Return true iff type is returned in memory. */
8286 static bool
8287 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
8289 #ifdef SUBTARGET_RETURN_IN_MEMORY
8290 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
8291 #else
8292 const machine_mode mode = type_natural_mode (type, NULL, true);
8293 HOST_WIDE_INT size;
8295 if (POINTER_BOUNDS_TYPE_P (type))
8296 return false;
8298 if (TARGET_64BIT)
8300 if (ix86_function_type_abi (fntype) == MS_ABI)
8302 size = int_size_in_bytes (type);
8304 /* __m128 is returned in xmm0. */
8305 if ((!type || VECTOR_INTEGER_TYPE_P (type)
8306 || INTEGRAL_TYPE_P (type)
8307 || VECTOR_FLOAT_TYPE_P (type))
8308 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8309 && !COMPLEX_MODE_P (mode)
8310 && (GET_MODE_SIZE (mode) == 16 || size == 16))
8311 return false;
8313 /* Otherwise, the size must be exactly in [1248]. */
8314 return size != 1 && size != 2 && size != 4 && size != 8;
8316 else
8318 int needed_intregs, needed_sseregs;
8320 return examine_argument (mode, type, 1,
8321 &needed_intregs, &needed_sseregs);
8324 else
8326 if (mode == BLKmode)
8327 return true;
8329 size = int_size_in_bytes (type);
8331 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
8332 return false;
8334 if (VECTOR_MODE_P (mode) || mode == TImode)
8336 /* User-created vectors small enough to fit in EAX. */
8337 if (size < 8)
8338 return false;
8340 /* Unless ABI prescibes otherwise,
8341 MMX/3dNow values are returned in MM0 if available. */
8343 if (size == 8)
8344 return TARGET_VECT8_RETURNS || !TARGET_MMX;
8346 /* SSE values are returned in XMM0 if available. */
8347 if (size == 16)
8348 return !TARGET_SSE;
8350 /* AVX values are returned in YMM0 if available. */
8351 if (size == 32)
8352 return !TARGET_AVX;
8354 /* AVX512F values are returned in ZMM0 if available. */
8355 if (size == 64)
8356 return !TARGET_AVX512F;
8359 if (mode == XFmode)
8360 return false;
8362 if (size > 12)
8363 return true;
8365 /* OImode shouldn't be used directly. */
8366 gcc_assert (mode != OImode);
8368 return false;
8370 #endif
8374 /* Create the va_list data type. */
8376 /* Returns the calling convention specific va_list date type.
8377 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
8379 static tree
8380 ix86_build_builtin_va_list_abi (enum calling_abi abi)
8382 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
8384 /* For i386 we use plain pointer to argument area. */
8385 if (!TARGET_64BIT || abi == MS_ABI)
8386 return build_pointer_type (char_type_node);
8388 record = lang_hooks.types.make_type (RECORD_TYPE);
8389 type_decl = build_decl (BUILTINS_LOCATION,
8390 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8392 f_gpr = build_decl (BUILTINS_LOCATION,
8393 FIELD_DECL, get_identifier ("gp_offset"),
8394 unsigned_type_node);
8395 f_fpr = build_decl (BUILTINS_LOCATION,
8396 FIELD_DECL, get_identifier ("fp_offset"),
8397 unsigned_type_node);
8398 f_ovf = build_decl (BUILTINS_LOCATION,
8399 FIELD_DECL, get_identifier ("overflow_arg_area"),
8400 ptr_type_node);
8401 f_sav = build_decl (BUILTINS_LOCATION,
8402 FIELD_DECL, get_identifier ("reg_save_area"),
8403 ptr_type_node);
8405 va_list_gpr_counter_field = f_gpr;
8406 va_list_fpr_counter_field = f_fpr;
8408 DECL_FIELD_CONTEXT (f_gpr) = record;
8409 DECL_FIELD_CONTEXT (f_fpr) = record;
8410 DECL_FIELD_CONTEXT (f_ovf) = record;
8411 DECL_FIELD_CONTEXT (f_sav) = record;
8413 TYPE_STUB_DECL (record) = type_decl;
8414 TYPE_NAME (record) = type_decl;
8415 TYPE_FIELDS (record) = f_gpr;
8416 DECL_CHAIN (f_gpr) = f_fpr;
8417 DECL_CHAIN (f_fpr) = f_ovf;
8418 DECL_CHAIN (f_ovf) = f_sav;
8420 layout_type (record);
8422 /* The correct type is an array type of one element. */
8423 return build_array_type (record, build_index_type (size_zero_node));
8426 /* Setup the builtin va_list data type and for 64-bit the additional
8427 calling convention specific va_list data types. */
8429 static tree
8430 ix86_build_builtin_va_list (void)
8432 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
8434 /* Initialize abi specific va_list builtin types. */
8435 if (TARGET_64BIT)
8437 tree t;
8438 if (ix86_abi == MS_ABI)
8440 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
8441 if (TREE_CODE (t) != RECORD_TYPE)
8442 t = build_variant_type_copy (t);
8443 sysv_va_list_type_node = t;
8445 else
8447 t = ret;
8448 if (TREE_CODE (t) != RECORD_TYPE)
8449 t = build_variant_type_copy (t);
8450 sysv_va_list_type_node = t;
8452 if (ix86_abi != MS_ABI)
8454 t = ix86_build_builtin_va_list_abi (MS_ABI);
8455 if (TREE_CODE (t) != RECORD_TYPE)
8456 t = build_variant_type_copy (t);
8457 ms_va_list_type_node = t;
8459 else
8461 t = ret;
8462 if (TREE_CODE (t) != RECORD_TYPE)
8463 t = build_variant_type_copy (t);
8464 ms_va_list_type_node = t;
8468 return ret;
8471 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
8473 static void
8474 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
8476 rtx save_area, mem;
8477 alias_set_type set;
8478 int i, max;
8480 /* GPR size of varargs save area. */
8481 if (cfun->va_list_gpr_size)
8482 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
8483 else
8484 ix86_varargs_gpr_size = 0;
8486 /* FPR size of varargs save area. We don't need it if we don't pass
8487 anything in SSE registers. */
8488 if (TARGET_SSE && cfun->va_list_fpr_size)
8489 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
8490 else
8491 ix86_varargs_fpr_size = 0;
8493 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
8494 return;
8496 save_area = frame_pointer_rtx;
8497 set = get_varargs_alias_set ();
8499 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8500 if (max > X86_64_REGPARM_MAX)
8501 max = X86_64_REGPARM_MAX;
8503 for (i = cum->regno; i < max; i++)
8505 mem = gen_rtx_MEM (word_mode,
8506 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
8507 MEM_NOTRAP_P (mem) = 1;
8508 set_mem_alias_set (mem, set);
8509 emit_move_insn (mem,
8510 gen_rtx_REG (word_mode,
8511 x86_64_int_parameter_registers[i]));
8514 if (ix86_varargs_fpr_size)
8516 machine_mode smode;
8517 rtx_code_label *label;
8518 rtx test;
8520 /* Now emit code to save SSE registers. The AX parameter contains number
8521 of SSE parameter registers used to call this function, though all we
8522 actually check here is the zero/non-zero status. */
8524 label = gen_label_rtx ();
8525 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
8526 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
8527 label));
8529 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8530 we used movdqa (i.e. TImode) instead? Perhaps even better would
8531 be if we could determine the real mode of the data, via a hook
8532 into pass_stdarg. Ignore all that for now. */
8533 smode = V4SFmode;
8534 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
8535 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
8537 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
8538 if (max > X86_64_SSE_REGPARM_MAX)
8539 max = X86_64_SSE_REGPARM_MAX;
8541 for (i = cum->sse_regno; i < max; ++i)
8543 mem = plus_constant (Pmode, save_area,
8544 i * 16 + ix86_varargs_gpr_size);
8545 mem = gen_rtx_MEM (smode, mem);
8546 MEM_NOTRAP_P (mem) = 1;
8547 set_mem_alias_set (mem, set);
8548 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
8550 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
8553 emit_label (label);
8557 static void
8558 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
8560 alias_set_type set = get_varargs_alias_set ();
8561 int i;
8563 /* Reset to zero, as there might be a sysv vaarg used
8564 before. */
8565 ix86_varargs_gpr_size = 0;
8566 ix86_varargs_fpr_size = 0;
8568 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
8570 rtx reg, mem;
8572 mem = gen_rtx_MEM (Pmode,
8573 plus_constant (Pmode, virtual_incoming_args_rtx,
8574 i * UNITS_PER_WORD));
8575 MEM_NOTRAP_P (mem) = 1;
8576 set_mem_alias_set (mem, set);
8578 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
8579 emit_move_insn (mem, reg);
8583 static void
8584 ix86_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
8585 tree type, int *, int no_rtl)
8587 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8588 CUMULATIVE_ARGS next_cum;
8589 tree fntype;
8591 /* This argument doesn't appear to be used anymore. Which is good,
8592 because the old code here didn't suppress rtl generation. */
8593 gcc_assert (!no_rtl);
8595 if (!TARGET_64BIT)
8596 return;
8598 fntype = TREE_TYPE (current_function_decl);
8600 /* For varargs, we do not want to skip the dummy va_dcl argument.
8601 For stdargs, we do want to skip the last named argument. */
8602 next_cum = *cum;
8603 if (stdarg_p (fntype))
8604 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8605 true);
8607 if (cum->call_abi == MS_ABI)
8608 setup_incoming_varargs_ms_64 (&next_cum);
8609 else
8610 setup_incoming_varargs_64 (&next_cum);
8613 static void
8614 ix86_setup_incoming_vararg_bounds (cumulative_args_t cum_v,
8615 enum machine_mode mode,
8616 tree type,
8617 int *pretend_size ATTRIBUTE_UNUSED,
8618 int no_rtl)
8620 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8621 CUMULATIVE_ARGS next_cum;
8622 tree fntype;
8623 rtx save_area;
8624 int bnd_reg, i, max;
8626 gcc_assert (!no_rtl);
8628 /* Do nothing if we use plain pointer to argument area. */
8629 if (!TARGET_64BIT || cum->call_abi == MS_ABI)
8630 return;
8632 fntype = TREE_TYPE (current_function_decl);
8634 /* For varargs, we do not want to skip the dummy va_dcl argument.
8635 For stdargs, we do want to skip the last named argument. */
8636 next_cum = *cum;
8637 if (stdarg_p (fntype))
8638 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8639 true);
8640 save_area = frame_pointer_rtx;
8642 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8643 if (max > X86_64_REGPARM_MAX)
8644 max = X86_64_REGPARM_MAX;
8646 bnd_reg = cum->bnd_regno + cum->force_bnd_pass;
8647 if (chkp_function_instrumented_p (current_function_decl))
8648 for (i = cum->regno; i < max; i++)
8650 rtx addr = plus_constant (Pmode, save_area, i * UNITS_PER_WORD);
8651 rtx reg = gen_rtx_REG (DImode,
8652 x86_64_int_parameter_registers[i]);
8653 rtx ptr = reg;
8654 rtx bounds;
8656 if (bnd_reg <= LAST_BND_REG)
8657 bounds = gen_rtx_REG (BNDmode, bnd_reg);
8658 else
8660 rtx ldx_addr =
8661 plus_constant (Pmode, arg_pointer_rtx,
8662 (LAST_BND_REG - bnd_reg) * GET_MODE_SIZE (Pmode));
8663 bounds = gen_reg_rtx (BNDmode);
8664 emit_insn (BNDmode == BND64mode
8665 ? gen_bnd64_ldx (bounds, ldx_addr, ptr)
8666 : gen_bnd32_ldx (bounds, ldx_addr, ptr));
8669 emit_insn (BNDmode == BND64mode
8670 ? gen_bnd64_stx (addr, ptr, bounds)
8671 : gen_bnd32_stx (addr, ptr, bounds));
8673 bnd_reg++;
8678 /* Checks if TYPE is of kind va_list char *. */
8680 static bool
8681 is_va_list_char_pointer (tree type)
8683 tree canonic;
8685 /* For 32-bit it is always true. */
8686 if (!TARGET_64BIT)
8687 return true;
8688 canonic = ix86_canonical_va_list_type (type);
8689 return (canonic == ms_va_list_type_node
8690 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
8693 /* Implement va_start. */
8695 static void
8696 ix86_va_start (tree valist, rtx nextarg)
8698 HOST_WIDE_INT words, n_gpr, n_fpr;
8699 tree f_gpr, f_fpr, f_ovf, f_sav;
8700 tree gpr, fpr, ovf, sav, t;
8701 tree type;
8702 rtx ovf_rtx;
8704 if (flag_split_stack
8705 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8707 unsigned int scratch_regno;
8709 /* When we are splitting the stack, we can't refer to the stack
8710 arguments using internal_arg_pointer, because they may be on
8711 the old stack. The split stack prologue will arrange to
8712 leave a pointer to the old stack arguments in a scratch
8713 register, which we here copy to a pseudo-register. The split
8714 stack prologue can't set the pseudo-register directly because
8715 it (the prologue) runs before any registers have been saved. */
8717 scratch_regno = split_stack_prologue_scratch_regno ();
8718 if (scratch_regno != INVALID_REGNUM)
8720 rtx reg;
8721 rtx_insn *seq;
8723 reg = gen_reg_rtx (Pmode);
8724 cfun->machine->split_stack_varargs_pointer = reg;
8726 start_sequence ();
8727 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8728 seq = get_insns ();
8729 end_sequence ();
8731 push_topmost_sequence ();
8732 emit_insn_after (seq, entry_of_function ());
8733 pop_topmost_sequence ();
8737 /* Only 64bit target needs something special. */
8738 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8740 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8741 std_expand_builtin_va_start (valist, nextarg);
8742 else
8744 rtx va_r, next;
8746 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8747 next = expand_binop (ptr_mode, add_optab,
8748 cfun->machine->split_stack_varargs_pointer,
8749 crtl->args.arg_offset_rtx,
8750 NULL_RTX, 0, OPTAB_LIB_WIDEN);
8751 convert_move (va_r, next, 0);
8753 /* Store zero bounds for va_list. */
8754 if (chkp_function_instrumented_p (current_function_decl))
8755 chkp_expand_bounds_reset_for_mem (valist,
8756 make_tree (TREE_TYPE (valist),
8757 next));
8760 return;
8763 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8764 f_fpr = DECL_CHAIN (f_gpr);
8765 f_ovf = DECL_CHAIN (f_fpr);
8766 f_sav = DECL_CHAIN (f_ovf);
8768 valist = build_simple_mem_ref (valist);
8769 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
8770 /* The following should be folded into the MEM_REF offset. */
8771 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
8772 f_gpr, NULL_TREE);
8773 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
8774 f_fpr, NULL_TREE);
8775 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
8776 f_ovf, NULL_TREE);
8777 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
8778 f_sav, NULL_TREE);
8780 /* Count number of gp and fp argument registers used. */
8781 words = crtl->args.info.words;
8782 n_gpr = crtl->args.info.regno;
8783 n_fpr = crtl->args.info.sse_regno;
8785 if (cfun->va_list_gpr_size)
8787 type = TREE_TYPE (gpr);
8788 t = build2 (MODIFY_EXPR, type,
8789 gpr, build_int_cst (type, n_gpr * 8));
8790 TREE_SIDE_EFFECTS (t) = 1;
8791 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8794 if (TARGET_SSE && cfun->va_list_fpr_size)
8796 type = TREE_TYPE (fpr);
8797 t = build2 (MODIFY_EXPR, type, fpr,
8798 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
8799 TREE_SIDE_EFFECTS (t) = 1;
8800 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8803 /* Find the overflow area. */
8804 type = TREE_TYPE (ovf);
8805 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8806 ovf_rtx = crtl->args.internal_arg_pointer;
8807 else
8808 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
8809 t = make_tree (type, ovf_rtx);
8810 if (words != 0)
8811 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
8813 /* Store zero bounds for overflow area pointer. */
8814 if (chkp_function_instrumented_p (current_function_decl))
8815 chkp_expand_bounds_reset_for_mem (ovf, t);
8817 t = build2 (MODIFY_EXPR, type, ovf, t);
8818 TREE_SIDE_EFFECTS (t) = 1;
8819 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8821 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
8823 /* Find the register save area.
8824 Prologue of the function save it right above stack frame. */
8825 type = TREE_TYPE (sav);
8826 t = make_tree (type, frame_pointer_rtx);
8827 if (!ix86_varargs_gpr_size)
8828 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
8830 /* Store zero bounds for save area pointer. */
8831 if (chkp_function_instrumented_p (current_function_decl))
8832 chkp_expand_bounds_reset_for_mem (sav, t);
8834 t = build2 (MODIFY_EXPR, type, sav, t);
8835 TREE_SIDE_EFFECTS (t) = 1;
8836 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8840 /* Implement va_arg. */
8842 static tree
8843 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
8844 gimple_seq *post_p)
8846 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
8847 tree f_gpr, f_fpr, f_ovf, f_sav;
8848 tree gpr, fpr, ovf, sav, t;
8849 int size, rsize;
8850 tree lab_false, lab_over = NULL_TREE;
8851 tree addr, t2;
8852 rtx container;
8853 int indirect_p = 0;
8854 tree ptrtype;
8855 machine_mode nat_mode;
8856 unsigned int arg_boundary;
8858 /* Only 64bit target needs something special. */
8859 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8860 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
8862 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8863 f_fpr = DECL_CHAIN (f_gpr);
8864 f_ovf = DECL_CHAIN (f_fpr);
8865 f_sav = DECL_CHAIN (f_ovf);
8867 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
8868 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
8869 valist = build_va_arg_indirect_ref (valist);
8870 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
8871 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
8872 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
8874 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
8875 if (indirect_p)
8876 type = build_pointer_type (type);
8877 size = int_size_in_bytes (type);
8878 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8880 nat_mode = type_natural_mode (type, NULL, false);
8881 switch (nat_mode)
8883 case V8SFmode:
8884 case V8SImode:
8885 case V32QImode:
8886 case V16HImode:
8887 case V4DFmode:
8888 case V4DImode:
8889 case V16SFmode:
8890 case V16SImode:
8891 case V64QImode:
8892 case V32HImode:
8893 case V8DFmode:
8894 case V8DImode:
8895 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
8896 if (!TARGET_64BIT_MS_ABI)
8898 container = NULL;
8899 break;
8902 default:
8903 container = construct_container (nat_mode, TYPE_MODE (type),
8904 type, 0, X86_64_REGPARM_MAX,
8905 X86_64_SSE_REGPARM_MAX, intreg,
8907 break;
8910 /* Pull the value out of the saved registers. */
8912 addr = create_tmp_var (ptr_type_node, "addr");
8914 if (container)
8916 int needed_intregs, needed_sseregs;
8917 bool need_temp;
8918 tree int_addr, sse_addr;
8920 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8921 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8923 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
8925 need_temp = (!REG_P (container)
8926 && ((needed_intregs && TYPE_ALIGN (type) > 64)
8927 || TYPE_ALIGN (type) > 128));
8929 /* In case we are passing structure, verify that it is consecutive block
8930 on the register save area. If not we need to do moves. */
8931 if (!need_temp && !REG_P (container))
8933 /* Verify that all registers are strictly consecutive */
8934 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
8936 int i;
8938 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8940 rtx slot = XVECEXP (container, 0, i);
8941 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
8942 || INTVAL (XEXP (slot, 1)) != i * 16)
8943 need_temp = 1;
8946 else
8948 int i;
8950 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8952 rtx slot = XVECEXP (container, 0, i);
8953 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
8954 || INTVAL (XEXP (slot, 1)) != i * 8)
8955 need_temp = 1;
8959 if (!need_temp)
8961 int_addr = addr;
8962 sse_addr = addr;
8964 else
8966 int_addr = create_tmp_var (ptr_type_node, "int_addr");
8967 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
8970 /* First ensure that we fit completely in registers. */
8971 if (needed_intregs)
8973 t = build_int_cst (TREE_TYPE (gpr),
8974 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
8975 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
8976 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8977 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8978 gimplify_and_add (t, pre_p);
8980 if (needed_sseregs)
8982 t = build_int_cst (TREE_TYPE (fpr),
8983 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
8984 + X86_64_REGPARM_MAX * 8);
8985 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
8986 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8987 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8988 gimplify_and_add (t, pre_p);
8991 /* Compute index to start of area used for integer regs. */
8992 if (needed_intregs)
8994 /* int_addr = gpr + sav; */
8995 t = fold_build_pointer_plus (sav, gpr);
8996 gimplify_assign (int_addr, t, pre_p);
8998 if (needed_sseregs)
9000 /* sse_addr = fpr + sav; */
9001 t = fold_build_pointer_plus (sav, fpr);
9002 gimplify_assign (sse_addr, t, pre_p);
9004 if (need_temp)
9006 int i, prev_size = 0;
9007 tree temp = create_tmp_var (type, "va_arg_tmp");
9009 /* addr = &temp; */
9010 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
9011 gimplify_assign (addr, t, pre_p);
9013 for (i = 0; i < XVECLEN (container, 0); i++)
9015 rtx slot = XVECEXP (container, 0, i);
9016 rtx reg = XEXP (slot, 0);
9017 machine_mode mode = GET_MODE (reg);
9018 tree piece_type;
9019 tree addr_type;
9020 tree daddr_type;
9021 tree src_addr, src;
9022 int src_offset;
9023 tree dest_addr, dest;
9024 int cur_size = GET_MODE_SIZE (mode);
9026 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
9027 prev_size = INTVAL (XEXP (slot, 1));
9028 if (prev_size + cur_size > size)
9030 cur_size = size - prev_size;
9031 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
9032 if (mode == BLKmode)
9033 mode = QImode;
9035 piece_type = lang_hooks.types.type_for_mode (mode, 1);
9036 if (mode == GET_MODE (reg))
9037 addr_type = build_pointer_type (piece_type);
9038 else
9039 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
9040 true);
9041 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
9042 true);
9044 if (SSE_REGNO_P (REGNO (reg)))
9046 src_addr = sse_addr;
9047 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
9049 else
9051 src_addr = int_addr;
9052 src_offset = REGNO (reg) * 8;
9054 src_addr = fold_convert (addr_type, src_addr);
9055 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
9057 dest_addr = fold_convert (daddr_type, addr);
9058 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
9059 if (cur_size == GET_MODE_SIZE (mode))
9061 src = build_va_arg_indirect_ref (src_addr);
9062 dest = build_va_arg_indirect_ref (dest_addr);
9064 gimplify_assign (dest, src, pre_p);
9066 else
9068 tree copy
9069 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
9070 3, dest_addr, src_addr,
9071 size_int (cur_size));
9072 gimplify_and_add (copy, pre_p);
9074 prev_size += cur_size;
9078 if (needed_intregs)
9080 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
9081 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
9082 gimplify_assign (gpr, t, pre_p);
9085 if (needed_sseregs)
9087 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
9088 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
9089 gimplify_assign (fpr, t, pre_p);
9092 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
9094 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
9097 /* ... otherwise out of the overflow area. */
9099 /* When we align parameter on stack for caller, if the parameter
9100 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
9101 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
9102 here with caller. */
9103 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
9104 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
9105 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
9107 /* Care for on-stack alignment if needed. */
9108 if (arg_boundary <= 64 || size == 0)
9109 t = ovf;
9110 else
9112 HOST_WIDE_INT align = arg_boundary / 8;
9113 t = fold_build_pointer_plus_hwi (ovf, align - 1);
9114 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
9115 build_int_cst (TREE_TYPE (t), -align));
9118 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
9119 gimplify_assign (addr, t, pre_p);
9121 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
9122 gimplify_assign (unshare_expr (ovf), t, pre_p);
9124 if (container)
9125 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
9127 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
9128 addr = fold_convert (ptrtype, addr);
9130 if (indirect_p)
9131 addr = build_va_arg_indirect_ref (addr);
9132 return build_va_arg_indirect_ref (addr);
9135 /* Return true if OPNUM's MEM should be matched
9136 in movabs* patterns. */
9138 bool
9139 ix86_check_movabs (rtx insn, int opnum)
9141 rtx set, mem;
9143 set = PATTERN (insn);
9144 if (GET_CODE (set) == PARALLEL)
9145 set = XVECEXP (set, 0, 0);
9146 gcc_assert (GET_CODE (set) == SET);
9147 mem = XEXP (set, opnum);
9148 while (GET_CODE (mem) == SUBREG)
9149 mem = SUBREG_REG (mem);
9150 gcc_assert (MEM_P (mem));
9151 return volatile_ok || !MEM_VOLATILE_P (mem);
9154 /* Initialize the table of extra 80387 mathematical constants. */
9156 static void
9157 init_ext_80387_constants (void)
9159 static const char * cst[5] =
9161 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
9162 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
9163 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
9164 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
9165 "3.1415926535897932385128089594061862044", /* 4: fldpi */
9167 int i;
9169 for (i = 0; i < 5; i++)
9171 real_from_string (&ext_80387_constants_table[i], cst[i]);
9172 /* Ensure each constant is rounded to XFmode precision. */
9173 real_convert (&ext_80387_constants_table[i],
9174 XFmode, &ext_80387_constants_table[i]);
9177 ext_80387_constants_init = 1;
9180 /* Return non-zero if the constant is something that
9181 can be loaded with a special instruction. */
9184 standard_80387_constant_p (rtx x)
9186 machine_mode mode = GET_MODE (x);
9188 REAL_VALUE_TYPE r;
9190 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
9191 return -1;
9193 if (x == CONST0_RTX (mode))
9194 return 1;
9195 if (x == CONST1_RTX (mode))
9196 return 2;
9198 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9200 /* For XFmode constants, try to find a special 80387 instruction when
9201 optimizing for size or on those CPUs that benefit from them. */
9202 if (mode == XFmode
9203 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
9205 int i;
9207 if (! ext_80387_constants_init)
9208 init_ext_80387_constants ();
9210 for (i = 0; i < 5; i++)
9211 if (real_identical (&r, &ext_80387_constants_table[i]))
9212 return i + 3;
9215 /* Load of the constant -0.0 or -1.0 will be split as
9216 fldz;fchs or fld1;fchs sequence. */
9217 if (real_isnegzero (&r))
9218 return 8;
9219 if (real_identical (&r, &dconstm1))
9220 return 9;
9222 return 0;
9225 /* Return the opcode of the special instruction to be used to load
9226 the constant X. */
9228 const char *
9229 standard_80387_constant_opcode (rtx x)
9231 switch (standard_80387_constant_p (x))
9233 case 1:
9234 return "fldz";
9235 case 2:
9236 return "fld1";
9237 case 3:
9238 return "fldlg2";
9239 case 4:
9240 return "fldln2";
9241 case 5:
9242 return "fldl2e";
9243 case 6:
9244 return "fldl2t";
9245 case 7:
9246 return "fldpi";
9247 case 8:
9248 case 9:
9249 return "#";
9250 default:
9251 gcc_unreachable ();
9255 /* Return the CONST_DOUBLE representing the 80387 constant that is
9256 loaded by the specified special instruction. The argument IDX
9257 matches the return value from standard_80387_constant_p. */
9260 standard_80387_constant_rtx (int idx)
9262 int i;
9264 if (! ext_80387_constants_init)
9265 init_ext_80387_constants ();
9267 switch (idx)
9269 case 3:
9270 case 4:
9271 case 5:
9272 case 6:
9273 case 7:
9274 i = idx - 3;
9275 break;
9277 default:
9278 gcc_unreachable ();
9281 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
9282 XFmode);
9285 /* Return 1 if X is all 0s and 2 if x is all 1s
9286 in supported SSE/AVX vector mode. */
9289 standard_sse_constant_p (rtx x)
9291 machine_mode mode = GET_MODE (x);
9293 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
9294 return 1;
9295 if (vector_all_ones_operand (x, mode))
9296 switch (mode)
9298 case V16QImode:
9299 case V8HImode:
9300 case V4SImode:
9301 case V2DImode:
9302 if (TARGET_SSE2)
9303 return 2;
9304 case V32QImode:
9305 case V16HImode:
9306 case V8SImode:
9307 case V4DImode:
9308 if (TARGET_AVX2)
9309 return 2;
9310 case V64QImode:
9311 case V32HImode:
9312 case V16SImode:
9313 case V8DImode:
9314 if (TARGET_AVX512F)
9315 return 2;
9316 default:
9317 break;
9320 return 0;
9323 /* Return the opcode of the special instruction to be used to load
9324 the constant X. */
9326 const char *
9327 standard_sse_constant_opcode (rtx_insn *insn, rtx x)
9329 switch (standard_sse_constant_p (x))
9331 case 1:
9332 switch (get_attr_mode (insn))
9334 case MODE_XI:
9335 return "vpxord\t%g0, %g0, %g0";
9336 case MODE_V16SF:
9337 return TARGET_AVX512DQ ? "vxorps\t%g0, %g0, %g0"
9338 : "vpxord\t%g0, %g0, %g0";
9339 case MODE_V8DF:
9340 return TARGET_AVX512DQ ? "vxorpd\t%g0, %g0, %g0"
9341 : "vpxorq\t%g0, %g0, %g0";
9342 case MODE_TI:
9343 return TARGET_AVX512VL ? "vpxord\t%t0, %t0, %t0"
9344 : "%vpxor\t%0, %d0";
9345 case MODE_V2DF:
9346 return "%vxorpd\t%0, %d0";
9347 case MODE_V4SF:
9348 return "%vxorps\t%0, %d0";
9350 case MODE_OI:
9351 return TARGET_AVX512VL ? "vpxord\t%x0, %x0, %x0"
9352 : "vpxor\t%x0, %x0, %x0";
9353 case MODE_V4DF:
9354 return "vxorpd\t%x0, %x0, %x0";
9355 case MODE_V8SF:
9356 return "vxorps\t%x0, %x0, %x0";
9358 default:
9359 break;
9362 case 2:
9363 if (TARGET_AVX512VL
9364 || get_attr_mode (insn) == MODE_XI
9365 || get_attr_mode (insn) == MODE_V8DF
9366 || get_attr_mode (insn) == MODE_V16SF)
9367 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
9368 if (TARGET_AVX)
9369 return "vpcmpeqd\t%0, %0, %0";
9370 else
9371 return "pcmpeqd\t%0, %0";
9373 default:
9374 break;
9376 gcc_unreachable ();
9379 /* Returns true if OP contains a symbol reference */
9381 bool
9382 symbolic_reference_mentioned_p (rtx op)
9384 const char *fmt;
9385 int i;
9387 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
9388 return true;
9390 fmt = GET_RTX_FORMAT (GET_CODE (op));
9391 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
9393 if (fmt[i] == 'E')
9395 int j;
9397 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
9398 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
9399 return true;
9402 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
9403 return true;
9406 return false;
9409 /* Return true if it is appropriate to emit `ret' instructions in the
9410 body of a function. Do this only if the epilogue is simple, needing a
9411 couple of insns. Prior to reloading, we can't tell how many registers
9412 must be saved, so return false then. Return false if there is no frame
9413 marker to de-allocate. */
9415 bool
9416 ix86_can_use_return_insn_p (void)
9418 struct ix86_frame frame;
9420 if (! reload_completed || frame_pointer_needed)
9421 return 0;
9423 /* Don't allow more than 32k pop, since that's all we can do
9424 with one instruction. */
9425 if (crtl->args.pops_args && crtl->args.size >= 32768)
9426 return 0;
9428 ix86_compute_frame_layout (&frame);
9429 return (frame.stack_pointer_offset == UNITS_PER_WORD
9430 && (frame.nregs + frame.nsseregs) == 0);
9433 /* Value should be nonzero if functions must have frame pointers.
9434 Zero means the frame pointer need not be set up (and parms may
9435 be accessed via the stack pointer) in functions that seem suitable. */
9437 static bool
9438 ix86_frame_pointer_required (void)
9440 /* If we accessed previous frames, then the generated code expects
9441 to be able to access the saved ebp value in our frame. */
9442 if (cfun->machine->accesses_prev_frame)
9443 return true;
9445 /* Several x86 os'es need a frame pointer for other reasons,
9446 usually pertaining to setjmp. */
9447 if (SUBTARGET_FRAME_POINTER_REQUIRED)
9448 return true;
9450 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
9451 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
9452 return true;
9454 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
9455 allocation is 4GB. */
9456 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
9457 return true;
9459 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
9460 turns off the frame pointer by default. Turn it back on now if
9461 we've not got a leaf function. */
9462 if (TARGET_OMIT_LEAF_FRAME_POINTER
9463 && (!crtl->is_leaf
9464 || ix86_current_function_calls_tls_descriptor))
9465 return true;
9467 if (crtl->profile && !flag_fentry)
9468 return true;
9470 return false;
9473 /* Record that the current function accesses previous call frames. */
9475 void
9476 ix86_setup_frame_addresses (void)
9478 cfun->machine->accesses_prev_frame = 1;
9481 #ifndef USE_HIDDEN_LINKONCE
9482 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
9483 # define USE_HIDDEN_LINKONCE 1
9484 # else
9485 # define USE_HIDDEN_LINKONCE 0
9486 # endif
9487 #endif
9489 static int pic_labels_used;
9491 /* Fills in the label name that should be used for a pc thunk for
9492 the given register. */
9494 static void
9495 get_pc_thunk_name (char name[32], unsigned int regno)
9497 gcc_assert (!TARGET_64BIT);
9499 if (USE_HIDDEN_LINKONCE)
9500 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
9501 else
9502 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
9506 /* This function generates code for -fpic that loads %ebx with
9507 the return address of the caller and then returns. */
9509 static void
9510 ix86_code_end (void)
9512 rtx xops[2];
9513 int regno;
9515 for (regno = AX_REG; regno <= SP_REG; regno++)
9517 char name[32];
9518 tree decl;
9520 if (!(pic_labels_used & (1 << regno)))
9521 continue;
9523 get_pc_thunk_name (name, regno);
9525 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9526 get_identifier (name),
9527 build_function_type_list (void_type_node, NULL_TREE));
9528 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9529 NULL_TREE, void_type_node);
9530 TREE_PUBLIC (decl) = 1;
9531 TREE_STATIC (decl) = 1;
9532 DECL_IGNORED_P (decl) = 1;
9534 #if TARGET_MACHO
9535 if (TARGET_MACHO)
9537 switch_to_section (darwin_sections[text_coal_section]);
9538 fputs ("\t.weak_definition\t", asm_out_file);
9539 assemble_name (asm_out_file, name);
9540 fputs ("\n\t.private_extern\t", asm_out_file);
9541 assemble_name (asm_out_file, name);
9542 putc ('\n', asm_out_file);
9543 ASM_OUTPUT_LABEL (asm_out_file, name);
9544 DECL_WEAK (decl) = 1;
9546 else
9547 #endif
9548 if (USE_HIDDEN_LINKONCE)
9550 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
9552 targetm.asm_out.unique_section (decl, 0);
9553 switch_to_section (get_named_section (decl, NULL, 0));
9555 targetm.asm_out.globalize_label (asm_out_file, name);
9556 fputs ("\t.hidden\t", asm_out_file);
9557 assemble_name (asm_out_file, name);
9558 putc ('\n', asm_out_file);
9559 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
9561 else
9563 switch_to_section (text_section);
9564 ASM_OUTPUT_LABEL (asm_out_file, name);
9567 DECL_INITIAL (decl) = make_node (BLOCK);
9568 current_function_decl = decl;
9569 init_function_start (decl);
9570 first_function_block_is_cold = false;
9571 /* Make sure unwind info is emitted for the thunk if needed. */
9572 final_start_function (emit_barrier (), asm_out_file, 1);
9574 /* Pad stack IP move with 4 instructions (two NOPs count
9575 as one instruction). */
9576 if (TARGET_PAD_SHORT_FUNCTION)
9578 int i = 8;
9580 while (i--)
9581 fputs ("\tnop\n", asm_out_file);
9584 xops[0] = gen_rtx_REG (Pmode, regno);
9585 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
9586 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
9587 output_asm_insn ("%!ret", NULL);
9588 final_end_function ();
9589 init_insn_lengths ();
9590 free_after_compilation (cfun);
9591 set_cfun (NULL);
9592 current_function_decl = NULL;
9595 if (flag_split_stack)
9596 file_end_indicate_split_stack ();
9599 /* Emit code for the SET_GOT patterns. */
9601 const char *
9602 output_set_got (rtx dest, rtx label)
9604 rtx xops[3];
9606 xops[0] = dest;
9608 if (TARGET_VXWORKS_RTP && flag_pic)
9610 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
9611 xops[2] = gen_rtx_MEM (Pmode,
9612 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
9613 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
9615 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9616 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9617 an unadorned address. */
9618 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
9619 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
9620 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
9621 return "";
9624 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
9626 if (!flag_pic)
9628 if (TARGET_MACHO)
9629 /* We don't need a pic base, we're not producing pic. */
9630 gcc_unreachable ();
9632 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
9633 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
9634 targetm.asm_out.internal_label (asm_out_file, "L",
9635 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
9637 else
9639 char name[32];
9640 get_pc_thunk_name (name, REGNO (dest));
9641 pic_labels_used |= 1 << REGNO (dest);
9643 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
9644 xops[2] = gen_rtx_MEM (QImode, xops[2]);
9645 output_asm_insn ("%!call\t%X2", xops);
9647 #if TARGET_MACHO
9648 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
9649 This is what will be referenced by the Mach-O PIC subsystem. */
9650 if (machopic_should_output_picbase_label () || !label)
9651 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
9653 /* When we are restoring the pic base at the site of a nonlocal label,
9654 and we decided to emit the pic base above, we will still output a
9655 local label used for calculating the correction offset (even though
9656 the offset will be 0 in that case). */
9657 if (label)
9658 targetm.asm_out.internal_label (asm_out_file, "L",
9659 CODE_LABEL_NUMBER (label));
9660 #endif
9663 if (!TARGET_MACHO)
9664 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
9666 return "";
9669 /* Generate an "push" pattern for input ARG. */
9671 static rtx
9672 gen_push (rtx arg)
9674 struct machine_function *m = cfun->machine;
9676 if (m->fs.cfa_reg == stack_pointer_rtx)
9677 m->fs.cfa_offset += UNITS_PER_WORD;
9678 m->fs.sp_offset += UNITS_PER_WORD;
9680 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9681 arg = gen_rtx_REG (word_mode, REGNO (arg));
9683 return gen_rtx_SET (VOIDmode,
9684 gen_rtx_MEM (word_mode,
9685 gen_rtx_PRE_DEC (Pmode,
9686 stack_pointer_rtx)),
9687 arg);
9690 /* Generate an "pop" pattern for input ARG. */
9692 static rtx
9693 gen_pop (rtx arg)
9695 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9696 arg = gen_rtx_REG (word_mode, REGNO (arg));
9698 return gen_rtx_SET (VOIDmode,
9699 arg,
9700 gen_rtx_MEM (word_mode,
9701 gen_rtx_POST_INC (Pmode,
9702 stack_pointer_rtx)));
9705 /* Return >= 0 if there is an unused call-clobbered register available
9706 for the entire function. */
9708 static unsigned int
9709 ix86_select_alt_pic_regnum (void)
9711 if (ix86_use_pseudo_pic_reg ())
9712 return INVALID_REGNUM;
9714 if (crtl->is_leaf
9715 && !crtl->profile
9716 && !ix86_current_function_calls_tls_descriptor)
9718 int i, drap;
9719 /* Can't use the same register for both PIC and DRAP. */
9720 if (crtl->drap_reg)
9721 drap = REGNO (crtl->drap_reg);
9722 else
9723 drap = -1;
9724 for (i = 2; i >= 0; --i)
9725 if (i != drap && !df_regs_ever_live_p (i))
9726 return i;
9729 return INVALID_REGNUM;
9732 /* Return TRUE if we need to save REGNO. */
9734 static bool
9735 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
9737 if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
9738 && pic_offset_table_rtx)
9740 if (ix86_use_pseudo_pic_reg ())
9742 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
9743 _mcount in prologue. */
9744 if (!TARGET_64BIT && flag_pic && crtl->profile)
9745 return true;
9747 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9748 || crtl->profile
9749 || crtl->calls_eh_return
9750 || crtl->uses_const_pool
9751 || cfun->has_nonlocal_label)
9752 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
9755 if (crtl->calls_eh_return && maybe_eh_return)
9757 unsigned i;
9758 for (i = 0; ; i++)
9760 unsigned test = EH_RETURN_DATA_REGNO (i);
9761 if (test == INVALID_REGNUM)
9762 break;
9763 if (test == regno)
9764 return true;
9768 if (crtl->drap_reg
9769 && regno == REGNO (crtl->drap_reg)
9770 && !cfun->machine->no_drap_save_restore)
9771 return true;
9773 return (df_regs_ever_live_p (regno)
9774 && !call_used_regs[regno]
9775 && !fixed_regs[regno]
9776 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
9779 /* Return number of saved general prupose registers. */
9781 static int
9782 ix86_nsaved_regs (void)
9784 int nregs = 0;
9785 int regno;
9787 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9788 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9789 nregs ++;
9790 return nregs;
9793 /* Return number of saved SSE registrers. */
9795 static int
9796 ix86_nsaved_sseregs (void)
9798 int nregs = 0;
9799 int regno;
9801 if (!TARGET_64BIT_MS_ABI)
9802 return 0;
9803 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9804 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9805 nregs ++;
9806 return nregs;
9809 /* Given FROM and TO register numbers, say whether this elimination is
9810 allowed. If stack alignment is needed, we can only replace argument
9811 pointer with hard frame pointer, or replace frame pointer with stack
9812 pointer. Otherwise, frame pointer elimination is automatically
9813 handled and all other eliminations are valid. */
9815 static bool
9816 ix86_can_eliminate (const int from, const int to)
9818 if (stack_realign_fp)
9819 return ((from == ARG_POINTER_REGNUM
9820 && to == HARD_FRAME_POINTER_REGNUM)
9821 || (from == FRAME_POINTER_REGNUM
9822 && to == STACK_POINTER_REGNUM));
9823 else
9824 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
9827 /* Return the offset between two registers, one to be eliminated, and the other
9828 its replacement, at the start of a routine. */
9830 HOST_WIDE_INT
9831 ix86_initial_elimination_offset (int from, int to)
9833 struct ix86_frame frame;
9834 ix86_compute_frame_layout (&frame);
9836 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9837 return frame.hard_frame_pointer_offset;
9838 else if (from == FRAME_POINTER_REGNUM
9839 && to == HARD_FRAME_POINTER_REGNUM)
9840 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
9841 else
9843 gcc_assert (to == STACK_POINTER_REGNUM);
9845 if (from == ARG_POINTER_REGNUM)
9846 return frame.stack_pointer_offset;
9848 gcc_assert (from == FRAME_POINTER_REGNUM);
9849 return frame.stack_pointer_offset - frame.frame_pointer_offset;
9853 /* In a dynamically-aligned function, we can't know the offset from
9854 stack pointer to frame pointer, so we must ensure that setjmp
9855 eliminates fp against the hard fp (%ebp) rather than trying to
9856 index from %esp up to the top of the frame across a gap that is
9857 of unknown (at compile-time) size. */
9858 static rtx
9859 ix86_builtin_setjmp_frame_value (void)
9861 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
9864 /* When using -fsplit-stack, the allocation routines set a field in
9865 the TCB to the bottom of the stack plus this much space, measured
9866 in bytes. */
9868 #define SPLIT_STACK_AVAILABLE 256
9870 /* Fill structure ix86_frame about frame of currently computed function. */
9872 static void
9873 ix86_compute_frame_layout (struct ix86_frame *frame)
9875 unsigned HOST_WIDE_INT stack_alignment_needed;
9876 HOST_WIDE_INT offset;
9877 unsigned HOST_WIDE_INT preferred_alignment;
9878 HOST_WIDE_INT size = get_frame_size ();
9879 HOST_WIDE_INT to_allocate;
9881 frame->nregs = ix86_nsaved_regs ();
9882 frame->nsseregs = ix86_nsaved_sseregs ();
9884 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9885 function prologues and leaf. */
9886 if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
9887 && (!crtl->is_leaf || cfun->calls_alloca != 0
9888 || ix86_current_function_calls_tls_descriptor))
9890 crtl->preferred_stack_boundary = 128;
9891 crtl->stack_alignment_needed = 128;
9893 /* preferred_stack_boundary is never updated for call
9894 expanded from tls descriptor. Update it here. We don't update it in
9895 expand stage because according to the comments before
9896 ix86_current_function_calls_tls_descriptor, tls calls may be optimized
9897 away. */
9898 else if (ix86_current_function_calls_tls_descriptor
9899 && crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY)
9901 crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
9902 if (crtl->stack_alignment_needed < PREFERRED_STACK_BOUNDARY)
9903 crtl->stack_alignment_needed = PREFERRED_STACK_BOUNDARY;
9906 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
9907 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
9909 gcc_assert (!size || stack_alignment_needed);
9910 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
9911 gcc_assert (preferred_alignment <= stack_alignment_needed);
9913 /* For SEH we have to limit the amount of code movement into the prologue.
9914 At present we do this via a BLOCKAGE, at which point there's very little
9915 scheduling that can be done, which means that there's very little point
9916 in doing anything except PUSHs. */
9917 if (TARGET_SEH)
9918 cfun->machine->use_fast_prologue_epilogue = false;
9920 /* During reload iteration the amount of registers saved can change.
9921 Recompute the value as needed. Do not recompute when amount of registers
9922 didn't change as reload does multiple calls to the function and does not
9923 expect the decision to change within single iteration. */
9924 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
9925 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
9927 int count = frame->nregs;
9928 struct cgraph_node *node = cgraph_node::get (current_function_decl);
9930 cfun->machine->use_fast_prologue_epilogue_nregs = count;
9932 /* The fast prologue uses move instead of push to save registers. This
9933 is significantly longer, but also executes faster as modern hardware
9934 can execute the moves in parallel, but can't do that for push/pop.
9936 Be careful about choosing what prologue to emit: When function takes
9937 many instructions to execute we may use slow version as well as in
9938 case function is known to be outside hot spot (this is known with
9939 feedback only). Weight the size of function by number of registers
9940 to save as it is cheap to use one or two push instructions but very
9941 slow to use many of them. */
9942 if (count)
9943 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
9944 if (node->frequency < NODE_FREQUENCY_NORMAL
9945 || (flag_branch_probabilities
9946 && node->frequency < NODE_FREQUENCY_HOT))
9947 cfun->machine->use_fast_prologue_epilogue = false;
9948 else
9949 cfun->machine->use_fast_prologue_epilogue
9950 = !expensive_function_p (count);
9953 frame->save_regs_using_mov
9954 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
9955 /* If static stack checking is enabled and done with probes,
9956 the registers need to be saved before allocating the frame. */
9957 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
9959 /* Skip return address. */
9960 offset = UNITS_PER_WORD;
9962 /* Skip pushed static chain. */
9963 if (ix86_static_chain_on_stack)
9964 offset += UNITS_PER_WORD;
9966 /* Skip saved base pointer. */
9967 if (frame_pointer_needed)
9968 offset += UNITS_PER_WORD;
9969 frame->hfp_save_offset = offset;
9971 /* The traditional frame pointer location is at the top of the frame. */
9972 frame->hard_frame_pointer_offset = offset;
9974 /* Register save area */
9975 offset += frame->nregs * UNITS_PER_WORD;
9976 frame->reg_save_offset = offset;
9978 /* On SEH target, registers are pushed just before the frame pointer
9979 location. */
9980 if (TARGET_SEH)
9981 frame->hard_frame_pointer_offset = offset;
9983 /* Align and set SSE register save area. */
9984 if (frame->nsseregs)
9986 /* The only ABI that has saved SSE registers (Win64) also has a
9987 16-byte aligned default stack, and thus we don't need to be
9988 within the re-aligned local stack frame to save them. */
9989 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
9990 offset = (offset + 16 - 1) & -16;
9991 offset += frame->nsseregs * 16;
9993 frame->sse_reg_save_offset = offset;
9995 /* The re-aligned stack starts here. Values before this point are not
9996 directly comparable with values below this point. In order to make
9997 sure that no value happens to be the same before and after, force
9998 the alignment computation below to add a non-zero value. */
9999 if (stack_realign_fp)
10000 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
10002 /* Va-arg area */
10003 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
10004 offset += frame->va_arg_size;
10006 /* Align start of frame for local function. */
10007 if (stack_realign_fp
10008 || offset != frame->sse_reg_save_offset
10009 || size != 0
10010 || !crtl->is_leaf
10011 || cfun->calls_alloca
10012 || ix86_current_function_calls_tls_descriptor)
10013 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
10015 /* Frame pointer points here. */
10016 frame->frame_pointer_offset = offset;
10018 offset += size;
10020 /* Add outgoing arguments area. Can be skipped if we eliminated
10021 all the function calls as dead code.
10022 Skipping is however impossible when function calls alloca. Alloca
10023 expander assumes that last crtl->outgoing_args_size
10024 of stack frame are unused. */
10025 if (ACCUMULATE_OUTGOING_ARGS
10026 && (!crtl->is_leaf || cfun->calls_alloca
10027 || ix86_current_function_calls_tls_descriptor))
10029 offset += crtl->outgoing_args_size;
10030 frame->outgoing_arguments_size = crtl->outgoing_args_size;
10032 else
10033 frame->outgoing_arguments_size = 0;
10035 /* Align stack boundary. Only needed if we're calling another function
10036 or using alloca. */
10037 if (!crtl->is_leaf || cfun->calls_alloca
10038 || ix86_current_function_calls_tls_descriptor)
10039 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
10041 /* We've reached end of stack frame. */
10042 frame->stack_pointer_offset = offset;
10044 /* Size prologue needs to allocate. */
10045 to_allocate = offset - frame->sse_reg_save_offset;
10047 if ((!to_allocate && frame->nregs <= 1)
10048 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
10049 frame->save_regs_using_mov = false;
10051 if (ix86_using_red_zone ()
10052 && crtl->sp_is_unchanging
10053 && crtl->is_leaf
10054 && !ix86_current_function_calls_tls_descriptor)
10056 frame->red_zone_size = to_allocate;
10057 if (frame->save_regs_using_mov)
10058 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
10059 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
10060 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
10062 else
10063 frame->red_zone_size = 0;
10064 frame->stack_pointer_offset -= frame->red_zone_size;
10066 /* The SEH frame pointer location is near the bottom of the frame.
10067 This is enforced by the fact that the difference between the
10068 stack pointer and the frame pointer is limited to 240 bytes in
10069 the unwind data structure. */
10070 if (TARGET_SEH)
10072 HOST_WIDE_INT diff;
10074 /* If we can leave the frame pointer where it is, do so. Also, returns
10075 the establisher frame for __builtin_frame_address (0). */
10076 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
10077 if (diff <= SEH_MAX_FRAME_SIZE
10078 && (diff > 240 || (diff & 15) != 0)
10079 && !crtl->accesses_prior_frames)
10081 /* Ideally we'd determine what portion of the local stack frame
10082 (within the constraint of the lowest 240) is most heavily used.
10083 But without that complication, simply bias the frame pointer
10084 by 128 bytes so as to maximize the amount of the local stack
10085 frame that is addressable with 8-bit offsets. */
10086 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
10091 /* This is semi-inlined memory_address_length, but simplified
10092 since we know that we're always dealing with reg+offset, and
10093 to avoid having to create and discard all that rtl. */
10095 static inline int
10096 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
10098 int len = 4;
10100 if (offset == 0)
10102 /* EBP and R13 cannot be encoded without an offset. */
10103 len = (regno == BP_REG || regno == R13_REG);
10105 else if (IN_RANGE (offset, -128, 127))
10106 len = 1;
10108 /* ESP and R12 must be encoded with a SIB byte. */
10109 if (regno == SP_REG || regno == R12_REG)
10110 len++;
10112 return len;
10115 /* Return an RTX that points to CFA_OFFSET within the stack frame.
10116 The valid base registers are taken from CFUN->MACHINE->FS. */
10118 static rtx
10119 choose_baseaddr (HOST_WIDE_INT cfa_offset)
10121 const struct machine_function *m = cfun->machine;
10122 rtx base_reg = NULL;
10123 HOST_WIDE_INT base_offset = 0;
10125 if (m->use_fast_prologue_epilogue)
10127 /* Choose the base register most likely to allow the most scheduling
10128 opportunities. Generally FP is valid throughout the function,
10129 while DRAP must be reloaded within the epilogue. But choose either
10130 over the SP due to increased encoding size. */
10132 if (m->fs.fp_valid)
10134 base_reg = hard_frame_pointer_rtx;
10135 base_offset = m->fs.fp_offset - cfa_offset;
10137 else if (m->fs.drap_valid)
10139 base_reg = crtl->drap_reg;
10140 base_offset = 0 - cfa_offset;
10142 else if (m->fs.sp_valid)
10144 base_reg = stack_pointer_rtx;
10145 base_offset = m->fs.sp_offset - cfa_offset;
10148 else
10150 HOST_WIDE_INT toffset;
10151 int len = 16, tlen;
10153 /* Choose the base register with the smallest address encoding.
10154 With a tie, choose FP > DRAP > SP. */
10155 if (m->fs.sp_valid)
10157 base_reg = stack_pointer_rtx;
10158 base_offset = m->fs.sp_offset - cfa_offset;
10159 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
10161 if (m->fs.drap_valid)
10163 toffset = 0 - cfa_offset;
10164 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
10165 if (tlen <= len)
10167 base_reg = crtl->drap_reg;
10168 base_offset = toffset;
10169 len = tlen;
10172 if (m->fs.fp_valid)
10174 toffset = m->fs.fp_offset - cfa_offset;
10175 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
10176 if (tlen <= len)
10178 base_reg = hard_frame_pointer_rtx;
10179 base_offset = toffset;
10180 len = tlen;
10184 gcc_assert (base_reg != NULL);
10186 return plus_constant (Pmode, base_reg, base_offset);
10189 /* Emit code to save registers in the prologue. */
10191 static void
10192 ix86_emit_save_regs (void)
10194 unsigned int regno;
10195 rtx insn;
10197 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
10198 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10200 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
10201 RTX_FRAME_RELATED_P (insn) = 1;
10205 /* Emit a single register save at CFA - CFA_OFFSET. */
10207 static void
10208 ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
10209 HOST_WIDE_INT cfa_offset)
10211 struct machine_function *m = cfun->machine;
10212 rtx reg = gen_rtx_REG (mode, regno);
10213 rtx mem, addr, base, insn;
10215 addr = choose_baseaddr (cfa_offset);
10216 mem = gen_frame_mem (mode, addr);
10218 /* For SSE saves, we need to indicate the 128-bit alignment. */
10219 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
10221 insn = emit_move_insn (mem, reg);
10222 RTX_FRAME_RELATED_P (insn) = 1;
10224 base = addr;
10225 if (GET_CODE (base) == PLUS)
10226 base = XEXP (base, 0);
10227 gcc_checking_assert (REG_P (base));
10229 /* When saving registers into a re-aligned local stack frame, avoid
10230 any tricky guessing by dwarf2out. */
10231 if (m->fs.realigned)
10233 gcc_checking_assert (stack_realign_drap);
10235 if (regno == REGNO (crtl->drap_reg))
10237 /* A bit of a hack. We force the DRAP register to be saved in
10238 the re-aligned stack frame, which provides us with a copy
10239 of the CFA that will last past the prologue. Install it. */
10240 gcc_checking_assert (cfun->machine->fs.fp_valid);
10241 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
10242 cfun->machine->fs.fp_offset - cfa_offset);
10243 mem = gen_rtx_MEM (mode, addr);
10244 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
10246 else
10248 /* The frame pointer is a stable reference within the
10249 aligned frame. Use it. */
10250 gcc_checking_assert (cfun->machine->fs.fp_valid);
10251 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
10252 cfun->machine->fs.fp_offset - cfa_offset);
10253 mem = gen_rtx_MEM (mode, addr);
10254 add_reg_note (insn, REG_CFA_EXPRESSION,
10255 gen_rtx_SET (VOIDmode, mem, reg));
10259 /* The memory may not be relative to the current CFA register,
10260 which means that we may need to generate a new pattern for
10261 use by the unwind info. */
10262 else if (base != m->fs.cfa_reg)
10264 addr = plus_constant (Pmode, m->fs.cfa_reg,
10265 m->fs.cfa_offset - cfa_offset);
10266 mem = gen_rtx_MEM (mode, addr);
10267 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
10271 /* Emit code to save registers using MOV insns.
10272 First register is stored at CFA - CFA_OFFSET. */
10273 static void
10274 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
10276 unsigned int regno;
10278 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10279 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10281 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
10282 cfa_offset -= UNITS_PER_WORD;
10286 /* Emit code to save SSE registers using MOV insns.
10287 First register is stored at CFA - CFA_OFFSET. */
10288 static void
10289 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
10291 unsigned int regno;
10293 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10294 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10296 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
10297 cfa_offset -= 16;
10301 static GTY(()) rtx queued_cfa_restores;
10303 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
10304 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
10305 Don't add the note if the previously saved value will be left untouched
10306 within stack red-zone till return, as unwinders can find the same value
10307 in the register and on the stack. */
10309 static void
10310 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
10312 if (!crtl->shrink_wrapped
10313 && cfa_offset <= cfun->machine->fs.red_zone_offset)
10314 return;
10316 if (insn)
10318 add_reg_note (insn, REG_CFA_RESTORE, reg);
10319 RTX_FRAME_RELATED_P (insn) = 1;
10321 else
10322 queued_cfa_restores
10323 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
10326 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
10328 static void
10329 ix86_add_queued_cfa_restore_notes (rtx insn)
10331 rtx last;
10332 if (!queued_cfa_restores)
10333 return;
10334 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
10336 XEXP (last, 1) = REG_NOTES (insn);
10337 REG_NOTES (insn) = queued_cfa_restores;
10338 queued_cfa_restores = NULL_RTX;
10339 RTX_FRAME_RELATED_P (insn) = 1;
10342 /* Expand prologue or epilogue stack adjustment.
10343 The pattern exist to put a dependency on all ebp-based memory accesses.
10344 STYLE should be negative if instructions should be marked as frame related,
10345 zero if %r11 register is live and cannot be freely used and positive
10346 otherwise. */
10348 static void
10349 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
10350 int style, bool set_cfa)
10352 struct machine_function *m = cfun->machine;
10353 rtx insn;
10354 bool add_frame_related_expr = false;
10356 if (Pmode == SImode)
10357 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
10358 else if (x86_64_immediate_operand (offset, DImode))
10359 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
10360 else
10362 rtx tmp;
10363 /* r11 is used by indirect sibcall return as well, set before the
10364 epilogue and used after the epilogue. */
10365 if (style)
10366 tmp = gen_rtx_REG (DImode, R11_REG);
10367 else
10369 gcc_assert (src != hard_frame_pointer_rtx
10370 && dest != hard_frame_pointer_rtx);
10371 tmp = hard_frame_pointer_rtx;
10373 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
10374 if (style < 0)
10375 add_frame_related_expr = true;
10377 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
10380 insn = emit_insn (insn);
10381 if (style >= 0)
10382 ix86_add_queued_cfa_restore_notes (insn);
10384 if (set_cfa)
10386 rtx r;
10388 gcc_assert (m->fs.cfa_reg == src);
10389 m->fs.cfa_offset += INTVAL (offset);
10390 m->fs.cfa_reg = dest;
10392 r = gen_rtx_PLUS (Pmode, src, offset);
10393 r = gen_rtx_SET (VOIDmode, dest, r);
10394 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
10395 RTX_FRAME_RELATED_P (insn) = 1;
10397 else if (style < 0)
10399 RTX_FRAME_RELATED_P (insn) = 1;
10400 if (add_frame_related_expr)
10402 rtx r = gen_rtx_PLUS (Pmode, src, offset);
10403 r = gen_rtx_SET (VOIDmode, dest, r);
10404 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
10408 if (dest == stack_pointer_rtx)
10410 HOST_WIDE_INT ooffset = m->fs.sp_offset;
10411 bool valid = m->fs.sp_valid;
10413 if (src == hard_frame_pointer_rtx)
10415 valid = m->fs.fp_valid;
10416 ooffset = m->fs.fp_offset;
10418 else if (src == crtl->drap_reg)
10420 valid = m->fs.drap_valid;
10421 ooffset = 0;
10423 else
10425 /* Else there are two possibilities: SP itself, which we set
10426 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
10427 taken care of this by hand along the eh_return path. */
10428 gcc_checking_assert (src == stack_pointer_rtx
10429 || offset == const0_rtx);
10432 m->fs.sp_offset = ooffset - INTVAL (offset);
10433 m->fs.sp_valid = valid;
10437 /* Find an available register to be used as dynamic realign argument
10438 pointer regsiter. Such a register will be written in prologue and
10439 used in begin of body, so it must not be
10440 1. parameter passing register.
10441 2. GOT pointer.
10442 We reuse static-chain register if it is available. Otherwise, we
10443 use DI for i386 and R13 for x86-64. We chose R13 since it has
10444 shorter encoding.
10446 Return: the regno of chosen register. */
10448 static unsigned int
10449 find_drap_reg (void)
10451 tree decl = cfun->decl;
10453 if (TARGET_64BIT)
10455 /* Use R13 for nested function or function need static chain.
10456 Since function with tail call may use any caller-saved
10457 registers in epilogue, DRAP must not use caller-saved
10458 register in such case. */
10459 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10460 return R13_REG;
10462 return R10_REG;
10464 else
10466 /* Use DI for nested function or function need static chain.
10467 Since function with tail call may use any caller-saved
10468 registers in epilogue, DRAP must not use caller-saved
10469 register in such case. */
10470 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10471 return DI_REG;
10473 /* Reuse static chain register if it isn't used for parameter
10474 passing. */
10475 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
10477 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
10478 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
10479 return CX_REG;
10481 return DI_REG;
10485 /* Return minimum incoming stack alignment. */
10487 static unsigned int
10488 ix86_minimum_incoming_stack_boundary (bool sibcall)
10490 unsigned int incoming_stack_boundary;
10492 /* Prefer the one specified at command line. */
10493 if (ix86_user_incoming_stack_boundary)
10494 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
10495 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
10496 if -mstackrealign is used, it isn't used for sibcall check and
10497 estimated stack alignment is 128bit. */
10498 else if (!sibcall
10499 && !TARGET_64BIT
10500 && ix86_force_align_arg_pointer
10501 && crtl->stack_alignment_estimated == 128)
10502 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10503 else
10504 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
10506 /* Incoming stack alignment can be changed on individual functions
10507 via force_align_arg_pointer attribute. We use the smallest
10508 incoming stack boundary. */
10509 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
10510 && lookup_attribute (ix86_force_align_arg_pointer_string,
10511 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
10512 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10514 /* The incoming stack frame has to be aligned at least at
10515 parm_stack_boundary. */
10516 if (incoming_stack_boundary < crtl->parm_stack_boundary)
10517 incoming_stack_boundary = crtl->parm_stack_boundary;
10519 /* Stack at entrance of main is aligned by runtime. We use the
10520 smallest incoming stack boundary. */
10521 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
10522 && DECL_NAME (current_function_decl)
10523 && MAIN_NAME_P (DECL_NAME (current_function_decl))
10524 && DECL_FILE_SCOPE_P (current_function_decl))
10525 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
10527 return incoming_stack_boundary;
10530 /* Update incoming stack boundary and estimated stack alignment. */
10532 static void
10533 ix86_update_stack_boundary (void)
10535 ix86_incoming_stack_boundary
10536 = ix86_minimum_incoming_stack_boundary (false);
10538 /* x86_64 vararg needs 16byte stack alignment for register save
10539 area. */
10540 if (TARGET_64BIT
10541 && cfun->stdarg
10542 && crtl->stack_alignment_estimated < 128)
10543 crtl->stack_alignment_estimated = 128;
10546 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
10547 needed or an rtx for DRAP otherwise. */
10549 static rtx
10550 ix86_get_drap_rtx (void)
10552 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
10553 crtl->need_drap = true;
10555 if (stack_realign_drap)
10557 /* Assign DRAP to vDRAP and returns vDRAP */
10558 unsigned int regno = find_drap_reg ();
10559 rtx drap_vreg;
10560 rtx arg_ptr;
10561 rtx_insn *seq, *insn;
10563 arg_ptr = gen_rtx_REG (Pmode, regno);
10564 crtl->drap_reg = arg_ptr;
10566 start_sequence ();
10567 drap_vreg = copy_to_reg (arg_ptr);
10568 seq = get_insns ();
10569 end_sequence ();
10571 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
10572 if (!optimize)
10574 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
10575 RTX_FRAME_RELATED_P (insn) = 1;
10577 return drap_vreg;
10579 else
10580 return NULL;
10583 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
10585 static rtx
10586 ix86_internal_arg_pointer (void)
10588 return virtual_incoming_args_rtx;
10591 struct scratch_reg {
10592 rtx reg;
10593 bool saved;
10596 /* Return a short-lived scratch register for use on function entry.
10597 In 32-bit mode, it is valid only after the registers are saved
10598 in the prologue. This register must be released by means of
10599 release_scratch_register_on_entry once it is dead. */
10601 static void
10602 get_scratch_register_on_entry (struct scratch_reg *sr)
10604 int regno;
10606 sr->saved = false;
10608 if (TARGET_64BIT)
10610 /* We always use R11 in 64-bit mode. */
10611 regno = R11_REG;
10613 else
10615 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
10616 bool fastcall_p
10617 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10618 bool thiscall_p
10619 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10620 bool static_chain_p = DECL_STATIC_CHAIN (decl);
10621 int regparm = ix86_function_regparm (fntype, decl);
10622 int drap_regno
10623 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
10625 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10626 for the static chain register. */
10627 if ((regparm < 1 || (fastcall_p && !static_chain_p))
10628 && drap_regno != AX_REG)
10629 regno = AX_REG;
10630 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10631 for the static chain register. */
10632 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
10633 regno = AX_REG;
10634 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
10635 regno = DX_REG;
10636 /* ecx is the static chain register. */
10637 else if (regparm < 3 && !fastcall_p && !thiscall_p
10638 && !static_chain_p
10639 && drap_regno != CX_REG)
10640 regno = CX_REG;
10641 else if (ix86_save_reg (BX_REG, true))
10642 regno = BX_REG;
10643 /* esi is the static chain register. */
10644 else if (!(regparm == 3 && static_chain_p)
10645 && ix86_save_reg (SI_REG, true))
10646 regno = SI_REG;
10647 else if (ix86_save_reg (DI_REG, true))
10648 regno = DI_REG;
10649 else
10651 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
10652 sr->saved = true;
10656 sr->reg = gen_rtx_REG (Pmode, regno);
10657 if (sr->saved)
10659 rtx insn = emit_insn (gen_push (sr->reg));
10660 RTX_FRAME_RELATED_P (insn) = 1;
10664 /* Release a scratch register obtained from the preceding function. */
10666 static void
10667 release_scratch_register_on_entry (struct scratch_reg *sr)
10669 if (sr->saved)
10671 struct machine_function *m = cfun->machine;
10672 rtx x, insn = emit_insn (gen_pop (sr->reg));
10674 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
10675 RTX_FRAME_RELATED_P (insn) = 1;
10676 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
10677 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10678 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
10679 m->fs.sp_offset -= UNITS_PER_WORD;
10683 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
10685 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
10687 static void
10688 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
10690 /* We skip the probe for the first interval + a small dope of 4 words and
10691 probe that many bytes past the specified size to maintain a protection
10692 area at the botton of the stack. */
10693 const int dope = 4 * UNITS_PER_WORD;
10694 rtx size_rtx = GEN_INT (size), last;
10696 /* See if we have a constant small number of probes to generate. If so,
10697 that's the easy case. The run-time loop is made up of 11 insns in the
10698 generic case while the compile-time loop is made up of 3+2*(n-1) insns
10699 for n # of intervals. */
10700 if (size <= 5 * PROBE_INTERVAL)
10702 HOST_WIDE_INT i, adjust;
10703 bool first_probe = true;
10705 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10706 values of N from 1 until it exceeds SIZE. If only one probe is
10707 needed, this will not generate any code. Then adjust and probe
10708 to PROBE_INTERVAL + SIZE. */
10709 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10711 if (first_probe)
10713 adjust = 2 * PROBE_INTERVAL + dope;
10714 first_probe = false;
10716 else
10717 adjust = PROBE_INTERVAL;
10719 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10720 plus_constant (Pmode, stack_pointer_rtx,
10721 -adjust)));
10722 emit_stack_probe (stack_pointer_rtx);
10725 if (first_probe)
10726 adjust = size + PROBE_INTERVAL + dope;
10727 else
10728 adjust = size + PROBE_INTERVAL - i;
10730 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10731 plus_constant (Pmode, stack_pointer_rtx,
10732 -adjust)));
10733 emit_stack_probe (stack_pointer_rtx);
10735 /* Adjust back to account for the additional first interval. */
10736 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10737 plus_constant (Pmode, stack_pointer_rtx,
10738 PROBE_INTERVAL + dope)));
10741 /* Otherwise, do the same as above, but in a loop. Note that we must be
10742 extra careful with variables wrapping around because we might be at
10743 the very top (or the very bottom) of the address space and we have
10744 to be able to handle this case properly; in particular, we use an
10745 equality test for the loop condition. */
10746 else
10748 HOST_WIDE_INT rounded_size;
10749 struct scratch_reg sr;
10751 get_scratch_register_on_entry (&sr);
10754 /* Step 1: round SIZE to the previous multiple of the interval. */
10756 rounded_size = size & -PROBE_INTERVAL;
10759 /* Step 2: compute initial and final value of the loop counter. */
10761 /* SP = SP_0 + PROBE_INTERVAL. */
10762 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10763 plus_constant (Pmode, stack_pointer_rtx,
10764 - (PROBE_INTERVAL + dope))));
10766 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10767 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
10768 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
10769 gen_rtx_PLUS (Pmode, sr.reg,
10770 stack_pointer_rtx)));
10773 /* Step 3: the loop
10775 while (SP != LAST_ADDR)
10777 SP = SP + PROBE_INTERVAL
10778 probe at SP
10781 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10782 values of N from 1 until it is equal to ROUNDED_SIZE. */
10784 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
10787 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10788 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10790 if (size != rounded_size)
10792 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10793 plus_constant (Pmode, stack_pointer_rtx,
10794 rounded_size - size)));
10795 emit_stack_probe (stack_pointer_rtx);
10798 /* Adjust back to account for the additional first interval. */
10799 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10800 plus_constant (Pmode, stack_pointer_rtx,
10801 PROBE_INTERVAL + dope)));
10803 release_scratch_register_on_entry (&sr);
10806 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
10808 /* Even if the stack pointer isn't the CFA register, we need to correctly
10809 describe the adjustments made to it, in particular differentiate the
10810 frame-related ones from the frame-unrelated ones. */
10811 if (size > 0)
10813 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
10814 XVECEXP (expr, 0, 0)
10815 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10816 plus_constant (Pmode, stack_pointer_rtx, -size));
10817 XVECEXP (expr, 0, 1)
10818 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10819 plus_constant (Pmode, stack_pointer_rtx,
10820 PROBE_INTERVAL + dope + size));
10821 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
10822 RTX_FRAME_RELATED_P (last) = 1;
10824 cfun->machine->fs.sp_offset += size;
10827 /* Make sure nothing is scheduled before we are done. */
10828 emit_insn (gen_blockage ());
10831 /* Adjust the stack pointer up to REG while probing it. */
10833 const char *
10834 output_adjust_stack_and_probe (rtx reg)
10836 static int labelno = 0;
10837 char loop_lab[32], end_lab[32];
10838 rtx xops[2];
10840 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10841 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10843 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10845 /* Jump to END_LAB if SP == LAST_ADDR. */
10846 xops[0] = stack_pointer_rtx;
10847 xops[1] = reg;
10848 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10849 fputs ("\tje\t", asm_out_file);
10850 assemble_name_raw (asm_out_file, end_lab);
10851 fputc ('\n', asm_out_file);
10853 /* SP = SP + PROBE_INTERVAL. */
10854 xops[1] = GEN_INT (PROBE_INTERVAL);
10855 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10857 /* Probe at SP. */
10858 xops[1] = const0_rtx;
10859 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
10861 fprintf (asm_out_file, "\tjmp\t");
10862 assemble_name_raw (asm_out_file, loop_lab);
10863 fputc ('\n', asm_out_file);
10865 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10867 return "";
10870 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10871 inclusive. These are offsets from the current stack pointer. */
10873 static void
10874 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
10876 /* See if we have a constant small number of probes to generate. If so,
10877 that's the easy case. The run-time loop is made up of 7 insns in the
10878 generic case while the compile-time loop is made up of n insns for n #
10879 of intervals. */
10880 if (size <= 7 * PROBE_INTERVAL)
10882 HOST_WIDE_INT i;
10884 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10885 it exceeds SIZE. If only one probe is needed, this will not
10886 generate any code. Then probe at FIRST + SIZE. */
10887 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10888 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10889 -(first + i)));
10891 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10892 -(first + size)));
10895 /* Otherwise, do the same as above, but in a loop. Note that we must be
10896 extra careful with variables wrapping around because we might be at
10897 the very top (or the very bottom) of the address space and we have
10898 to be able to handle this case properly; in particular, we use an
10899 equality test for the loop condition. */
10900 else
10902 HOST_WIDE_INT rounded_size, last;
10903 struct scratch_reg sr;
10905 get_scratch_register_on_entry (&sr);
10908 /* Step 1: round SIZE to the previous multiple of the interval. */
10910 rounded_size = size & -PROBE_INTERVAL;
10913 /* Step 2: compute initial and final value of the loop counter. */
10915 /* TEST_OFFSET = FIRST. */
10916 emit_move_insn (sr.reg, GEN_INT (-first));
10918 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10919 last = first + rounded_size;
10922 /* Step 3: the loop
10924 while (TEST_ADDR != LAST_ADDR)
10926 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10927 probe at TEST_ADDR
10930 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10931 until it is equal to ROUNDED_SIZE. */
10933 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
10936 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10937 that SIZE is equal to ROUNDED_SIZE. */
10939 if (size != rounded_size)
10940 emit_stack_probe (plus_constant (Pmode,
10941 gen_rtx_PLUS (Pmode,
10942 stack_pointer_rtx,
10943 sr.reg),
10944 rounded_size - size));
10946 release_scratch_register_on_entry (&sr);
10949 /* Make sure nothing is scheduled before we are done. */
10950 emit_insn (gen_blockage ());
10953 /* Probe a range of stack addresses from REG to END, inclusive. These are
10954 offsets from the current stack pointer. */
10956 const char *
10957 output_probe_stack_range (rtx reg, rtx end)
10959 static int labelno = 0;
10960 char loop_lab[32], end_lab[32];
10961 rtx xops[3];
10963 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10964 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10966 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10968 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10969 xops[0] = reg;
10970 xops[1] = end;
10971 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10972 fputs ("\tje\t", asm_out_file);
10973 assemble_name_raw (asm_out_file, end_lab);
10974 fputc ('\n', asm_out_file);
10976 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10977 xops[1] = GEN_INT (PROBE_INTERVAL);
10978 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10980 /* Probe at TEST_ADDR. */
10981 xops[0] = stack_pointer_rtx;
10982 xops[1] = reg;
10983 xops[2] = const0_rtx;
10984 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
10986 fprintf (asm_out_file, "\tjmp\t");
10987 assemble_name_raw (asm_out_file, loop_lab);
10988 fputc ('\n', asm_out_file);
10990 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10992 return "";
10995 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10996 to be generated in correct form. */
10997 static void
10998 ix86_finalize_stack_realign_flags (void)
11000 /* Check if stack realign is really needed after reload, and
11001 stores result in cfun */
11002 unsigned int incoming_stack_boundary
11003 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
11004 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
11005 unsigned int stack_realign = (incoming_stack_boundary
11006 < (crtl->is_leaf
11007 ? crtl->max_used_stack_slot_alignment
11008 : crtl->stack_alignment_needed));
11010 if (crtl->stack_realign_finalized)
11012 /* After stack_realign_needed is finalized, we can't no longer
11013 change it. */
11014 gcc_assert (crtl->stack_realign_needed == stack_realign);
11015 return;
11018 /* If the only reason for frame_pointer_needed is that we conservatively
11019 assumed stack realignment might be needed, but in the end nothing that
11020 needed the stack alignment had been spilled, clear frame_pointer_needed
11021 and say we don't need stack realignment. */
11022 if (stack_realign
11023 && frame_pointer_needed
11024 && crtl->is_leaf
11025 && flag_omit_frame_pointer
11026 && crtl->sp_is_unchanging
11027 && !ix86_current_function_calls_tls_descriptor
11028 && !crtl->accesses_prior_frames
11029 && !cfun->calls_alloca
11030 && !crtl->calls_eh_return
11031 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
11032 && !ix86_frame_pointer_required ()
11033 && get_frame_size () == 0
11034 && ix86_nsaved_sseregs () == 0
11035 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
11037 HARD_REG_SET set_up_by_prologue, prologue_used;
11038 basic_block bb;
11040 CLEAR_HARD_REG_SET (prologue_used);
11041 CLEAR_HARD_REG_SET (set_up_by_prologue);
11042 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
11043 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
11044 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
11045 HARD_FRAME_POINTER_REGNUM);
11046 FOR_EACH_BB_FN (bb, cfun)
11048 rtx_insn *insn;
11049 FOR_BB_INSNS (bb, insn)
11050 if (NONDEBUG_INSN_P (insn)
11051 && requires_stack_frame_p (insn, prologue_used,
11052 set_up_by_prologue))
11054 crtl->stack_realign_needed = stack_realign;
11055 crtl->stack_realign_finalized = true;
11056 return;
11060 /* If drap has been set, but it actually isn't live at the start
11061 of the function, there is no reason to set it up. */
11062 if (crtl->drap_reg)
11064 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
11065 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
11067 crtl->drap_reg = NULL_RTX;
11068 crtl->need_drap = false;
11071 else
11072 cfun->machine->no_drap_save_restore = true;
11074 frame_pointer_needed = false;
11075 stack_realign = false;
11076 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
11077 crtl->stack_alignment_needed = incoming_stack_boundary;
11078 crtl->stack_alignment_estimated = incoming_stack_boundary;
11079 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
11080 crtl->preferred_stack_boundary = incoming_stack_boundary;
11081 df_finish_pass (true);
11082 df_scan_alloc (NULL);
11083 df_scan_blocks ();
11084 df_compute_regs_ever_live (true);
11085 df_analyze ();
11088 crtl->stack_realign_needed = stack_realign;
11089 crtl->stack_realign_finalized = true;
11092 /* Delete SET_GOT right after entry block if it is allocated to reg. */
11094 static void
11095 ix86_elim_entry_set_got (rtx reg)
11097 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
11098 rtx_insn *c_insn = BB_HEAD (bb);
11099 if (!NONDEBUG_INSN_P (c_insn))
11100 c_insn = next_nonnote_nondebug_insn (c_insn);
11101 if (c_insn && NONJUMP_INSN_P (c_insn))
11103 rtx pat = PATTERN (c_insn);
11104 if (GET_CODE (pat) == PARALLEL)
11106 rtx vec = XVECEXP (pat, 0, 0);
11107 if (GET_CODE (vec) == SET
11108 && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT
11109 && REGNO (XEXP (vec, 0)) == REGNO (reg))
11110 delete_insn (c_insn);
11115 /* Expand the prologue into a bunch of separate insns. */
11117 void
11118 ix86_expand_prologue (void)
11120 struct machine_function *m = cfun->machine;
11121 rtx insn, t;
11122 struct ix86_frame frame;
11123 HOST_WIDE_INT allocate;
11124 bool int_registers_saved;
11125 bool sse_registers_saved;
11127 ix86_finalize_stack_realign_flags ();
11129 /* DRAP should not coexist with stack_realign_fp */
11130 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
11132 memset (&m->fs, 0, sizeof (m->fs));
11134 /* Initialize CFA state for before the prologue. */
11135 m->fs.cfa_reg = stack_pointer_rtx;
11136 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
11138 /* Track SP offset to the CFA. We continue tracking this after we've
11139 swapped the CFA register away from SP. In the case of re-alignment
11140 this is fudged; we're interested to offsets within the local frame. */
11141 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
11142 m->fs.sp_valid = true;
11144 ix86_compute_frame_layout (&frame);
11146 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
11148 /* We should have already generated an error for any use of
11149 ms_hook on a nested function. */
11150 gcc_checking_assert (!ix86_static_chain_on_stack);
11152 /* Check if profiling is active and we shall use profiling before
11153 prologue variant. If so sorry. */
11154 if (crtl->profile && flag_fentry != 0)
11155 sorry ("ms_hook_prologue attribute isn%'t compatible "
11156 "with -mfentry for 32-bit");
11158 /* In ix86_asm_output_function_label we emitted:
11159 8b ff movl.s %edi,%edi
11160 55 push %ebp
11161 8b ec movl.s %esp,%ebp
11163 This matches the hookable function prologue in Win32 API
11164 functions in Microsoft Windows XP Service Pack 2 and newer.
11165 Wine uses this to enable Windows apps to hook the Win32 API
11166 functions provided by Wine.
11168 What that means is that we've already set up the frame pointer. */
11170 if (frame_pointer_needed
11171 && !(crtl->drap_reg && crtl->stack_realign_needed))
11173 rtx push, mov;
11175 /* We've decided to use the frame pointer already set up.
11176 Describe this to the unwinder by pretending that both
11177 push and mov insns happen right here.
11179 Putting the unwind info here at the end of the ms_hook
11180 is done so that we can make absolutely certain we get
11181 the required byte sequence at the start of the function,
11182 rather than relying on an assembler that can produce
11183 the exact encoding required.
11185 However it does mean (in the unpatched case) that we have
11186 a 1 insn window where the asynchronous unwind info is
11187 incorrect. However, if we placed the unwind info at
11188 its correct location we would have incorrect unwind info
11189 in the patched case. Which is probably all moot since
11190 I don't expect Wine generates dwarf2 unwind info for the
11191 system libraries that use this feature. */
11193 insn = emit_insn (gen_blockage ());
11195 push = gen_push (hard_frame_pointer_rtx);
11196 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
11197 stack_pointer_rtx);
11198 RTX_FRAME_RELATED_P (push) = 1;
11199 RTX_FRAME_RELATED_P (mov) = 1;
11201 RTX_FRAME_RELATED_P (insn) = 1;
11202 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11203 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
11205 /* Note that gen_push incremented m->fs.cfa_offset, even
11206 though we didn't emit the push insn here. */
11207 m->fs.cfa_reg = hard_frame_pointer_rtx;
11208 m->fs.fp_offset = m->fs.cfa_offset;
11209 m->fs.fp_valid = true;
11211 else
11213 /* The frame pointer is not needed so pop %ebp again.
11214 This leaves us with a pristine state. */
11215 emit_insn (gen_pop (hard_frame_pointer_rtx));
11219 /* The first insn of a function that accepts its static chain on the
11220 stack is to push the register that would be filled in by a direct
11221 call. This insn will be skipped by the trampoline. */
11222 else if (ix86_static_chain_on_stack)
11224 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
11225 emit_insn (gen_blockage ());
11227 /* We don't want to interpret this push insn as a register save,
11228 only as a stack adjustment. The real copy of the register as
11229 a save will be done later, if needed. */
11230 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
11231 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
11232 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
11233 RTX_FRAME_RELATED_P (insn) = 1;
11236 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
11237 of DRAP is needed and stack realignment is really needed after reload */
11238 if (stack_realign_drap)
11240 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11242 /* Only need to push parameter pointer reg if it is caller saved. */
11243 if (!call_used_regs[REGNO (crtl->drap_reg)])
11245 /* Push arg pointer reg */
11246 insn = emit_insn (gen_push (crtl->drap_reg));
11247 RTX_FRAME_RELATED_P (insn) = 1;
11250 /* Grab the argument pointer. */
11251 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
11252 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11253 RTX_FRAME_RELATED_P (insn) = 1;
11254 m->fs.cfa_reg = crtl->drap_reg;
11255 m->fs.cfa_offset = 0;
11257 /* Align the stack. */
11258 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11259 stack_pointer_rtx,
11260 GEN_INT (-align_bytes)));
11261 RTX_FRAME_RELATED_P (insn) = 1;
11263 /* Replicate the return address on the stack so that return
11264 address can be reached via (argp - 1) slot. This is needed
11265 to implement macro RETURN_ADDR_RTX and intrinsic function
11266 expand_builtin_return_addr etc. */
11267 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
11268 t = gen_frame_mem (word_mode, t);
11269 insn = emit_insn (gen_push (t));
11270 RTX_FRAME_RELATED_P (insn) = 1;
11272 /* For the purposes of frame and register save area addressing,
11273 we've started over with a new frame. */
11274 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
11275 m->fs.realigned = true;
11278 int_registers_saved = (frame.nregs == 0);
11279 sse_registers_saved = (frame.nsseregs == 0);
11281 if (frame_pointer_needed && !m->fs.fp_valid)
11283 /* Note: AT&T enter does NOT have reversed args. Enter is probably
11284 slower on all targets. Also sdb doesn't like it. */
11285 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
11286 RTX_FRAME_RELATED_P (insn) = 1;
11288 /* Push registers now, before setting the frame pointer
11289 on SEH target. */
11290 if (!int_registers_saved
11291 && TARGET_SEH
11292 && !frame.save_regs_using_mov)
11294 ix86_emit_save_regs ();
11295 int_registers_saved = true;
11296 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11299 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
11301 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
11302 RTX_FRAME_RELATED_P (insn) = 1;
11304 if (m->fs.cfa_reg == stack_pointer_rtx)
11305 m->fs.cfa_reg = hard_frame_pointer_rtx;
11306 m->fs.fp_offset = m->fs.sp_offset;
11307 m->fs.fp_valid = true;
11311 if (!int_registers_saved)
11313 /* If saving registers via PUSH, do so now. */
11314 if (!frame.save_regs_using_mov)
11316 ix86_emit_save_regs ();
11317 int_registers_saved = true;
11318 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11321 /* When using red zone we may start register saving before allocating
11322 the stack frame saving one cycle of the prologue. However, avoid
11323 doing this if we have to probe the stack; at least on x86_64 the
11324 stack probe can turn into a call that clobbers a red zone location. */
11325 else if (ix86_using_red_zone ()
11326 && (! TARGET_STACK_PROBE
11327 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
11329 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11330 int_registers_saved = true;
11334 if (stack_realign_fp)
11336 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11337 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
11339 /* The computation of the size of the re-aligned stack frame means
11340 that we must allocate the size of the register save area before
11341 performing the actual alignment. Otherwise we cannot guarantee
11342 that there's enough storage above the realignment point. */
11343 if (m->fs.sp_offset != frame.sse_reg_save_offset)
11344 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11345 GEN_INT (m->fs.sp_offset
11346 - frame.sse_reg_save_offset),
11347 -1, false);
11349 /* Align the stack. */
11350 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11351 stack_pointer_rtx,
11352 GEN_INT (-align_bytes)));
11354 /* For the purposes of register save area addressing, the stack
11355 pointer is no longer valid. As for the value of sp_offset,
11356 see ix86_compute_frame_layout, which we need to match in order
11357 to pass verification of stack_pointer_offset at the end. */
11358 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
11359 m->fs.sp_valid = false;
11362 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
11364 if (flag_stack_usage_info)
11366 /* We start to count from ARG_POINTER. */
11367 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
11369 /* If it was realigned, take into account the fake frame. */
11370 if (stack_realign_drap)
11372 if (ix86_static_chain_on_stack)
11373 stack_size += UNITS_PER_WORD;
11375 if (!call_used_regs[REGNO (crtl->drap_reg)])
11376 stack_size += UNITS_PER_WORD;
11378 /* This over-estimates by 1 minimal-stack-alignment-unit but
11379 mitigates that by counting in the new return address slot. */
11380 current_function_dynamic_stack_size
11381 += crtl->stack_alignment_needed / BITS_PER_UNIT;
11384 current_function_static_stack_size = stack_size;
11387 /* On SEH target with very large frame size, allocate an area to save
11388 SSE registers (as the very large allocation won't be described). */
11389 if (TARGET_SEH
11390 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
11391 && !sse_registers_saved)
11393 HOST_WIDE_INT sse_size =
11394 frame.sse_reg_save_offset - frame.reg_save_offset;
11396 gcc_assert (int_registers_saved);
11398 /* No need to do stack checking as the area will be immediately
11399 written. */
11400 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11401 GEN_INT (-sse_size), -1,
11402 m->fs.cfa_reg == stack_pointer_rtx);
11403 allocate -= sse_size;
11404 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11405 sse_registers_saved = true;
11408 /* The stack has already been decremented by the instruction calling us
11409 so probe if the size is non-negative to preserve the protection area. */
11410 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
11412 /* We expect the registers to be saved when probes are used. */
11413 gcc_assert (int_registers_saved);
11415 if (STACK_CHECK_MOVING_SP)
11417 if (!(crtl->is_leaf && !cfun->calls_alloca
11418 && allocate <= PROBE_INTERVAL))
11420 ix86_adjust_stack_and_probe (allocate);
11421 allocate = 0;
11424 else
11426 HOST_WIDE_INT size = allocate;
11428 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
11429 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
11431 if (TARGET_STACK_PROBE)
11433 if (crtl->is_leaf && !cfun->calls_alloca)
11435 if (size > PROBE_INTERVAL)
11436 ix86_emit_probe_stack_range (0, size);
11438 else
11439 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
11441 else
11443 if (crtl->is_leaf && !cfun->calls_alloca)
11445 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
11446 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
11447 size - STACK_CHECK_PROTECT);
11449 else
11450 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
11455 if (allocate == 0)
11457 else if (!ix86_target_stack_probe ()
11458 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
11460 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11461 GEN_INT (-allocate), -1,
11462 m->fs.cfa_reg == stack_pointer_rtx);
11464 else
11466 rtx eax = gen_rtx_REG (Pmode, AX_REG);
11467 rtx r10 = NULL;
11468 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
11469 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
11470 bool eax_live = ix86_eax_live_at_start_p ();
11471 bool r10_live = false;
11473 if (TARGET_64BIT)
11474 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
11476 if (eax_live)
11478 insn = emit_insn (gen_push (eax));
11479 allocate -= UNITS_PER_WORD;
11480 /* Note that SEH directives need to continue tracking the stack
11481 pointer even after the frame pointer has been set up. */
11482 if (sp_is_cfa_reg || TARGET_SEH)
11484 if (sp_is_cfa_reg)
11485 m->fs.cfa_offset += UNITS_PER_WORD;
11486 RTX_FRAME_RELATED_P (insn) = 1;
11490 if (r10_live)
11492 r10 = gen_rtx_REG (Pmode, R10_REG);
11493 insn = emit_insn (gen_push (r10));
11494 allocate -= UNITS_PER_WORD;
11495 if (sp_is_cfa_reg || TARGET_SEH)
11497 if (sp_is_cfa_reg)
11498 m->fs.cfa_offset += UNITS_PER_WORD;
11499 RTX_FRAME_RELATED_P (insn) = 1;
11503 emit_move_insn (eax, GEN_INT (allocate));
11504 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
11506 /* Use the fact that AX still contains ALLOCATE. */
11507 adjust_stack_insn = (Pmode == DImode
11508 ? gen_pro_epilogue_adjust_stack_di_sub
11509 : gen_pro_epilogue_adjust_stack_si_sub);
11511 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
11512 stack_pointer_rtx, eax));
11514 if (sp_is_cfa_reg || TARGET_SEH)
11516 if (sp_is_cfa_reg)
11517 m->fs.cfa_offset += allocate;
11518 RTX_FRAME_RELATED_P (insn) = 1;
11519 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11520 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11521 plus_constant (Pmode, stack_pointer_rtx,
11522 -allocate)));
11524 m->fs.sp_offset += allocate;
11526 /* Use stack_pointer_rtx for relative addressing so that code
11527 works for realigned stack, too. */
11528 if (r10_live && eax_live)
11530 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11531 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11532 gen_frame_mem (word_mode, t));
11533 t = plus_constant (Pmode, t, UNITS_PER_WORD);
11534 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
11535 gen_frame_mem (word_mode, t));
11537 else if (eax_live || r10_live)
11539 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11540 emit_move_insn (gen_rtx_REG (word_mode,
11541 (eax_live ? AX_REG : R10_REG)),
11542 gen_frame_mem (word_mode, t));
11545 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
11547 /* If we havn't already set up the frame pointer, do so now. */
11548 if (frame_pointer_needed && !m->fs.fp_valid)
11550 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
11551 GEN_INT (frame.stack_pointer_offset
11552 - frame.hard_frame_pointer_offset));
11553 insn = emit_insn (insn);
11554 RTX_FRAME_RELATED_P (insn) = 1;
11555 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
11557 if (m->fs.cfa_reg == stack_pointer_rtx)
11558 m->fs.cfa_reg = hard_frame_pointer_rtx;
11559 m->fs.fp_offset = frame.hard_frame_pointer_offset;
11560 m->fs.fp_valid = true;
11563 if (!int_registers_saved)
11564 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11565 if (!sse_registers_saved)
11566 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11568 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
11569 in PROLOGUE. */
11570 if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
11572 rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
11573 insn = emit_insn (gen_set_got (pic));
11574 RTX_FRAME_RELATED_P (insn) = 1;
11575 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
11576 emit_insn (gen_prologue_use (pic));
11577 /* Deleting already emmitted SET_GOT if exist and allocated to
11578 REAL_PIC_OFFSET_TABLE_REGNUM. */
11579 ix86_elim_entry_set_got (pic);
11582 if (crtl->drap_reg && !crtl->stack_realign_needed)
11584 /* vDRAP is setup but after reload it turns out stack realign
11585 isn't necessary, here we will emit prologue to setup DRAP
11586 without stack realign adjustment */
11587 t = choose_baseaddr (0);
11588 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11591 /* Prevent instructions from being scheduled into register save push
11592 sequence when access to the redzone area is done through frame pointer.
11593 The offset between the frame pointer and the stack pointer is calculated
11594 relative to the value of the stack pointer at the end of the function
11595 prologue, and moving instructions that access redzone area via frame
11596 pointer inside push sequence violates this assumption. */
11597 if (frame_pointer_needed && frame.red_zone_size)
11598 emit_insn (gen_memory_blockage ());
11600 /* Emit cld instruction if stringops are used in the function. */
11601 if (TARGET_CLD && ix86_current_function_needs_cld)
11602 emit_insn (gen_cld ());
11604 /* SEH requires that the prologue end within 256 bytes of the start of
11605 the function. Prevent instruction schedules that would extend that.
11606 Further, prevent alloca modifications to the stack pointer from being
11607 combined with prologue modifications. */
11608 if (TARGET_SEH)
11609 emit_insn (gen_prologue_use (stack_pointer_rtx));
11612 /* Emit code to restore REG using a POP insn. */
11614 static void
11615 ix86_emit_restore_reg_using_pop (rtx reg)
11617 struct machine_function *m = cfun->machine;
11618 rtx insn = emit_insn (gen_pop (reg));
11620 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
11621 m->fs.sp_offset -= UNITS_PER_WORD;
11623 if (m->fs.cfa_reg == crtl->drap_reg
11624 && REGNO (reg) == REGNO (crtl->drap_reg))
11626 /* Previously we'd represented the CFA as an expression
11627 like *(%ebp - 8). We've just popped that value from
11628 the stack, which means we need to reset the CFA to
11629 the drap register. This will remain until we restore
11630 the stack pointer. */
11631 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11632 RTX_FRAME_RELATED_P (insn) = 1;
11634 /* This means that the DRAP register is valid for addressing too. */
11635 m->fs.drap_valid = true;
11636 return;
11639 if (m->fs.cfa_reg == stack_pointer_rtx)
11641 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11642 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11643 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11644 RTX_FRAME_RELATED_P (insn) = 1;
11646 m->fs.cfa_offset -= UNITS_PER_WORD;
11649 /* When the frame pointer is the CFA, and we pop it, we are
11650 swapping back to the stack pointer as the CFA. This happens
11651 for stack frames that don't allocate other data, so we assume
11652 the stack pointer is now pointing at the return address, i.e.
11653 the function entry state, which makes the offset be 1 word. */
11654 if (reg == hard_frame_pointer_rtx)
11656 m->fs.fp_valid = false;
11657 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11659 m->fs.cfa_reg = stack_pointer_rtx;
11660 m->fs.cfa_offset -= UNITS_PER_WORD;
11662 add_reg_note (insn, REG_CFA_DEF_CFA,
11663 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11664 GEN_INT (m->fs.cfa_offset)));
11665 RTX_FRAME_RELATED_P (insn) = 1;
11670 /* Emit code to restore saved registers using POP insns. */
11672 static void
11673 ix86_emit_restore_regs_using_pop (void)
11675 unsigned int regno;
11677 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11678 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
11679 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
11682 /* Emit code and notes for the LEAVE instruction. */
11684 static void
11685 ix86_emit_leave (void)
11687 struct machine_function *m = cfun->machine;
11688 rtx insn = emit_insn (ix86_gen_leave ());
11690 ix86_add_queued_cfa_restore_notes (insn);
11692 gcc_assert (m->fs.fp_valid);
11693 m->fs.sp_valid = true;
11694 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
11695 m->fs.fp_valid = false;
11697 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11699 m->fs.cfa_reg = stack_pointer_rtx;
11700 m->fs.cfa_offset = m->fs.sp_offset;
11702 add_reg_note (insn, REG_CFA_DEF_CFA,
11703 plus_constant (Pmode, stack_pointer_rtx,
11704 m->fs.sp_offset));
11705 RTX_FRAME_RELATED_P (insn) = 1;
11707 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
11708 m->fs.fp_offset);
11711 /* Emit code to restore saved registers using MOV insns.
11712 First register is restored from CFA - CFA_OFFSET. */
11713 static void
11714 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
11715 bool maybe_eh_return)
11717 struct machine_function *m = cfun->machine;
11718 unsigned int regno;
11720 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11721 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11723 rtx reg = gen_rtx_REG (word_mode, regno);
11724 rtx insn, mem;
11726 mem = choose_baseaddr (cfa_offset);
11727 mem = gen_frame_mem (word_mode, mem);
11728 insn = emit_move_insn (reg, mem);
11730 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
11732 /* Previously we'd represented the CFA as an expression
11733 like *(%ebp - 8). We've just popped that value from
11734 the stack, which means we need to reset the CFA to
11735 the drap register. This will remain until we restore
11736 the stack pointer. */
11737 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11738 RTX_FRAME_RELATED_P (insn) = 1;
11740 /* This means that the DRAP register is valid for addressing. */
11741 m->fs.drap_valid = true;
11743 else
11744 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11746 cfa_offset -= UNITS_PER_WORD;
11750 /* Emit code to restore saved registers using MOV insns.
11751 First register is restored from CFA - CFA_OFFSET. */
11752 static void
11753 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
11754 bool maybe_eh_return)
11756 unsigned int regno;
11758 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11759 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11761 rtx reg = gen_rtx_REG (V4SFmode, regno);
11762 rtx mem;
11764 mem = choose_baseaddr (cfa_offset);
11765 mem = gen_rtx_MEM (V4SFmode, mem);
11766 set_mem_align (mem, 128);
11767 emit_move_insn (reg, mem);
11769 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11771 cfa_offset -= 16;
11775 /* Restore function stack, frame, and registers. */
11777 void
11778 ix86_expand_epilogue (int style)
11780 struct machine_function *m = cfun->machine;
11781 struct machine_frame_state frame_state_save = m->fs;
11782 struct ix86_frame frame;
11783 bool restore_regs_via_mov;
11784 bool using_drap;
11786 ix86_finalize_stack_realign_flags ();
11787 ix86_compute_frame_layout (&frame);
11789 m->fs.sp_valid = (!frame_pointer_needed
11790 || (crtl->sp_is_unchanging
11791 && !stack_realign_fp));
11792 gcc_assert (!m->fs.sp_valid
11793 || m->fs.sp_offset == frame.stack_pointer_offset);
11795 /* The FP must be valid if the frame pointer is present. */
11796 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
11797 gcc_assert (!m->fs.fp_valid
11798 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
11800 /* We must have *some* valid pointer to the stack frame. */
11801 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
11803 /* The DRAP is never valid at this point. */
11804 gcc_assert (!m->fs.drap_valid);
11806 /* See the comment about red zone and frame
11807 pointer usage in ix86_expand_prologue. */
11808 if (frame_pointer_needed && frame.red_zone_size)
11809 emit_insn (gen_memory_blockage ());
11811 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
11812 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
11814 /* Determine the CFA offset of the end of the red-zone. */
11815 m->fs.red_zone_offset = 0;
11816 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
11818 /* The red-zone begins below the return address. */
11819 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
11821 /* When the register save area is in the aligned portion of
11822 the stack, determine the maximum runtime displacement that
11823 matches up with the aligned frame. */
11824 if (stack_realign_drap)
11825 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
11826 + UNITS_PER_WORD);
11829 /* Special care must be taken for the normal return case of a function
11830 using eh_return: the eax and edx registers are marked as saved, but
11831 not restored along this path. Adjust the save location to match. */
11832 if (crtl->calls_eh_return && style != 2)
11833 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
11835 /* EH_RETURN requires the use of moves to function properly. */
11836 if (crtl->calls_eh_return)
11837 restore_regs_via_mov = true;
11838 /* SEH requires the use of pops to identify the epilogue. */
11839 else if (TARGET_SEH)
11840 restore_regs_via_mov = false;
11841 /* If we're only restoring one register and sp is not valid then
11842 using a move instruction to restore the register since it's
11843 less work than reloading sp and popping the register. */
11844 else if (!m->fs.sp_valid && frame.nregs <= 1)
11845 restore_regs_via_mov = true;
11846 else if (TARGET_EPILOGUE_USING_MOVE
11847 && cfun->machine->use_fast_prologue_epilogue
11848 && (frame.nregs > 1
11849 || m->fs.sp_offset != frame.reg_save_offset))
11850 restore_regs_via_mov = true;
11851 else if (frame_pointer_needed
11852 && !frame.nregs
11853 && m->fs.sp_offset != frame.reg_save_offset)
11854 restore_regs_via_mov = true;
11855 else if (frame_pointer_needed
11856 && TARGET_USE_LEAVE
11857 && cfun->machine->use_fast_prologue_epilogue
11858 && frame.nregs == 1)
11859 restore_regs_via_mov = true;
11860 else
11861 restore_regs_via_mov = false;
11863 if (restore_regs_via_mov || frame.nsseregs)
11865 /* Ensure that the entire register save area is addressable via
11866 the stack pointer, if we will restore via sp. */
11867 if (TARGET_64BIT
11868 && m->fs.sp_offset > 0x7fffffff
11869 && !(m->fs.fp_valid || m->fs.drap_valid)
11870 && (frame.nsseregs + frame.nregs) != 0)
11872 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11873 GEN_INT (m->fs.sp_offset
11874 - frame.sse_reg_save_offset),
11875 style,
11876 m->fs.cfa_reg == stack_pointer_rtx);
11880 /* If there are any SSE registers to restore, then we have to do it
11881 via moves, since there's obviously no pop for SSE regs. */
11882 if (frame.nsseregs)
11883 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
11884 style == 2);
11886 if (restore_regs_via_mov)
11888 rtx t;
11890 if (frame.nregs)
11891 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
11893 /* eh_return epilogues need %ecx added to the stack pointer. */
11894 if (style == 2)
11896 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
11898 /* Stack align doesn't work with eh_return. */
11899 gcc_assert (!stack_realign_drap);
11900 /* Neither does regparm nested functions. */
11901 gcc_assert (!ix86_static_chain_on_stack);
11903 if (frame_pointer_needed)
11905 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
11906 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
11907 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
11909 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
11910 insn = emit_move_insn (hard_frame_pointer_rtx, t);
11912 /* Note that we use SA as a temporary CFA, as the return
11913 address is at the proper place relative to it. We
11914 pretend this happens at the FP restore insn because
11915 prior to this insn the FP would be stored at the wrong
11916 offset relative to SA, and after this insn we have no
11917 other reasonable register to use for the CFA. We don't
11918 bother resetting the CFA to the SP for the duration of
11919 the return insn. */
11920 add_reg_note (insn, REG_CFA_DEF_CFA,
11921 plus_constant (Pmode, sa, UNITS_PER_WORD));
11922 ix86_add_queued_cfa_restore_notes (insn);
11923 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
11924 RTX_FRAME_RELATED_P (insn) = 1;
11926 m->fs.cfa_reg = sa;
11927 m->fs.cfa_offset = UNITS_PER_WORD;
11928 m->fs.fp_valid = false;
11930 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
11931 const0_rtx, style, false);
11933 else
11935 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
11936 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
11937 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
11938 ix86_add_queued_cfa_restore_notes (insn);
11940 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
11941 if (m->fs.cfa_offset != UNITS_PER_WORD)
11943 m->fs.cfa_offset = UNITS_PER_WORD;
11944 add_reg_note (insn, REG_CFA_DEF_CFA,
11945 plus_constant (Pmode, stack_pointer_rtx,
11946 UNITS_PER_WORD));
11947 RTX_FRAME_RELATED_P (insn) = 1;
11950 m->fs.sp_offset = UNITS_PER_WORD;
11951 m->fs.sp_valid = true;
11954 else
11956 /* SEH requires that the function end with (1) a stack adjustment
11957 if necessary, (2) a sequence of pops, and (3) a return or
11958 jump instruction. Prevent insns from the function body from
11959 being scheduled into this sequence. */
11960 if (TARGET_SEH)
11962 /* Prevent a catch region from being adjacent to the standard
11963 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11964 several other flags that would be interesting to test are
11965 not yet set up. */
11966 if (flag_non_call_exceptions)
11967 emit_insn (gen_nops (const1_rtx));
11968 else
11969 emit_insn (gen_blockage ());
11972 /* First step is to deallocate the stack frame so that we can
11973 pop the registers. Also do it on SEH target for very large
11974 frame as the emitted instructions aren't allowed by the ABI in
11975 epilogues. */
11976 if (!m->fs.sp_valid
11977 || (TARGET_SEH
11978 && (m->fs.sp_offset - frame.reg_save_offset
11979 >= SEH_MAX_FRAME_SIZE)))
11981 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
11982 GEN_INT (m->fs.fp_offset
11983 - frame.reg_save_offset),
11984 style, false);
11986 else if (m->fs.sp_offset != frame.reg_save_offset)
11988 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11989 GEN_INT (m->fs.sp_offset
11990 - frame.reg_save_offset),
11991 style,
11992 m->fs.cfa_reg == stack_pointer_rtx);
11995 ix86_emit_restore_regs_using_pop ();
11998 /* If we used a stack pointer and haven't already got rid of it,
11999 then do so now. */
12000 if (m->fs.fp_valid)
12002 /* If the stack pointer is valid and pointing at the frame
12003 pointer store address, then we only need a pop. */
12004 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
12005 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
12006 /* Leave results in shorter dependency chains on CPUs that are
12007 able to grok it fast. */
12008 else if (TARGET_USE_LEAVE
12009 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
12010 || !cfun->machine->use_fast_prologue_epilogue)
12011 ix86_emit_leave ();
12012 else
12014 pro_epilogue_adjust_stack (stack_pointer_rtx,
12015 hard_frame_pointer_rtx,
12016 const0_rtx, style, !using_drap);
12017 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
12021 if (using_drap)
12023 int param_ptr_offset = UNITS_PER_WORD;
12024 rtx insn;
12026 gcc_assert (stack_realign_drap);
12028 if (ix86_static_chain_on_stack)
12029 param_ptr_offset += UNITS_PER_WORD;
12030 if (!call_used_regs[REGNO (crtl->drap_reg)])
12031 param_ptr_offset += UNITS_PER_WORD;
12033 insn = emit_insn (gen_rtx_SET
12034 (VOIDmode, stack_pointer_rtx,
12035 gen_rtx_PLUS (Pmode,
12036 crtl->drap_reg,
12037 GEN_INT (-param_ptr_offset))));
12038 m->fs.cfa_reg = stack_pointer_rtx;
12039 m->fs.cfa_offset = param_ptr_offset;
12040 m->fs.sp_offset = param_ptr_offset;
12041 m->fs.realigned = false;
12043 add_reg_note (insn, REG_CFA_DEF_CFA,
12044 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12045 GEN_INT (param_ptr_offset)));
12046 RTX_FRAME_RELATED_P (insn) = 1;
12048 if (!call_used_regs[REGNO (crtl->drap_reg)])
12049 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
12052 /* At this point the stack pointer must be valid, and we must have
12053 restored all of the registers. We may not have deallocated the
12054 entire stack frame. We've delayed this until now because it may
12055 be possible to merge the local stack deallocation with the
12056 deallocation forced by ix86_static_chain_on_stack. */
12057 gcc_assert (m->fs.sp_valid);
12058 gcc_assert (!m->fs.fp_valid);
12059 gcc_assert (!m->fs.realigned);
12060 if (m->fs.sp_offset != UNITS_PER_WORD)
12062 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12063 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
12064 style, true);
12066 else
12067 ix86_add_queued_cfa_restore_notes (get_last_insn ());
12069 /* Sibcall epilogues don't want a return instruction. */
12070 if (style == 0)
12072 m->fs = frame_state_save;
12073 return;
12076 if (crtl->args.pops_args && crtl->args.size)
12078 rtx popc = GEN_INT (crtl->args.pops_args);
12080 /* i386 can only pop 64K bytes. If asked to pop more, pop return
12081 address, do explicit add, and jump indirectly to the caller. */
12083 if (crtl->args.pops_args >= 65536)
12085 rtx ecx = gen_rtx_REG (SImode, CX_REG);
12086 rtx insn;
12088 /* There is no "pascal" calling convention in any 64bit ABI. */
12089 gcc_assert (!TARGET_64BIT);
12091 insn = emit_insn (gen_pop (ecx));
12092 m->fs.cfa_offset -= UNITS_PER_WORD;
12093 m->fs.sp_offset -= UNITS_PER_WORD;
12095 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
12096 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
12097 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
12098 add_reg_note (insn, REG_CFA_REGISTER,
12099 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
12100 RTX_FRAME_RELATED_P (insn) = 1;
12102 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12103 popc, -1, true);
12104 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
12106 else
12107 emit_jump_insn (gen_simple_return_pop_internal (popc));
12109 else
12110 emit_jump_insn (gen_simple_return_internal ());
12112 /* Restore the state back to the state from the prologue,
12113 so that it's correct for the next epilogue. */
12114 m->fs = frame_state_save;
12117 /* Reset from the function's potential modifications. */
12119 static void
12120 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, HOST_WIDE_INT)
12122 if (pic_offset_table_rtx
12123 && !ix86_use_pseudo_pic_reg ())
12124 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
12125 #if TARGET_MACHO
12126 /* Mach-O doesn't support labels at the end of objects, so if
12127 it looks like we might want one, insert a NOP. */
12129 rtx_insn *insn = get_last_insn ();
12130 rtx_insn *deleted_debug_label = NULL;
12131 while (insn
12132 && NOTE_P (insn)
12133 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
12135 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
12136 notes only, instead set their CODE_LABEL_NUMBER to -1,
12137 otherwise there would be code generation differences
12138 in between -g and -g0. */
12139 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
12140 deleted_debug_label = insn;
12141 insn = PREV_INSN (insn);
12143 if (insn
12144 && (LABEL_P (insn)
12145 || (NOTE_P (insn)
12146 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
12147 fputs ("\tnop\n", file);
12148 else if (deleted_debug_label)
12149 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
12150 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
12151 CODE_LABEL_NUMBER (insn) = -1;
12153 #endif
12157 /* Return a scratch register to use in the split stack prologue. The
12158 split stack prologue is used for -fsplit-stack. It is the first
12159 instructions in the function, even before the regular prologue.
12160 The scratch register can be any caller-saved register which is not
12161 used for parameters or for the static chain. */
12163 static unsigned int
12164 split_stack_prologue_scratch_regno (void)
12166 if (TARGET_64BIT)
12167 return R11_REG;
12168 else
12170 bool is_fastcall, is_thiscall;
12171 int regparm;
12173 is_fastcall = (lookup_attribute ("fastcall",
12174 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
12175 != NULL);
12176 is_thiscall = (lookup_attribute ("thiscall",
12177 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
12178 != NULL);
12179 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
12181 if (is_fastcall)
12183 if (DECL_STATIC_CHAIN (cfun->decl))
12185 sorry ("-fsplit-stack does not support fastcall with "
12186 "nested function");
12187 return INVALID_REGNUM;
12189 return AX_REG;
12191 else if (is_thiscall)
12193 if (!DECL_STATIC_CHAIN (cfun->decl))
12194 return DX_REG;
12195 return AX_REG;
12197 else if (regparm < 3)
12199 if (!DECL_STATIC_CHAIN (cfun->decl))
12200 return CX_REG;
12201 else
12203 if (regparm >= 2)
12205 sorry ("-fsplit-stack does not support 2 register "
12206 "parameters for a nested function");
12207 return INVALID_REGNUM;
12209 return DX_REG;
12212 else
12214 /* FIXME: We could make this work by pushing a register
12215 around the addition and comparison. */
12216 sorry ("-fsplit-stack does not support 3 register parameters");
12217 return INVALID_REGNUM;
12222 /* A SYMBOL_REF for the function which allocates new stackspace for
12223 -fsplit-stack. */
12225 static GTY(()) rtx split_stack_fn;
12227 /* A SYMBOL_REF for the more stack function when using the large
12228 model. */
12230 static GTY(()) rtx split_stack_fn_large;
12232 /* Handle -fsplit-stack. These are the first instructions in the
12233 function, even before the regular prologue. */
12235 void
12236 ix86_expand_split_stack_prologue (void)
12238 struct ix86_frame frame;
12239 HOST_WIDE_INT allocate;
12240 unsigned HOST_WIDE_INT args_size;
12241 rtx_code_label *label;
12242 rtx limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
12243 rtx scratch_reg = NULL_RTX;
12244 rtx_code_label *varargs_label = NULL;
12245 rtx fn;
12247 gcc_assert (flag_split_stack && reload_completed);
12249 ix86_finalize_stack_realign_flags ();
12250 ix86_compute_frame_layout (&frame);
12251 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
12253 /* This is the label we will branch to if we have enough stack
12254 space. We expect the basic block reordering pass to reverse this
12255 branch if optimizing, so that we branch in the unlikely case. */
12256 label = gen_label_rtx ();
12258 /* We need to compare the stack pointer minus the frame size with
12259 the stack boundary in the TCB. The stack boundary always gives
12260 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
12261 can compare directly. Otherwise we need to do an addition. */
12263 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12264 UNSPEC_STACK_CHECK);
12265 limit = gen_rtx_CONST (Pmode, limit);
12266 limit = gen_rtx_MEM (Pmode, limit);
12267 if (allocate < SPLIT_STACK_AVAILABLE)
12268 current = stack_pointer_rtx;
12269 else
12271 unsigned int scratch_regno;
12272 rtx offset;
12274 /* We need a scratch register to hold the stack pointer minus
12275 the required frame size. Since this is the very start of the
12276 function, the scratch register can be any caller-saved
12277 register which is not used for parameters. */
12278 offset = GEN_INT (- allocate);
12279 scratch_regno = split_stack_prologue_scratch_regno ();
12280 if (scratch_regno == INVALID_REGNUM)
12281 return;
12282 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12283 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
12285 /* We don't use ix86_gen_add3 in this case because it will
12286 want to split to lea, but when not optimizing the insn
12287 will not be split after this point. */
12288 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12289 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12290 offset)));
12292 else
12294 emit_move_insn (scratch_reg, offset);
12295 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
12296 stack_pointer_rtx));
12298 current = scratch_reg;
12301 ix86_expand_branch (GEU, current, limit, label);
12302 jump_insn = get_last_insn ();
12303 JUMP_LABEL (jump_insn) = label;
12305 /* Mark the jump as very likely to be taken. */
12306 add_int_reg_note (jump_insn, REG_BR_PROB,
12307 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
12309 if (split_stack_fn == NULL_RTX)
12311 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
12312 SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
12314 fn = split_stack_fn;
12316 /* Get more stack space. We pass in the desired stack space and the
12317 size of the arguments to copy to the new stack. In 32-bit mode
12318 we push the parameters; __morestack will return on a new stack
12319 anyhow. In 64-bit mode we pass the parameters in r10 and
12320 r11. */
12321 allocate_rtx = GEN_INT (allocate);
12322 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
12323 call_fusage = NULL_RTX;
12324 if (TARGET_64BIT)
12326 rtx reg10, reg11;
12328 reg10 = gen_rtx_REG (Pmode, R10_REG);
12329 reg11 = gen_rtx_REG (Pmode, R11_REG);
12331 /* If this function uses a static chain, it will be in %r10.
12332 Preserve it across the call to __morestack. */
12333 if (DECL_STATIC_CHAIN (cfun->decl))
12335 rtx rax;
12337 rax = gen_rtx_REG (word_mode, AX_REG);
12338 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
12339 use_reg (&call_fusage, rax);
12342 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
12343 && !TARGET_PECOFF)
12345 HOST_WIDE_INT argval;
12347 gcc_assert (Pmode == DImode);
12348 /* When using the large model we need to load the address
12349 into a register, and we've run out of registers. So we
12350 switch to a different calling convention, and we call a
12351 different function: __morestack_large. We pass the
12352 argument size in the upper 32 bits of r10 and pass the
12353 frame size in the lower 32 bits. */
12354 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
12355 gcc_assert ((args_size & 0xffffffff) == args_size);
12357 if (split_stack_fn_large == NULL_RTX)
12359 split_stack_fn_large =
12360 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
12361 SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
12363 if (ix86_cmodel == CM_LARGE_PIC)
12365 rtx_code_label *label;
12366 rtx x;
12368 label = gen_label_rtx ();
12369 emit_label (label);
12370 LABEL_PRESERVE_P (label) = 1;
12371 emit_insn (gen_set_rip_rex64 (reg10, label));
12372 emit_insn (gen_set_got_offset_rex64 (reg11, label));
12373 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
12374 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
12375 UNSPEC_GOT);
12376 x = gen_rtx_CONST (Pmode, x);
12377 emit_move_insn (reg11, x);
12378 x = gen_rtx_PLUS (Pmode, reg10, reg11);
12379 x = gen_const_mem (Pmode, x);
12380 emit_move_insn (reg11, x);
12382 else
12383 emit_move_insn (reg11, split_stack_fn_large);
12385 fn = reg11;
12387 argval = ((args_size << 16) << 16) + allocate;
12388 emit_move_insn (reg10, GEN_INT (argval));
12390 else
12392 emit_move_insn (reg10, allocate_rtx);
12393 emit_move_insn (reg11, GEN_INT (args_size));
12394 use_reg (&call_fusage, reg11);
12397 use_reg (&call_fusage, reg10);
12399 else
12401 emit_insn (gen_push (GEN_INT (args_size)));
12402 emit_insn (gen_push (allocate_rtx));
12404 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
12405 GEN_INT (UNITS_PER_WORD), constm1_rtx,
12406 NULL_RTX, false);
12407 add_function_usage_to (call_insn, call_fusage);
12409 /* In order to make call/return prediction work right, we now need
12410 to execute a return instruction. See
12411 libgcc/config/i386/morestack.S for the details on how this works.
12413 For flow purposes gcc must not see this as a return
12414 instruction--we need control flow to continue at the subsequent
12415 label. Therefore, we use an unspec. */
12416 gcc_assert (crtl->args.pops_args < 65536);
12417 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
12419 /* If we are in 64-bit mode and this function uses a static chain,
12420 we saved %r10 in %rax before calling _morestack. */
12421 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
12422 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
12423 gen_rtx_REG (word_mode, AX_REG));
12425 /* If this function calls va_start, we need to store a pointer to
12426 the arguments on the old stack, because they may not have been
12427 all copied to the new stack. At this point the old stack can be
12428 found at the frame pointer value used by __morestack, because
12429 __morestack has set that up before calling back to us. Here we
12430 store that pointer in a scratch register, and in
12431 ix86_expand_prologue we store the scratch register in a stack
12432 slot. */
12433 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12435 unsigned int scratch_regno;
12436 rtx frame_reg;
12437 int words;
12439 scratch_regno = split_stack_prologue_scratch_regno ();
12440 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12441 frame_reg = gen_rtx_REG (Pmode, BP_REG);
12443 /* 64-bit:
12444 fp -> old fp value
12445 return address within this function
12446 return address of caller of this function
12447 stack arguments
12448 So we add three words to get to the stack arguments.
12450 32-bit:
12451 fp -> old fp value
12452 return address within this function
12453 first argument to __morestack
12454 second argument to __morestack
12455 return address of caller of this function
12456 stack arguments
12457 So we add five words to get to the stack arguments.
12459 words = TARGET_64BIT ? 3 : 5;
12460 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12461 gen_rtx_PLUS (Pmode, frame_reg,
12462 GEN_INT (words * UNITS_PER_WORD))));
12464 varargs_label = gen_label_rtx ();
12465 emit_jump_insn (gen_jump (varargs_label));
12466 JUMP_LABEL (get_last_insn ()) = varargs_label;
12468 emit_barrier ();
12471 emit_label (label);
12472 LABEL_NUSES (label) = 1;
12474 /* If this function calls va_start, we now have to set the scratch
12475 register for the case where we do not call __morestack. In this
12476 case we need to set it based on the stack pointer. */
12477 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12479 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12480 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12481 GEN_INT (UNITS_PER_WORD))));
12483 emit_label (varargs_label);
12484 LABEL_NUSES (varargs_label) = 1;
12488 /* We may have to tell the dataflow pass that the split stack prologue
12489 is initializing a scratch register. */
12491 static void
12492 ix86_live_on_entry (bitmap regs)
12494 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12496 gcc_assert (flag_split_stack);
12497 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
12501 /* Extract the parts of an RTL expression that is a valid memory address
12502 for an instruction. Return 0 if the structure of the address is
12503 grossly off. Return -1 if the address contains ASHIFT, so it is not
12504 strictly valid, but still used for computing length of lea instruction. */
12507 ix86_decompose_address (rtx addr, struct ix86_address *out)
12509 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
12510 rtx base_reg, index_reg;
12511 HOST_WIDE_INT scale = 1;
12512 rtx scale_rtx = NULL_RTX;
12513 rtx tmp;
12514 int retval = 1;
12515 enum ix86_address_seg seg = SEG_DEFAULT;
12517 /* Allow zero-extended SImode addresses,
12518 they will be emitted with addr32 prefix. */
12519 if (TARGET_64BIT && GET_MODE (addr) == DImode)
12521 if (GET_CODE (addr) == ZERO_EXTEND
12522 && GET_MODE (XEXP (addr, 0)) == SImode)
12524 addr = XEXP (addr, 0);
12525 if (CONST_INT_P (addr))
12526 return 0;
12528 else if (GET_CODE (addr) == AND
12529 && const_32bit_mask (XEXP (addr, 1), DImode))
12531 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
12532 if (addr == NULL_RTX)
12533 return 0;
12535 if (CONST_INT_P (addr))
12536 return 0;
12540 /* Allow SImode subregs of DImode addresses,
12541 they will be emitted with addr32 prefix. */
12542 if (TARGET_64BIT && GET_MODE (addr) == SImode)
12544 if (GET_CODE (addr) == SUBREG
12545 && GET_MODE (SUBREG_REG (addr)) == DImode)
12547 addr = SUBREG_REG (addr);
12548 if (CONST_INT_P (addr))
12549 return 0;
12553 if (REG_P (addr))
12554 base = addr;
12555 else if (GET_CODE (addr) == SUBREG)
12557 if (REG_P (SUBREG_REG (addr)))
12558 base = addr;
12559 else
12560 return 0;
12562 else if (GET_CODE (addr) == PLUS)
12564 rtx addends[4], op;
12565 int n = 0, i;
12567 op = addr;
12570 if (n >= 4)
12571 return 0;
12572 addends[n++] = XEXP (op, 1);
12573 op = XEXP (op, 0);
12575 while (GET_CODE (op) == PLUS);
12576 if (n >= 4)
12577 return 0;
12578 addends[n] = op;
12580 for (i = n; i >= 0; --i)
12582 op = addends[i];
12583 switch (GET_CODE (op))
12585 case MULT:
12586 if (index)
12587 return 0;
12588 index = XEXP (op, 0);
12589 scale_rtx = XEXP (op, 1);
12590 break;
12592 case ASHIFT:
12593 if (index)
12594 return 0;
12595 index = XEXP (op, 0);
12596 tmp = XEXP (op, 1);
12597 if (!CONST_INT_P (tmp))
12598 return 0;
12599 scale = INTVAL (tmp);
12600 if ((unsigned HOST_WIDE_INT) scale > 3)
12601 return 0;
12602 scale = 1 << scale;
12603 break;
12605 case ZERO_EXTEND:
12606 op = XEXP (op, 0);
12607 if (GET_CODE (op) != UNSPEC)
12608 return 0;
12609 /* FALLTHRU */
12611 case UNSPEC:
12612 if (XINT (op, 1) == UNSPEC_TP
12613 && TARGET_TLS_DIRECT_SEG_REFS
12614 && seg == SEG_DEFAULT)
12615 seg = DEFAULT_TLS_SEG_REG;
12616 else
12617 return 0;
12618 break;
12620 case SUBREG:
12621 if (!REG_P (SUBREG_REG (op)))
12622 return 0;
12623 /* FALLTHRU */
12625 case REG:
12626 if (!base)
12627 base = op;
12628 else if (!index)
12629 index = op;
12630 else
12631 return 0;
12632 break;
12634 case CONST:
12635 case CONST_INT:
12636 case SYMBOL_REF:
12637 case LABEL_REF:
12638 if (disp)
12639 return 0;
12640 disp = op;
12641 break;
12643 default:
12644 return 0;
12648 else if (GET_CODE (addr) == MULT)
12650 index = XEXP (addr, 0); /* index*scale */
12651 scale_rtx = XEXP (addr, 1);
12653 else if (GET_CODE (addr) == ASHIFT)
12655 /* We're called for lea too, which implements ashift on occasion. */
12656 index = XEXP (addr, 0);
12657 tmp = XEXP (addr, 1);
12658 if (!CONST_INT_P (tmp))
12659 return 0;
12660 scale = INTVAL (tmp);
12661 if ((unsigned HOST_WIDE_INT) scale > 3)
12662 return 0;
12663 scale = 1 << scale;
12664 retval = -1;
12666 else
12667 disp = addr; /* displacement */
12669 if (index)
12671 if (REG_P (index))
12673 else if (GET_CODE (index) == SUBREG
12674 && REG_P (SUBREG_REG (index)))
12676 else
12677 return 0;
12680 /* Extract the integral value of scale. */
12681 if (scale_rtx)
12683 if (!CONST_INT_P (scale_rtx))
12684 return 0;
12685 scale = INTVAL (scale_rtx);
12688 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
12689 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
12691 /* Avoid useless 0 displacement. */
12692 if (disp == const0_rtx && (base || index))
12693 disp = NULL_RTX;
12695 /* Allow arg pointer and stack pointer as index if there is not scaling. */
12696 if (base_reg && index_reg && scale == 1
12697 && (index_reg == arg_pointer_rtx
12698 || index_reg == frame_pointer_rtx
12699 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
12701 std::swap (base, index);
12702 std::swap (base_reg, index_reg);
12705 /* Special case: %ebp cannot be encoded as a base without a displacement.
12706 Similarly %r13. */
12707 if (!disp
12708 && base_reg
12709 && (base_reg == hard_frame_pointer_rtx
12710 || base_reg == frame_pointer_rtx
12711 || base_reg == arg_pointer_rtx
12712 || (REG_P (base_reg)
12713 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
12714 || REGNO (base_reg) == R13_REG))))
12715 disp = const0_rtx;
12717 /* Special case: on K6, [%esi] makes the instruction vector decoded.
12718 Avoid this by transforming to [%esi+0].
12719 Reload calls address legitimization without cfun defined, so we need
12720 to test cfun for being non-NULL. */
12721 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
12722 && base_reg && !index_reg && !disp
12723 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
12724 disp = const0_rtx;
12726 /* Special case: encode reg+reg instead of reg*2. */
12727 if (!base && index && scale == 2)
12728 base = index, base_reg = index_reg, scale = 1;
12730 /* Special case: scaling cannot be encoded without base or displacement. */
12731 if (!base && !disp && index && scale != 1)
12732 disp = const0_rtx;
12734 out->base = base;
12735 out->index = index;
12736 out->disp = disp;
12737 out->scale = scale;
12738 out->seg = seg;
12740 return retval;
12743 /* Return cost of the memory address x.
12744 For i386, it is better to use a complex address than let gcc copy
12745 the address into a reg and make a new pseudo. But not if the address
12746 requires to two regs - that would mean more pseudos with longer
12747 lifetimes. */
12748 static int
12749 ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
12751 struct ix86_address parts;
12752 int cost = 1;
12753 int ok = ix86_decompose_address (x, &parts);
12755 gcc_assert (ok);
12757 if (parts.base && GET_CODE (parts.base) == SUBREG)
12758 parts.base = SUBREG_REG (parts.base);
12759 if (parts.index && GET_CODE (parts.index) == SUBREG)
12760 parts.index = SUBREG_REG (parts.index);
12762 /* Attempt to minimize number of registers in the address. */
12763 if ((parts.base
12764 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
12765 || (parts.index
12766 && (!REG_P (parts.index)
12767 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
12768 cost++;
12770 /* When address base or index is "pic_offset_table_rtx" we don't increase
12771 address cost. When a memopt with "pic_offset_table_rtx" is not invariant
12772 itself it most likely means that base or index is not invariant.
12773 Therefore only "pic_offset_table_rtx" could be hoisted out, which is not
12774 profitable for x86. */
12775 if (parts.base
12776 && (!pic_offset_table_rtx
12777 || REGNO (pic_offset_table_rtx) != REGNO(parts.base))
12778 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
12779 && parts.index
12780 && (!pic_offset_table_rtx
12781 || REGNO (pic_offset_table_rtx) != REGNO(parts.index))
12782 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
12783 && parts.base != parts.index)
12784 cost++;
12786 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
12787 since it's predecode logic can't detect the length of instructions
12788 and it degenerates to vector decoded. Increase cost of such
12789 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
12790 to split such addresses or even refuse such addresses at all.
12792 Following addressing modes are affected:
12793 [base+scale*index]
12794 [scale*index+disp]
12795 [base+index]
12797 The first and last case may be avoidable by explicitly coding the zero in
12798 memory address, but I don't have AMD-K6 machine handy to check this
12799 theory. */
12801 if (TARGET_K6
12802 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
12803 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
12804 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
12805 cost += 10;
12807 return cost;
12810 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
12811 this is used for to form addresses to local data when -fPIC is in
12812 use. */
12814 static bool
12815 darwin_local_data_pic (rtx disp)
12817 return (GET_CODE (disp) == UNSPEC
12818 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
12821 /* Determine if a given RTX is a valid constant. We already know this
12822 satisfies CONSTANT_P. */
12824 static bool
12825 ix86_legitimate_constant_p (machine_mode, rtx x)
12827 /* Pointer bounds constants are not valid. */
12828 if (POINTER_BOUNDS_MODE_P (GET_MODE (x)))
12829 return false;
12831 switch (GET_CODE (x))
12833 case CONST:
12834 x = XEXP (x, 0);
12836 if (GET_CODE (x) == PLUS)
12838 if (!CONST_INT_P (XEXP (x, 1)))
12839 return false;
12840 x = XEXP (x, 0);
12843 if (TARGET_MACHO && darwin_local_data_pic (x))
12844 return true;
12846 /* Only some unspecs are valid as "constants". */
12847 if (GET_CODE (x) == UNSPEC)
12848 switch (XINT (x, 1))
12850 case UNSPEC_GOT:
12851 case UNSPEC_GOTOFF:
12852 case UNSPEC_PLTOFF:
12853 return TARGET_64BIT;
12854 case UNSPEC_TPOFF:
12855 case UNSPEC_NTPOFF:
12856 x = XVECEXP (x, 0, 0);
12857 return (GET_CODE (x) == SYMBOL_REF
12858 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12859 case UNSPEC_DTPOFF:
12860 x = XVECEXP (x, 0, 0);
12861 return (GET_CODE (x) == SYMBOL_REF
12862 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
12863 default:
12864 return false;
12867 /* We must have drilled down to a symbol. */
12868 if (GET_CODE (x) == LABEL_REF)
12869 return true;
12870 if (GET_CODE (x) != SYMBOL_REF)
12871 return false;
12872 /* FALLTHRU */
12874 case SYMBOL_REF:
12875 /* TLS symbols are never valid. */
12876 if (SYMBOL_REF_TLS_MODEL (x))
12877 return false;
12879 /* DLLIMPORT symbols are never valid. */
12880 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12881 && SYMBOL_REF_DLLIMPORT_P (x))
12882 return false;
12884 #if TARGET_MACHO
12885 /* mdynamic-no-pic */
12886 if (MACHO_DYNAMIC_NO_PIC_P)
12887 return machopic_symbol_defined_p (x);
12888 #endif
12889 break;
12891 case CONST_DOUBLE:
12892 if (GET_MODE (x) == TImode
12893 && x != CONST0_RTX (TImode)
12894 && !TARGET_64BIT)
12895 return false;
12896 break;
12898 case CONST_VECTOR:
12899 if (!standard_sse_constant_p (x))
12900 return false;
12902 default:
12903 break;
12906 /* Otherwise we handle everything else in the move patterns. */
12907 return true;
12910 /* Determine if it's legal to put X into the constant pool. This
12911 is not possible for the address of thread-local symbols, which
12912 is checked above. */
12914 static bool
12915 ix86_cannot_force_const_mem (machine_mode mode, rtx x)
12917 /* We can always put integral constants and vectors in memory. */
12918 switch (GET_CODE (x))
12920 case CONST_INT:
12921 case CONST_DOUBLE:
12922 case CONST_VECTOR:
12923 return false;
12925 default:
12926 break;
12928 return !ix86_legitimate_constant_p (mode, x);
12931 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
12932 otherwise zero. */
12934 static bool
12935 is_imported_p (rtx x)
12937 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
12938 || GET_CODE (x) != SYMBOL_REF)
12939 return false;
12941 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
12945 /* Nonzero if the constant value X is a legitimate general operand
12946 when generating PIC code. It is given that flag_pic is on and
12947 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
12949 bool
12950 legitimate_pic_operand_p (rtx x)
12952 rtx inner;
12954 switch (GET_CODE (x))
12956 case CONST:
12957 inner = XEXP (x, 0);
12958 if (GET_CODE (inner) == PLUS
12959 && CONST_INT_P (XEXP (inner, 1)))
12960 inner = XEXP (inner, 0);
12962 /* Only some unspecs are valid as "constants". */
12963 if (GET_CODE (inner) == UNSPEC)
12964 switch (XINT (inner, 1))
12966 case UNSPEC_GOT:
12967 case UNSPEC_GOTOFF:
12968 case UNSPEC_PLTOFF:
12969 return TARGET_64BIT;
12970 case UNSPEC_TPOFF:
12971 x = XVECEXP (inner, 0, 0);
12972 return (GET_CODE (x) == SYMBOL_REF
12973 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12974 case UNSPEC_MACHOPIC_OFFSET:
12975 return legitimate_pic_address_disp_p (x);
12976 default:
12977 return false;
12979 /* FALLTHRU */
12981 case SYMBOL_REF:
12982 case LABEL_REF:
12983 return legitimate_pic_address_disp_p (x);
12985 default:
12986 return true;
12990 /* Determine if a given CONST RTX is a valid memory displacement
12991 in PIC mode. */
12993 bool
12994 legitimate_pic_address_disp_p (rtx disp)
12996 bool saw_plus;
12998 /* In 64bit mode we can allow direct addresses of symbols and labels
12999 when they are not dynamic symbols. */
13000 if (TARGET_64BIT)
13002 rtx op0 = disp, op1;
13004 switch (GET_CODE (disp))
13006 case LABEL_REF:
13007 return true;
13009 case CONST:
13010 if (GET_CODE (XEXP (disp, 0)) != PLUS)
13011 break;
13012 op0 = XEXP (XEXP (disp, 0), 0);
13013 op1 = XEXP (XEXP (disp, 0), 1);
13014 if (!CONST_INT_P (op1)
13015 || INTVAL (op1) >= 16*1024*1024
13016 || INTVAL (op1) < -16*1024*1024)
13017 break;
13018 if (GET_CODE (op0) == LABEL_REF)
13019 return true;
13020 if (GET_CODE (op0) == CONST
13021 && GET_CODE (XEXP (op0, 0)) == UNSPEC
13022 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
13023 return true;
13024 if (GET_CODE (op0) == UNSPEC
13025 && XINT (op0, 1) == UNSPEC_PCREL)
13026 return true;
13027 if (GET_CODE (op0) != SYMBOL_REF)
13028 break;
13029 /* FALLTHRU */
13031 case SYMBOL_REF:
13032 /* TLS references should always be enclosed in UNSPEC.
13033 The dllimported symbol needs always to be resolved. */
13034 if (SYMBOL_REF_TLS_MODEL (op0)
13035 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
13036 return false;
13038 if (TARGET_PECOFF)
13040 if (is_imported_p (op0))
13041 return true;
13043 if (SYMBOL_REF_FAR_ADDR_P (op0)
13044 || !SYMBOL_REF_LOCAL_P (op0))
13045 break;
13047 /* Function-symbols need to be resolved only for
13048 large-model.
13049 For the small-model we don't need to resolve anything
13050 here. */
13051 if ((ix86_cmodel != CM_LARGE_PIC
13052 && SYMBOL_REF_FUNCTION_P (op0))
13053 || ix86_cmodel == CM_SMALL_PIC)
13054 return true;
13055 /* Non-external symbols don't need to be resolved for
13056 large, and medium-model. */
13057 if ((ix86_cmodel == CM_LARGE_PIC
13058 || ix86_cmodel == CM_MEDIUM_PIC)
13059 && !SYMBOL_REF_EXTERNAL_P (op0))
13060 return true;
13062 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
13063 && SYMBOL_REF_LOCAL_P (op0)
13064 && ix86_cmodel != CM_LARGE_PIC)
13065 return true;
13066 break;
13068 default:
13069 break;
13072 if (GET_CODE (disp) != CONST)
13073 return false;
13074 disp = XEXP (disp, 0);
13076 if (TARGET_64BIT)
13078 /* We are unsafe to allow PLUS expressions. This limit allowed distance
13079 of GOT tables. We should not need these anyway. */
13080 if (GET_CODE (disp) != UNSPEC
13081 || (XINT (disp, 1) != UNSPEC_GOTPCREL
13082 && XINT (disp, 1) != UNSPEC_GOTOFF
13083 && XINT (disp, 1) != UNSPEC_PCREL
13084 && XINT (disp, 1) != UNSPEC_PLTOFF))
13085 return false;
13087 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
13088 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
13089 return false;
13090 return true;
13093 saw_plus = false;
13094 if (GET_CODE (disp) == PLUS)
13096 if (!CONST_INT_P (XEXP (disp, 1)))
13097 return false;
13098 disp = XEXP (disp, 0);
13099 saw_plus = true;
13102 if (TARGET_MACHO && darwin_local_data_pic (disp))
13103 return true;
13105 if (GET_CODE (disp) != UNSPEC)
13106 return false;
13108 switch (XINT (disp, 1))
13110 case UNSPEC_GOT:
13111 if (saw_plus)
13112 return false;
13113 /* We need to check for both symbols and labels because VxWorks loads
13114 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
13115 details. */
13116 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
13117 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
13118 case UNSPEC_GOTOFF:
13119 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
13120 While ABI specify also 32bit relocation but we don't produce it in
13121 small PIC model at all. */
13122 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
13123 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
13124 && !TARGET_64BIT)
13125 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
13126 return false;
13127 case UNSPEC_GOTTPOFF:
13128 case UNSPEC_GOTNTPOFF:
13129 case UNSPEC_INDNTPOFF:
13130 if (saw_plus)
13131 return false;
13132 disp = XVECEXP (disp, 0, 0);
13133 return (GET_CODE (disp) == SYMBOL_REF
13134 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
13135 case UNSPEC_NTPOFF:
13136 disp = XVECEXP (disp, 0, 0);
13137 return (GET_CODE (disp) == SYMBOL_REF
13138 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
13139 case UNSPEC_DTPOFF:
13140 disp = XVECEXP (disp, 0, 0);
13141 return (GET_CODE (disp) == SYMBOL_REF
13142 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
13145 return false;
13148 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
13149 replace the input X, or the original X if no replacement is called for.
13150 The output parameter *WIN is 1 if the calling macro should goto WIN,
13151 0 if it should not. */
13153 bool
13154 ix86_legitimize_reload_address (rtx x, machine_mode, int opnum, int type,
13155 int)
13157 /* Reload can generate:
13159 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
13160 (reg:DI 97))
13161 (reg:DI 2 cx))
13163 This RTX is rejected from ix86_legitimate_address_p due to
13164 non-strictness of base register 97. Following this rejection,
13165 reload pushes all three components into separate registers,
13166 creating invalid memory address RTX.
13168 Following code reloads only the invalid part of the
13169 memory address RTX. */
13171 if (GET_CODE (x) == PLUS
13172 && REG_P (XEXP (x, 1))
13173 && GET_CODE (XEXP (x, 0)) == PLUS
13174 && REG_P (XEXP (XEXP (x, 0), 1)))
13176 rtx base, index;
13177 bool something_reloaded = false;
13179 base = XEXP (XEXP (x, 0), 1);
13180 if (!REG_OK_FOR_BASE_STRICT_P (base))
13182 push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
13183 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
13184 opnum, (enum reload_type) type);
13185 something_reloaded = true;
13188 index = XEXP (x, 1);
13189 if (!REG_OK_FOR_INDEX_STRICT_P (index))
13191 push_reload (index, NULL_RTX, &XEXP (x, 1), NULL,
13192 INDEX_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
13193 opnum, (enum reload_type) type);
13194 something_reloaded = true;
13197 gcc_assert (something_reloaded);
13198 return true;
13201 return false;
13204 /* Determine if op is suitable RTX for an address register.
13205 Return naked register if a register or a register subreg is
13206 found, otherwise return NULL_RTX. */
13208 static rtx
13209 ix86_validate_address_register (rtx op)
13211 machine_mode mode = GET_MODE (op);
13213 /* Only SImode or DImode registers can form the address. */
13214 if (mode != SImode && mode != DImode)
13215 return NULL_RTX;
13217 if (REG_P (op))
13218 return op;
13219 else if (GET_CODE (op) == SUBREG)
13221 rtx reg = SUBREG_REG (op);
13223 if (!REG_P (reg))
13224 return NULL_RTX;
13226 mode = GET_MODE (reg);
13228 /* Don't allow SUBREGs that span more than a word. It can
13229 lead to spill failures when the register is one word out
13230 of a two word structure. */
13231 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
13232 return NULL_RTX;
13234 /* Allow only SUBREGs of non-eliminable hard registers. */
13235 if (register_no_elim_operand (reg, mode))
13236 return reg;
13239 /* Op is not a register. */
13240 return NULL_RTX;
13243 /* Recognizes RTL expressions that are valid memory addresses for an
13244 instruction. The MODE argument is the machine mode for the MEM
13245 expression that wants to use this address.
13247 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
13248 convert common non-canonical forms to canonical form so that they will
13249 be recognized. */
13251 static bool
13252 ix86_legitimate_address_p (machine_mode, rtx addr, bool strict)
13254 struct ix86_address parts;
13255 rtx base, index, disp;
13256 HOST_WIDE_INT scale;
13257 enum ix86_address_seg seg;
13259 if (ix86_decompose_address (addr, &parts) <= 0)
13260 /* Decomposition failed. */
13261 return false;
13263 base = parts.base;
13264 index = parts.index;
13265 disp = parts.disp;
13266 scale = parts.scale;
13267 seg = parts.seg;
13269 /* Validate base register. */
13270 if (base)
13272 rtx reg = ix86_validate_address_register (base);
13274 if (reg == NULL_RTX)
13275 return false;
13277 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
13278 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
13279 /* Base is not valid. */
13280 return false;
13283 /* Validate index register. */
13284 if (index)
13286 rtx reg = ix86_validate_address_register (index);
13288 if (reg == NULL_RTX)
13289 return false;
13291 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
13292 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
13293 /* Index is not valid. */
13294 return false;
13297 /* Index and base should have the same mode. */
13298 if (base && index
13299 && GET_MODE (base) != GET_MODE (index))
13300 return false;
13302 /* Address override works only on the (%reg) part of %fs:(%reg). */
13303 if (seg != SEG_DEFAULT
13304 && ((base && GET_MODE (base) != word_mode)
13305 || (index && GET_MODE (index) != word_mode)))
13306 return false;
13308 /* Validate scale factor. */
13309 if (scale != 1)
13311 if (!index)
13312 /* Scale without index. */
13313 return false;
13315 if (scale != 2 && scale != 4 && scale != 8)
13316 /* Scale is not a valid multiplier. */
13317 return false;
13320 /* Validate displacement. */
13321 if (disp)
13323 if (GET_CODE (disp) == CONST
13324 && GET_CODE (XEXP (disp, 0)) == UNSPEC
13325 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
13326 switch (XINT (XEXP (disp, 0), 1))
13328 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
13329 used. While ABI specify also 32bit relocations, we don't produce
13330 them at all and use IP relative instead. */
13331 case UNSPEC_GOT:
13332 case UNSPEC_GOTOFF:
13333 gcc_assert (flag_pic);
13334 if (!TARGET_64BIT)
13335 goto is_legitimate_pic;
13337 /* 64bit address unspec. */
13338 return false;
13340 case UNSPEC_GOTPCREL:
13341 case UNSPEC_PCREL:
13342 gcc_assert (flag_pic);
13343 goto is_legitimate_pic;
13345 case UNSPEC_GOTTPOFF:
13346 case UNSPEC_GOTNTPOFF:
13347 case UNSPEC_INDNTPOFF:
13348 case UNSPEC_NTPOFF:
13349 case UNSPEC_DTPOFF:
13350 break;
13352 case UNSPEC_STACK_CHECK:
13353 gcc_assert (flag_split_stack);
13354 break;
13356 default:
13357 /* Invalid address unspec. */
13358 return false;
13361 else if (SYMBOLIC_CONST (disp)
13362 && (flag_pic
13363 || (TARGET_MACHO
13364 #if TARGET_MACHO
13365 && MACHOPIC_INDIRECT
13366 && !machopic_operand_p (disp)
13367 #endif
13371 is_legitimate_pic:
13372 if (TARGET_64BIT && (index || base))
13374 /* foo@dtpoff(%rX) is ok. */
13375 if (GET_CODE (disp) != CONST
13376 || GET_CODE (XEXP (disp, 0)) != PLUS
13377 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
13378 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
13379 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
13380 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
13381 /* Non-constant pic memory reference. */
13382 return false;
13384 else if ((!TARGET_MACHO || flag_pic)
13385 && ! legitimate_pic_address_disp_p (disp))
13386 /* Displacement is an invalid pic construct. */
13387 return false;
13388 #if TARGET_MACHO
13389 else if (MACHO_DYNAMIC_NO_PIC_P
13390 && !ix86_legitimate_constant_p (Pmode, disp))
13391 /* displacment must be referenced via non_lazy_pointer */
13392 return false;
13393 #endif
13395 /* This code used to verify that a symbolic pic displacement
13396 includes the pic_offset_table_rtx register.
13398 While this is good idea, unfortunately these constructs may
13399 be created by "adds using lea" optimization for incorrect
13400 code like:
13402 int a;
13403 int foo(int i)
13405 return *(&a+i);
13408 This code is nonsensical, but results in addressing
13409 GOT table with pic_offset_table_rtx base. We can't
13410 just refuse it easily, since it gets matched by
13411 "addsi3" pattern, that later gets split to lea in the
13412 case output register differs from input. While this
13413 can be handled by separate addsi pattern for this case
13414 that never results in lea, this seems to be easier and
13415 correct fix for crash to disable this test. */
13417 else if (GET_CODE (disp) != LABEL_REF
13418 && !CONST_INT_P (disp)
13419 && (GET_CODE (disp) != CONST
13420 || !ix86_legitimate_constant_p (Pmode, disp))
13421 && (GET_CODE (disp) != SYMBOL_REF
13422 || !ix86_legitimate_constant_p (Pmode, disp)))
13423 /* Displacement is not constant. */
13424 return false;
13425 else if (TARGET_64BIT
13426 && !x86_64_immediate_operand (disp, VOIDmode))
13427 /* Displacement is out of range. */
13428 return false;
13429 /* In x32 mode, constant addresses are sign extended to 64bit, so
13430 we have to prevent addresses from 0x80000000 to 0xffffffff. */
13431 else if (TARGET_X32 && !(index || base)
13432 && CONST_INT_P (disp)
13433 && val_signbit_known_set_p (SImode, INTVAL (disp)))
13434 return false;
13437 /* Everything looks valid. */
13438 return true;
13441 /* Determine if a given RTX is a valid constant address. */
13443 bool
13444 constant_address_p (rtx x)
13446 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
13449 /* Return a unique alias set for the GOT. */
13451 static alias_set_type
13452 ix86_GOT_alias_set (void)
13454 static alias_set_type set = -1;
13455 if (set == -1)
13456 set = new_alias_set ();
13457 return set;
13460 /* Set regs_ever_live for PIC base address register
13461 to true if required. */
13462 static void
13463 set_pic_reg_ever_live ()
13465 if (reload_in_progress)
13466 df_set_regs_ever_live (REGNO (pic_offset_table_rtx), true);
13469 /* Return a legitimate reference for ORIG (an address) using the
13470 register REG. If REG is 0, a new pseudo is generated.
13472 There are two types of references that must be handled:
13474 1. Global data references must load the address from the GOT, via
13475 the PIC reg. An insn is emitted to do this load, and the reg is
13476 returned.
13478 2. Static data references, constant pool addresses, and code labels
13479 compute the address as an offset from the GOT, whose base is in
13480 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
13481 differentiate them from global data objects. The returned
13482 address is the PIC reg + an unspec constant.
13484 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
13485 reg also appears in the address. */
13487 static rtx
13488 legitimize_pic_address (rtx orig, rtx reg)
13490 rtx addr = orig;
13491 rtx new_rtx = orig;
13493 #if TARGET_MACHO
13494 if (TARGET_MACHO && !TARGET_64BIT)
13496 if (reg == 0)
13497 reg = gen_reg_rtx (Pmode);
13498 /* Use the generic Mach-O PIC machinery. */
13499 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
13501 #endif
13503 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13505 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13506 if (tmp)
13507 return tmp;
13510 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
13511 new_rtx = addr;
13512 else if (TARGET_64BIT && !TARGET_PECOFF
13513 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
13515 rtx tmpreg;
13516 /* This symbol may be referenced via a displacement from the PIC
13517 base address (@GOTOFF). */
13519 set_pic_reg_ever_live ();
13520 if (GET_CODE (addr) == CONST)
13521 addr = XEXP (addr, 0);
13522 if (GET_CODE (addr) == PLUS)
13524 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13525 UNSPEC_GOTOFF);
13526 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13528 else
13529 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13530 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13531 if (!reg)
13532 tmpreg = gen_reg_rtx (Pmode);
13533 else
13534 tmpreg = reg;
13535 emit_move_insn (tmpreg, new_rtx);
13537 if (reg != 0)
13539 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
13540 tmpreg, 1, OPTAB_DIRECT);
13541 new_rtx = reg;
13543 else
13544 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
13546 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
13548 /* This symbol may be referenced via a displacement from the PIC
13549 base address (@GOTOFF). */
13551 set_pic_reg_ever_live ();
13552 if (GET_CODE (addr) == CONST)
13553 addr = XEXP (addr, 0);
13554 if (GET_CODE (addr) == PLUS)
13556 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13557 UNSPEC_GOTOFF);
13558 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13560 else
13561 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13562 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13563 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13565 if (reg != 0)
13567 emit_move_insn (reg, new_rtx);
13568 new_rtx = reg;
13571 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
13572 /* We can't use @GOTOFF for text labels on VxWorks;
13573 see gotoff_operand. */
13574 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
13576 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13577 if (tmp)
13578 return tmp;
13580 /* For x64 PE-COFF there is no GOT table. So we use address
13581 directly. */
13582 if (TARGET_64BIT && TARGET_PECOFF)
13584 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
13585 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13587 if (reg == 0)
13588 reg = gen_reg_rtx (Pmode);
13589 emit_move_insn (reg, new_rtx);
13590 new_rtx = reg;
13592 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
13594 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
13595 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13596 new_rtx = gen_const_mem (Pmode, new_rtx);
13597 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13599 if (reg == 0)
13600 reg = gen_reg_rtx (Pmode);
13601 /* Use directly gen_movsi, otherwise the address is loaded
13602 into register for CSE. We don't want to CSE this addresses,
13603 instead we CSE addresses from the GOT table, so skip this. */
13604 emit_insn (gen_movsi (reg, new_rtx));
13605 new_rtx = reg;
13607 else
13609 /* This symbol must be referenced via a load from the
13610 Global Offset Table (@GOT). */
13612 set_pic_reg_ever_live ();
13613 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
13614 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13615 if (TARGET_64BIT)
13616 new_rtx = force_reg (Pmode, new_rtx);
13617 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13618 new_rtx = gen_const_mem (Pmode, new_rtx);
13619 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13621 if (reg == 0)
13622 reg = gen_reg_rtx (Pmode);
13623 emit_move_insn (reg, new_rtx);
13624 new_rtx = reg;
13627 else
13629 if (CONST_INT_P (addr)
13630 && !x86_64_immediate_operand (addr, VOIDmode))
13632 if (reg)
13634 emit_move_insn (reg, addr);
13635 new_rtx = reg;
13637 else
13638 new_rtx = force_reg (Pmode, addr);
13640 else if (GET_CODE (addr) == CONST)
13642 addr = XEXP (addr, 0);
13644 /* We must match stuff we generate before. Assume the only
13645 unspecs that can get here are ours. Not that we could do
13646 anything with them anyway.... */
13647 if (GET_CODE (addr) == UNSPEC
13648 || (GET_CODE (addr) == PLUS
13649 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
13650 return orig;
13651 gcc_assert (GET_CODE (addr) == PLUS);
13653 if (GET_CODE (addr) == PLUS)
13655 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
13657 /* Check first to see if this is a constant offset from a @GOTOFF
13658 symbol reference. */
13659 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
13660 && CONST_INT_P (op1))
13662 if (!TARGET_64BIT)
13664 set_pic_reg_ever_live ();
13665 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
13666 UNSPEC_GOTOFF);
13667 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
13668 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13669 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13671 if (reg != 0)
13673 emit_move_insn (reg, new_rtx);
13674 new_rtx = reg;
13677 else
13679 if (INTVAL (op1) < -16*1024*1024
13680 || INTVAL (op1) >= 16*1024*1024)
13682 if (!x86_64_immediate_operand (op1, Pmode))
13683 op1 = force_reg (Pmode, op1);
13684 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
13688 else
13690 rtx base = legitimize_pic_address (op0, reg);
13691 machine_mode mode = GET_MODE (base);
13692 new_rtx
13693 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
13695 if (CONST_INT_P (new_rtx))
13697 if (INTVAL (new_rtx) < -16*1024*1024
13698 || INTVAL (new_rtx) >= 16*1024*1024)
13700 if (!x86_64_immediate_operand (new_rtx, mode))
13701 new_rtx = force_reg (mode, new_rtx);
13702 new_rtx
13703 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
13705 else
13706 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
13708 else
13710 if (GET_CODE (new_rtx) == PLUS
13711 && CONSTANT_P (XEXP (new_rtx, 1)))
13713 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
13714 new_rtx = XEXP (new_rtx, 1);
13716 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
13721 return new_rtx;
13724 /* Load the thread pointer. If TO_REG is true, force it into a register. */
13726 static rtx
13727 get_thread_pointer (machine_mode tp_mode, bool to_reg)
13729 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
13731 if (GET_MODE (tp) != tp_mode)
13733 gcc_assert (GET_MODE (tp) == SImode);
13734 gcc_assert (tp_mode == DImode);
13736 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
13739 if (to_reg)
13740 tp = copy_to_mode_reg (tp_mode, tp);
13742 return tp;
13745 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13747 static GTY(()) rtx ix86_tls_symbol;
13749 static rtx
13750 ix86_tls_get_addr (void)
13752 if (!ix86_tls_symbol)
13754 const char *sym
13755 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
13756 ? "___tls_get_addr" : "__tls_get_addr");
13758 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
13761 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
13763 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
13764 UNSPEC_PLTOFF);
13765 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
13766 gen_rtx_CONST (Pmode, unspec));
13769 return ix86_tls_symbol;
13772 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13774 static GTY(()) rtx ix86_tls_module_base_symbol;
13777 ix86_tls_module_base (void)
13779 if (!ix86_tls_module_base_symbol)
13781 ix86_tls_module_base_symbol
13782 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
13784 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13785 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13788 return ix86_tls_module_base_symbol;
13791 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
13792 false if we expect this to be used for a memory address and true if
13793 we expect to load the address into a register. */
13795 static rtx
13796 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
13798 rtx dest, base, off;
13799 rtx pic = NULL_RTX, tp = NULL_RTX;
13800 machine_mode tp_mode = Pmode;
13801 int type;
13803 /* Fall back to global dynamic model if tool chain cannot support local
13804 dynamic. */
13805 if (TARGET_SUN_TLS && !TARGET_64BIT
13806 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
13807 && model == TLS_MODEL_LOCAL_DYNAMIC)
13808 model = TLS_MODEL_GLOBAL_DYNAMIC;
13810 switch (model)
13812 case TLS_MODEL_GLOBAL_DYNAMIC:
13813 dest = gen_reg_rtx (Pmode);
13815 if (!TARGET_64BIT)
13817 if (flag_pic && !TARGET_PECOFF)
13818 pic = pic_offset_table_rtx;
13819 else
13821 pic = gen_reg_rtx (Pmode);
13822 emit_insn (gen_set_got (pic));
13826 if (TARGET_GNU2_TLS)
13828 if (TARGET_64BIT)
13829 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
13830 else
13831 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
13833 tp = get_thread_pointer (Pmode, true);
13834 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
13836 if (GET_MODE (x) != Pmode)
13837 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13839 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13841 else
13843 rtx caddr = ix86_tls_get_addr ();
13845 if (TARGET_64BIT)
13847 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13848 rtx_insn *insns;
13850 start_sequence ();
13851 emit_call_insn
13852 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
13853 insns = get_insns ();
13854 end_sequence ();
13856 if (GET_MODE (x) != Pmode)
13857 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13859 RTL_CONST_CALL_P (insns) = 1;
13860 emit_libcall_block (insns, dest, rax, x);
13862 else
13863 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
13865 break;
13867 case TLS_MODEL_LOCAL_DYNAMIC:
13868 base = gen_reg_rtx (Pmode);
13870 if (!TARGET_64BIT)
13872 if (flag_pic)
13873 pic = pic_offset_table_rtx;
13874 else
13876 pic = gen_reg_rtx (Pmode);
13877 emit_insn (gen_set_got (pic));
13881 if (TARGET_GNU2_TLS)
13883 rtx tmp = ix86_tls_module_base ();
13885 if (TARGET_64BIT)
13886 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
13887 else
13888 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
13890 tp = get_thread_pointer (Pmode, true);
13891 set_unique_reg_note (get_last_insn (), REG_EQUAL,
13892 gen_rtx_MINUS (Pmode, tmp, tp));
13894 else
13896 rtx caddr = ix86_tls_get_addr ();
13898 if (TARGET_64BIT)
13900 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13901 rtx_insn *insns;
13902 rtx eqv;
13904 start_sequence ();
13905 emit_call_insn
13906 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
13907 insns = get_insns ();
13908 end_sequence ();
13910 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
13911 share the LD_BASE result with other LD model accesses. */
13912 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
13913 UNSPEC_TLS_LD_BASE);
13915 RTL_CONST_CALL_P (insns) = 1;
13916 emit_libcall_block (insns, base, rax, eqv);
13918 else
13919 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
13922 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
13923 off = gen_rtx_CONST (Pmode, off);
13925 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
13927 if (TARGET_GNU2_TLS)
13929 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
13931 if (GET_MODE (x) != Pmode)
13932 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13934 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13936 break;
13938 case TLS_MODEL_INITIAL_EXEC:
13939 if (TARGET_64BIT)
13941 if (TARGET_SUN_TLS && !TARGET_X32)
13943 /* The Sun linker took the AMD64 TLS spec literally
13944 and can only handle %rax as destination of the
13945 initial executable code sequence. */
13947 dest = gen_reg_rtx (DImode);
13948 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
13949 return dest;
13952 /* Generate DImode references to avoid %fs:(%reg32)
13953 problems and linker IE->LE relaxation bug. */
13954 tp_mode = DImode;
13955 pic = NULL;
13956 type = UNSPEC_GOTNTPOFF;
13958 else if (flag_pic)
13960 set_pic_reg_ever_live ();
13961 pic = pic_offset_table_rtx;
13962 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
13964 else if (!TARGET_ANY_GNU_TLS)
13966 pic = gen_reg_rtx (Pmode);
13967 emit_insn (gen_set_got (pic));
13968 type = UNSPEC_GOTTPOFF;
13970 else
13972 pic = NULL;
13973 type = UNSPEC_INDNTPOFF;
13976 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
13977 off = gen_rtx_CONST (tp_mode, off);
13978 if (pic)
13979 off = gen_rtx_PLUS (tp_mode, pic, off);
13980 off = gen_const_mem (tp_mode, off);
13981 set_mem_alias_set (off, ix86_GOT_alias_set ());
13983 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13985 base = get_thread_pointer (tp_mode,
13986 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13987 off = force_reg (tp_mode, off);
13988 return gen_rtx_PLUS (tp_mode, base, off);
13990 else
13992 base = get_thread_pointer (Pmode, true);
13993 dest = gen_reg_rtx (Pmode);
13994 emit_insn (ix86_gen_sub3 (dest, base, off));
13996 break;
13998 case TLS_MODEL_LOCAL_EXEC:
13999 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
14000 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14001 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
14002 off = gen_rtx_CONST (Pmode, off);
14004 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14006 base = get_thread_pointer (Pmode,
14007 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
14008 return gen_rtx_PLUS (Pmode, base, off);
14010 else
14012 base = get_thread_pointer (Pmode, true);
14013 dest = gen_reg_rtx (Pmode);
14014 emit_insn (ix86_gen_sub3 (dest, base, off));
14016 break;
14018 default:
14019 gcc_unreachable ();
14022 return dest;
14025 /* Create or return the unique __imp_DECL dllimport symbol corresponding
14026 to symbol DECL if BEIMPORT is true. Otherwise create or return the
14027 unique refptr-DECL symbol corresponding to symbol DECL. */
14029 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
14030 htab_t dllimport_map;
14032 static tree
14033 get_dllimport_decl (tree decl, bool beimport)
14035 struct tree_map *h, in;
14036 void **loc;
14037 const char *name;
14038 const char *prefix;
14039 size_t namelen, prefixlen;
14040 char *imp_name;
14041 tree to;
14042 rtx rtl;
14044 if (!dllimport_map)
14045 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
14047 in.hash = htab_hash_pointer (decl);
14048 in.base.from = decl;
14049 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
14050 h = (struct tree_map *) *loc;
14051 if (h)
14052 return h->to;
14054 *loc = h = ggc_alloc<tree_map> ();
14055 h->hash = in.hash;
14056 h->base.from = decl;
14057 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
14058 VAR_DECL, NULL, ptr_type_node);
14059 DECL_ARTIFICIAL (to) = 1;
14060 DECL_IGNORED_P (to) = 1;
14061 DECL_EXTERNAL (to) = 1;
14062 TREE_READONLY (to) = 1;
14064 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
14065 name = targetm.strip_name_encoding (name);
14066 if (beimport)
14067 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
14068 ? "*__imp_" : "*__imp__";
14069 else
14070 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
14071 namelen = strlen (name);
14072 prefixlen = strlen (prefix);
14073 imp_name = (char *) alloca (namelen + prefixlen + 1);
14074 memcpy (imp_name, prefix, prefixlen);
14075 memcpy (imp_name + prefixlen, name, namelen + 1);
14077 name = ggc_alloc_string (imp_name, namelen + prefixlen);
14078 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
14079 SET_SYMBOL_REF_DECL (rtl, to);
14080 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
14081 if (!beimport)
14083 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
14084 #ifdef SUB_TARGET_RECORD_STUB
14085 SUB_TARGET_RECORD_STUB (name);
14086 #endif
14089 rtl = gen_const_mem (Pmode, rtl);
14090 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
14092 SET_DECL_RTL (to, rtl);
14093 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
14095 return to;
14098 /* Expand SYMBOL into its corresponding far-addresse symbol.
14099 WANT_REG is true if we require the result be a register. */
14101 static rtx
14102 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
14104 tree imp_decl;
14105 rtx x;
14107 gcc_assert (SYMBOL_REF_DECL (symbol));
14108 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
14110 x = DECL_RTL (imp_decl);
14111 if (want_reg)
14112 x = force_reg (Pmode, x);
14113 return x;
14116 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
14117 true if we require the result be a register. */
14119 static rtx
14120 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
14122 tree imp_decl;
14123 rtx x;
14125 gcc_assert (SYMBOL_REF_DECL (symbol));
14126 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
14128 x = DECL_RTL (imp_decl);
14129 if (want_reg)
14130 x = force_reg (Pmode, x);
14131 return x;
14134 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
14135 is true if we require the result be a register. */
14137 static rtx
14138 legitimize_pe_coff_symbol (rtx addr, bool inreg)
14140 if (!TARGET_PECOFF)
14141 return NULL_RTX;
14143 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14145 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
14146 return legitimize_dllimport_symbol (addr, inreg);
14147 if (GET_CODE (addr) == CONST
14148 && GET_CODE (XEXP (addr, 0)) == PLUS
14149 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
14150 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
14152 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
14153 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
14157 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
14158 return NULL_RTX;
14159 if (GET_CODE (addr) == SYMBOL_REF
14160 && !is_imported_p (addr)
14161 && SYMBOL_REF_EXTERNAL_P (addr)
14162 && SYMBOL_REF_DECL (addr))
14163 return legitimize_pe_coff_extern_decl (addr, inreg);
14165 if (GET_CODE (addr) == CONST
14166 && GET_CODE (XEXP (addr, 0)) == PLUS
14167 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
14168 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
14169 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
14170 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
14172 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
14173 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
14175 return NULL_RTX;
14178 /* Try machine-dependent ways of modifying an illegitimate address
14179 to be legitimate. If we find one, return the new, valid address.
14180 This macro is used in only one place: `memory_address' in explow.c.
14182 OLDX is the address as it was before break_out_memory_refs was called.
14183 In some cases it is useful to look at this to decide what needs to be done.
14185 It is always safe for this macro to do nothing. It exists to recognize
14186 opportunities to optimize the output.
14188 For the 80386, we handle X+REG by loading X into a register R and
14189 using R+REG. R will go in a general reg and indexing will be used.
14190 However, if REG is a broken-out memory address or multiplication,
14191 nothing needs to be done because REG can certainly go in a general reg.
14193 When -fpic is used, special handling is needed for symbolic references.
14194 See comments by legitimize_pic_address in i386.c for details. */
14196 static rtx
14197 ix86_legitimize_address (rtx x, rtx, machine_mode mode)
14199 int changed = 0;
14200 unsigned log;
14202 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
14203 if (log)
14204 return legitimize_tls_address (x, (enum tls_model) log, false);
14205 if (GET_CODE (x) == CONST
14206 && GET_CODE (XEXP (x, 0)) == PLUS
14207 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
14208 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
14210 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
14211 (enum tls_model) log, false);
14212 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
14215 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14217 rtx tmp = legitimize_pe_coff_symbol (x, true);
14218 if (tmp)
14219 return tmp;
14222 if (flag_pic && SYMBOLIC_CONST (x))
14223 return legitimize_pic_address (x, 0);
14225 #if TARGET_MACHO
14226 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
14227 return machopic_indirect_data_reference (x, 0);
14228 #endif
14230 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
14231 if (GET_CODE (x) == ASHIFT
14232 && CONST_INT_P (XEXP (x, 1))
14233 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
14235 changed = 1;
14236 log = INTVAL (XEXP (x, 1));
14237 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
14238 GEN_INT (1 << log));
14241 if (GET_CODE (x) == PLUS)
14243 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
14245 if (GET_CODE (XEXP (x, 0)) == ASHIFT
14246 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14247 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
14249 changed = 1;
14250 log = INTVAL (XEXP (XEXP (x, 0), 1));
14251 XEXP (x, 0) = gen_rtx_MULT (Pmode,
14252 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
14253 GEN_INT (1 << log));
14256 if (GET_CODE (XEXP (x, 1)) == ASHIFT
14257 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
14258 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
14260 changed = 1;
14261 log = INTVAL (XEXP (XEXP (x, 1), 1));
14262 XEXP (x, 1) = gen_rtx_MULT (Pmode,
14263 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
14264 GEN_INT (1 << log));
14267 /* Put multiply first if it isn't already. */
14268 if (GET_CODE (XEXP (x, 1)) == MULT)
14270 rtx tmp = XEXP (x, 0);
14271 XEXP (x, 0) = XEXP (x, 1);
14272 XEXP (x, 1) = tmp;
14273 changed = 1;
14276 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
14277 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
14278 created by virtual register instantiation, register elimination, and
14279 similar optimizations. */
14280 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
14282 changed = 1;
14283 x = gen_rtx_PLUS (Pmode,
14284 gen_rtx_PLUS (Pmode, XEXP (x, 0),
14285 XEXP (XEXP (x, 1), 0)),
14286 XEXP (XEXP (x, 1), 1));
14289 /* Canonicalize
14290 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
14291 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
14292 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
14293 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
14294 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
14295 && CONSTANT_P (XEXP (x, 1)))
14297 rtx constant;
14298 rtx other = NULL_RTX;
14300 if (CONST_INT_P (XEXP (x, 1)))
14302 constant = XEXP (x, 1);
14303 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
14305 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
14307 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
14308 other = XEXP (x, 1);
14310 else
14311 constant = 0;
14313 if (constant)
14315 changed = 1;
14316 x = gen_rtx_PLUS (Pmode,
14317 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
14318 XEXP (XEXP (XEXP (x, 0), 1), 0)),
14319 plus_constant (Pmode, other,
14320 INTVAL (constant)));
14324 if (changed && ix86_legitimate_address_p (mode, x, false))
14325 return x;
14327 if (GET_CODE (XEXP (x, 0)) == MULT)
14329 changed = 1;
14330 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
14333 if (GET_CODE (XEXP (x, 1)) == MULT)
14335 changed = 1;
14336 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
14339 if (changed
14340 && REG_P (XEXP (x, 1))
14341 && REG_P (XEXP (x, 0)))
14342 return x;
14344 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
14346 changed = 1;
14347 x = legitimize_pic_address (x, 0);
14350 if (changed && ix86_legitimate_address_p (mode, x, false))
14351 return x;
14353 if (REG_P (XEXP (x, 0)))
14355 rtx temp = gen_reg_rtx (Pmode);
14356 rtx val = force_operand (XEXP (x, 1), temp);
14357 if (val != temp)
14359 val = convert_to_mode (Pmode, val, 1);
14360 emit_move_insn (temp, val);
14363 XEXP (x, 1) = temp;
14364 return x;
14367 else if (REG_P (XEXP (x, 1)))
14369 rtx temp = gen_reg_rtx (Pmode);
14370 rtx val = force_operand (XEXP (x, 0), temp);
14371 if (val != temp)
14373 val = convert_to_mode (Pmode, val, 1);
14374 emit_move_insn (temp, val);
14377 XEXP (x, 0) = temp;
14378 return x;
14382 return x;
14385 /* Print an integer constant expression in assembler syntax. Addition
14386 and subtraction are the only arithmetic that may appear in these
14387 expressions. FILE is the stdio stream to write to, X is the rtx, and
14388 CODE is the operand print code from the output string. */
14390 static void
14391 output_pic_addr_const (FILE *file, rtx x, int code)
14393 char buf[256];
14395 switch (GET_CODE (x))
14397 case PC:
14398 gcc_assert (flag_pic);
14399 putc ('.', file);
14400 break;
14402 case SYMBOL_REF:
14403 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
14404 output_addr_const (file, x);
14405 else
14407 const char *name = XSTR (x, 0);
14409 /* Mark the decl as referenced so that cgraph will
14410 output the function. */
14411 if (SYMBOL_REF_DECL (x))
14412 mark_decl_referenced (SYMBOL_REF_DECL (x));
14414 #if TARGET_MACHO
14415 if (MACHOPIC_INDIRECT
14416 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14417 name = machopic_indirection_name (x, /*stub_p=*/true);
14418 #endif
14419 assemble_name (file, name);
14421 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
14422 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
14423 fputs ("@PLT", file);
14424 break;
14426 case LABEL_REF:
14427 x = XEXP (x, 0);
14428 /* FALLTHRU */
14429 case CODE_LABEL:
14430 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
14431 assemble_name (asm_out_file, buf);
14432 break;
14434 case CONST_INT:
14435 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14436 break;
14438 case CONST:
14439 /* This used to output parentheses around the expression,
14440 but that does not work on the 386 (either ATT or BSD assembler). */
14441 output_pic_addr_const (file, XEXP (x, 0), code);
14442 break;
14444 case CONST_DOUBLE:
14445 if (GET_MODE (x) == VOIDmode)
14447 /* We can use %d if the number is <32 bits and positive. */
14448 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
14449 fprintf (file, "0x%lx%08lx",
14450 (unsigned long) CONST_DOUBLE_HIGH (x),
14451 (unsigned long) CONST_DOUBLE_LOW (x));
14452 else
14453 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
14455 else
14456 /* We can't handle floating point constants;
14457 TARGET_PRINT_OPERAND must handle them. */
14458 output_operand_lossage ("floating constant misused");
14459 break;
14461 case PLUS:
14462 /* Some assemblers need integer constants to appear first. */
14463 if (CONST_INT_P (XEXP (x, 0)))
14465 output_pic_addr_const (file, XEXP (x, 0), code);
14466 putc ('+', file);
14467 output_pic_addr_const (file, XEXP (x, 1), code);
14469 else
14471 gcc_assert (CONST_INT_P (XEXP (x, 1)));
14472 output_pic_addr_const (file, XEXP (x, 1), code);
14473 putc ('+', file);
14474 output_pic_addr_const (file, XEXP (x, 0), code);
14476 break;
14478 case MINUS:
14479 if (!TARGET_MACHO)
14480 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
14481 output_pic_addr_const (file, XEXP (x, 0), code);
14482 putc ('-', file);
14483 output_pic_addr_const (file, XEXP (x, 1), code);
14484 if (!TARGET_MACHO)
14485 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
14486 break;
14488 case UNSPEC:
14489 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
14491 bool f = i386_asm_output_addr_const_extra (file, x);
14492 gcc_assert (f);
14493 break;
14496 gcc_assert (XVECLEN (x, 0) == 1);
14497 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
14498 switch (XINT (x, 1))
14500 case UNSPEC_GOT:
14501 fputs ("@GOT", file);
14502 break;
14503 case UNSPEC_GOTOFF:
14504 fputs ("@GOTOFF", file);
14505 break;
14506 case UNSPEC_PLTOFF:
14507 fputs ("@PLTOFF", file);
14508 break;
14509 case UNSPEC_PCREL:
14510 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14511 "(%rip)" : "[rip]", file);
14512 break;
14513 case UNSPEC_GOTPCREL:
14514 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14515 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
14516 break;
14517 case UNSPEC_GOTTPOFF:
14518 /* FIXME: This might be @TPOFF in Sun ld too. */
14519 fputs ("@gottpoff", file);
14520 break;
14521 case UNSPEC_TPOFF:
14522 fputs ("@tpoff", file);
14523 break;
14524 case UNSPEC_NTPOFF:
14525 if (TARGET_64BIT)
14526 fputs ("@tpoff", file);
14527 else
14528 fputs ("@ntpoff", file);
14529 break;
14530 case UNSPEC_DTPOFF:
14531 fputs ("@dtpoff", file);
14532 break;
14533 case UNSPEC_GOTNTPOFF:
14534 if (TARGET_64BIT)
14535 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14536 "@gottpoff(%rip)": "@gottpoff[rip]", file);
14537 else
14538 fputs ("@gotntpoff", file);
14539 break;
14540 case UNSPEC_INDNTPOFF:
14541 fputs ("@indntpoff", file);
14542 break;
14543 #if TARGET_MACHO
14544 case UNSPEC_MACHOPIC_OFFSET:
14545 putc ('-', file);
14546 machopic_output_function_base_name (file);
14547 break;
14548 #endif
14549 default:
14550 output_operand_lossage ("invalid UNSPEC as operand");
14551 break;
14553 break;
14555 default:
14556 output_operand_lossage ("invalid expression as operand");
14560 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
14561 We need to emit DTP-relative relocations. */
14563 static void ATTRIBUTE_UNUSED
14564 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
14566 fputs (ASM_LONG, file);
14567 output_addr_const (file, x);
14568 fputs ("@dtpoff", file);
14569 switch (size)
14571 case 4:
14572 break;
14573 case 8:
14574 fputs (", 0", file);
14575 break;
14576 default:
14577 gcc_unreachable ();
14581 /* Return true if X is a representation of the PIC register. This copes
14582 with calls from ix86_find_base_term, where the register might have
14583 been replaced by a cselib value. */
14585 static bool
14586 ix86_pic_register_p (rtx x)
14588 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
14589 return (pic_offset_table_rtx
14590 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
14591 else if (!REG_P (x))
14592 return false;
14593 else if (pic_offset_table_rtx)
14595 if (REGNO (x) == REGNO (pic_offset_table_rtx))
14596 return true;
14597 if (HARD_REGISTER_P (x)
14598 && !HARD_REGISTER_P (pic_offset_table_rtx)
14599 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
14600 return true;
14601 return false;
14603 else
14604 return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
14607 /* Helper function for ix86_delegitimize_address.
14608 Attempt to delegitimize TLS local-exec accesses. */
14610 static rtx
14611 ix86_delegitimize_tls_address (rtx orig_x)
14613 rtx x = orig_x, unspec;
14614 struct ix86_address addr;
14616 if (!TARGET_TLS_DIRECT_SEG_REFS)
14617 return orig_x;
14618 if (MEM_P (x))
14619 x = XEXP (x, 0);
14620 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
14621 return orig_x;
14622 if (ix86_decompose_address (x, &addr) == 0
14623 || addr.seg != DEFAULT_TLS_SEG_REG
14624 || addr.disp == NULL_RTX
14625 || GET_CODE (addr.disp) != CONST)
14626 return orig_x;
14627 unspec = XEXP (addr.disp, 0);
14628 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
14629 unspec = XEXP (unspec, 0);
14630 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
14631 return orig_x;
14632 x = XVECEXP (unspec, 0, 0);
14633 gcc_assert (GET_CODE (x) == SYMBOL_REF);
14634 if (unspec != XEXP (addr.disp, 0))
14635 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
14636 if (addr.index)
14638 rtx idx = addr.index;
14639 if (addr.scale != 1)
14640 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
14641 x = gen_rtx_PLUS (Pmode, idx, x);
14643 if (addr.base)
14644 x = gen_rtx_PLUS (Pmode, addr.base, x);
14645 if (MEM_P (orig_x))
14646 x = replace_equiv_address_nv (orig_x, x);
14647 return x;
14650 /* In the name of slightly smaller debug output, and to cater to
14651 general assembler lossage, recognize PIC+GOTOFF and turn it back
14652 into a direct symbol reference.
14654 On Darwin, this is necessary to avoid a crash, because Darwin
14655 has a different PIC label for each routine but the DWARF debugging
14656 information is not associated with any particular routine, so it's
14657 necessary to remove references to the PIC label from RTL stored by
14658 the DWARF output code. */
14660 static rtx
14661 ix86_delegitimize_address (rtx x)
14663 rtx orig_x = delegitimize_mem_from_attrs (x);
14664 /* addend is NULL or some rtx if x is something+GOTOFF where
14665 something doesn't include the PIC register. */
14666 rtx addend = NULL_RTX;
14667 /* reg_addend is NULL or a multiple of some register. */
14668 rtx reg_addend = NULL_RTX;
14669 /* const_addend is NULL or a const_int. */
14670 rtx const_addend = NULL_RTX;
14671 /* This is the result, or NULL. */
14672 rtx result = NULL_RTX;
14674 x = orig_x;
14676 if (MEM_P (x))
14677 x = XEXP (x, 0);
14679 if (TARGET_64BIT)
14681 if (GET_CODE (x) == CONST
14682 && GET_CODE (XEXP (x, 0)) == PLUS
14683 && GET_MODE (XEXP (x, 0)) == Pmode
14684 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14685 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
14686 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
14688 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
14689 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
14690 if (MEM_P (orig_x))
14691 x = replace_equiv_address_nv (orig_x, x);
14692 return x;
14695 if (GET_CODE (x) == CONST
14696 && GET_CODE (XEXP (x, 0)) == UNSPEC
14697 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
14698 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
14699 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
14701 x = XVECEXP (XEXP (x, 0), 0, 0);
14702 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
14704 x = simplify_gen_subreg (GET_MODE (orig_x), x,
14705 GET_MODE (x), 0);
14706 if (x == NULL_RTX)
14707 return orig_x;
14709 return x;
14712 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
14713 return ix86_delegitimize_tls_address (orig_x);
14715 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
14716 and -mcmodel=medium -fpic. */
14719 if (GET_CODE (x) != PLUS
14720 || GET_CODE (XEXP (x, 1)) != CONST)
14721 return ix86_delegitimize_tls_address (orig_x);
14723 if (ix86_pic_register_p (XEXP (x, 0)))
14724 /* %ebx + GOT/GOTOFF */
14726 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14728 /* %ebx + %reg * scale + GOT/GOTOFF */
14729 reg_addend = XEXP (x, 0);
14730 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
14731 reg_addend = XEXP (reg_addend, 1);
14732 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
14733 reg_addend = XEXP (reg_addend, 0);
14734 else
14736 reg_addend = NULL_RTX;
14737 addend = XEXP (x, 0);
14740 else
14741 addend = XEXP (x, 0);
14743 x = XEXP (XEXP (x, 1), 0);
14744 if (GET_CODE (x) == PLUS
14745 && CONST_INT_P (XEXP (x, 1)))
14747 const_addend = XEXP (x, 1);
14748 x = XEXP (x, 0);
14751 if (GET_CODE (x) == UNSPEC
14752 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
14753 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
14754 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
14755 && !MEM_P (orig_x) && !addend)))
14756 result = XVECEXP (x, 0, 0);
14758 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
14759 && !MEM_P (orig_x))
14760 result = XVECEXP (x, 0, 0);
14762 if (! result)
14763 return ix86_delegitimize_tls_address (orig_x);
14765 if (const_addend)
14766 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
14767 if (reg_addend)
14768 result = gen_rtx_PLUS (Pmode, reg_addend, result);
14769 if (addend)
14771 /* If the rest of original X doesn't involve the PIC register, add
14772 addend and subtract pic_offset_table_rtx. This can happen e.g.
14773 for code like:
14774 leal (%ebx, %ecx, 4), %ecx
14776 movl foo@GOTOFF(%ecx), %edx
14777 in which case we return (%ecx - %ebx) + foo
14778 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
14779 and reload has completed. */
14780 if (pic_offset_table_rtx
14781 && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
14782 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
14783 pic_offset_table_rtx),
14784 result);
14785 else if (pic_offset_table_rtx && !TARGET_MACHO && !TARGET_VXWORKS_RTP)
14787 rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
14788 tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
14789 result = gen_rtx_PLUS (Pmode, tmp, result);
14791 else
14792 return orig_x;
14794 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
14796 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
14797 if (result == NULL_RTX)
14798 return orig_x;
14800 return result;
14803 /* If X is a machine specific address (i.e. a symbol or label being
14804 referenced as a displacement from the GOT implemented using an
14805 UNSPEC), then return the base term. Otherwise return X. */
14808 ix86_find_base_term (rtx x)
14810 rtx term;
14812 if (TARGET_64BIT)
14814 if (GET_CODE (x) != CONST)
14815 return x;
14816 term = XEXP (x, 0);
14817 if (GET_CODE (term) == PLUS
14818 && (CONST_INT_P (XEXP (term, 1))
14819 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
14820 term = XEXP (term, 0);
14821 if (GET_CODE (term) != UNSPEC
14822 || (XINT (term, 1) != UNSPEC_GOTPCREL
14823 && XINT (term, 1) != UNSPEC_PCREL))
14824 return x;
14826 return XVECEXP (term, 0, 0);
14829 return ix86_delegitimize_address (x);
14832 static void
14833 put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
14834 bool fp, FILE *file)
14836 const char *suffix;
14838 if (mode == CCFPmode || mode == CCFPUmode)
14840 code = ix86_fp_compare_code_to_integer (code);
14841 mode = CCmode;
14843 if (reverse)
14844 code = reverse_condition (code);
14846 switch (code)
14848 case EQ:
14849 switch (mode)
14851 case CCAmode:
14852 suffix = "a";
14853 break;
14855 case CCCmode:
14856 suffix = "c";
14857 break;
14859 case CCOmode:
14860 suffix = "o";
14861 break;
14863 case CCSmode:
14864 suffix = "s";
14865 break;
14867 default:
14868 suffix = "e";
14870 break;
14871 case NE:
14872 switch (mode)
14874 case CCAmode:
14875 suffix = "na";
14876 break;
14878 case CCCmode:
14879 suffix = "nc";
14880 break;
14882 case CCOmode:
14883 suffix = "no";
14884 break;
14886 case CCSmode:
14887 suffix = "ns";
14888 break;
14890 default:
14891 suffix = "ne";
14893 break;
14894 case GT:
14895 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
14896 suffix = "g";
14897 break;
14898 case GTU:
14899 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
14900 Those same assemblers have the same but opposite lossage on cmov. */
14901 if (mode == CCmode)
14902 suffix = fp ? "nbe" : "a";
14903 else
14904 gcc_unreachable ();
14905 break;
14906 case LT:
14907 switch (mode)
14909 case CCNOmode:
14910 case CCGOCmode:
14911 suffix = "s";
14912 break;
14914 case CCmode:
14915 case CCGCmode:
14916 suffix = "l";
14917 break;
14919 default:
14920 gcc_unreachable ();
14922 break;
14923 case LTU:
14924 if (mode == CCmode)
14925 suffix = "b";
14926 else if (mode == CCCmode)
14927 suffix = "c";
14928 else
14929 gcc_unreachable ();
14930 break;
14931 case GE:
14932 switch (mode)
14934 case CCNOmode:
14935 case CCGOCmode:
14936 suffix = "ns";
14937 break;
14939 case CCmode:
14940 case CCGCmode:
14941 suffix = "ge";
14942 break;
14944 default:
14945 gcc_unreachable ();
14947 break;
14948 case GEU:
14949 if (mode == CCmode)
14950 suffix = fp ? "nb" : "ae";
14951 else if (mode == CCCmode)
14952 suffix = "nc";
14953 else
14954 gcc_unreachable ();
14955 break;
14956 case LE:
14957 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
14958 suffix = "le";
14959 break;
14960 case LEU:
14961 if (mode == CCmode)
14962 suffix = "be";
14963 else
14964 gcc_unreachable ();
14965 break;
14966 case UNORDERED:
14967 suffix = fp ? "u" : "p";
14968 break;
14969 case ORDERED:
14970 suffix = fp ? "nu" : "np";
14971 break;
14972 default:
14973 gcc_unreachable ();
14975 fputs (suffix, file);
14978 /* Print the name of register X to FILE based on its machine mode and number.
14979 If CODE is 'w', pretend the mode is HImode.
14980 If CODE is 'b', pretend the mode is QImode.
14981 If CODE is 'k', pretend the mode is SImode.
14982 If CODE is 'q', pretend the mode is DImode.
14983 If CODE is 'x', pretend the mode is V4SFmode.
14984 If CODE is 't', pretend the mode is V8SFmode.
14985 If CODE is 'g', pretend the mode is V16SFmode.
14986 If CODE is 'h', pretend the reg is the 'high' byte register.
14987 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
14988 If CODE is 'd', duplicate the operand for AVX instruction.
14991 void
14992 print_reg (rtx x, int code, FILE *file)
14994 const char *reg;
14995 unsigned int regno;
14996 bool duplicated = code == 'd' && TARGET_AVX;
14998 if (ASSEMBLER_DIALECT == ASM_ATT)
14999 putc ('%', file);
15001 if (x == pc_rtx)
15003 gcc_assert (TARGET_64BIT);
15004 fputs ("rip", file);
15005 return;
15008 regno = true_regnum (x);
15009 gcc_assert (regno != ARG_POINTER_REGNUM
15010 && regno != FRAME_POINTER_REGNUM
15011 && regno != FLAGS_REG
15012 && regno != FPSR_REG
15013 && regno != FPCR_REG);
15015 if (code == 'w' || MMX_REG_P (x))
15016 code = 2;
15017 else if (code == 'b')
15018 code = 1;
15019 else if (code == 'k')
15020 code = 4;
15021 else if (code == 'q')
15022 code = 8;
15023 else if (code == 'y')
15024 code = 3;
15025 else if (code == 'h')
15026 code = 0;
15027 else if (code == 'x')
15028 code = 16;
15029 else if (code == 't')
15030 code = 32;
15031 else if (code == 'g')
15032 code = 64;
15033 else
15034 code = GET_MODE_SIZE (GET_MODE (x));
15036 /* Irritatingly, AMD extended registers use different naming convention
15037 from the normal registers: "r%d[bwd]" */
15038 if (REX_INT_REGNO_P (regno))
15040 gcc_assert (TARGET_64BIT);
15041 putc ('r', file);
15042 fprint_ul (file, regno - FIRST_REX_INT_REG + 8);
15043 switch (code)
15045 case 0:
15046 error ("extended registers have no high halves");
15047 break;
15048 case 1:
15049 putc ('b', file);
15050 break;
15051 case 2:
15052 putc ('w', file);
15053 break;
15054 case 4:
15055 putc ('d', file);
15056 break;
15057 case 8:
15058 /* no suffix */
15059 break;
15060 default:
15061 error ("unsupported operand size for extended register");
15062 break;
15064 return;
15067 reg = NULL;
15068 switch (code)
15070 case 3:
15071 if (STACK_TOP_P (x))
15073 reg = "st(0)";
15074 break;
15076 /* FALLTHRU */
15077 case 8:
15078 case 4:
15079 case 12:
15080 if (! ANY_FP_REG_P (x) && ! ANY_MASK_REG_P (x) && ! ANY_BND_REG_P (x))
15081 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
15082 /* FALLTHRU */
15083 case 16:
15084 case 2:
15085 normal:
15086 reg = hi_reg_name[regno];
15087 break;
15088 case 1:
15089 if (regno >= ARRAY_SIZE (qi_reg_name))
15090 goto normal;
15091 reg = qi_reg_name[regno];
15092 break;
15093 case 0:
15094 if (regno >= ARRAY_SIZE (qi_high_reg_name))
15095 goto normal;
15096 reg = qi_high_reg_name[regno];
15097 break;
15098 case 32:
15099 if (SSE_REG_P (x))
15101 gcc_assert (!duplicated);
15102 putc ('y', file);
15103 fputs (hi_reg_name[regno] + 1, file);
15104 return;
15106 case 64:
15107 if (SSE_REG_P (x))
15109 gcc_assert (!duplicated);
15110 putc ('z', file);
15111 fputs (hi_reg_name[REGNO (x)] + 1, file);
15112 return;
15114 break;
15115 default:
15116 gcc_unreachable ();
15119 fputs (reg, file);
15120 if (duplicated)
15122 if (ASSEMBLER_DIALECT == ASM_ATT)
15123 fprintf (file, ", %%%s", reg);
15124 else
15125 fprintf (file, ", %s", reg);
15129 /* Meaning of CODE:
15130 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
15131 C -- print opcode suffix for set/cmov insn.
15132 c -- like C, but print reversed condition
15133 F,f -- likewise, but for floating-point.
15134 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
15135 otherwise nothing
15136 R -- print embeded rounding and sae.
15137 r -- print only sae.
15138 z -- print the opcode suffix for the size of the current operand.
15139 Z -- likewise, with special suffixes for x87 instructions.
15140 * -- print a star (in certain assembler syntax)
15141 A -- print an absolute memory reference.
15142 E -- print address with DImode register names if TARGET_64BIT.
15143 w -- print the operand as if it's a "word" (HImode) even if it isn't.
15144 s -- print a shift double count, followed by the assemblers argument
15145 delimiter.
15146 b -- print the QImode name of the register for the indicated operand.
15147 %b0 would print %al if operands[0] is reg 0.
15148 w -- likewise, print the HImode name of the register.
15149 k -- likewise, print the SImode name of the register.
15150 q -- likewise, print the DImode name of the register.
15151 x -- likewise, print the V4SFmode name of the register.
15152 t -- likewise, print the V8SFmode name of the register.
15153 g -- likewise, print the V16SFmode name of the register.
15154 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
15155 y -- print "st(0)" instead of "st" as a register.
15156 d -- print duplicated register operand for AVX instruction.
15157 D -- print condition for SSE cmp instruction.
15158 P -- if PIC, print an @PLT suffix.
15159 p -- print raw symbol name.
15160 X -- don't print any sort of PIC '@' suffix for a symbol.
15161 & -- print some in-use local-dynamic symbol name.
15162 H -- print a memory address offset by 8; used for sse high-parts
15163 Y -- print condition for XOP pcom* instruction.
15164 + -- print a branch hint as 'cs' or 'ds' prefix
15165 ; -- print a semicolon (after prefixes due to bug in older gas).
15166 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
15167 @ -- print a segment register of thread base pointer load
15168 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
15169 ! -- print MPX prefix for jxx/call/ret instructions if required.
15172 void
15173 ix86_print_operand (FILE *file, rtx x, int code)
15175 if (code)
15177 switch (code)
15179 case 'A':
15180 switch (ASSEMBLER_DIALECT)
15182 case ASM_ATT:
15183 putc ('*', file);
15184 break;
15186 case ASM_INTEL:
15187 /* Intel syntax. For absolute addresses, registers should not
15188 be surrounded by braces. */
15189 if (!REG_P (x))
15191 putc ('[', file);
15192 ix86_print_operand (file, x, 0);
15193 putc (']', file);
15194 return;
15196 break;
15198 default:
15199 gcc_unreachable ();
15202 ix86_print_operand (file, x, 0);
15203 return;
15205 case 'E':
15206 /* Wrap address in an UNSPEC to declare special handling. */
15207 if (TARGET_64BIT)
15208 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
15210 output_address (x);
15211 return;
15213 case 'L':
15214 if (ASSEMBLER_DIALECT == ASM_ATT)
15215 putc ('l', file);
15216 return;
15218 case 'W':
15219 if (ASSEMBLER_DIALECT == ASM_ATT)
15220 putc ('w', file);
15221 return;
15223 case 'B':
15224 if (ASSEMBLER_DIALECT == ASM_ATT)
15225 putc ('b', file);
15226 return;
15228 case 'Q':
15229 if (ASSEMBLER_DIALECT == ASM_ATT)
15230 putc ('l', file);
15231 return;
15233 case 'S':
15234 if (ASSEMBLER_DIALECT == ASM_ATT)
15235 putc ('s', file);
15236 return;
15238 case 'T':
15239 if (ASSEMBLER_DIALECT == ASM_ATT)
15240 putc ('t', file);
15241 return;
15243 case 'O':
15244 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15245 if (ASSEMBLER_DIALECT != ASM_ATT)
15246 return;
15248 switch (GET_MODE_SIZE (GET_MODE (x)))
15250 case 2:
15251 putc ('w', file);
15252 break;
15254 case 4:
15255 putc ('l', file);
15256 break;
15258 case 8:
15259 putc ('q', file);
15260 break;
15262 default:
15263 output_operand_lossage
15264 ("invalid operand size for operand code 'O'");
15265 return;
15268 putc ('.', file);
15269 #endif
15270 return;
15272 case 'z':
15273 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15275 /* Opcodes don't get size suffixes if using Intel opcodes. */
15276 if (ASSEMBLER_DIALECT == ASM_INTEL)
15277 return;
15279 switch (GET_MODE_SIZE (GET_MODE (x)))
15281 case 1:
15282 putc ('b', file);
15283 return;
15285 case 2:
15286 putc ('w', file);
15287 return;
15289 case 4:
15290 putc ('l', file);
15291 return;
15293 case 8:
15294 putc ('q', file);
15295 return;
15297 default:
15298 output_operand_lossage
15299 ("invalid operand size for operand code 'z'");
15300 return;
15304 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15305 warning
15306 (0, "non-integer operand used with operand code 'z'");
15307 /* FALLTHRU */
15309 case 'Z':
15310 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
15311 if (ASSEMBLER_DIALECT == ASM_INTEL)
15312 return;
15314 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15316 switch (GET_MODE_SIZE (GET_MODE (x)))
15318 case 2:
15319 #ifdef HAVE_AS_IX86_FILDS
15320 putc ('s', file);
15321 #endif
15322 return;
15324 case 4:
15325 putc ('l', file);
15326 return;
15328 case 8:
15329 #ifdef HAVE_AS_IX86_FILDQ
15330 putc ('q', file);
15331 #else
15332 fputs ("ll", file);
15333 #endif
15334 return;
15336 default:
15337 break;
15340 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15342 /* 387 opcodes don't get size suffixes
15343 if the operands are registers. */
15344 if (STACK_REG_P (x))
15345 return;
15347 switch (GET_MODE_SIZE (GET_MODE (x)))
15349 case 4:
15350 putc ('s', file);
15351 return;
15353 case 8:
15354 putc ('l', file);
15355 return;
15357 case 12:
15358 case 16:
15359 putc ('t', file);
15360 return;
15362 default:
15363 break;
15366 else
15368 output_operand_lossage
15369 ("invalid operand type used with operand code 'Z'");
15370 return;
15373 output_operand_lossage
15374 ("invalid operand size for operand code 'Z'");
15375 return;
15377 case 'd':
15378 case 'b':
15379 case 'w':
15380 case 'k':
15381 case 'q':
15382 case 'h':
15383 case 't':
15384 case 'g':
15385 case 'y':
15386 case 'x':
15387 case 'X':
15388 case 'P':
15389 case 'p':
15390 break;
15392 case 's':
15393 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
15395 ix86_print_operand (file, x, 0);
15396 fputs (", ", file);
15398 return;
15400 case 'Y':
15401 switch (GET_CODE (x))
15403 case NE:
15404 fputs ("neq", file);
15405 break;
15406 case EQ:
15407 fputs ("eq", file);
15408 break;
15409 case GE:
15410 case GEU:
15411 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
15412 break;
15413 case GT:
15414 case GTU:
15415 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
15416 break;
15417 case LE:
15418 case LEU:
15419 fputs ("le", file);
15420 break;
15421 case LT:
15422 case LTU:
15423 fputs ("lt", file);
15424 break;
15425 case UNORDERED:
15426 fputs ("unord", file);
15427 break;
15428 case ORDERED:
15429 fputs ("ord", file);
15430 break;
15431 case UNEQ:
15432 fputs ("ueq", file);
15433 break;
15434 case UNGE:
15435 fputs ("nlt", file);
15436 break;
15437 case UNGT:
15438 fputs ("nle", file);
15439 break;
15440 case UNLE:
15441 fputs ("ule", file);
15442 break;
15443 case UNLT:
15444 fputs ("ult", file);
15445 break;
15446 case LTGT:
15447 fputs ("une", file);
15448 break;
15449 default:
15450 output_operand_lossage ("operand is not a condition code, "
15451 "invalid operand code 'Y'");
15452 return;
15454 return;
15456 case 'D':
15457 /* Little bit of braindamage here. The SSE compare instructions
15458 does use completely different names for the comparisons that the
15459 fp conditional moves. */
15460 switch (GET_CODE (x))
15462 case UNEQ:
15463 if (TARGET_AVX)
15465 fputs ("eq_us", file);
15466 break;
15468 case EQ:
15469 fputs ("eq", file);
15470 break;
15471 case UNLT:
15472 if (TARGET_AVX)
15474 fputs ("nge", file);
15475 break;
15477 case LT:
15478 fputs ("lt", file);
15479 break;
15480 case UNLE:
15481 if (TARGET_AVX)
15483 fputs ("ngt", file);
15484 break;
15486 case LE:
15487 fputs ("le", file);
15488 break;
15489 case UNORDERED:
15490 fputs ("unord", file);
15491 break;
15492 case LTGT:
15493 if (TARGET_AVX)
15495 fputs ("neq_oq", file);
15496 break;
15498 case NE:
15499 fputs ("neq", file);
15500 break;
15501 case GE:
15502 if (TARGET_AVX)
15504 fputs ("ge", file);
15505 break;
15507 case UNGE:
15508 fputs ("nlt", file);
15509 break;
15510 case GT:
15511 if (TARGET_AVX)
15513 fputs ("gt", file);
15514 break;
15516 case UNGT:
15517 fputs ("nle", file);
15518 break;
15519 case ORDERED:
15520 fputs ("ord", file);
15521 break;
15522 default:
15523 output_operand_lossage ("operand is not a condition code, "
15524 "invalid operand code 'D'");
15525 return;
15527 return;
15529 case 'F':
15530 case 'f':
15531 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15532 if (ASSEMBLER_DIALECT == ASM_ATT)
15533 putc ('.', file);
15534 #endif
15536 case 'C':
15537 case 'c':
15538 if (!COMPARISON_P (x))
15540 output_operand_lossage ("operand is not a condition code, "
15541 "invalid operand code '%c'", code);
15542 return;
15544 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
15545 code == 'c' || code == 'f',
15546 code == 'F' || code == 'f',
15547 file);
15548 return;
15550 case 'H':
15551 if (!offsettable_memref_p (x))
15553 output_operand_lossage ("operand is not an offsettable memory "
15554 "reference, invalid operand code 'H'");
15555 return;
15557 /* It doesn't actually matter what mode we use here, as we're
15558 only going to use this for printing. */
15559 x = adjust_address_nv (x, DImode, 8);
15560 /* Output 'qword ptr' for intel assembler dialect. */
15561 if (ASSEMBLER_DIALECT == ASM_INTEL)
15562 code = 'q';
15563 break;
15565 case 'K':
15566 gcc_assert (CONST_INT_P (x));
15568 if (INTVAL (x) & IX86_HLE_ACQUIRE)
15569 #ifdef HAVE_AS_IX86_HLE
15570 fputs ("xacquire ", file);
15571 #else
15572 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
15573 #endif
15574 else if (INTVAL (x) & IX86_HLE_RELEASE)
15575 #ifdef HAVE_AS_IX86_HLE
15576 fputs ("xrelease ", file);
15577 #else
15578 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
15579 #endif
15580 /* We do not want to print value of the operand. */
15581 return;
15583 case 'N':
15584 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
15585 fputs ("{z}", file);
15586 return;
15588 case 'r':
15589 gcc_assert (CONST_INT_P (x));
15590 gcc_assert (INTVAL (x) == ROUND_SAE);
15592 if (ASSEMBLER_DIALECT == ASM_INTEL)
15593 fputs (", ", file);
15595 fputs ("{sae}", file);
15597 if (ASSEMBLER_DIALECT == ASM_ATT)
15598 fputs (", ", file);
15600 return;
15602 case 'R':
15603 gcc_assert (CONST_INT_P (x));
15605 if (ASSEMBLER_DIALECT == ASM_INTEL)
15606 fputs (", ", file);
15608 switch (INTVAL (x))
15610 case ROUND_NEAREST_INT | ROUND_SAE:
15611 fputs ("{rn-sae}", file);
15612 break;
15613 case ROUND_NEG_INF | ROUND_SAE:
15614 fputs ("{rd-sae}", file);
15615 break;
15616 case ROUND_POS_INF | ROUND_SAE:
15617 fputs ("{ru-sae}", file);
15618 break;
15619 case ROUND_ZERO | ROUND_SAE:
15620 fputs ("{rz-sae}", file);
15621 break;
15622 default:
15623 gcc_unreachable ();
15626 if (ASSEMBLER_DIALECT == ASM_ATT)
15627 fputs (", ", file);
15629 return;
15631 case '*':
15632 if (ASSEMBLER_DIALECT == ASM_ATT)
15633 putc ('*', file);
15634 return;
15636 case '&':
15638 const char *name = get_some_local_dynamic_name ();
15639 if (name == NULL)
15640 output_operand_lossage ("'%%&' used without any "
15641 "local dynamic TLS references");
15642 else
15643 assemble_name (file, name);
15644 return;
15647 case '+':
15649 rtx x;
15651 if (!optimize
15652 || optimize_function_for_size_p (cfun)
15653 || !TARGET_BRANCH_PREDICTION_HINTS)
15654 return;
15656 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
15657 if (x)
15659 int pred_val = XINT (x, 0);
15661 if (pred_val < REG_BR_PROB_BASE * 45 / 100
15662 || pred_val > REG_BR_PROB_BASE * 55 / 100)
15664 bool taken = pred_val > REG_BR_PROB_BASE / 2;
15665 bool cputaken
15666 = final_forward_branch_p (current_output_insn) == 0;
15668 /* Emit hints only in the case default branch prediction
15669 heuristics would fail. */
15670 if (taken != cputaken)
15672 /* We use 3e (DS) prefix for taken branches and
15673 2e (CS) prefix for not taken branches. */
15674 if (taken)
15675 fputs ("ds ; ", file);
15676 else
15677 fputs ("cs ; ", file);
15681 return;
15684 case ';':
15685 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
15686 putc (';', file);
15687 #endif
15688 return;
15690 case '@':
15691 if (ASSEMBLER_DIALECT == ASM_ATT)
15692 putc ('%', file);
15694 /* The kernel uses a different segment register for performance
15695 reasons; a system call would not have to trash the userspace
15696 segment register, which would be expensive. */
15697 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
15698 fputs ("fs", file);
15699 else
15700 fputs ("gs", file);
15701 return;
15703 case '~':
15704 putc (TARGET_AVX2 ? 'i' : 'f', file);
15705 return;
15707 case '^':
15708 if (TARGET_64BIT && Pmode != word_mode)
15709 fputs ("addr32 ", file);
15710 return;
15712 case '!':
15713 if (ix86_bnd_prefixed_insn_p (current_output_insn))
15714 fputs ("bnd ", file);
15715 return;
15717 default:
15718 output_operand_lossage ("invalid operand code '%c'", code);
15722 if (REG_P (x))
15723 print_reg (x, code, file);
15725 else if (MEM_P (x))
15727 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
15728 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
15729 && GET_MODE (x) != BLKmode)
15731 const char * size;
15732 switch (GET_MODE_SIZE (GET_MODE (x)))
15734 case 1: size = "BYTE"; break;
15735 case 2: size = "WORD"; break;
15736 case 4: size = "DWORD"; break;
15737 case 8: size = "QWORD"; break;
15738 case 12: size = "TBYTE"; break;
15739 case 16:
15740 if (GET_MODE (x) == XFmode)
15741 size = "TBYTE";
15742 else
15743 size = "XMMWORD";
15744 break;
15745 case 32: size = "YMMWORD"; break;
15746 case 64: size = "ZMMWORD"; break;
15747 default:
15748 gcc_unreachable ();
15751 /* Check for explicit size override (codes 'b', 'w', 'k',
15752 'q' and 'x') */
15753 if (code == 'b')
15754 size = "BYTE";
15755 else if (code == 'w')
15756 size = "WORD";
15757 else if (code == 'k')
15758 size = "DWORD";
15759 else if (code == 'q')
15760 size = "QWORD";
15761 else if (code == 'x')
15762 size = "XMMWORD";
15764 fputs (size, file);
15765 fputs (" PTR ", file);
15768 x = XEXP (x, 0);
15769 /* Avoid (%rip) for call operands. */
15770 if (CONSTANT_ADDRESS_P (x) && code == 'P'
15771 && !CONST_INT_P (x))
15772 output_addr_const (file, x);
15773 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
15774 output_operand_lossage ("invalid constraints for operand");
15775 else
15776 output_address (x);
15779 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
15781 REAL_VALUE_TYPE r;
15782 long l;
15784 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15785 REAL_VALUE_TO_TARGET_SINGLE (r, l);
15787 if (ASSEMBLER_DIALECT == ASM_ATT)
15788 putc ('$', file);
15789 /* Sign extend 32bit SFmode immediate to 8 bytes. */
15790 if (code == 'q')
15791 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
15792 (unsigned long long) (int) l);
15793 else
15794 fprintf (file, "0x%08x", (unsigned int) l);
15797 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
15799 REAL_VALUE_TYPE r;
15800 long l[2];
15802 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15803 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
15805 if (ASSEMBLER_DIALECT == ASM_ATT)
15806 putc ('$', file);
15807 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
15810 /* These float cases don't actually occur as immediate operands. */
15811 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
15813 char dstr[30];
15815 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
15816 fputs (dstr, file);
15819 else
15821 /* We have patterns that allow zero sets of memory, for instance.
15822 In 64-bit mode, we should probably support all 8-byte vectors,
15823 since we can in fact encode that into an immediate. */
15824 if (GET_CODE (x) == CONST_VECTOR)
15826 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
15827 x = const0_rtx;
15830 if (code != 'P' && code != 'p')
15832 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
15834 if (ASSEMBLER_DIALECT == ASM_ATT)
15835 putc ('$', file);
15837 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
15838 || GET_CODE (x) == LABEL_REF)
15840 if (ASSEMBLER_DIALECT == ASM_ATT)
15841 putc ('$', file);
15842 else
15843 fputs ("OFFSET FLAT:", file);
15846 if (CONST_INT_P (x))
15847 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15848 else if (flag_pic || MACHOPIC_INDIRECT)
15849 output_pic_addr_const (file, x, code);
15850 else
15851 output_addr_const (file, x);
15855 static bool
15856 ix86_print_operand_punct_valid_p (unsigned char code)
15858 return (code == '@' || code == '*' || code == '+' || code == '&'
15859 || code == ';' || code == '~' || code == '^' || code == '!');
15862 /* Print a memory operand whose address is ADDR. */
15864 static void
15865 ix86_print_operand_address (FILE *file, rtx addr)
15867 struct ix86_address parts;
15868 rtx base, index, disp;
15869 int scale;
15870 int ok;
15871 bool vsib = false;
15872 int code = 0;
15874 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
15876 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15877 gcc_assert (parts.index == NULL_RTX);
15878 parts.index = XVECEXP (addr, 0, 1);
15879 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
15880 addr = XVECEXP (addr, 0, 0);
15881 vsib = true;
15883 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
15885 gcc_assert (TARGET_64BIT);
15886 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15887 code = 'q';
15889 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDMK_ADDR)
15891 ok = ix86_decompose_address (XVECEXP (addr, 0, 1), &parts);
15892 gcc_assert (parts.base == NULL_RTX || parts.index == NULL_RTX);
15893 if (parts.base != NULL_RTX)
15895 parts.index = parts.base;
15896 parts.scale = 1;
15898 parts.base = XVECEXP (addr, 0, 0);
15899 addr = XVECEXP (addr, 0, 0);
15901 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDLDX_ADDR)
15903 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15904 gcc_assert (parts.index == NULL_RTX);
15905 parts.index = XVECEXP (addr, 0, 1);
15906 addr = XVECEXP (addr, 0, 0);
15908 else
15909 ok = ix86_decompose_address (addr, &parts);
15911 gcc_assert (ok);
15913 base = parts.base;
15914 index = parts.index;
15915 disp = parts.disp;
15916 scale = parts.scale;
15918 switch (parts.seg)
15920 case SEG_DEFAULT:
15921 break;
15922 case SEG_FS:
15923 case SEG_GS:
15924 if (ASSEMBLER_DIALECT == ASM_ATT)
15925 putc ('%', file);
15926 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
15927 break;
15928 default:
15929 gcc_unreachable ();
15932 /* Use one byte shorter RIP relative addressing for 64bit mode. */
15933 if (TARGET_64BIT && !base && !index)
15935 rtx symbol = disp;
15937 if (GET_CODE (disp) == CONST
15938 && GET_CODE (XEXP (disp, 0)) == PLUS
15939 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15940 symbol = XEXP (XEXP (disp, 0), 0);
15942 if (GET_CODE (symbol) == LABEL_REF
15943 || (GET_CODE (symbol) == SYMBOL_REF
15944 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
15945 base = pc_rtx;
15947 if (!base && !index)
15949 /* Displacement only requires special attention. */
15951 if (CONST_INT_P (disp))
15953 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
15954 fputs ("ds:", file);
15955 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
15957 else if (flag_pic)
15958 output_pic_addr_const (file, disp, 0);
15959 else
15960 output_addr_const (file, disp);
15962 else
15964 /* Print SImode register names to force addr32 prefix. */
15965 if (SImode_address_operand (addr, VOIDmode))
15967 #ifdef ENABLE_CHECKING
15968 gcc_assert (TARGET_64BIT);
15969 switch (GET_CODE (addr))
15971 case SUBREG:
15972 gcc_assert (GET_MODE (addr) == SImode);
15973 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
15974 break;
15975 case ZERO_EXTEND:
15976 case AND:
15977 gcc_assert (GET_MODE (addr) == DImode);
15978 break;
15979 default:
15980 gcc_unreachable ();
15982 #endif
15983 gcc_assert (!code);
15984 code = 'k';
15986 else if (code == 0
15987 && TARGET_X32
15988 && disp
15989 && CONST_INT_P (disp)
15990 && INTVAL (disp) < -16*1024*1024)
15992 /* X32 runs in 64-bit mode, where displacement, DISP, in
15993 address DISP(%r64), is encoded as 32-bit immediate sign-
15994 extended from 32-bit to 64-bit. For -0x40000300(%r64),
15995 address is %r64 + 0xffffffffbffffd00. When %r64 <
15996 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
15997 which is invalid for x32. The correct address is %r64
15998 - 0x40000300 == 0xf7ffdd64. To properly encode
15999 -0x40000300(%r64) for x32, we zero-extend negative
16000 displacement by forcing addr32 prefix which truncates
16001 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
16002 zero-extend all negative displacements, including -1(%rsp).
16003 However, for small negative displacements, sign-extension
16004 won't cause overflow. We only zero-extend negative
16005 displacements if they < -16*1024*1024, which is also used
16006 to check legitimate address displacements for PIC. */
16007 code = 'k';
16010 if (ASSEMBLER_DIALECT == ASM_ATT)
16012 if (disp)
16014 if (flag_pic)
16015 output_pic_addr_const (file, disp, 0);
16016 else if (GET_CODE (disp) == LABEL_REF)
16017 output_asm_label (disp);
16018 else
16019 output_addr_const (file, disp);
16022 putc ('(', file);
16023 if (base)
16024 print_reg (base, code, file);
16025 if (index)
16027 putc (',', file);
16028 print_reg (index, vsib ? 0 : code, file);
16029 if (scale != 1 || vsib)
16030 fprintf (file, ",%d", scale);
16032 putc (')', file);
16034 else
16036 rtx offset = NULL_RTX;
16038 if (disp)
16040 /* Pull out the offset of a symbol; print any symbol itself. */
16041 if (GET_CODE (disp) == CONST
16042 && GET_CODE (XEXP (disp, 0)) == PLUS
16043 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
16045 offset = XEXP (XEXP (disp, 0), 1);
16046 disp = gen_rtx_CONST (VOIDmode,
16047 XEXP (XEXP (disp, 0), 0));
16050 if (flag_pic)
16051 output_pic_addr_const (file, disp, 0);
16052 else if (GET_CODE (disp) == LABEL_REF)
16053 output_asm_label (disp);
16054 else if (CONST_INT_P (disp))
16055 offset = disp;
16056 else
16057 output_addr_const (file, disp);
16060 putc ('[', file);
16061 if (base)
16063 print_reg (base, code, file);
16064 if (offset)
16066 if (INTVAL (offset) >= 0)
16067 putc ('+', file);
16068 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
16071 else if (offset)
16072 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
16073 else
16074 putc ('0', file);
16076 if (index)
16078 putc ('+', file);
16079 print_reg (index, vsib ? 0 : code, file);
16080 if (scale != 1 || vsib)
16081 fprintf (file, "*%d", scale);
16083 putc (']', file);
16088 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
16090 static bool
16091 i386_asm_output_addr_const_extra (FILE *file, rtx x)
16093 rtx op;
16095 if (GET_CODE (x) != UNSPEC)
16096 return false;
16098 op = XVECEXP (x, 0, 0);
16099 switch (XINT (x, 1))
16101 case UNSPEC_GOTTPOFF:
16102 output_addr_const (file, op);
16103 /* FIXME: This might be @TPOFF in Sun ld. */
16104 fputs ("@gottpoff", file);
16105 break;
16106 case UNSPEC_TPOFF:
16107 output_addr_const (file, op);
16108 fputs ("@tpoff", file);
16109 break;
16110 case UNSPEC_NTPOFF:
16111 output_addr_const (file, op);
16112 if (TARGET_64BIT)
16113 fputs ("@tpoff", file);
16114 else
16115 fputs ("@ntpoff", file);
16116 break;
16117 case UNSPEC_DTPOFF:
16118 output_addr_const (file, op);
16119 fputs ("@dtpoff", file);
16120 break;
16121 case UNSPEC_GOTNTPOFF:
16122 output_addr_const (file, op);
16123 if (TARGET_64BIT)
16124 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
16125 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
16126 else
16127 fputs ("@gotntpoff", file);
16128 break;
16129 case UNSPEC_INDNTPOFF:
16130 output_addr_const (file, op);
16131 fputs ("@indntpoff", file);
16132 break;
16133 #if TARGET_MACHO
16134 case UNSPEC_MACHOPIC_OFFSET:
16135 output_addr_const (file, op);
16136 putc ('-', file);
16137 machopic_output_function_base_name (file);
16138 break;
16139 #endif
16141 case UNSPEC_STACK_CHECK:
16143 int offset;
16145 gcc_assert (flag_split_stack);
16147 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
16148 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
16149 #else
16150 gcc_unreachable ();
16151 #endif
16153 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
16155 break;
16157 default:
16158 return false;
16161 return true;
16164 /* Split one or more double-mode RTL references into pairs of half-mode
16165 references. The RTL can be REG, offsettable MEM, integer constant, or
16166 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
16167 split and "num" is its length. lo_half and hi_half are output arrays
16168 that parallel "operands". */
16170 void
16171 split_double_mode (machine_mode mode, rtx operands[],
16172 int num, rtx lo_half[], rtx hi_half[])
16174 machine_mode half_mode;
16175 unsigned int byte;
16177 switch (mode)
16179 case TImode:
16180 half_mode = DImode;
16181 break;
16182 case DImode:
16183 half_mode = SImode;
16184 break;
16185 default:
16186 gcc_unreachable ();
16189 byte = GET_MODE_SIZE (half_mode);
16191 while (num--)
16193 rtx op = operands[num];
16195 /* simplify_subreg refuse to split volatile memory addresses,
16196 but we still have to handle it. */
16197 if (MEM_P (op))
16199 lo_half[num] = adjust_address (op, half_mode, 0);
16200 hi_half[num] = adjust_address (op, half_mode, byte);
16202 else
16204 lo_half[num] = simplify_gen_subreg (half_mode, op,
16205 GET_MODE (op) == VOIDmode
16206 ? mode : GET_MODE (op), 0);
16207 hi_half[num] = simplify_gen_subreg (half_mode, op,
16208 GET_MODE (op) == VOIDmode
16209 ? mode : GET_MODE (op), byte);
16214 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
16215 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
16216 is the expression of the binary operation. The output may either be
16217 emitted here, or returned to the caller, like all output_* functions.
16219 There is no guarantee that the operands are the same mode, as they
16220 might be within FLOAT or FLOAT_EXTEND expressions. */
16222 #ifndef SYSV386_COMPAT
16223 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
16224 wants to fix the assemblers because that causes incompatibility
16225 with gcc. No-one wants to fix gcc because that causes
16226 incompatibility with assemblers... You can use the option of
16227 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
16228 #define SYSV386_COMPAT 1
16229 #endif
16231 const char *
16232 output_387_binary_op (rtx insn, rtx *operands)
16234 static char buf[40];
16235 const char *p;
16236 const char *ssep;
16237 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
16239 #ifdef ENABLE_CHECKING
16240 /* Even if we do not want to check the inputs, this documents input
16241 constraints. Which helps in understanding the following code. */
16242 if (STACK_REG_P (operands[0])
16243 && ((REG_P (operands[1])
16244 && REGNO (operands[0]) == REGNO (operands[1])
16245 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
16246 || (REG_P (operands[2])
16247 && REGNO (operands[0]) == REGNO (operands[2])
16248 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
16249 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
16250 ; /* ok */
16251 else
16252 gcc_assert (is_sse);
16253 #endif
16255 switch (GET_CODE (operands[3]))
16257 case PLUS:
16258 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16259 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16260 p = "fiadd";
16261 else
16262 p = "fadd";
16263 ssep = "vadd";
16264 break;
16266 case MINUS:
16267 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16268 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16269 p = "fisub";
16270 else
16271 p = "fsub";
16272 ssep = "vsub";
16273 break;
16275 case MULT:
16276 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16277 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16278 p = "fimul";
16279 else
16280 p = "fmul";
16281 ssep = "vmul";
16282 break;
16284 case DIV:
16285 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16286 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16287 p = "fidiv";
16288 else
16289 p = "fdiv";
16290 ssep = "vdiv";
16291 break;
16293 default:
16294 gcc_unreachable ();
16297 if (is_sse)
16299 if (TARGET_AVX)
16301 strcpy (buf, ssep);
16302 if (GET_MODE (operands[0]) == SFmode)
16303 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
16304 else
16305 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
16307 else
16309 strcpy (buf, ssep + 1);
16310 if (GET_MODE (operands[0]) == SFmode)
16311 strcat (buf, "ss\t{%2, %0|%0, %2}");
16312 else
16313 strcat (buf, "sd\t{%2, %0|%0, %2}");
16315 return buf;
16317 strcpy (buf, p);
16319 switch (GET_CODE (operands[3]))
16321 case MULT:
16322 case PLUS:
16323 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
16325 rtx temp = operands[2];
16326 operands[2] = operands[1];
16327 operands[1] = temp;
16330 /* know operands[0] == operands[1]. */
16332 if (MEM_P (operands[2]))
16334 p = "%Z2\t%2";
16335 break;
16338 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16340 if (STACK_TOP_P (operands[0]))
16341 /* How is it that we are storing to a dead operand[2]?
16342 Well, presumably operands[1] is dead too. We can't
16343 store the result to st(0) as st(0) gets popped on this
16344 instruction. Instead store to operands[2] (which I
16345 think has to be st(1)). st(1) will be popped later.
16346 gcc <= 2.8.1 didn't have this check and generated
16347 assembly code that the Unixware assembler rejected. */
16348 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16349 else
16350 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16351 break;
16354 if (STACK_TOP_P (operands[0]))
16355 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16356 else
16357 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16358 break;
16360 case MINUS:
16361 case DIV:
16362 if (MEM_P (operands[1]))
16364 p = "r%Z1\t%1";
16365 break;
16368 if (MEM_P (operands[2]))
16370 p = "%Z2\t%2";
16371 break;
16374 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16376 #if SYSV386_COMPAT
16377 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
16378 derived assemblers, confusingly reverse the direction of
16379 the operation for fsub{r} and fdiv{r} when the
16380 destination register is not st(0). The Intel assembler
16381 doesn't have this brain damage. Read !SYSV386_COMPAT to
16382 figure out what the hardware really does. */
16383 if (STACK_TOP_P (operands[0]))
16384 p = "{p\t%0, %2|rp\t%2, %0}";
16385 else
16386 p = "{rp\t%2, %0|p\t%0, %2}";
16387 #else
16388 if (STACK_TOP_P (operands[0]))
16389 /* As above for fmul/fadd, we can't store to st(0). */
16390 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16391 else
16392 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16393 #endif
16394 break;
16397 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
16399 #if SYSV386_COMPAT
16400 if (STACK_TOP_P (operands[0]))
16401 p = "{rp\t%0, %1|p\t%1, %0}";
16402 else
16403 p = "{p\t%1, %0|rp\t%0, %1}";
16404 #else
16405 if (STACK_TOP_P (operands[0]))
16406 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
16407 else
16408 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
16409 #endif
16410 break;
16413 if (STACK_TOP_P (operands[0]))
16415 if (STACK_TOP_P (operands[1]))
16416 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16417 else
16418 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
16419 break;
16421 else if (STACK_TOP_P (operands[1]))
16423 #if SYSV386_COMPAT
16424 p = "{\t%1, %0|r\t%0, %1}";
16425 #else
16426 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
16427 #endif
16429 else
16431 #if SYSV386_COMPAT
16432 p = "{r\t%2, %0|\t%0, %2}";
16433 #else
16434 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16435 #endif
16437 break;
16439 default:
16440 gcc_unreachable ();
16443 strcat (buf, p);
16444 return buf;
16447 /* Check if a 256bit AVX register is referenced inside of EXP. */
16449 static bool
16450 ix86_check_avx256_register (const_rtx exp)
16452 if (GET_CODE (exp) == SUBREG)
16453 exp = SUBREG_REG (exp);
16455 return (REG_P (exp)
16456 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)));
16459 /* Return needed mode for entity in optimize_mode_switching pass. */
16461 static int
16462 ix86_avx_u128_mode_needed (rtx_insn *insn)
16464 if (CALL_P (insn))
16466 rtx link;
16468 /* Needed mode is set to AVX_U128_CLEAN if there are
16469 no 256bit modes used in function arguments. */
16470 for (link = CALL_INSN_FUNCTION_USAGE (insn);
16471 link;
16472 link = XEXP (link, 1))
16474 if (GET_CODE (XEXP (link, 0)) == USE)
16476 rtx arg = XEXP (XEXP (link, 0), 0);
16478 if (ix86_check_avx256_register (arg))
16479 return AVX_U128_DIRTY;
16483 return AVX_U128_CLEAN;
16486 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
16487 changes state only when a 256bit register is written to, but we need
16488 to prevent the compiler from moving optimal insertion point above
16489 eventual read from 256bit register. */
16490 subrtx_iterator::array_type array;
16491 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
16492 if (ix86_check_avx256_register (*iter))
16493 return AVX_U128_DIRTY;
16495 return AVX_U128_ANY;
16498 /* Return mode that i387 must be switched into
16499 prior to the execution of insn. */
16501 static int
16502 ix86_i387_mode_needed (int entity, rtx_insn *insn)
16504 enum attr_i387_cw mode;
16506 /* The mode UNINITIALIZED is used to store control word after a
16507 function call or ASM pattern. The mode ANY specify that function
16508 has no requirements on the control word and make no changes in the
16509 bits we are interested in. */
16511 if (CALL_P (insn)
16512 || (NONJUMP_INSN_P (insn)
16513 && (asm_noperands (PATTERN (insn)) >= 0
16514 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
16515 return I387_CW_UNINITIALIZED;
16517 if (recog_memoized (insn) < 0)
16518 return I387_CW_ANY;
16520 mode = get_attr_i387_cw (insn);
16522 switch (entity)
16524 case I387_TRUNC:
16525 if (mode == I387_CW_TRUNC)
16526 return mode;
16527 break;
16529 case I387_FLOOR:
16530 if (mode == I387_CW_FLOOR)
16531 return mode;
16532 break;
16534 case I387_CEIL:
16535 if (mode == I387_CW_CEIL)
16536 return mode;
16537 break;
16539 case I387_MASK_PM:
16540 if (mode == I387_CW_MASK_PM)
16541 return mode;
16542 break;
16544 default:
16545 gcc_unreachable ();
16548 return I387_CW_ANY;
16551 /* Return mode that entity must be switched into
16552 prior to the execution of insn. */
16554 static int
16555 ix86_mode_needed (int entity, rtx_insn *insn)
16557 switch (entity)
16559 case AVX_U128:
16560 return ix86_avx_u128_mode_needed (insn);
16561 case I387_TRUNC:
16562 case I387_FLOOR:
16563 case I387_CEIL:
16564 case I387_MASK_PM:
16565 return ix86_i387_mode_needed (entity, insn);
16566 default:
16567 gcc_unreachable ();
16569 return 0;
16572 /* Check if a 256bit AVX register is referenced in stores. */
16574 static void
16575 ix86_check_avx256_stores (rtx dest, const_rtx, void *data)
16577 if (ix86_check_avx256_register (dest))
16579 bool *used = (bool *) data;
16580 *used = true;
16584 /* Calculate mode of upper 128bit AVX registers after the insn. */
16586 static int
16587 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
16589 rtx pat = PATTERN (insn);
16591 if (vzeroupper_operation (pat, VOIDmode)
16592 || vzeroall_operation (pat, VOIDmode))
16593 return AVX_U128_CLEAN;
16595 /* We know that state is clean after CALL insn if there are no
16596 256bit registers used in the function return register. */
16597 if (CALL_P (insn))
16599 bool avx_reg256_found = false;
16600 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
16602 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
16605 /* Otherwise, return current mode. Remember that if insn
16606 references AVX 256bit registers, the mode was already changed
16607 to DIRTY from MODE_NEEDED. */
16608 return mode;
16611 /* Return the mode that an insn results in. */
16614 ix86_mode_after (int entity, int mode, rtx_insn *insn)
16616 switch (entity)
16618 case AVX_U128:
16619 return ix86_avx_u128_mode_after (mode, insn);
16620 case I387_TRUNC:
16621 case I387_FLOOR:
16622 case I387_CEIL:
16623 case I387_MASK_PM:
16624 return mode;
16625 default:
16626 gcc_unreachable ();
16630 static int
16631 ix86_avx_u128_mode_entry (void)
16633 tree arg;
16635 /* Entry mode is set to AVX_U128_DIRTY if there are
16636 256bit modes used in function arguments. */
16637 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
16638 arg = TREE_CHAIN (arg))
16640 rtx incoming = DECL_INCOMING_RTL (arg);
16642 if (incoming && ix86_check_avx256_register (incoming))
16643 return AVX_U128_DIRTY;
16646 return AVX_U128_CLEAN;
16649 /* Return a mode that ENTITY is assumed to be
16650 switched to at function entry. */
16652 static int
16653 ix86_mode_entry (int entity)
16655 switch (entity)
16657 case AVX_U128:
16658 return ix86_avx_u128_mode_entry ();
16659 case I387_TRUNC:
16660 case I387_FLOOR:
16661 case I387_CEIL:
16662 case I387_MASK_PM:
16663 return I387_CW_ANY;
16664 default:
16665 gcc_unreachable ();
16669 static int
16670 ix86_avx_u128_mode_exit (void)
16672 rtx reg = crtl->return_rtx;
16674 /* Exit mode is set to AVX_U128_DIRTY if there are
16675 256bit modes used in the function return register. */
16676 if (reg && ix86_check_avx256_register (reg))
16677 return AVX_U128_DIRTY;
16679 return AVX_U128_CLEAN;
16682 /* Return a mode that ENTITY is assumed to be
16683 switched to at function exit. */
16685 static int
16686 ix86_mode_exit (int entity)
16688 switch (entity)
16690 case AVX_U128:
16691 return ix86_avx_u128_mode_exit ();
16692 case I387_TRUNC:
16693 case I387_FLOOR:
16694 case I387_CEIL:
16695 case I387_MASK_PM:
16696 return I387_CW_ANY;
16697 default:
16698 gcc_unreachable ();
16702 static int
16703 ix86_mode_priority (int, int n)
16705 return n;
16708 /* Output code to initialize control word copies used by trunc?f?i and
16709 rounding patterns. CURRENT_MODE is set to current control word,
16710 while NEW_MODE is set to new control word. */
16712 static void
16713 emit_i387_cw_initialization (int mode)
16715 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
16716 rtx new_mode;
16718 enum ix86_stack_slot slot;
16720 rtx reg = gen_reg_rtx (HImode);
16722 emit_insn (gen_x86_fnstcw_1 (stored_mode));
16723 emit_move_insn (reg, copy_rtx (stored_mode));
16725 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
16726 || optimize_insn_for_size_p ())
16728 switch (mode)
16730 case I387_CW_TRUNC:
16731 /* round toward zero (truncate) */
16732 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
16733 slot = SLOT_CW_TRUNC;
16734 break;
16736 case I387_CW_FLOOR:
16737 /* round down toward -oo */
16738 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16739 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
16740 slot = SLOT_CW_FLOOR;
16741 break;
16743 case I387_CW_CEIL:
16744 /* round up toward +oo */
16745 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16746 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
16747 slot = SLOT_CW_CEIL;
16748 break;
16750 case I387_CW_MASK_PM:
16751 /* mask precision exception for nearbyint() */
16752 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16753 slot = SLOT_CW_MASK_PM;
16754 break;
16756 default:
16757 gcc_unreachable ();
16760 else
16762 switch (mode)
16764 case I387_CW_TRUNC:
16765 /* round toward zero (truncate) */
16766 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
16767 slot = SLOT_CW_TRUNC;
16768 break;
16770 case I387_CW_FLOOR:
16771 /* round down toward -oo */
16772 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
16773 slot = SLOT_CW_FLOOR;
16774 break;
16776 case I387_CW_CEIL:
16777 /* round up toward +oo */
16778 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
16779 slot = SLOT_CW_CEIL;
16780 break;
16782 case I387_CW_MASK_PM:
16783 /* mask precision exception for nearbyint() */
16784 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16785 slot = SLOT_CW_MASK_PM;
16786 break;
16788 default:
16789 gcc_unreachable ();
16793 gcc_assert (slot < MAX_386_STACK_LOCALS);
16795 new_mode = assign_386_stack_local (HImode, slot);
16796 emit_move_insn (new_mode, reg);
16799 /* Emit vzeroupper. */
16801 void
16802 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
16804 int i;
16806 /* Cancel automatic vzeroupper insertion if there are
16807 live call-saved SSE registers at the insertion point. */
16809 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
16810 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16811 return;
16813 if (TARGET_64BIT)
16814 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
16815 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16816 return;
16818 emit_insn (gen_avx_vzeroupper ());
16821 /* Generate one or more insns to set ENTITY to MODE. */
16823 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
16824 is the set of hard registers live at the point where the insn(s)
16825 are to be inserted. */
16827 static void
16828 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
16829 HARD_REG_SET regs_live)
16831 switch (entity)
16833 case AVX_U128:
16834 if (mode == AVX_U128_CLEAN)
16835 ix86_avx_emit_vzeroupper (regs_live);
16836 break;
16837 case I387_TRUNC:
16838 case I387_FLOOR:
16839 case I387_CEIL:
16840 case I387_MASK_PM:
16841 if (mode != I387_CW_ANY
16842 && mode != I387_CW_UNINITIALIZED)
16843 emit_i387_cw_initialization (mode);
16844 break;
16845 default:
16846 gcc_unreachable ();
16850 /* Output code for INSN to convert a float to a signed int. OPERANDS
16851 are the insn operands. The output may be [HSD]Imode and the input
16852 operand may be [SDX]Fmode. */
16854 const char *
16855 output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
16857 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16858 int dimode_p = GET_MODE (operands[0]) == DImode;
16859 int round_mode = get_attr_i387_cw (insn);
16861 /* Jump through a hoop or two for DImode, since the hardware has no
16862 non-popping instruction. We used to do this a different way, but
16863 that was somewhat fragile and broke with post-reload splitters. */
16864 if ((dimode_p || fisttp) && !stack_top_dies)
16865 output_asm_insn ("fld\t%y1", operands);
16867 gcc_assert (STACK_TOP_P (operands[1]));
16868 gcc_assert (MEM_P (operands[0]));
16869 gcc_assert (GET_MODE (operands[1]) != TFmode);
16871 if (fisttp)
16872 output_asm_insn ("fisttp%Z0\t%0", operands);
16873 else
16875 if (round_mode != I387_CW_ANY)
16876 output_asm_insn ("fldcw\t%3", operands);
16877 if (stack_top_dies || dimode_p)
16878 output_asm_insn ("fistp%Z0\t%0", operands);
16879 else
16880 output_asm_insn ("fist%Z0\t%0", operands);
16881 if (round_mode != I387_CW_ANY)
16882 output_asm_insn ("fldcw\t%2", operands);
16885 return "";
16888 /* Output code for x87 ffreep insn. The OPNO argument, which may only
16889 have the values zero or one, indicates the ffreep insn's operand
16890 from the OPERANDS array. */
16892 static const char *
16893 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
16895 if (TARGET_USE_FFREEP)
16896 #ifdef HAVE_AS_IX86_FFREEP
16897 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
16898 #else
16900 static char retval[32];
16901 int regno = REGNO (operands[opno]);
16903 gcc_assert (STACK_REGNO_P (regno));
16905 regno -= FIRST_STACK_REG;
16907 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
16908 return retval;
16910 #endif
16912 return opno ? "fstp\t%y1" : "fstp\t%y0";
16916 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
16917 should be used. UNORDERED_P is true when fucom should be used. */
16919 const char *
16920 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
16922 int stack_top_dies;
16923 rtx cmp_op0, cmp_op1;
16924 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
16926 if (eflags_p)
16928 cmp_op0 = operands[0];
16929 cmp_op1 = operands[1];
16931 else
16933 cmp_op0 = operands[1];
16934 cmp_op1 = operands[2];
16937 if (is_sse)
16939 if (GET_MODE (operands[0]) == SFmode)
16940 if (unordered_p)
16941 return "%vucomiss\t{%1, %0|%0, %1}";
16942 else
16943 return "%vcomiss\t{%1, %0|%0, %1}";
16944 else
16945 if (unordered_p)
16946 return "%vucomisd\t{%1, %0|%0, %1}";
16947 else
16948 return "%vcomisd\t{%1, %0|%0, %1}";
16951 gcc_assert (STACK_TOP_P (cmp_op0));
16953 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16955 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
16957 if (stack_top_dies)
16959 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
16960 return output_387_ffreep (operands, 1);
16962 else
16963 return "ftst\n\tfnstsw\t%0";
16966 if (STACK_REG_P (cmp_op1)
16967 && stack_top_dies
16968 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
16969 && REGNO (cmp_op1) != FIRST_STACK_REG)
16971 /* If both the top of the 387 stack dies, and the other operand
16972 is also a stack register that dies, then this must be a
16973 `fcompp' float compare */
16975 if (eflags_p)
16977 /* There is no double popping fcomi variant. Fortunately,
16978 eflags is immune from the fstp's cc clobbering. */
16979 if (unordered_p)
16980 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
16981 else
16982 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
16983 return output_387_ffreep (operands, 0);
16985 else
16987 if (unordered_p)
16988 return "fucompp\n\tfnstsw\t%0";
16989 else
16990 return "fcompp\n\tfnstsw\t%0";
16993 else
16995 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
16997 static const char * const alt[16] =
16999 "fcom%Z2\t%y2\n\tfnstsw\t%0",
17000 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
17001 "fucom%Z2\t%y2\n\tfnstsw\t%0",
17002 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
17004 "ficom%Z2\t%y2\n\tfnstsw\t%0",
17005 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
17006 NULL,
17007 NULL,
17009 "fcomi\t{%y1, %0|%0, %y1}",
17010 "fcomip\t{%y1, %0|%0, %y1}",
17011 "fucomi\t{%y1, %0|%0, %y1}",
17012 "fucomip\t{%y1, %0|%0, %y1}",
17014 NULL,
17015 NULL,
17016 NULL,
17017 NULL
17020 int mask;
17021 const char *ret;
17023 mask = eflags_p << 3;
17024 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
17025 mask |= unordered_p << 1;
17026 mask |= stack_top_dies;
17028 gcc_assert (mask < 16);
17029 ret = alt[mask];
17030 gcc_assert (ret);
17032 return ret;
17036 void
17037 ix86_output_addr_vec_elt (FILE *file, int value)
17039 const char *directive = ASM_LONG;
17041 #ifdef ASM_QUAD
17042 if (TARGET_LP64)
17043 directive = ASM_QUAD;
17044 #else
17045 gcc_assert (!TARGET_64BIT);
17046 #endif
17048 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
17051 void
17052 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
17054 const char *directive = ASM_LONG;
17056 #ifdef ASM_QUAD
17057 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
17058 directive = ASM_QUAD;
17059 #else
17060 gcc_assert (!TARGET_64BIT);
17061 #endif
17062 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
17063 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
17064 fprintf (file, "%s%s%d-%s%d\n",
17065 directive, LPREFIX, value, LPREFIX, rel);
17066 else if (HAVE_AS_GOTOFF_IN_DATA)
17067 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
17068 #if TARGET_MACHO
17069 else if (TARGET_MACHO)
17071 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
17072 machopic_output_function_base_name (file);
17073 putc ('\n', file);
17075 #endif
17076 else
17077 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
17078 GOT_SYMBOL_NAME, LPREFIX, value);
17081 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
17082 for the target. */
17084 void
17085 ix86_expand_clear (rtx dest)
17087 rtx tmp;
17089 /* We play register width games, which are only valid after reload. */
17090 gcc_assert (reload_completed);
17092 /* Avoid HImode and its attendant prefix byte. */
17093 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
17094 dest = gen_rtx_REG (SImode, REGNO (dest));
17095 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
17097 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
17099 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17100 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
17103 emit_insn (tmp);
17106 /* X is an unchanging MEM. If it is a constant pool reference, return
17107 the constant pool rtx, else NULL. */
17110 maybe_get_pool_constant (rtx x)
17112 x = ix86_delegitimize_address (XEXP (x, 0));
17114 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
17115 return get_pool_constant (x);
17117 return NULL_RTX;
17120 void
17121 ix86_expand_move (machine_mode mode, rtx operands[])
17123 rtx op0, op1;
17124 enum tls_model model;
17126 op0 = operands[0];
17127 op1 = operands[1];
17129 if (GET_CODE (op1) == SYMBOL_REF)
17131 rtx tmp;
17133 model = SYMBOL_REF_TLS_MODEL (op1);
17134 if (model)
17136 op1 = legitimize_tls_address (op1, model, true);
17137 op1 = force_operand (op1, op0);
17138 if (op1 == op0)
17139 return;
17140 op1 = convert_to_mode (mode, op1, 1);
17142 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
17143 op1 = tmp;
17145 else if (GET_CODE (op1) == CONST
17146 && GET_CODE (XEXP (op1, 0)) == PLUS
17147 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
17149 rtx addend = XEXP (XEXP (op1, 0), 1);
17150 rtx symbol = XEXP (XEXP (op1, 0), 0);
17151 rtx tmp;
17153 model = SYMBOL_REF_TLS_MODEL (symbol);
17154 if (model)
17155 tmp = legitimize_tls_address (symbol, model, true);
17156 else
17157 tmp = legitimize_pe_coff_symbol (symbol, true);
17159 if (tmp)
17161 tmp = force_operand (tmp, NULL);
17162 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
17163 op0, 1, OPTAB_DIRECT);
17164 if (tmp == op0)
17165 return;
17166 op1 = convert_to_mode (mode, tmp, 1);
17170 if ((flag_pic || MACHOPIC_INDIRECT)
17171 && symbolic_operand (op1, mode))
17173 if (TARGET_MACHO && !TARGET_64BIT)
17175 #if TARGET_MACHO
17176 /* dynamic-no-pic */
17177 if (MACHOPIC_INDIRECT)
17179 rtx temp = ((reload_in_progress
17180 || ((op0 && REG_P (op0))
17181 && mode == Pmode))
17182 ? op0 : gen_reg_rtx (Pmode));
17183 op1 = machopic_indirect_data_reference (op1, temp);
17184 if (MACHOPIC_PURE)
17185 op1 = machopic_legitimize_pic_address (op1, mode,
17186 temp == op1 ? 0 : temp);
17188 if (op0 != op1 && GET_CODE (op0) != MEM)
17190 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
17191 emit_insn (insn);
17192 return;
17194 if (GET_CODE (op0) == MEM)
17195 op1 = force_reg (Pmode, op1);
17196 else
17198 rtx temp = op0;
17199 if (GET_CODE (temp) != REG)
17200 temp = gen_reg_rtx (Pmode);
17201 temp = legitimize_pic_address (op1, temp);
17202 if (temp == op0)
17203 return;
17204 op1 = temp;
17206 /* dynamic-no-pic */
17207 #endif
17209 else
17211 if (MEM_P (op0))
17212 op1 = force_reg (mode, op1);
17213 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
17215 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
17216 op1 = legitimize_pic_address (op1, reg);
17217 if (op0 == op1)
17218 return;
17219 op1 = convert_to_mode (mode, op1, 1);
17223 else
17225 if (MEM_P (op0)
17226 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
17227 || !push_operand (op0, mode))
17228 && MEM_P (op1))
17229 op1 = force_reg (mode, op1);
17231 if (push_operand (op0, mode)
17232 && ! general_no_elim_operand (op1, mode))
17233 op1 = copy_to_mode_reg (mode, op1);
17235 /* Force large constants in 64bit compilation into register
17236 to get them CSEed. */
17237 if (can_create_pseudo_p ()
17238 && (mode == DImode) && TARGET_64BIT
17239 && immediate_operand (op1, mode)
17240 && !x86_64_zext_immediate_operand (op1, VOIDmode)
17241 && !register_operand (op0, mode)
17242 && optimize)
17243 op1 = copy_to_mode_reg (mode, op1);
17245 if (can_create_pseudo_p ()
17246 && FLOAT_MODE_P (mode)
17247 && GET_CODE (op1) == CONST_DOUBLE)
17249 /* If we are loading a floating point constant to a register,
17250 force the value to memory now, since we'll get better code
17251 out the back end. */
17253 op1 = validize_mem (force_const_mem (mode, op1));
17254 if (!register_operand (op0, mode))
17256 rtx temp = gen_reg_rtx (mode);
17257 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
17258 emit_move_insn (op0, temp);
17259 return;
17264 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17267 void
17268 ix86_expand_vector_move (machine_mode mode, rtx operands[])
17270 rtx op0 = operands[0], op1 = operands[1];
17271 unsigned int align = GET_MODE_ALIGNMENT (mode);
17273 if (push_operand (op0, VOIDmode))
17274 op0 = emit_move_resolve_push (mode, op0);
17276 /* Force constants other than zero into memory. We do not know how
17277 the instructions used to build constants modify the upper 64 bits
17278 of the register, once we have that information we may be able
17279 to handle some of them more efficiently. */
17280 if (can_create_pseudo_p ()
17281 && register_operand (op0, mode)
17282 && (CONSTANT_P (op1)
17283 || (GET_CODE (op1) == SUBREG
17284 && CONSTANT_P (SUBREG_REG (op1))))
17285 && !standard_sse_constant_p (op1))
17286 op1 = validize_mem (force_const_mem (mode, op1));
17288 /* We need to check memory alignment for SSE mode since attribute
17289 can make operands unaligned. */
17290 if (can_create_pseudo_p ()
17291 && SSE_REG_MODE_P (mode)
17292 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
17293 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
17295 rtx tmp[2];
17297 /* ix86_expand_vector_move_misalign() does not like constants ... */
17298 if (CONSTANT_P (op1)
17299 || (GET_CODE (op1) == SUBREG
17300 && CONSTANT_P (SUBREG_REG (op1))))
17301 op1 = validize_mem (force_const_mem (mode, op1));
17303 /* ... nor both arguments in memory. */
17304 if (!register_operand (op0, mode)
17305 && !register_operand (op1, mode))
17306 op1 = force_reg (mode, op1);
17308 tmp[0] = op0; tmp[1] = op1;
17309 ix86_expand_vector_move_misalign (mode, tmp);
17310 return;
17313 /* Make operand1 a register if it isn't already. */
17314 if (can_create_pseudo_p ()
17315 && !register_operand (op0, mode)
17316 && !register_operand (op1, mode))
17318 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
17319 return;
17322 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17325 /* Split 32-byte AVX unaligned load and store if needed. */
17327 static void
17328 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
17330 rtx m;
17331 rtx (*extract) (rtx, rtx, rtx);
17332 rtx (*load_unaligned) (rtx, rtx);
17333 rtx (*store_unaligned) (rtx, rtx);
17334 machine_mode mode;
17336 switch (GET_MODE (op0))
17338 default:
17339 gcc_unreachable ();
17340 case V32QImode:
17341 extract = gen_avx_vextractf128v32qi;
17342 load_unaligned = gen_avx_loaddquv32qi;
17343 store_unaligned = gen_avx_storedquv32qi;
17344 mode = V16QImode;
17345 break;
17346 case V8SFmode:
17347 extract = gen_avx_vextractf128v8sf;
17348 load_unaligned = gen_avx_loadups256;
17349 store_unaligned = gen_avx_storeups256;
17350 mode = V4SFmode;
17351 break;
17352 case V4DFmode:
17353 extract = gen_avx_vextractf128v4df;
17354 load_unaligned = gen_avx_loadupd256;
17355 store_unaligned = gen_avx_storeupd256;
17356 mode = V2DFmode;
17357 break;
17360 if (MEM_P (op1))
17362 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
17364 rtx r = gen_reg_rtx (mode);
17365 m = adjust_address (op1, mode, 0);
17366 emit_move_insn (r, m);
17367 m = adjust_address (op1, mode, 16);
17368 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
17369 emit_move_insn (op0, r);
17371 /* Normal *mov<mode>_internal pattern will handle
17372 unaligned loads just fine if misaligned_operand
17373 is true, and without the UNSPEC it can be combined
17374 with arithmetic instructions. */
17375 else if (misaligned_operand (op1, GET_MODE (op1)))
17376 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17377 else
17378 emit_insn (load_unaligned (op0, op1));
17380 else if (MEM_P (op0))
17382 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE)
17384 m = adjust_address (op0, mode, 0);
17385 emit_insn (extract (m, op1, const0_rtx));
17386 m = adjust_address (op0, mode, 16);
17387 emit_insn (extract (m, op1, const1_rtx));
17389 else
17390 emit_insn (store_unaligned (op0, op1));
17392 else
17393 gcc_unreachable ();
17396 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
17397 straight to ix86_expand_vector_move. */
17398 /* Code generation for scalar reg-reg moves of single and double precision data:
17399 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
17400 movaps reg, reg
17401 else
17402 movss reg, reg
17403 if (x86_sse_partial_reg_dependency == true)
17404 movapd reg, reg
17405 else
17406 movsd reg, reg
17408 Code generation for scalar loads of double precision data:
17409 if (x86_sse_split_regs == true)
17410 movlpd mem, reg (gas syntax)
17411 else
17412 movsd mem, reg
17414 Code generation for unaligned packed loads of single precision data
17415 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
17416 if (x86_sse_unaligned_move_optimal)
17417 movups mem, reg
17419 if (x86_sse_partial_reg_dependency == true)
17421 xorps reg, reg
17422 movlps mem, reg
17423 movhps mem+8, reg
17425 else
17427 movlps mem, reg
17428 movhps mem+8, reg
17431 Code generation for unaligned packed loads of double precision data
17432 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
17433 if (x86_sse_unaligned_move_optimal)
17434 movupd mem, reg
17436 if (x86_sse_split_regs == true)
17438 movlpd mem, reg
17439 movhpd mem+8, reg
17441 else
17443 movsd mem, reg
17444 movhpd mem+8, reg
17448 void
17449 ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[])
17451 rtx op0, op1, orig_op0 = NULL_RTX, m;
17452 rtx (*load_unaligned) (rtx, rtx);
17453 rtx (*store_unaligned) (rtx, rtx);
17455 op0 = operands[0];
17456 op1 = operands[1];
17458 if (GET_MODE_SIZE (mode) == 64)
17460 switch (GET_MODE_CLASS (mode))
17462 case MODE_VECTOR_INT:
17463 case MODE_INT:
17464 if (GET_MODE (op0) != V16SImode)
17466 if (!MEM_P (op0))
17468 orig_op0 = op0;
17469 op0 = gen_reg_rtx (V16SImode);
17471 else
17472 op0 = gen_lowpart (V16SImode, op0);
17474 op1 = gen_lowpart (V16SImode, op1);
17475 /* FALLTHRU */
17477 case MODE_VECTOR_FLOAT:
17478 switch (GET_MODE (op0))
17480 default:
17481 gcc_unreachable ();
17482 case V16SImode:
17483 load_unaligned = gen_avx512f_loaddquv16si;
17484 store_unaligned = gen_avx512f_storedquv16si;
17485 break;
17486 case V16SFmode:
17487 load_unaligned = gen_avx512f_loadups512;
17488 store_unaligned = gen_avx512f_storeups512;
17489 break;
17490 case V8DFmode:
17491 load_unaligned = gen_avx512f_loadupd512;
17492 store_unaligned = gen_avx512f_storeupd512;
17493 break;
17496 if (MEM_P (op1))
17497 emit_insn (load_unaligned (op0, op1));
17498 else if (MEM_P (op0))
17499 emit_insn (store_unaligned (op0, op1));
17500 else
17501 gcc_unreachable ();
17502 if (orig_op0)
17503 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17504 break;
17506 default:
17507 gcc_unreachable ();
17510 return;
17513 if (TARGET_AVX
17514 && GET_MODE_SIZE (mode) == 32)
17516 switch (GET_MODE_CLASS (mode))
17518 case MODE_VECTOR_INT:
17519 case MODE_INT:
17520 if (GET_MODE (op0) != V32QImode)
17522 if (!MEM_P (op0))
17524 orig_op0 = op0;
17525 op0 = gen_reg_rtx (V32QImode);
17527 else
17528 op0 = gen_lowpart (V32QImode, op0);
17530 op1 = gen_lowpart (V32QImode, op1);
17531 /* FALLTHRU */
17533 case MODE_VECTOR_FLOAT:
17534 ix86_avx256_split_vector_move_misalign (op0, op1);
17535 if (orig_op0)
17536 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17537 break;
17539 default:
17540 gcc_unreachable ();
17543 return;
17546 if (MEM_P (op1))
17548 /* Normal *mov<mode>_internal pattern will handle
17549 unaligned loads just fine if misaligned_operand
17550 is true, and without the UNSPEC it can be combined
17551 with arithmetic instructions. */
17552 if (TARGET_AVX
17553 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
17554 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
17555 && misaligned_operand (op1, GET_MODE (op1)))
17556 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17557 /* ??? If we have typed data, then it would appear that using
17558 movdqu is the only way to get unaligned data loaded with
17559 integer type. */
17560 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17562 if (GET_MODE (op0) != V16QImode)
17564 orig_op0 = op0;
17565 op0 = gen_reg_rtx (V16QImode);
17567 op1 = gen_lowpart (V16QImode, op1);
17568 /* We will eventually emit movups based on insn attributes. */
17569 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
17570 if (orig_op0)
17571 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17573 else if (TARGET_SSE2 && mode == V2DFmode)
17575 rtx zero;
17577 if (TARGET_AVX
17578 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17579 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17580 || optimize_insn_for_size_p ())
17582 /* We will eventually emit movups based on insn attributes. */
17583 emit_insn (gen_sse2_loadupd (op0, op1));
17584 return;
17587 /* When SSE registers are split into halves, we can avoid
17588 writing to the top half twice. */
17589 if (TARGET_SSE_SPLIT_REGS)
17591 emit_clobber (op0);
17592 zero = op0;
17594 else
17596 /* ??? Not sure about the best option for the Intel chips.
17597 The following would seem to satisfy; the register is
17598 entirely cleared, breaking the dependency chain. We
17599 then store to the upper half, with a dependency depth
17600 of one. A rumor has it that Intel recommends two movsd
17601 followed by an unpacklpd, but this is unconfirmed. And
17602 given that the dependency depth of the unpacklpd would
17603 still be one, I'm not sure why this would be better. */
17604 zero = CONST0_RTX (V2DFmode);
17607 m = adjust_address (op1, DFmode, 0);
17608 emit_insn (gen_sse2_loadlpd (op0, zero, m));
17609 m = adjust_address (op1, DFmode, 8);
17610 emit_insn (gen_sse2_loadhpd (op0, op0, m));
17612 else
17614 rtx t;
17616 if (TARGET_AVX
17617 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17618 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17619 || optimize_insn_for_size_p ())
17621 if (GET_MODE (op0) != V4SFmode)
17623 orig_op0 = op0;
17624 op0 = gen_reg_rtx (V4SFmode);
17626 op1 = gen_lowpart (V4SFmode, op1);
17627 emit_insn (gen_sse_loadups (op0, op1));
17628 if (orig_op0)
17629 emit_move_insn (orig_op0,
17630 gen_lowpart (GET_MODE (orig_op0), op0));
17631 return;
17634 if (mode != V4SFmode)
17635 t = gen_reg_rtx (V4SFmode);
17636 else
17637 t = op0;
17639 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
17640 emit_move_insn (t, CONST0_RTX (V4SFmode));
17641 else
17642 emit_clobber (t);
17644 m = adjust_address (op1, V2SFmode, 0);
17645 emit_insn (gen_sse_loadlps (t, t, m));
17646 m = adjust_address (op1, V2SFmode, 8);
17647 emit_insn (gen_sse_loadhps (t, t, m));
17648 if (mode != V4SFmode)
17649 emit_move_insn (op0, gen_lowpart (mode, t));
17652 else if (MEM_P (op0))
17654 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17656 op0 = gen_lowpart (V16QImode, op0);
17657 op1 = gen_lowpart (V16QImode, op1);
17658 /* We will eventually emit movups based on insn attributes. */
17659 emit_insn (gen_sse2_storedquv16qi (op0, op1));
17661 else if (TARGET_SSE2 && mode == V2DFmode)
17663 if (TARGET_AVX
17664 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17665 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17666 || optimize_insn_for_size_p ())
17667 /* We will eventually emit movups based on insn attributes. */
17668 emit_insn (gen_sse2_storeupd (op0, op1));
17669 else
17671 m = adjust_address (op0, DFmode, 0);
17672 emit_insn (gen_sse2_storelpd (m, op1));
17673 m = adjust_address (op0, DFmode, 8);
17674 emit_insn (gen_sse2_storehpd (m, op1));
17677 else
17679 if (mode != V4SFmode)
17680 op1 = gen_lowpart (V4SFmode, op1);
17682 if (TARGET_AVX
17683 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17684 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17685 || optimize_insn_for_size_p ())
17687 op0 = gen_lowpart (V4SFmode, op0);
17688 emit_insn (gen_sse_storeups (op0, op1));
17690 else
17692 m = adjust_address (op0, V2SFmode, 0);
17693 emit_insn (gen_sse_storelps (m, op1));
17694 m = adjust_address (op0, V2SFmode, 8);
17695 emit_insn (gen_sse_storehps (m, op1));
17699 else
17700 gcc_unreachable ();
17703 /* Helper function of ix86_fixup_binary_operands to canonicalize
17704 operand order. Returns true if the operands should be swapped. */
17706 static bool
17707 ix86_swap_binary_operands_p (enum rtx_code code, machine_mode mode,
17708 rtx operands[])
17710 rtx dst = operands[0];
17711 rtx src1 = operands[1];
17712 rtx src2 = operands[2];
17714 /* If the operation is not commutative, we can't do anything. */
17715 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
17716 return false;
17718 /* Highest priority is that src1 should match dst. */
17719 if (rtx_equal_p (dst, src1))
17720 return false;
17721 if (rtx_equal_p (dst, src2))
17722 return true;
17724 /* Next highest priority is that immediate constants come second. */
17725 if (immediate_operand (src2, mode))
17726 return false;
17727 if (immediate_operand (src1, mode))
17728 return true;
17730 /* Lowest priority is that memory references should come second. */
17731 if (MEM_P (src2))
17732 return false;
17733 if (MEM_P (src1))
17734 return true;
17736 return false;
17740 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
17741 destination to use for the operation. If different from the true
17742 destination in operands[0], a copy operation will be required. */
17745 ix86_fixup_binary_operands (enum rtx_code code, machine_mode mode,
17746 rtx operands[])
17748 rtx dst = operands[0];
17749 rtx src1 = operands[1];
17750 rtx src2 = operands[2];
17752 /* Canonicalize operand order. */
17753 if (ix86_swap_binary_operands_p (code, mode, operands))
17755 /* It is invalid to swap operands of different modes. */
17756 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
17758 std::swap (src1, src2);
17761 /* Both source operands cannot be in memory. */
17762 if (MEM_P (src1) && MEM_P (src2))
17764 /* Optimization: Only read from memory once. */
17765 if (rtx_equal_p (src1, src2))
17767 src2 = force_reg (mode, src2);
17768 src1 = src2;
17770 else if (rtx_equal_p (dst, src1))
17771 src2 = force_reg (mode, src2);
17772 else
17773 src1 = force_reg (mode, src1);
17776 /* If the destination is memory, and we do not have matching source
17777 operands, do things in registers. */
17778 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17779 dst = gen_reg_rtx (mode);
17781 /* Source 1 cannot be a constant. */
17782 if (CONSTANT_P (src1))
17783 src1 = force_reg (mode, src1);
17785 /* Source 1 cannot be a non-matching memory. */
17786 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17787 src1 = force_reg (mode, src1);
17789 /* Improve address combine. */
17790 if (code == PLUS
17791 && GET_MODE_CLASS (mode) == MODE_INT
17792 && MEM_P (src2))
17793 src2 = force_reg (mode, src2);
17795 operands[1] = src1;
17796 operands[2] = src2;
17797 return dst;
17800 /* Similarly, but assume that the destination has already been
17801 set up properly. */
17803 void
17804 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
17805 machine_mode mode, rtx operands[])
17807 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
17808 gcc_assert (dst == operands[0]);
17811 /* Attempt to expand a binary operator. Make the expansion closer to the
17812 actual machine, then just general_operand, which will allow 3 separate
17813 memory references (one output, two input) in a single insn. */
17815 void
17816 ix86_expand_binary_operator (enum rtx_code code, machine_mode mode,
17817 rtx operands[])
17819 rtx src1, src2, dst, op, clob;
17821 dst = ix86_fixup_binary_operands (code, mode, operands);
17822 src1 = operands[1];
17823 src2 = operands[2];
17825 /* Emit the instruction. */
17827 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
17828 if (reload_in_progress)
17830 /* Reload doesn't know about the flags register, and doesn't know that
17831 it doesn't want to clobber it. We can only do this with PLUS. */
17832 gcc_assert (code == PLUS);
17833 emit_insn (op);
17835 else if (reload_completed
17836 && code == PLUS
17837 && !rtx_equal_p (dst, src1))
17839 /* This is going to be an LEA; avoid splitting it later. */
17840 emit_insn (op);
17842 else
17844 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17845 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17848 /* Fix up the destination if needed. */
17849 if (dst != operands[0])
17850 emit_move_insn (operands[0], dst);
17853 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
17854 the given OPERANDS. */
17856 void
17857 ix86_expand_vector_logical_operator (enum rtx_code code, machine_mode mode,
17858 rtx operands[])
17860 rtx op1 = NULL_RTX, op2 = NULL_RTX;
17861 if (GET_CODE (operands[1]) == SUBREG)
17863 op1 = operands[1];
17864 op2 = operands[2];
17866 else if (GET_CODE (operands[2]) == SUBREG)
17868 op1 = operands[2];
17869 op2 = operands[1];
17871 /* Optimize (__m128i) d | (__m128i) e and similar code
17872 when d and e are float vectors into float vector logical
17873 insn. In C/C++ without using intrinsics there is no other way
17874 to express vector logical operation on float vectors than
17875 to cast them temporarily to integer vectors. */
17876 if (op1
17877 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17878 && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR))
17879 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
17880 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
17881 && SUBREG_BYTE (op1) == 0
17882 && (GET_CODE (op2) == CONST_VECTOR
17883 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
17884 && SUBREG_BYTE (op2) == 0))
17885 && can_create_pseudo_p ())
17887 rtx dst;
17888 switch (GET_MODE (SUBREG_REG (op1)))
17890 case V4SFmode:
17891 case V8SFmode:
17892 case V16SFmode:
17893 case V2DFmode:
17894 case V4DFmode:
17895 case V8DFmode:
17896 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
17897 if (GET_CODE (op2) == CONST_VECTOR)
17899 op2 = gen_lowpart (GET_MODE (dst), op2);
17900 op2 = force_reg (GET_MODE (dst), op2);
17902 else
17904 op1 = operands[1];
17905 op2 = SUBREG_REG (operands[2]);
17906 if (!nonimmediate_operand (op2, GET_MODE (dst)))
17907 op2 = force_reg (GET_MODE (dst), op2);
17909 op1 = SUBREG_REG (op1);
17910 if (!nonimmediate_operand (op1, GET_MODE (dst)))
17911 op1 = force_reg (GET_MODE (dst), op1);
17912 emit_insn (gen_rtx_SET (VOIDmode, dst,
17913 gen_rtx_fmt_ee (code, GET_MODE (dst),
17914 op1, op2)));
17915 emit_move_insn (operands[0], gen_lowpart (mode, dst));
17916 return;
17917 default:
17918 break;
17921 if (!nonimmediate_operand (operands[1], mode))
17922 operands[1] = force_reg (mode, operands[1]);
17923 if (!nonimmediate_operand (operands[2], mode))
17924 operands[2] = force_reg (mode, operands[2]);
17925 ix86_fixup_binary_operands_no_copy (code, mode, operands);
17926 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
17927 gen_rtx_fmt_ee (code, mode, operands[1],
17928 operands[2])));
17931 /* Return TRUE or FALSE depending on whether the binary operator meets the
17932 appropriate constraints. */
17934 bool
17935 ix86_binary_operator_ok (enum rtx_code code, machine_mode mode,
17936 rtx operands[3])
17938 rtx dst = operands[0];
17939 rtx src1 = operands[1];
17940 rtx src2 = operands[2];
17942 /* Both source operands cannot be in memory. */
17943 if (MEM_P (src1) && MEM_P (src2))
17944 return false;
17946 /* Canonicalize operand order for commutative operators. */
17947 if (ix86_swap_binary_operands_p (code, mode, operands))
17948 std::swap (src1, src2);
17950 /* If the destination is memory, we must have a matching source operand. */
17951 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17952 return false;
17954 /* Source 1 cannot be a constant. */
17955 if (CONSTANT_P (src1))
17956 return false;
17958 /* Source 1 cannot be a non-matching memory. */
17959 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17960 /* Support "andhi/andsi/anddi" as a zero-extending move. */
17961 return (code == AND
17962 && (mode == HImode
17963 || mode == SImode
17964 || (TARGET_64BIT && mode == DImode))
17965 && satisfies_constraint_L (src2));
17967 return true;
17970 /* Attempt to expand a unary operator. Make the expansion closer to the
17971 actual machine, then just general_operand, which will allow 2 separate
17972 memory references (one output, one input) in a single insn. */
17974 void
17975 ix86_expand_unary_operator (enum rtx_code code, machine_mode mode,
17976 rtx operands[])
17978 int matching_memory;
17979 rtx src, dst, op, clob;
17981 dst = operands[0];
17982 src = operands[1];
17984 /* If the destination is memory, and we do not have matching source
17985 operands, do things in registers. */
17986 matching_memory = 0;
17987 if (MEM_P (dst))
17989 if (rtx_equal_p (dst, src))
17990 matching_memory = 1;
17991 else
17992 dst = gen_reg_rtx (mode);
17995 /* When source operand is memory, destination must match. */
17996 if (MEM_P (src) && !matching_memory)
17997 src = force_reg (mode, src);
17999 /* Emit the instruction. */
18001 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
18002 if (reload_in_progress || code == NOT)
18004 /* Reload doesn't know about the flags register, and doesn't know that
18005 it doesn't want to clobber it. */
18006 gcc_assert (code == NOT);
18007 emit_insn (op);
18009 else
18011 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18012 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18015 /* Fix up the destination if needed. */
18016 if (dst != operands[0])
18017 emit_move_insn (operands[0], dst);
18020 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
18021 divisor are within the range [0-255]. */
18023 void
18024 ix86_split_idivmod (machine_mode mode, rtx operands[],
18025 bool signed_p)
18027 rtx_code_label *end_label, *qimode_label;
18028 rtx insn, div, mod;
18029 rtx scratch, tmp0, tmp1, tmp2;
18030 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
18031 rtx (*gen_zero_extend) (rtx, rtx);
18032 rtx (*gen_test_ccno_1) (rtx, rtx);
18034 switch (mode)
18036 case SImode:
18037 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
18038 gen_test_ccno_1 = gen_testsi_ccno_1;
18039 gen_zero_extend = gen_zero_extendqisi2;
18040 break;
18041 case DImode:
18042 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
18043 gen_test_ccno_1 = gen_testdi_ccno_1;
18044 gen_zero_extend = gen_zero_extendqidi2;
18045 break;
18046 default:
18047 gcc_unreachable ();
18050 end_label = gen_label_rtx ();
18051 qimode_label = gen_label_rtx ();
18053 scratch = gen_reg_rtx (mode);
18055 /* Use 8bit unsigned divimod if dividend and divisor are within
18056 the range [0-255]. */
18057 emit_move_insn (scratch, operands[2]);
18058 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
18059 scratch, 1, OPTAB_DIRECT);
18060 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
18061 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
18062 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
18063 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
18064 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
18065 pc_rtx);
18066 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
18067 predict_jump (REG_BR_PROB_BASE * 50 / 100);
18068 JUMP_LABEL (insn) = qimode_label;
18070 /* Generate original signed/unsigned divimod. */
18071 div = gen_divmod4_1 (operands[0], operands[1],
18072 operands[2], operands[3]);
18073 emit_insn (div);
18075 /* Branch to the end. */
18076 emit_jump_insn (gen_jump (end_label));
18077 emit_barrier ();
18079 /* Generate 8bit unsigned divide. */
18080 emit_label (qimode_label);
18081 /* Don't use operands[0] for result of 8bit divide since not all
18082 registers support QImode ZERO_EXTRACT. */
18083 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
18084 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
18085 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
18086 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
18088 if (signed_p)
18090 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
18091 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
18093 else
18095 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
18096 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
18099 /* Extract remainder from AH. */
18100 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
18101 if (REG_P (operands[1]))
18102 insn = emit_move_insn (operands[1], tmp1);
18103 else
18105 /* Need a new scratch register since the old one has result
18106 of 8bit divide. */
18107 scratch = gen_reg_rtx (mode);
18108 emit_move_insn (scratch, tmp1);
18109 insn = emit_move_insn (operands[1], scratch);
18111 set_unique_reg_note (insn, REG_EQUAL, mod);
18113 /* Zero extend quotient from AL. */
18114 tmp1 = gen_lowpart (QImode, tmp0);
18115 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
18116 set_unique_reg_note (insn, REG_EQUAL, div);
18118 emit_label (end_label);
18121 #define LEA_MAX_STALL (3)
18122 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
18124 /* Increase given DISTANCE in half-cycles according to
18125 dependencies between PREV and NEXT instructions.
18126 Add 1 half-cycle if there is no dependency and
18127 go to next cycle if there is some dependecy. */
18129 static unsigned int
18130 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
18132 df_ref def, use;
18134 if (!prev || !next)
18135 return distance + (distance & 1) + 2;
18137 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
18138 return distance + 1;
18140 FOR_EACH_INSN_USE (use, next)
18141 FOR_EACH_INSN_DEF (def, prev)
18142 if (!DF_REF_IS_ARTIFICIAL (def)
18143 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
18144 return distance + (distance & 1) + 2;
18146 return distance + 1;
18149 /* Function checks if instruction INSN defines register number
18150 REGNO1 or REGNO2. */
18152 static bool
18153 insn_defines_reg (unsigned int regno1, unsigned int regno2,
18154 rtx insn)
18156 df_ref def;
18158 FOR_EACH_INSN_DEF (def, insn)
18159 if (DF_REF_REG_DEF_P (def)
18160 && !DF_REF_IS_ARTIFICIAL (def)
18161 && (regno1 == DF_REF_REGNO (def)
18162 || regno2 == DF_REF_REGNO (def)))
18163 return true;
18165 return false;
18168 /* Function checks if instruction INSN uses register number
18169 REGNO as a part of address expression. */
18171 static bool
18172 insn_uses_reg_mem (unsigned int regno, rtx insn)
18174 df_ref use;
18176 FOR_EACH_INSN_USE (use, insn)
18177 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
18178 return true;
18180 return false;
18183 /* Search backward for non-agu definition of register number REGNO1
18184 or register number REGNO2 in basic block starting from instruction
18185 START up to head of basic block or instruction INSN.
18187 Function puts true value into *FOUND var if definition was found
18188 and false otherwise.
18190 Distance in half-cycles between START and found instruction or head
18191 of BB is added to DISTANCE and returned. */
18193 static int
18194 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
18195 rtx_insn *insn, int distance,
18196 rtx_insn *start, bool *found)
18198 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
18199 rtx_insn *prev = start;
18200 rtx_insn *next = NULL;
18202 *found = false;
18204 while (prev
18205 && prev != insn
18206 && distance < LEA_SEARCH_THRESHOLD)
18208 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
18210 distance = increase_distance (prev, next, distance);
18211 if (insn_defines_reg (regno1, regno2, prev))
18213 if (recog_memoized (prev) < 0
18214 || get_attr_type (prev) != TYPE_LEA)
18216 *found = true;
18217 return distance;
18221 next = prev;
18223 if (prev == BB_HEAD (bb))
18224 break;
18226 prev = PREV_INSN (prev);
18229 return distance;
18232 /* Search backward for non-agu definition of register number REGNO1
18233 or register number REGNO2 in INSN's basic block until
18234 1. Pass LEA_SEARCH_THRESHOLD instructions, or
18235 2. Reach neighbour BBs boundary, or
18236 3. Reach agu definition.
18237 Returns the distance between the non-agu definition point and INSN.
18238 If no definition point, returns -1. */
18240 static int
18241 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
18242 rtx_insn *insn)
18244 basic_block bb = BLOCK_FOR_INSN (insn);
18245 int distance = 0;
18246 bool found = false;
18248 if (insn != BB_HEAD (bb))
18249 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
18250 distance, PREV_INSN (insn),
18251 &found);
18253 if (!found && distance < LEA_SEARCH_THRESHOLD)
18255 edge e;
18256 edge_iterator ei;
18257 bool simple_loop = false;
18259 FOR_EACH_EDGE (e, ei, bb->preds)
18260 if (e->src == bb)
18262 simple_loop = true;
18263 break;
18266 if (simple_loop)
18267 distance = distance_non_agu_define_in_bb (regno1, regno2,
18268 insn, distance,
18269 BB_END (bb), &found);
18270 else
18272 int shortest_dist = -1;
18273 bool found_in_bb = false;
18275 FOR_EACH_EDGE (e, ei, bb->preds)
18277 int bb_dist
18278 = distance_non_agu_define_in_bb (regno1, regno2,
18279 insn, distance,
18280 BB_END (e->src),
18281 &found_in_bb);
18282 if (found_in_bb)
18284 if (shortest_dist < 0)
18285 shortest_dist = bb_dist;
18286 else if (bb_dist > 0)
18287 shortest_dist = MIN (bb_dist, shortest_dist);
18289 found = true;
18293 distance = shortest_dist;
18297 /* get_attr_type may modify recog data. We want to make sure
18298 that recog data is valid for instruction INSN, on which
18299 distance_non_agu_define is called. INSN is unchanged here. */
18300 extract_insn_cached (insn);
18302 if (!found)
18303 return -1;
18305 return distance >> 1;
18308 /* Return the distance in half-cycles between INSN and the next
18309 insn that uses register number REGNO in memory address added
18310 to DISTANCE. Return -1 if REGNO0 is set.
18312 Put true value into *FOUND if register usage was found and
18313 false otherwise.
18314 Put true value into *REDEFINED if register redefinition was
18315 found and false otherwise. */
18317 static int
18318 distance_agu_use_in_bb (unsigned int regno,
18319 rtx_insn *insn, int distance, rtx_insn *start,
18320 bool *found, bool *redefined)
18322 basic_block bb = NULL;
18323 rtx_insn *next = start;
18324 rtx_insn *prev = NULL;
18326 *found = false;
18327 *redefined = false;
18329 if (start != NULL_RTX)
18331 bb = BLOCK_FOR_INSN (start);
18332 if (start != BB_HEAD (bb))
18333 /* If insn and start belong to the same bb, set prev to insn,
18334 so the call to increase_distance will increase the distance
18335 between insns by 1. */
18336 prev = insn;
18339 while (next
18340 && next != insn
18341 && distance < LEA_SEARCH_THRESHOLD)
18343 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
18345 distance = increase_distance(prev, next, distance);
18346 if (insn_uses_reg_mem (regno, next))
18348 /* Return DISTANCE if OP0 is used in memory
18349 address in NEXT. */
18350 *found = true;
18351 return distance;
18354 if (insn_defines_reg (regno, INVALID_REGNUM, next))
18356 /* Return -1 if OP0 is set in NEXT. */
18357 *redefined = true;
18358 return -1;
18361 prev = next;
18364 if (next == BB_END (bb))
18365 break;
18367 next = NEXT_INSN (next);
18370 return distance;
18373 /* Return the distance between INSN and the next insn that uses
18374 register number REGNO0 in memory address. Return -1 if no such
18375 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
18377 static int
18378 distance_agu_use (unsigned int regno0, rtx_insn *insn)
18380 basic_block bb = BLOCK_FOR_INSN (insn);
18381 int distance = 0;
18382 bool found = false;
18383 bool redefined = false;
18385 if (insn != BB_END (bb))
18386 distance = distance_agu_use_in_bb (regno0, insn, distance,
18387 NEXT_INSN (insn),
18388 &found, &redefined);
18390 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
18392 edge e;
18393 edge_iterator ei;
18394 bool simple_loop = false;
18396 FOR_EACH_EDGE (e, ei, bb->succs)
18397 if (e->dest == bb)
18399 simple_loop = true;
18400 break;
18403 if (simple_loop)
18404 distance = distance_agu_use_in_bb (regno0, insn,
18405 distance, BB_HEAD (bb),
18406 &found, &redefined);
18407 else
18409 int shortest_dist = -1;
18410 bool found_in_bb = false;
18411 bool redefined_in_bb = false;
18413 FOR_EACH_EDGE (e, ei, bb->succs)
18415 int bb_dist
18416 = distance_agu_use_in_bb (regno0, insn,
18417 distance, BB_HEAD (e->dest),
18418 &found_in_bb, &redefined_in_bb);
18419 if (found_in_bb)
18421 if (shortest_dist < 0)
18422 shortest_dist = bb_dist;
18423 else if (bb_dist > 0)
18424 shortest_dist = MIN (bb_dist, shortest_dist);
18426 found = true;
18430 distance = shortest_dist;
18434 if (!found || redefined)
18435 return -1;
18437 return distance >> 1;
18440 /* Define this macro to tune LEA priority vs ADD, it take effect when
18441 there is a dilemma of choicing LEA or ADD
18442 Negative value: ADD is more preferred than LEA
18443 Zero: Netrual
18444 Positive value: LEA is more preferred than ADD*/
18445 #define IX86_LEA_PRIORITY 0
18447 /* Return true if usage of lea INSN has performance advantage
18448 over a sequence of instructions. Instructions sequence has
18449 SPLIT_COST cycles higher latency than lea latency. */
18451 static bool
18452 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
18453 unsigned int regno2, int split_cost, bool has_scale)
18455 int dist_define, dist_use;
18457 /* For Silvermont if using a 2-source or 3-source LEA for
18458 non-destructive destination purposes, or due to wanting
18459 ability to use SCALE, the use of LEA is justified. */
18460 if (TARGET_SILVERMONT || TARGET_INTEL)
18462 if (has_scale)
18463 return true;
18464 if (split_cost < 1)
18465 return false;
18466 if (regno0 == regno1 || regno0 == regno2)
18467 return false;
18468 return true;
18471 dist_define = distance_non_agu_define (regno1, regno2, insn);
18472 dist_use = distance_agu_use (regno0, insn);
18474 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
18476 /* If there is no non AGU operand definition, no AGU
18477 operand usage and split cost is 0 then both lea
18478 and non lea variants have same priority. Currently
18479 we prefer lea for 64 bit code and non lea on 32 bit
18480 code. */
18481 if (dist_use < 0 && split_cost == 0)
18482 return TARGET_64BIT || IX86_LEA_PRIORITY;
18483 else
18484 return true;
18487 /* With longer definitions distance lea is more preferable.
18488 Here we change it to take into account splitting cost and
18489 lea priority. */
18490 dist_define += split_cost + IX86_LEA_PRIORITY;
18492 /* If there is no use in memory addess then we just check
18493 that split cost exceeds AGU stall. */
18494 if (dist_use < 0)
18495 return dist_define > LEA_MAX_STALL;
18497 /* If this insn has both backward non-agu dependence and forward
18498 agu dependence, the one with short distance takes effect. */
18499 return dist_define >= dist_use;
18502 /* Return true if it is legal to clobber flags by INSN and
18503 false otherwise. */
18505 static bool
18506 ix86_ok_to_clobber_flags (rtx_insn *insn)
18508 basic_block bb = BLOCK_FOR_INSN (insn);
18509 df_ref use;
18510 bitmap live;
18512 while (insn)
18514 if (NONDEBUG_INSN_P (insn))
18516 FOR_EACH_INSN_USE (use, insn)
18517 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
18518 return false;
18520 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
18521 return true;
18524 if (insn == BB_END (bb))
18525 break;
18527 insn = NEXT_INSN (insn);
18530 live = df_get_live_out(bb);
18531 return !REGNO_REG_SET_P (live, FLAGS_REG);
18534 /* Return true if we need to split op0 = op1 + op2 into a sequence of
18535 move and add to avoid AGU stalls. */
18537 bool
18538 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
18540 unsigned int regno0, regno1, regno2;
18542 /* Check if we need to optimize. */
18543 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18544 return false;
18546 /* Check it is correct to split here. */
18547 if (!ix86_ok_to_clobber_flags(insn))
18548 return false;
18550 regno0 = true_regnum (operands[0]);
18551 regno1 = true_regnum (operands[1]);
18552 regno2 = true_regnum (operands[2]);
18554 /* We need to split only adds with non destructive
18555 destination operand. */
18556 if (regno0 == regno1 || regno0 == regno2)
18557 return false;
18558 else
18559 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
18562 /* Return true if we should emit lea instruction instead of mov
18563 instruction. */
18565 bool
18566 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
18568 unsigned int regno0, regno1;
18570 /* Check if we need to optimize. */
18571 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18572 return false;
18574 /* Use lea for reg to reg moves only. */
18575 if (!REG_P (operands[0]) || !REG_P (operands[1]))
18576 return false;
18578 regno0 = true_regnum (operands[0]);
18579 regno1 = true_regnum (operands[1]);
18581 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
18584 /* Return true if we need to split lea into a sequence of
18585 instructions to avoid AGU stalls. */
18587 bool
18588 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
18590 unsigned int regno0, regno1, regno2;
18591 int split_cost;
18592 struct ix86_address parts;
18593 int ok;
18595 /* Check we need to optimize. */
18596 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
18597 return false;
18599 /* The "at least two components" test below might not catch simple
18600 move or zero extension insns if parts.base is non-NULL and parts.disp
18601 is const0_rtx as the only components in the address, e.g. if the
18602 register is %rbp or %r13. As this test is much cheaper and moves or
18603 zero extensions are the common case, do this check first. */
18604 if (REG_P (operands[1])
18605 || (SImode_address_operand (operands[1], VOIDmode)
18606 && REG_P (XEXP (operands[1], 0))))
18607 return false;
18609 /* Check if it is OK to split here. */
18610 if (!ix86_ok_to_clobber_flags (insn))
18611 return false;
18613 ok = ix86_decompose_address (operands[1], &parts);
18614 gcc_assert (ok);
18616 /* There should be at least two components in the address. */
18617 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
18618 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
18619 return false;
18621 /* We should not split into add if non legitimate pic
18622 operand is used as displacement. */
18623 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
18624 return false;
18626 regno0 = true_regnum (operands[0]) ;
18627 regno1 = INVALID_REGNUM;
18628 regno2 = INVALID_REGNUM;
18630 if (parts.base)
18631 regno1 = true_regnum (parts.base);
18632 if (parts.index)
18633 regno2 = true_regnum (parts.index);
18635 split_cost = 0;
18637 /* Compute how many cycles we will add to execution time
18638 if split lea into a sequence of instructions. */
18639 if (parts.base || parts.index)
18641 /* Have to use mov instruction if non desctructive
18642 destination form is used. */
18643 if (regno1 != regno0 && regno2 != regno0)
18644 split_cost += 1;
18646 /* Have to add index to base if both exist. */
18647 if (parts.base && parts.index)
18648 split_cost += 1;
18650 /* Have to use shift and adds if scale is 2 or greater. */
18651 if (parts.scale > 1)
18653 if (regno0 != regno1)
18654 split_cost += 1;
18655 else if (regno2 == regno0)
18656 split_cost += 4;
18657 else
18658 split_cost += parts.scale;
18661 /* Have to use add instruction with immediate if
18662 disp is non zero. */
18663 if (parts.disp && parts.disp != const0_rtx)
18664 split_cost += 1;
18666 /* Subtract the price of lea. */
18667 split_cost -= 1;
18670 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
18671 parts.scale > 1);
18674 /* Emit x86 binary operand CODE in mode MODE, where the first operand
18675 matches destination. RTX includes clobber of FLAGS_REG. */
18677 static void
18678 ix86_emit_binop (enum rtx_code code, machine_mode mode,
18679 rtx dst, rtx src)
18681 rtx op, clob;
18683 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
18684 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18686 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18689 /* Return true if regno1 def is nearest to the insn. */
18691 static bool
18692 find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2)
18694 rtx_insn *prev = insn;
18695 rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn));
18697 if (insn == start)
18698 return false;
18699 while (prev && prev != start)
18701 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
18703 prev = PREV_INSN (prev);
18704 continue;
18706 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
18707 return true;
18708 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
18709 return false;
18710 prev = PREV_INSN (prev);
18713 /* None of the regs is defined in the bb. */
18714 return false;
18717 /* Split lea instructions into a sequence of instructions
18718 which are executed on ALU to avoid AGU stalls.
18719 It is assumed that it is allowed to clobber flags register
18720 at lea position. */
18722 void
18723 ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], machine_mode mode)
18725 unsigned int regno0, regno1, regno2;
18726 struct ix86_address parts;
18727 rtx target, tmp;
18728 int ok, adds;
18730 ok = ix86_decompose_address (operands[1], &parts);
18731 gcc_assert (ok);
18733 target = gen_lowpart (mode, operands[0]);
18735 regno0 = true_regnum (target);
18736 regno1 = INVALID_REGNUM;
18737 regno2 = INVALID_REGNUM;
18739 if (parts.base)
18741 parts.base = gen_lowpart (mode, parts.base);
18742 regno1 = true_regnum (parts.base);
18745 if (parts.index)
18747 parts.index = gen_lowpart (mode, parts.index);
18748 regno2 = true_regnum (parts.index);
18751 if (parts.disp)
18752 parts.disp = gen_lowpart (mode, parts.disp);
18754 if (parts.scale > 1)
18756 /* Case r1 = r1 + ... */
18757 if (regno1 == regno0)
18759 /* If we have a case r1 = r1 + C * r2 then we
18760 should use multiplication which is very
18761 expensive. Assume cost model is wrong if we
18762 have such case here. */
18763 gcc_assert (regno2 != regno0);
18765 for (adds = parts.scale; adds > 0; adds--)
18766 ix86_emit_binop (PLUS, mode, target, parts.index);
18768 else
18770 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
18771 if (regno0 != regno2)
18772 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18774 /* Use shift for scaling. */
18775 ix86_emit_binop (ASHIFT, mode, target,
18776 GEN_INT (exact_log2 (parts.scale)));
18778 if (parts.base)
18779 ix86_emit_binop (PLUS, mode, target, parts.base);
18781 if (parts.disp && parts.disp != const0_rtx)
18782 ix86_emit_binop (PLUS, mode, target, parts.disp);
18785 else if (!parts.base && !parts.index)
18787 gcc_assert(parts.disp);
18788 emit_insn (gen_rtx_SET (VOIDmode, target, parts.disp));
18790 else
18792 if (!parts.base)
18794 if (regno0 != regno2)
18795 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18797 else if (!parts.index)
18799 if (regno0 != regno1)
18800 emit_insn (gen_rtx_SET (VOIDmode, target, parts.base));
18802 else
18804 if (regno0 == regno1)
18805 tmp = parts.index;
18806 else if (regno0 == regno2)
18807 tmp = parts.base;
18808 else
18810 rtx tmp1;
18812 /* Find better operand for SET instruction, depending
18813 on which definition is farther from the insn. */
18814 if (find_nearest_reg_def (insn, regno1, regno2))
18815 tmp = parts.index, tmp1 = parts.base;
18816 else
18817 tmp = parts.base, tmp1 = parts.index;
18819 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18821 if (parts.disp && parts.disp != const0_rtx)
18822 ix86_emit_binop (PLUS, mode, target, parts.disp);
18824 ix86_emit_binop (PLUS, mode, target, tmp1);
18825 return;
18828 ix86_emit_binop (PLUS, mode, target, tmp);
18831 if (parts.disp && parts.disp != const0_rtx)
18832 ix86_emit_binop (PLUS, mode, target, parts.disp);
18836 /* Return true if it is ok to optimize an ADD operation to LEA
18837 operation to avoid flag register consumation. For most processors,
18838 ADD is faster than LEA. For the processors like BONNELL, if the
18839 destination register of LEA holds an actual address which will be
18840 used soon, LEA is better and otherwise ADD is better. */
18842 bool
18843 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
18845 unsigned int regno0 = true_regnum (operands[0]);
18846 unsigned int regno1 = true_regnum (operands[1]);
18847 unsigned int regno2 = true_regnum (operands[2]);
18849 /* If a = b + c, (a!=b && a!=c), must use lea form. */
18850 if (regno0 != regno1 && regno0 != regno2)
18851 return true;
18853 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18854 return false;
18856 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
18859 /* Return true if destination reg of SET_BODY is shift count of
18860 USE_BODY. */
18862 static bool
18863 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
18865 rtx set_dest;
18866 rtx shift_rtx;
18867 int i;
18869 /* Retrieve destination of SET_BODY. */
18870 switch (GET_CODE (set_body))
18872 case SET:
18873 set_dest = SET_DEST (set_body);
18874 if (!set_dest || !REG_P (set_dest))
18875 return false;
18876 break;
18877 case PARALLEL:
18878 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
18879 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
18880 use_body))
18881 return true;
18882 default:
18883 return false;
18884 break;
18887 /* Retrieve shift count of USE_BODY. */
18888 switch (GET_CODE (use_body))
18890 case SET:
18891 shift_rtx = XEXP (use_body, 1);
18892 break;
18893 case PARALLEL:
18894 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
18895 if (ix86_dep_by_shift_count_body (set_body,
18896 XVECEXP (use_body, 0, i)))
18897 return true;
18898 default:
18899 return false;
18900 break;
18903 if (shift_rtx
18904 && (GET_CODE (shift_rtx) == ASHIFT
18905 || GET_CODE (shift_rtx) == LSHIFTRT
18906 || GET_CODE (shift_rtx) == ASHIFTRT
18907 || GET_CODE (shift_rtx) == ROTATE
18908 || GET_CODE (shift_rtx) == ROTATERT))
18910 rtx shift_count = XEXP (shift_rtx, 1);
18912 /* Return true if shift count is dest of SET_BODY. */
18913 if (REG_P (shift_count))
18915 /* Add check since it can be invoked before register
18916 allocation in pre-reload schedule. */
18917 if (reload_completed
18918 && true_regnum (set_dest) == true_regnum (shift_count))
18919 return true;
18920 else if (REGNO(set_dest) == REGNO(shift_count))
18921 return true;
18925 return false;
18928 /* Return true if destination reg of SET_INSN is shift count of
18929 USE_INSN. */
18931 bool
18932 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
18934 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
18935 PATTERN (use_insn));
18938 /* Return TRUE or FALSE depending on whether the unary operator meets the
18939 appropriate constraints. */
18941 bool
18942 ix86_unary_operator_ok (enum rtx_code,
18943 machine_mode,
18944 rtx operands[2])
18946 /* If one of operands is memory, source and destination must match. */
18947 if ((MEM_P (operands[0])
18948 || MEM_P (operands[1]))
18949 && ! rtx_equal_p (operands[0], operands[1]))
18950 return false;
18951 return true;
18954 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
18955 are ok, keeping in mind the possible movddup alternative. */
18957 bool
18958 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
18960 if (MEM_P (operands[0]))
18961 return rtx_equal_p (operands[0], operands[1 + high]);
18962 if (MEM_P (operands[1]) && MEM_P (operands[2]))
18963 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
18964 return true;
18967 /* Post-reload splitter for converting an SF or DFmode value in an
18968 SSE register into an unsigned SImode. */
18970 void
18971 ix86_split_convert_uns_si_sse (rtx operands[])
18973 machine_mode vecmode;
18974 rtx value, large, zero_or_two31, input, two31, x;
18976 large = operands[1];
18977 zero_or_two31 = operands[2];
18978 input = operands[3];
18979 two31 = operands[4];
18980 vecmode = GET_MODE (large);
18981 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
18983 /* Load up the value into the low element. We must ensure that the other
18984 elements are valid floats -- zero is the easiest such value. */
18985 if (MEM_P (input))
18987 if (vecmode == V4SFmode)
18988 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
18989 else
18990 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
18992 else
18994 input = gen_rtx_REG (vecmode, REGNO (input));
18995 emit_move_insn (value, CONST0_RTX (vecmode));
18996 if (vecmode == V4SFmode)
18997 emit_insn (gen_sse_movss (value, value, input));
18998 else
18999 emit_insn (gen_sse2_movsd (value, value, input));
19002 emit_move_insn (large, two31);
19003 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
19005 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
19006 emit_insn (gen_rtx_SET (VOIDmode, large, x));
19008 x = gen_rtx_AND (vecmode, zero_or_two31, large);
19009 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
19011 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
19012 emit_insn (gen_rtx_SET (VOIDmode, value, x));
19014 large = gen_rtx_REG (V4SImode, REGNO (large));
19015 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
19017 x = gen_rtx_REG (V4SImode, REGNO (value));
19018 if (vecmode == V4SFmode)
19019 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
19020 else
19021 emit_insn (gen_sse2_cvttpd2dq (x, value));
19022 value = x;
19024 emit_insn (gen_xorv4si3 (value, value, large));
19027 /* Convert an unsigned DImode value into a DFmode, using only SSE.
19028 Expects the 64-bit DImode to be supplied in a pair of integral
19029 registers. Requires SSE2; will use SSE3 if available. For x86_32,
19030 -mfpmath=sse, !optimize_size only. */
19032 void
19033 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
19035 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
19036 rtx int_xmm, fp_xmm;
19037 rtx biases, exponents;
19038 rtx x;
19040 int_xmm = gen_reg_rtx (V4SImode);
19041 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
19042 emit_insn (gen_movdi_to_sse (int_xmm, input));
19043 else if (TARGET_SSE_SPLIT_REGS)
19045 emit_clobber (int_xmm);
19046 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
19048 else
19050 x = gen_reg_rtx (V2DImode);
19051 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
19052 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
19055 x = gen_rtx_CONST_VECTOR (V4SImode,
19056 gen_rtvec (4, GEN_INT (0x43300000UL),
19057 GEN_INT (0x45300000UL),
19058 const0_rtx, const0_rtx));
19059 exponents = validize_mem (force_const_mem (V4SImode, x));
19061 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
19062 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
19064 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
19065 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
19066 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
19067 (0x1.0p84 + double(fp_value_hi_xmm)).
19068 Note these exponents differ by 32. */
19070 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
19072 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
19073 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
19074 real_ldexp (&bias_lo_rvt, &dconst1, 52);
19075 real_ldexp (&bias_hi_rvt, &dconst1, 84);
19076 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
19077 x = const_double_from_real_value (bias_hi_rvt, DFmode);
19078 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
19079 biases = validize_mem (force_const_mem (V2DFmode, biases));
19080 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
19082 /* Add the upper and lower DFmode values together. */
19083 if (TARGET_SSE3)
19084 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
19085 else
19087 x = copy_to_mode_reg (V2DFmode, fp_xmm);
19088 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
19089 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
19092 ix86_expand_vector_extract (false, target, fp_xmm, 0);
19095 /* Not used, but eases macroization of patterns. */
19096 void
19097 ix86_expand_convert_uns_sixf_sse (rtx, rtx)
19099 gcc_unreachable ();
19102 /* Convert an unsigned SImode value into a DFmode. Only currently used
19103 for SSE, but applicable anywhere. */
19105 void
19106 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
19108 REAL_VALUE_TYPE TWO31r;
19109 rtx x, fp;
19111 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
19112 NULL, 1, OPTAB_DIRECT);
19114 fp = gen_reg_rtx (DFmode);
19115 emit_insn (gen_floatsidf2 (fp, x));
19117 real_ldexp (&TWO31r, &dconst1, 31);
19118 x = const_double_from_real_value (TWO31r, DFmode);
19120 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
19121 if (x != target)
19122 emit_move_insn (target, x);
19125 /* Convert a signed DImode value into a DFmode. Only used for SSE in
19126 32-bit mode; otherwise we have a direct convert instruction. */
19128 void
19129 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
19131 REAL_VALUE_TYPE TWO32r;
19132 rtx fp_lo, fp_hi, x;
19134 fp_lo = gen_reg_rtx (DFmode);
19135 fp_hi = gen_reg_rtx (DFmode);
19137 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
19139 real_ldexp (&TWO32r, &dconst1, 32);
19140 x = const_double_from_real_value (TWO32r, DFmode);
19141 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
19143 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
19145 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
19146 0, OPTAB_DIRECT);
19147 if (x != target)
19148 emit_move_insn (target, x);
19151 /* Convert an unsigned SImode value into a SFmode, using only SSE.
19152 For x86_32, -mfpmath=sse, !optimize_size only. */
19153 void
19154 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
19156 REAL_VALUE_TYPE ONE16r;
19157 rtx fp_hi, fp_lo, int_hi, int_lo, x;
19159 real_ldexp (&ONE16r, &dconst1, 16);
19160 x = const_double_from_real_value (ONE16r, SFmode);
19161 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
19162 NULL, 0, OPTAB_DIRECT);
19163 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
19164 NULL, 0, OPTAB_DIRECT);
19165 fp_hi = gen_reg_rtx (SFmode);
19166 fp_lo = gen_reg_rtx (SFmode);
19167 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
19168 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
19169 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
19170 0, OPTAB_DIRECT);
19171 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
19172 0, OPTAB_DIRECT);
19173 if (!rtx_equal_p (target, fp_hi))
19174 emit_move_insn (target, fp_hi);
19177 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
19178 a vector of unsigned ints VAL to vector of floats TARGET. */
19180 void
19181 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
19183 rtx tmp[8];
19184 REAL_VALUE_TYPE TWO16r;
19185 machine_mode intmode = GET_MODE (val);
19186 machine_mode fltmode = GET_MODE (target);
19187 rtx (*cvt) (rtx, rtx);
19189 if (intmode == V4SImode)
19190 cvt = gen_floatv4siv4sf2;
19191 else
19192 cvt = gen_floatv8siv8sf2;
19193 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
19194 tmp[0] = force_reg (intmode, tmp[0]);
19195 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
19196 OPTAB_DIRECT);
19197 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
19198 NULL_RTX, 1, OPTAB_DIRECT);
19199 tmp[3] = gen_reg_rtx (fltmode);
19200 emit_insn (cvt (tmp[3], tmp[1]));
19201 tmp[4] = gen_reg_rtx (fltmode);
19202 emit_insn (cvt (tmp[4], tmp[2]));
19203 real_ldexp (&TWO16r, &dconst1, 16);
19204 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
19205 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
19206 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
19207 OPTAB_DIRECT);
19208 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
19209 OPTAB_DIRECT);
19210 if (tmp[7] != target)
19211 emit_move_insn (target, tmp[7]);
19214 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
19215 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
19216 This is done by doing just signed conversion if < 0x1p31, and otherwise by
19217 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
19220 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
19222 REAL_VALUE_TYPE TWO31r;
19223 rtx two31r, tmp[4];
19224 machine_mode mode = GET_MODE (val);
19225 machine_mode scalarmode = GET_MODE_INNER (mode);
19226 machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
19227 rtx (*cmp) (rtx, rtx, rtx, rtx);
19228 int i;
19230 for (i = 0; i < 3; i++)
19231 tmp[i] = gen_reg_rtx (mode);
19232 real_ldexp (&TWO31r, &dconst1, 31);
19233 two31r = const_double_from_real_value (TWO31r, scalarmode);
19234 two31r = ix86_build_const_vector (mode, 1, two31r);
19235 two31r = force_reg (mode, two31r);
19236 switch (mode)
19238 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
19239 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
19240 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
19241 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
19242 default: gcc_unreachable ();
19244 tmp[3] = gen_rtx_LE (mode, two31r, val);
19245 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
19246 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
19247 0, OPTAB_DIRECT);
19248 if (intmode == V4SImode || TARGET_AVX2)
19249 *xorp = expand_simple_binop (intmode, ASHIFT,
19250 gen_lowpart (intmode, tmp[0]),
19251 GEN_INT (31), NULL_RTX, 0,
19252 OPTAB_DIRECT);
19253 else
19255 rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
19256 two31 = ix86_build_const_vector (intmode, 1, two31);
19257 *xorp = expand_simple_binop (intmode, AND,
19258 gen_lowpart (intmode, tmp[0]),
19259 two31, NULL_RTX, 0,
19260 OPTAB_DIRECT);
19262 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
19263 0, OPTAB_DIRECT);
19266 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
19267 then replicate the value for all elements of the vector
19268 register. */
19271 ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
19273 int i, n_elt;
19274 rtvec v;
19275 machine_mode scalar_mode;
19277 switch (mode)
19279 case V64QImode:
19280 case V32QImode:
19281 case V16QImode:
19282 case V32HImode:
19283 case V16HImode:
19284 case V8HImode:
19285 case V16SImode:
19286 case V8SImode:
19287 case V4SImode:
19288 case V8DImode:
19289 case V4DImode:
19290 case V2DImode:
19291 gcc_assert (vect);
19292 case V16SFmode:
19293 case V8SFmode:
19294 case V4SFmode:
19295 case V8DFmode:
19296 case V4DFmode:
19297 case V2DFmode:
19298 n_elt = GET_MODE_NUNITS (mode);
19299 v = rtvec_alloc (n_elt);
19300 scalar_mode = GET_MODE_INNER (mode);
19302 RTVEC_ELT (v, 0) = value;
19304 for (i = 1; i < n_elt; ++i)
19305 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
19307 return gen_rtx_CONST_VECTOR (mode, v);
19309 default:
19310 gcc_unreachable ();
19314 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
19315 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
19316 for an SSE register. If VECT is true, then replicate the mask for
19317 all elements of the vector register. If INVERT is true, then create
19318 a mask excluding the sign bit. */
19321 ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
19323 machine_mode vec_mode, imode;
19324 HOST_WIDE_INT hi, lo;
19325 int shift = 63;
19326 rtx v;
19327 rtx mask;
19329 /* Find the sign bit, sign extended to 2*HWI. */
19330 switch (mode)
19332 case V16SImode:
19333 case V16SFmode:
19334 case V8SImode:
19335 case V4SImode:
19336 case V8SFmode:
19337 case V4SFmode:
19338 vec_mode = mode;
19339 mode = GET_MODE_INNER (mode);
19340 imode = SImode;
19341 lo = 0x80000000, hi = lo < 0;
19342 break;
19344 case V8DImode:
19345 case V4DImode:
19346 case V2DImode:
19347 case V8DFmode:
19348 case V4DFmode:
19349 case V2DFmode:
19350 vec_mode = mode;
19351 mode = GET_MODE_INNER (mode);
19352 imode = DImode;
19353 if (HOST_BITS_PER_WIDE_INT >= 64)
19354 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
19355 else
19356 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19357 break;
19359 case TImode:
19360 case TFmode:
19361 vec_mode = VOIDmode;
19362 if (HOST_BITS_PER_WIDE_INT >= 64)
19364 imode = TImode;
19365 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
19367 else
19369 rtvec vec;
19371 imode = DImode;
19372 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19374 if (invert)
19376 lo = ~lo, hi = ~hi;
19377 v = constm1_rtx;
19379 else
19380 v = const0_rtx;
19382 mask = immed_double_const (lo, hi, imode);
19384 vec = gen_rtvec (2, v, mask);
19385 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
19386 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
19388 return v;
19390 break;
19392 default:
19393 gcc_unreachable ();
19396 if (invert)
19397 lo = ~lo, hi = ~hi;
19399 /* Force this value into the low part of a fp vector constant. */
19400 mask = immed_double_const (lo, hi, imode);
19401 mask = gen_lowpart (mode, mask);
19403 if (vec_mode == VOIDmode)
19404 return force_reg (mode, mask);
19406 v = ix86_build_const_vector (vec_mode, vect, mask);
19407 return force_reg (vec_mode, v);
19410 /* Generate code for floating point ABS or NEG. */
19412 void
19413 ix86_expand_fp_absneg_operator (enum rtx_code code, machine_mode mode,
19414 rtx operands[])
19416 rtx mask, set, dst, src;
19417 bool use_sse = false;
19418 bool vector_mode = VECTOR_MODE_P (mode);
19419 machine_mode vmode = mode;
19421 if (vector_mode)
19422 use_sse = true;
19423 else if (mode == TFmode)
19424 use_sse = true;
19425 else if (TARGET_SSE_MATH)
19427 use_sse = SSE_FLOAT_MODE_P (mode);
19428 if (mode == SFmode)
19429 vmode = V4SFmode;
19430 else if (mode == DFmode)
19431 vmode = V2DFmode;
19434 /* NEG and ABS performed with SSE use bitwise mask operations.
19435 Create the appropriate mask now. */
19436 if (use_sse)
19437 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
19438 else
19439 mask = NULL_RTX;
19441 dst = operands[0];
19442 src = operands[1];
19444 set = gen_rtx_fmt_e (code, mode, src);
19445 set = gen_rtx_SET (VOIDmode, dst, set);
19447 if (mask)
19449 rtx use, clob;
19450 rtvec par;
19452 use = gen_rtx_USE (VOIDmode, mask);
19453 if (vector_mode)
19454 par = gen_rtvec (2, set, use);
19455 else
19457 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19458 par = gen_rtvec (3, set, use, clob);
19460 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
19462 else
19463 emit_insn (set);
19466 /* Expand a copysign operation. Special case operand 0 being a constant. */
19468 void
19469 ix86_expand_copysign (rtx operands[])
19471 machine_mode mode, vmode;
19472 rtx dest, op0, op1, mask, nmask;
19474 dest = operands[0];
19475 op0 = operands[1];
19476 op1 = operands[2];
19478 mode = GET_MODE (dest);
19480 if (mode == SFmode)
19481 vmode = V4SFmode;
19482 else if (mode == DFmode)
19483 vmode = V2DFmode;
19484 else
19485 vmode = mode;
19487 if (GET_CODE (op0) == CONST_DOUBLE)
19489 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
19491 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
19492 op0 = simplify_unary_operation (ABS, mode, op0, mode);
19494 if (mode == SFmode || mode == DFmode)
19496 if (op0 == CONST0_RTX (mode))
19497 op0 = CONST0_RTX (vmode);
19498 else
19500 rtx v = ix86_build_const_vector (vmode, false, op0);
19502 op0 = force_reg (vmode, v);
19505 else if (op0 != CONST0_RTX (mode))
19506 op0 = force_reg (mode, op0);
19508 mask = ix86_build_signbit_mask (vmode, 0, 0);
19510 if (mode == SFmode)
19511 copysign_insn = gen_copysignsf3_const;
19512 else if (mode == DFmode)
19513 copysign_insn = gen_copysigndf3_const;
19514 else
19515 copysign_insn = gen_copysigntf3_const;
19517 emit_insn (copysign_insn (dest, op0, op1, mask));
19519 else
19521 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
19523 nmask = ix86_build_signbit_mask (vmode, 0, 1);
19524 mask = ix86_build_signbit_mask (vmode, 0, 0);
19526 if (mode == SFmode)
19527 copysign_insn = gen_copysignsf3_var;
19528 else if (mode == DFmode)
19529 copysign_insn = gen_copysigndf3_var;
19530 else
19531 copysign_insn = gen_copysigntf3_var;
19533 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
19537 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
19538 be a constant, and so has already been expanded into a vector constant. */
19540 void
19541 ix86_split_copysign_const (rtx operands[])
19543 machine_mode mode, vmode;
19544 rtx dest, op0, mask, x;
19546 dest = operands[0];
19547 op0 = operands[1];
19548 mask = operands[3];
19550 mode = GET_MODE (dest);
19551 vmode = GET_MODE (mask);
19553 dest = simplify_gen_subreg (vmode, dest, mode, 0);
19554 x = gen_rtx_AND (vmode, dest, mask);
19555 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19557 if (op0 != CONST0_RTX (vmode))
19559 x = gen_rtx_IOR (vmode, dest, op0);
19560 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19564 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
19565 so we have to do two masks. */
19567 void
19568 ix86_split_copysign_var (rtx operands[])
19570 machine_mode mode, vmode;
19571 rtx dest, scratch, op0, op1, mask, nmask, x;
19573 dest = operands[0];
19574 scratch = operands[1];
19575 op0 = operands[2];
19576 op1 = operands[3];
19577 nmask = operands[4];
19578 mask = operands[5];
19580 mode = GET_MODE (dest);
19581 vmode = GET_MODE (mask);
19583 if (rtx_equal_p (op0, op1))
19585 /* Shouldn't happen often (it's useless, obviously), but when it does
19586 we'd generate incorrect code if we continue below. */
19587 emit_move_insn (dest, op0);
19588 return;
19591 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
19593 gcc_assert (REGNO (op1) == REGNO (scratch));
19595 x = gen_rtx_AND (vmode, scratch, mask);
19596 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19598 dest = mask;
19599 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19600 x = gen_rtx_NOT (vmode, dest);
19601 x = gen_rtx_AND (vmode, x, op0);
19602 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19604 else
19606 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
19608 x = gen_rtx_AND (vmode, scratch, mask);
19610 else /* alternative 2,4 */
19612 gcc_assert (REGNO (mask) == REGNO (scratch));
19613 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
19614 x = gen_rtx_AND (vmode, scratch, op1);
19616 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19618 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
19620 dest = simplify_gen_subreg (vmode, op0, mode, 0);
19621 x = gen_rtx_AND (vmode, dest, nmask);
19623 else /* alternative 3,4 */
19625 gcc_assert (REGNO (nmask) == REGNO (dest));
19626 dest = nmask;
19627 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19628 x = gen_rtx_AND (vmode, dest, op0);
19630 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19633 x = gen_rtx_IOR (vmode, dest, scratch);
19634 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19637 /* Return TRUE or FALSE depending on whether the first SET in INSN
19638 has source and destination with matching CC modes, and that the
19639 CC mode is at least as constrained as REQ_MODE. */
19641 bool
19642 ix86_match_ccmode (rtx insn, machine_mode req_mode)
19644 rtx set;
19645 machine_mode set_mode;
19647 set = PATTERN (insn);
19648 if (GET_CODE (set) == PARALLEL)
19649 set = XVECEXP (set, 0, 0);
19650 gcc_assert (GET_CODE (set) == SET);
19651 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
19653 set_mode = GET_MODE (SET_DEST (set));
19654 switch (set_mode)
19656 case CCNOmode:
19657 if (req_mode != CCNOmode
19658 && (req_mode != CCmode
19659 || XEXP (SET_SRC (set), 1) != const0_rtx))
19660 return false;
19661 break;
19662 case CCmode:
19663 if (req_mode == CCGCmode)
19664 return false;
19665 /* FALLTHRU */
19666 case CCGCmode:
19667 if (req_mode == CCGOCmode || req_mode == CCNOmode)
19668 return false;
19669 /* FALLTHRU */
19670 case CCGOCmode:
19671 if (req_mode == CCZmode)
19672 return false;
19673 /* FALLTHRU */
19674 case CCZmode:
19675 break;
19677 case CCAmode:
19678 case CCCmode:
19679 case CCOmode:
19680 case CCSmode:
19681 if (set_mode != req_mode)
19682 return false;
19683 break;
19685 default:
19686 gcc_unreachable ();
19689 return GET_MODE (SET_SRC (set)) == set_mode;
19692 /* Generate insn patterns to do an integer compare of OPERANDS. */
19694 static rtx
19695 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
19697 machine_mode cmpmode;
19698 rtx tmp, flags;
19700 cmpmode = SELECT_CC_MODE (code, op0, op1);
19701 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
19703 /* This is very simple, but making the interface the same as in the
19704 FP case makes the rest of the code easier. */
19705 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
19706 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
19708 /* Return the test that should be put into the flags user, i.e.
19709 the bcc, scc, or cmov instruction. */
19710 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
19713 /* Figure out whether to use ordered or unordered fp comparisons.
19714 Return the appropriate mode to use. */
19716 machine_mode
19717 ix86_fp_compare_mode (enum rtx_code)
19719 /* ??? In order to make all comparisons reversible, we do all comparisons
19720 non-trapping when compiling for IEEE. Once gcc is able to distinguish
19721 all forms trapping and nontrapping comparisons, we can make inequality
19722 comparisons trapping again, since it results in better code when using
19723 FCOM based compares. */
19724 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
19727 machine_mode
19728 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
19730 machine_mode mode = GET_MODE (op0);
19732 if (SCALAR_FLOAT_MODE_P (mode))
19734 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
19735 return ix86_fp_compare_mode (code);
19738 switch (code)
19740 /* Only zero flag is needed. */
19741 case EQ: /* ZF=0 */
19742 case NE: /* ZF!=0 */
19743 return CCZmode;
19744 /* Codes needing carry flag. */
19745 case GEU: /* CF=0 */
19746 case LTU: /* CF=1 */
19747 /* Detect overflow checks. They need just the carry flag. */
19748 if (GET_CODE (op0) == PLUS
19749 && rtx_equal_p (op1, XEXP (op0, 0)))
19750 return CCCmode;
19751 else
19752 return CCmode;
19753 case GTU: /* CF=0 & ZF=0 */
19754 case LEU: /* CF=1 | ZF=1 */
19755 return CCmode;
19756 /* Codes possibly doable only with sign flag when
19757 comparing against zero. */
19758 case GE: /* SF=OF or SF=0 */
19759 case LT: /* SF<>OF or SF=1 */
19760 if (op1 == const0_rtx)
19761 return CCGOCmode;
19762 else
19763 /* For other cases Carry flag is not required. */
19764 return CCGCmode;
19765 /* Codes doable only with sign flag when comparing
19766 against zero, but we miss jump instruction for it
19767 so we need to use relational tests against overflow
19768 that thus needs to be zero. */
19769 case GT: /* ZF=0 & SF=OF */
19770 case LE: /* ZF=1 | SF<>OF */
19771 if (op1 == const0_rtx)
19772 return CCNOmode;
19773 else
19774 return CCGCmode;
19775 /* strcmp pattern do (use flags) and combine may ask us for proper
19776 mode. */
19777 case USE:
19778 return CCmode;
19779 default:
19780 gcc_unreachable ();
19784 /* Return the fixed registers used for condition codes. */
19786 static bool
19787 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
19789 *p1 = FLAGS_REG;
19790 *p2 = FPSR_REG;
19791 return true;
19794 /* If two condition code modes are compatible, return a condition code
19795 mode which is compatible with both. Otherwise, return
19796 VOIDmode. */
19798 static machine_mode
19799 ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
19801 if (m1 == m2)
19802 return m1;
19804 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
19805 return VOIDmode;
19807 if ((m1 == CCGCmode && m2 == CCGOCmode)
19808 || (m1 == CCGOCmode && m2 == CCGCmode))
19809 return CCGCmode;
19811 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
19812 return m2;
19813 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
19814 return m1;
19816 switch (m1)
19818 default:
19819 gcc_unreachable ();
19821 case CCmode:
19822 case CCGCmode:
19823 case CCGOCmode:
19824 case CCNOmode:
19825 case CCAmode:
19826 case CCCmode:
19827 case CCOmode:
19828 case CCSmode:
19829 case CCZmode:
19830 switch (m2)
19832 default:
19833 return VOIDmode;
19835 case CCmode:
19836 case CCGCmode:
19837 case CCGOCmode:
19838 case CCNOmode:
19839 case CCAmode:
19840 case CCCmode:
19841 case CCOmode:
19842 case CCSmode:
19843 case CCZmode:
19844 return CCmode;
19847 case CCFPmode:
19848 case CCFPUmode:
19849 /* These are only compatible with themselves, which we already
19850 checked above. */
19851 return VOIDmode;
19856 /* Return a comparison we can do and that it is equivalent to
19857 swap_condition (code) apart possibly from orderedness.
19858 But, never change orderedness if TARGET_IEEE_FP, returning
19859 UNKNOWN in that case if necessary. */
19861 static enum rtx_code
19862 ix86_fp_swap_condition (enum rtx_code code)
19864 switch (code)
19866 case GT: /* GTU - CF=0 & ZF=0 */
19867 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
19868 case GE: /* GEU - CF=0 */
19869 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
19870 case UNLT: /* LTU - CF=1 */
19871 return TARGET_IEEE_FP ? UNKNOWN : GT;
19872 case UNLE: /* LEU - CF=1 | ZF=1 */
19873 return TARGET_IEEE_FP ? UNKNOWN : GE;
19874 default:
19875 return swap_condition (code);
19879 /* Return cost of comparison CODE using the best strategy for performance.
19880 All following functions do use number of instructions as a cost metrics.
19881 In future this should be tweaked to compute bytes for optimize_size and
19882 take into account performance of various instructions on various CPUs. */
19884 static int
19885 ix86_fp_comparison_cost (enum rtx_code code)
19887 int arith_cost;
19889 /* The cost of code using bit-twiddling on %ah. */
19890 switch (code)
19892 case UNLE:
19893 case UNLT:
19894 case LTGT:
19895 case GT:
19896 case GE:
19897 case UNORDERED:
19898 case ORDERED:
19899 case UNEQ:
19900 arith_cost = 4;
19901 break;
19902 case LT:
19903 case NE:
19904 case EQ:
19905 case UNGE:
19906 arith_cost = TARGET_IEEE_FP ? 5 : 4;
19907 break;
19908 case LE:
19909 case UNGT:
19910 arith_cost = TARGET_IEEE_FP ? 6 : 4;
19911 break;
19912 default:
19913 gcc_unreachable ();
19916 switch (ix86_fp_comparison_strategy (code))
19918 case IX86_FPCMP_COMI:
19919 return arith_cost > 4 ? 3 : 2;
19920 case IX86_FPCMP_SAHF:
19921 return arith_cost > 4 ? 4 : 3;
19922 default:
19923 return arith_cost;
19927 /* Return strategy to use for floating-point. We assume that fcomi is always
19928 preferrable where available, since that is also true when looking at size
19929 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
19931 enum ix86_fpcmp_strategy
19932 ix86_fp_comparison_strategy (enum rtx_code)
19934 /* Do fcomi/sahf based test when profitable. */
19936 if (TARGET_CMOVE)
19937 return IX86_FPCMP_COMI;
19939 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
19940 return IX86_FPCMP_SAHF;
19942 return IX86_FPCMP_ARITH;
19945 /* Swap, force into registers, or otherwise massage the two operands
19946 to a fp comparison. The operands are updated in place; the new
19947 comparison code is returned. */
19949 static enum rtx_code
19950 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
19952 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
19953 rtx op0 = *pop0, op1 = *pop1;
19954 machine_mode op_mode = GET_MODE (op0);
19955 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
19957 /* All of the unordered compare instructions only work on registers.
19958 The same is true of the fcomi compare instructions. The XFmode
19959 compare instructions require registers except when comparing
19960 against zero or when converting operand 1 from fixed point to
19961 floating point. */
19963 if (!is_sse
19964 && (fpcmp_mode == CCFPUmode
19965 || (op_mode == XFmode
19966 && ! (standard_80387_constant_p (op0) == 1
19967 || standard_80387_constant_p (op1) == 1)
19968 && GET_CODE (op1) != FLOAT)
19969 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
19971 op0 = force_reg (op_mode, op0);
19972 op1 = force_reg (op_mode, op1);
19974 else
19976 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
19977 things around if they appear profitable, otherwise force op0
19978 into a register. */
19980 if (standard_80387_constant_p (op0) == 0
19981 || (MEM_P (op0)
19982 && ! (standard_80387_constant_p (op1) == 0
19983 || MEM_P (op1))))
19985 enum rtx_code new_code = ix86_fp_swap_condition (code);
19986 if (new_code != UNKNOWN)
19988 std::swap (op0, op1);
19989 code = new_code;
19993 if (!REG_P (op0))
19994 op0 = force_reg (op_mode, op0);
19996 if (CONSTANT_P (op1))
19998 int tmp = standard_80387_constant_p (op1);
19999 if (tmp == 0)
20000 op1 = validize_mem (force_const_mem (op_mode, op1));
20001 else if (tmp == 1)
20003 if (TARGET_CMOVE)
20004 op1 = force_reg (op_mode, op1);
20006 else
20007 op1 = force_reg (op_mode, op1);
20011 /* Try to rearrange the comparison to make it cheaper. */
20012 if (ix86_fp_comparison_cost (code)
20013 > ix86_fp_comparison_cost (swap_condition (code))
20014 && (REG_P (op1) || can_create_pseudo_p ()))
20016 std::swap (op0, op1);
20017 code = swap_condition (code);
20018 if (!REG_P (op0))
20019 op0 = force_reg (op_mode, op0);
20022 *pop0 = op0;
20023 *pop1 = op1;
20024 return code;
20027 /* Convert comparison codes we use to represent FP comparison to integer
20028 code that will result in proper branch. Return UNKNOWN if no such code
20029 is available. */
20031 enum rtx_code
20032 ix86_fp_compare_code_to_integer (enum rtx_code code)
20034 switch (code)
20036 case GT:
20037 return GTU;
20038 case GE:
20039 return GEU;
20040 case ORDERED:
20041 case UNORDERED:
20042 return code;
20043 break;
20044 case UNEQ:
20045 return EQ;
20046 break;
20047 case UNLT:
20048 return LTU;
20049 break;
20050 case UNLE:
20051 return LEU;
20052 break;
20053 case LTGT:
20054 return NE;
20055 break;
20056 default:
20057 return UNKNOWN;
20061 /* Generate insn patterns to do a floating point compare of OPERANDS. */
20063 static rtx
20064 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
20066 machine_mode fpcmp_mode, intcmp_mode;
20067 rtx tmp, tmp2;
20069 fpcmp_mode = ix86_fp_compare_mode (code);
20070 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
20072 /* Do fcomi/sahf based test when profitable. */
20073 switch (ix86_fp_comparison_strategy (code))
20075 case IX86_FPCMP_COMI:
20076 intcmp_mode = fpcmp_mode;
20077 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20078 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
20079 tmp);
20080 emit_insn (tmp);
20081 break;
20083 case IX86_FPCMP_SAHF:
20084 intcmp_mode = fpcmp_mode;
20085 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20086 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
20087 tmp);
20089 if (!scratch)
20090 scratch = gen_reg_rtx (HImode);
20091 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
20092 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
20093 break;
20095 case IX86_FPCMP_ARITH:
20096 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
20097 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20098 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
20099 if (!scratch)
20100 scratch = gen_reg_rtx (HImode);
20101 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
20103 /* In the unordered case, we have to check C2 for NaN's, which
20104 doesn't happen to work out to anything nice combination-wise.
20105 So do some bit twiddling on the value we've got in AH to come
20106 up with an appropriate set of condition codes. */
20108 intcmp_mode = CCNOmode;
20109 switch (code)
20111 case GT:
20112 case UNGT:
20113 if (code == GT || !TARGET_IEEE_FP)
20115 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
20116 code = EQ;
20118 else
20120 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20121 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
20122 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
20123 intcmp_mode = CCmode;
20124 code = GEU;
20126 break;
20127 case LT:
20128 case UNLT:
20129 if (code == LT && TARGET_IEEE_FP)
20131 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20132 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
20133 intcmp_mode = CCmode;
20134 code = EQ;
20136 else
20138 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
20139 code = NE;
20141 break;
20142 case GE:
20143 case UNGE:
20144 if (code == GE || !TARGET_IEEE_FP)
20146 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
20147 code = EQ;
20149 else
20151 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20152 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
20153 code = NE;
20155 break;
20156 case LE:
20157 case UNLE:
20158 if (code == LE && TARGET_IEEE_FP)
20160 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20161 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
20162 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
20163 intcmp_mode = CCmode;
20164 code = LTU;
20166 else
20168 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
20169 code = NE;
20171 break;
20172 case EQ:
20173 case UNEQ:
20174 if (code == EQ && TARGET_IEEE_FP)
20176 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20177 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
20178 intcmp_mode = CCmode;
20179 code = EQ;
20181 else
20183 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
20184 code = NE;
20186 break;
20187 case NE:
20188 case LTGT:
20189 if (code == NE && TARGET_IEEE_FP)
20191 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20192 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
20193 GEN_INT (0x40)));
20194 code = NE;
20196 else
20198 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
20199 code = EQ;
20201 break;
20203 case UNORDERED:
20204 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
20205 code = NE;
20206 break;
20207 case ORDERED:
20208 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
20209 code = EQ;
20210 break;
20212 default:
20213 gcc_unreachable ();
20215 break;
20217 default:
20218 gcc_unreachable();
20221 /* Return the test that should be put into the flags user, i.e.
20222 the bcc, scc, or cmov instruction. */
20223 return gen_rtx_fmt_ee (code, VOIDmode,
20224 gen_rtx_REG (intcmp_mode, FLAGS_REG),
20225 const0_rtx);
20228 static rtx
20229 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
20231 rtx ret;
20233 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
20234 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
20236 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
20238 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
20239 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20241 else
20242 ret = ix86_expand_int_compare (code, op0, op1);
20244 return ret;
20247 void
20248 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
20250 machine_mode mode = GET_MODE (op0);
20251 rtx tmp;
20253 switch (mode)
20255 case SFmode:
20256 case DFmode:
20257 case XFmode:
20258 case QImode:
20259 case HImode:
20260 case SImode:
20261 simple:
20262 tmp = ix86_expand_compare (code, op0, op1);
20263 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
20264 gen_rtx_LABEL_REF (VOIDmode, label),
20265 pc_rtx);
20266 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
20267 return;
20269 case DImode:
20270 if (TARGET_64BIT)
20271 goto simple;
20272 case TImode:
20273 /* Expand DImode branch into multiple compare+branch. */
20275 rtx lo[2], hi[2];
20276 rtx_code_label *label2;
20277 enum rtx_code code1, code2, code3;
20278 machine_mode submode;
20280 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
20282 std::swap (op0, op1);
20283 code = swap_condition (code);
20286 split_double_mode (mode, &op0, 1, lo+0, hi+0);
20287 split_double_mode (mode, &op1, 1, lo+1, hi+1);
20289 submode = mode == DImode ? SImode : DImode;
20291 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
20292 avoid two branches. This costs one extra insn, so disable when
20293 optimizing for size. */
20295 if ((code == EQ || code == NE)
20296 && (!optimize_insn_for_size_p ()
20297 || hi[1] == const0_rtx || lo[1] == const0_rtx))
20299 rtx xor0, xor1;
20301 xor1 = hi[0];
20302 if (hi[1] != const0_rtx)
20303 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
20304 NULL_RTX, 0, OPTAB_WIDEN);
20306 xor0 = lo[0];
20307 if (lo[1] != const0_rtx)
20308 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
20309 NULL_RTX, 0, OPTAB_WIDEN);
20311 tmp = expand_binop (submode, ior_optab, xor1, xor0,
20312 NULL_RTX, 0, OPTAB_WIDEN);
20314 ix86_expand_branch (code, tmp, const0_rtx, label);
20315 return;
20318 /* Otherwise, if we are doing less-than or greater-or-equal-than,
20319 op1 is a constant and the low word is zero, then we can just
20320 examine the high word. Similarly for low word -1 and
20321 less-or-equal-than or greater-than. */
20323 if (CONST_INT_P (hi[1]))
20324 switch (code)
20326 case LT: case LTU: case GE: case GEU:
20327 if (lo[1] == const0_rtx)
20329 ix86_expand_branch (code, hi[0], hi[1], label);
20330 return;
20332 break;
20333 case LE: case LEU: case GT: case GTU:
20334 if (lo[1] == constm1_rtx)
20336 ix86_expand_branch (code, hi[0], hi[1], label);
20337 return;
20339 break;
20340 default:
20341 break;
20344 /* Otherwise, we need two or three jumps. */
20346 label2 = gen_label_rtx ();
20348 code1 = code;
20349 code2 = swap_condition (code);
20350 code3 = unsigned_condition (code);
20352 switch (code)
20354 case LT: case GT: case LTU: case GTU:
20355 break;
20357 case LE: code1 = LT; code2 = GT; break;
20358 case GE: code1 = GT; code2 = LT; break;
20359 case LEU: code1 = LTU; code2 = GTU; break;
20360 case GEU: code1 = GTU; code2 = LTU; break;
20362 case EQ: code1 = UNKNOWN; code2 = NE; break;
20363 case NE: code2 = UNKNOWN; break;
20365 default:
20366 gcc_unreachable ();
20370 * a < b =>
20371 * if (hi(a) < hi(b)) goto true;
20372 * if (hi(a) > hi(b)) goto false;
20373 * if (lo(a) < lo(b)) goto true;
20374 * false:
20377 if (code1 != UNKNOWN)
20378 ix86_expand_branch (code1, hi[0], hi[1], label);
20379 if (code2 != UNKNOWN)
20380 ix86_expand_branch (code2, hi[0], hi[1], label2);
20382 ix86_expand_branch (code3, lo[0], lo[1], label);
20384 if (code2 != UNKNOWN)
20385 emit_label (label2);
20386 return;
20389 default:
20390 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
20391 goto simple;
20395 /* Split branch based on floating point condition. */
20396 void
20397 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
20398 rtx target1, rtx target2, rtx tmp)
20400 rtx condition;
20401 rtx i;
20403 if (target2 != pc_rtx)
20405 rtx tmp = target2;
20406 code = reverse_condition_maybe_unordered (code);
20407 target2 = target1;
20408 target1 = tmp;
20411 condition = ix86_expand_fp_compare (code, op1, op2,
20412 tmp);
20414 i = emit_jump_insn (gen_rtx_SET
20415 (VOIDmode, pc_rtx,
20416 gen_rtx_IF_THEN_ELSE (VOIDmode,
20417 condition, target1, target2)));
20418 if (split_branch_probability >= 0)
20419 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
20422 void
20423 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20425 rtx ret;
20427 gcc_assert (GET_MODE (dest) == QImode);
20429 ret = ix86_expand_compare (code, op0, op1);
20430 PUT_MODE (ret, QImode);
20431 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
20434 /* Expand comparison setting or clearing carry flag. Return true when
20435 successful and set pop for the operation. */
20436 static bool
20437 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
20439 machine_mode mode =
20440 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
20442 /* Do not handle double-mode compares that go through special path. */
20443 if (mode == (TARGET_64BIT ? TImode : DImode))
20444 return false;
20446 if (SCALAR_FLOAT_MODE_P (mode))
20448 rtx compare_op;
20449 rtx_insn *compare_seq;
20451 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
20453 /* Shortcut: following common codes never translate
20454 into carry flag compares. */
20455 if (code == EQ || code == NE || code == UNEQ || code == LTGT
20456 || code == ORDERED || code == UNORDERED)
20457 return false;
20459 /* These comparisons require zero flag; swap operands so they won't. */
20460 if ((code == GT || code == UNLE || code == LE || code == UNGT)
20461 && !TARGET_IEEE_FP)
20463 std::swap (op0, op1);
20464 code = swap_condition (code);
20467 /* Try to expand the comparison and verify that we end up with
20468 carry flag based comparison. This fails to be true only when
20469 we decide to expand comparison using arithmetic that is not
20470 too common scenario. */
20471 start_sequence ();
20472 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20473 compare_seq = get_insns ();
20474 end_sequence ();
20476 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
20477 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
20478 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
20479 else
20480 code = GET_CODE (compare_op);
20482 if (code != LTU && code != GEU)
20483 return false;
20485 emit_insn (compare_seq);
20486 *pop = compare_op;
20487 return true;
20490 if (!INTEGRAL_MODE_P (mode))
20491 return false;
20493 switch (code)
20495 case LTU:
20496 case GEU:
20497 break;
20499 /* Convert a==0 into (unsigned)a<1. */
20500 case EQ:
20501 case NE:
20502 if (op1 != const0_rtx)
20503 return false;
20504 op1 = const1_rtx;
20505 code = (code == EQ ? LTU : GEU);
20506 break;
20508 /* Convert a>b into b<a or a>=b-1. */
20509 case GTU:
20510 case LEU:
20511 if (CONST_INT_P (op1))
20513 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
20514 /* Bail out on overflow. We still can swap operands but that
20515 would force loading of the constant into register. */
20516 if (op1 == const0_rtx
20517 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
20518 return false;
20519 code = (code == GTU ? GEU : LTU);
20521 else
20523 std::swap (op1, op0);
20524 code = (code == GTU ? LTU : GEU);
20526 break;
20528 /* Convert a>=0 into (unsigned)a<0x80000000. */
20529 case LT:
20530 case GE:
20531 if (mode == DImode || op1 != const0_rtx)
20532 return false;
20533 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20534 code = (code == LT ? GEU : LTU);
20535 break;
20536 case LE:
20537 case GT:
20538 if (mode == DImode || op1 != constm1_rtx)
20539 return false;
20540 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20541 code = (code == LE ? GEU : LTU);
20542 break;
20544 default:
20545 return false;
20547 /* Swapping operands may cause constant to appear as first operand. */
20548 if (!nonimmediate_operand (op0, VOIDmode))
20550 if (!can_create_pseudo_p ())
20551 return false;
20552 op0 = force_reg (mode, op0);
20554 *pop = ix86_expand_compare (code, op0, op1);
20555 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
20556 return true;
20559 bool
20560 ix86_expand_int_movcc (rtx operands[])
20562 enum rtx_code code = GET_CODE (operands[1]), compare_code;
20563 rtx_insn *compare_seq;
20564 rtx compare_op;
20565 machine_mode mode = GET_MODE (operands[0]);
20566 bool sign_bit_compare_p = false;
20567 rtx op0 = XEXP (operands[1], 0);
20568 rtx op1 = XEXP (operands[1], 1);
20570 if (GET_MODE (op0) == TImode
20571 || (GET_MODE (op0) == DImode
20572 && !TARGET_64BIT))
20573 return false;
20575 start_sequence ();
20576 compare_op = ix86_expand_compare (code, op0, op1);
20577 compare_seq = get_insns ();
20578 end_sequence ();
20580 compare_code = GET_CODE (compare_op);
20582 if ((op1 == const0_rtx && (code == GE || code == LT))
20583 || (op1 == constm1_rtx && (code == GT || code == LE)))
20584 sign_bit_compare_p = true;
20586 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
20587 HImode insns, we'd be swallowed in word prefix ops. */
20589 if ((mode != HImode || TARGET_FAST_PREFIX)
20590 && (mode != (TARGET_64BIT ? TImode : DImode))
20591 && CONST_INT_P (operands[2])
20592 && CONST_INT_P (operands[3]))
20594 rtx out = operands[0];
20595 HOST_WIDE_INT ct = INTVAL (operands[2]);
20596 HOST_WIDE_INT cf = INTVAL (operands[3]);
20597 HOST_WIDE_INT diff;
20599 diff = ct - cf;
20600 /* Sign bit compares are better done using shifts than we do by using
20601 sbb. */
20602 if (sign_bit_compare_p
20603 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20605 /* Detect overlap between destination and compare sources. */
20606 rtx tmp = out;
20608 if (!sign_bit_compare_p)
20610 rtx flags;
20611 bool fpcmp = false;
20613 compare_code = GET_CODE (compare_op);
20615 flags = XEXP (compare_op, 0);
20617 if (GET_MODE (flags) == CCFPmode
20618 || GET_MODE (flags) == CCFPUmode)
20620 fpcmp = true;
20621 compare_code
20622 = ix86_fp_compare_code_to_integer (compare_code);
20625 /* To simplify rest of code, restrict to the GEU case. */
20626 if (compare_code == LTU)
20628 HOST_WIDE_INT tmp = ct;
20629 ct = cf;
20630 cf = tmp;
20631 compare_code = reverse_condition (compare_code);
20632 code = reverse_condition (code);
20634 else
20636 if (fpcmp)
20637 PUT_CODE (compare_op,
20638 reverse_condition_maybe_unordered
20639 (GET_CODE (compare_op)));
20640 else
20641 PUT_CODE (compare_op,
20642 reverse_condition (GET_CODE (compare_op)));
20644 diff = ct - cf;
20646 if (reg_overlap_mentioned_p (out, op0)
20647 || reg_overlap_mentioned_p (out, op1))
20648 tmp = gen_reg_rtx (mode);
20650 if (mode == DImode)
20651 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
20652 else
20653 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
20654 flags, compare_op));
20656 else
20658 if (code == GT || code == GE)
20659 code = reverse_condition (code);
20660 else
20662 HOST_WIDE_INT tmp = ct;
20663 ct = cf;
20664 cf = tmp;
20665 diff = ct - cf;
20667 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
20670 if (diff == 1)
20673 * cmpl op0,op1
20674 * sbbl dest,dest
20675 * [addl dest, ct]
20677 * Size 5 - 8.
20679 if (ct)
20680 tmp = expand_simple_binop (mode, PLUS,
20681 tmp, GEN_INT (ct),
20682 copy_rtx (tmp), 1, OPTAB_DIRECT);
20684 else if (cf == -1)
20687 * cmpl op0,op1
20688 * sbbl dest,dest
20689 * orl $ct, dest
20691 * Size 8.
20693 tmp = expand_simple_binop (mode, IOR,
20694 tmp, GEN_INT (ct),
20695 copy_rtx (tmp), 1, OPTAB_DIRECT);
20697 else if (diff == -1 && ct)
20700 * cmpl op0,op1
20701 * sbbl dest,dest
20702 * notl dest
20703 * [addl dest, cf]
20705 * Size 8 - 11.
20707 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20708 if (cf)
20709 tmp = expand_simple_binop (mode, PLUS,
20710 copy_rtx (tmp), GEN_INT (cf),
20711 copy_rtx (tmp), 1, OPTAB_DIRECT);
20713 else
20716 * cmpl op0,op1
20717 * sbbl dest,dest
20718 * [notl dest]
20719 * andl cf - ct, dest
20720 * [addl dest, ct]
20722 * Size 8 - 11.
20725 if (cf == 0)
20727 cf = ct;
20728 ct = 0;
20729 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20732 tmp = expand_simple_binop (mode, AND,
20733 copy_rtx (tmp),
20734 gen_int_mode (cf - ct, mode),
20735 copy_rtx (tmp), 1, OPTAB_DIRECT);
20736 if (ct)
20737 tmp = expand_simple_binop (mode, PLUS,
20738 copy_rtx (tmp), GEN_INT (ct),
20739 copy_rtx (tmp), 1, OPTAB_DIRECT);
20742 if (!rtx_equal_p (tmp, out))
20743 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
20745 return true;
20748 if (diff < 0)
20750 machine_mode cmp_mode = GET_MODE (op0);
20752 std::swap (ct, cf);
20753 diff = -diff;
20755 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20757 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20759 /* We may be reversing unordered compare to normal compare, that
20760 is not valid in general (we may convert non-trapping condition
20761 to trapping one), however on i386 we currently emit all
20762 comparisons unordered. */
20763 compare_code = reverse_condition_maybe_unordered (compare_code);
20764 code = reverse_condition_maybe_unordered (code);
20766 else
20768 compare_code = reverse_condition (compare_code);
20769 code = reverse_condition (code);
20773 compare_code = UNKNOWN;
20774 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
20775 && CONST_INT_P (op1))
20777 if (op1 == const0_rtx
20778 && (code == LT || code == GE))
20779 compare_code = code;
20780 else if (op1 == constm1_rtx)
20782 if (code == LE)
20783 compare_code = LT;
20784 else if (code == GT)
20785 compare_code = GE;
20789 /* Optimize dest = (op0 < 0) ? -1 : cf. */
20790 if (compare_code != UNKNOWN
20791 && GET_MODE (op0) == GET_MODE (out)
20792 && (cf == -1 || ct == -1))
20794 /* If lea code below could be used, only optimize
20795 if it results in a 2 insn sequence. */
20797 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
20798 || diff == 3 || diff == 5 || diff == 9)
20799 || (compare_code == LT && ct == -1)
20800 || (compare_code == GE && cf == -1))
20803 * notl op1 (if necessary)
20804 * sarl $31, op1
20805 * orl cf, op1
20807 if (ct != -1)
20809 cf = ct;
20810 ct = -1;
20811 code = reverse_condition (code);
20814 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20816 out = expand_simple_binop (mode, IOR,
20817 out, GEN_INT (cf),
20818 out, 1, OPTAB_DIRECT);
20819 if (out != operands[0])
20820 emit_move_insn (operands[0], out);
20822 return true;
20827 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
20828 || diff == 3 || diff == 5 || diff == 9)
20829 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
20830 && (mode != DImode
20831 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
20834 * xorl dest,dest
20835 * cmpl op1,op2
20836 * setcc dest
20837 * lea cf(dest*(ct-cf)),dest
20839 * Size 14.
20841 * This also catches the degenerate setcc-only case.
20844 rtx tmp;
20845 int nops;
20847 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20849 nops = 0;
20850 /* On x86_64 the lea instruction operates on Pmode, so we need
20851 to get arithmetics done in proper mode to match. */
20852 if (diff == 1)
20853 tmp = copy_rtx (out);
20854 else
20856 rtx out1;
20857 out1 = copy_rtx (out);
20858 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
20859 nops++;
20860 if (diff & 1)
20862 tmp = gen_rtx_PLUS (mode, tmp, out1);
20863 nops++;
20866 if (cf != 0)
20868 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
20869 nops++;
20871 if (!rtx_equal_p (tmp, out))
20873 if (nops == 1)
20874 out = force_operand (tmp, copy_rtx (out));
20875 else
20876 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
20878 if (!rtx_equal_p (out, operands[0]))
20879 emit_move_insn (operands[0], copy_rtx (out));
20881 return true;
20885 * General case: Jumpful:
20886 * xorl dest,dest cmpl op1, op2
20887 * cmpl op1, op2 movl ct, dest
20888 * setcc dest jcc 1f
20889 * decl dest movl cf, dest
20890 * andl (cf-ct),dest 1:
20891 * addl ct,dest
20893 * Size 20. Size 14.
20895 * This is reasonably steep, but branch mispredict costs are
20896 * high on modern cpus, so consider failing only if optimizing
20897 * for space.
20900 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20901 && BRANCH_COST (optimize_insn_for_speed_p (),
20902 false) >= 2)
20904 if (cf == 0)
20906 machine_mode cmp_mode = GET_MODE (op0);
20908 cf = ct;
20909 ct = 0;
20911 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20913 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20915 /* We may be reversing unordered compare to normal compare,
20916 that is not valid in general (we may convert non-trapping
20917 condition to trapping one), however on i386 we currently
20918 emit all comparisons unordered. */
20919 code = reverse_condition_maybe_unordered (code);
20921 else
20923 code = reverse_condition (code);
20924 if (compare_code != UNKNOWN)
20925 compare_code = reverse_condition (compare_code);
20929 if (compare_code != UNKNOWN)
20931 /* notl op1 (if needed)
20932 sarl $31, op1
20933 andl (cf-ct), op1
20934 addl ct, op1
20936 For x < 0 (resp. x <= -1) there will be no notl,
20937 so if possible swap the constants to get rid of the
20938 complement.
20939 True/false will be -1/0 while code below (store flag
20940 followed by decrement) is 0/-1, so the constants need
20941 to be exchanged once more. */
20943 if (compare_code == GE || !cf)
20945 code = reverse_condition (code);
20946 compare_code = LT;
20948 else
20949 std::swap (cf, ct);
20951 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20953 else
20955 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20957 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
20958 constm1_rtx,
20959 copy_rtx (out), 1, OPTAB_DIRECT);
20962 out = expand_simple_binop (mode, AND, copy_rtx (out),
20963 gen_int_mode (cf - ct, mode),
20964 copy_rtx (out), 1, OPTAB_DIRECT);
20965 if (ct)
20966 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
20967 copy_rtx (out), 1, OPTAB_DIRECT);
20968 if (!rtx_equal_p (out, operands[0]))
20969 emit_move_insn (operands[0], copy_rtx (out));
20971 return true;
20975 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20977 /* Try a few things more with specific constants and a variable. */
20979 optab op;
20980 rtx var, orig_out, out, tmp;
20982 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
20983 return false;
20985 /* If one of the two operands is an interesting constant, load a
20986 constant with the above and mask it in with a logical operation. */
20988 if (CONST_INT_P (operands[2]))
20990 var = operands[3];
20991 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
20992 operands[3] = constm1_rtx, op = and_optab;
20993 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
20994 operands[3] = const0_rtx, op = ior_optab;
20995 else
20996 return false;
20998 else if (CONST_INT_P (operands[3]))
21000 var = operands[2];
21001 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
21002 operands[2] = constm1_rtx, op = and_optab;
21003 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
21004 operands[2] = const0_rtx, op = ior_optab;
21005 else
21006 return false;
21008 else
21009 return false;
21011 orig_out = operands[0];
21012 tmp = gen_reg_rtx (mode);
21013 operands[0] = tmp;
21015 /* Recurse to get the constant loaded. */
21016 if (ix86_expand_int_movcc (operands) == 0)
21017 return false;
21019 /* Mask in the interesting variable. */
21020 out = expand_binop (mode, op, var, tmp, orig_out, 0,
21021 OPTAB_WIDEN);
21022 if (!rtx_equal_p (out, orig_out))
21023 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
21025 return true;
21029 * For comparison with above,
21031 * movl cf,dest
21032 * movl ct,tmp
21033 * cmpl op1,op2
21034 * cmovcc tmp,dest
21036 * Size 15.
21039 if (! nonimmediate_operand (operands[2], mode))
21040 operands[2] = force_reg (mode, operands[2]);
21041 if (! nonimmediate_operand (operands[3], mode))
21042 operands[3] = force_reg (mode, operands[3]);
21044 if (! register_operand (operands[2], VOIDmode)
21045 && (mode == QImode
21046 || ! register_operand (operands[3], VOIDmode)))
21047 operands[2] = force_reg (mode, operands[2]);
21049 if (mode == QImode
21050 && ! register_operand (operands[3], VOIDmode))
21051 operands[3] = force_reg (mode, operands[3]);
21053 emit_insn (compare_seq);
21054 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21055 gen_rtx_IF_THEN_ELSE (mode,
21056 compare_op, operands[2],
21057 operands[3])));
21058 return true;
21061 /* Swap, force into registers, or otherwise massage the two operands
21062 to an sse comparison with a mask result. Thus we differ a bit from
21063 ix86_prepare_fp_compare_args which expects to produce a flags result.
21065 The DEST operand exists to help determine whether to commute commutative
21066 operators. The POP0/POP1 operands are updated in place. The new
21067 comparison code is returned, or UNKNOWN if not implementable. */
21069 static enum rtx_code
21070 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
21071 rtx *pop0, rtx *pop1)
21073 switch (code)
21075 case LTGT:
21076 case UNEQ:
21077 /* AVX supports all the needed comparisons. */
21078 if (TARGET_AVX)
21079 break;
21080 /* We have no LTGT as an operator. We could implement it with
21081 NE & ORDERED, but this requires an extra temporary. It's
21082 not clear that it's worth it. */
21083 return UNKNOWN;
21085 case LT:
21086 case LE:
21087 case UNGT:
21088 case UNGE:
21089 /* These are supported directly. */
21090 break;
21092 case EQ:
21093 case NE:
21094 case UNORDERED:
21095 case ORDERED:
21096 /* AVX has 3 operand comparisons, no need to swap anything. */
21097 if (TARGET_AVX)
21098 break;
21099 /* For commutative operators, try to canonicalize the destination
21100 operand to be first in the comparison - this helps reload to
21101 avoid extra moves. */
21102 if (!dest || !rtx_equal_p (dest, *pop1))
21103 break;
21104 /* FALLTHRU */
21106 case GE:
21107 case GT:
21108 case UNLE:
21109 case UNLT:
21110 /* These are not supported directly before AVX, and furthermore
21111 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
21112 comparison operands to transform into something that is
21113 supported. */
21114 std::swap (*pop0, *pop1);
21115 code = swap_condition (code);
21116 break;
21118 default:
21119 gcc_unreachable ();
21122 return code;
21125 /* Detect conditional moves that exactly match min/max operational
21126 semantics. Note that this is IEEE safe, as long as we don't
21127 interchange the operands.
21129 Returns FALSE if this conditional move doesn't match a MIN/MAX,
21130 and TRUE if the operation is successful and instructions are emitted. */
21132 static bool
21133 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
21134 rtx cmp_op1, rtx if_true, rtx if_false)
21136 machine_mode mode;
21137 bool is_min;
21138 rtx tmp;
21140 if (code == LT)
21142 else if (code == UNGE)
21143 std::swap (if_true, if_false);
21144 else
21145 return false;
21147 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
21148 is_min = true;
21149 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
21150 is_min = false;
21151 else
21152 return false;
21154 mode = GET_MODE (dest);
21156 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
21157 but MODE may be a vector mode and thus not appropriate. */
21158 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
21160 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
21161 rtvec v;
21163 if_true = force_reg (mode, if_true);
21164 v = gen_rtvec (2, if_true, if_false);
21165 tmp = gen_rtx_UNSPEC (mode, v, u);
21167 else
21169 code = is_min ? SMIN : SMAX;
21170 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
21173 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
21174 return true;
21177 /* Expand an sse vector comparison. Return the register with the result. */
21179 static rtx
21180 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
21181 rtx op_true, rtx op_false)
21183 machine_mode mode = GET_MODE (dest);
21184 machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
21186 /* In general case result of comparison can differ from operands' type. */
21187 machine_mode cmp_mode;
21189 /* In AVX512F the result of comparison is an integer mask. */
21190 bool maskcmp = false;
21191 rtx x;
21193 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
21195 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
21196 gcc_assert (cmp_mode != BLKmode);
21198 maskcmp = true;
21200 else
21201 cmp_mode = cmp_ops_mode;
21204 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
21205 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
21206 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
21208 if (optimize
21209 || reg_overlap_mentioned_p (dest, op_true)
21210 || reg_overlap_mentioned_p (dest, op_false))
21211 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
21213 /* Compare patterns for int modes are unspec in AVX512F only. */
21214 if (maskcmp && (code == GT || code == EQ))
21216 rtx (*gen)(rtx, rtx, rtx);
21218 switch (cmp_ops_mode)
21220 case V16SImode:
21221 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
21222 break;
21223 case V8DImode:
21224 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
21225 break;
21226 default:
21227 gen = NULL;
21230 if (gen)
21232 emit_insn (gen (dest, cmp_op0, cmp_op1));
21233 return dest;
21236 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
21238 if (cmp_mode != mode && !maskcmp)
21240 x = force_reg (cmp_ops_mode, x);
21241 convert_move (dest, x, false);
21243 else
21244 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21246 return dest;
21249 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
21250 operations. This is used for both scalar and vector conditional moves. */
21252 static void
21253 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
21255 machine_mode mode = GET_MODE (dest);
21256 machine_mode cmpmode = GET_MODE (cmp);
21258 /* In AVX512F the result of comparison is an integer mask. */
21259 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
21261 rtx t2, t3, x;
21263 if (vector_all_ones_operand (op_true, mode)
21264 && rtx_equal_p (op_false, CONST0_RTX (mode))
21265 && !maskcmp)
21267 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
21269 else if (op_false == CONST0_RTX (mode)
21270 && !maskcmp)
21272 op_true = force_reg (mode, op_true);
21273 x = gen_rtx_AND (mode, cmp, op_true);
21274 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21276 else if (op_true == CONST0_RTX (mode)
21277 && !maskcmp)
21279 op_false = force_reg (mode, op_false);
21280 x = gen_rtx_NOT (mode, cmp);
21281 x = gen_rtx_AND (mode, x, op_false);
21282 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21284 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
21285 && !maskcmp)
21287 op_false = force_reg (mode, op_false);
21288 x = gen_rtx_IOR (mode, cmp, op_false);
21289 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21291 else if (TARGET_XOP
21292 && !maskcmp)
21294 op_true = force_reg (mode, op_true);
21296 if (!nonimmediate_operand (op_false, mode))
21297 op_false = force_reg (mode, op_false);
21299 emit_insn (gen_rtx_SET (mode, dest,
21300 gen_rtx_IF_THEN_ELSE (mode, cmp,
21301 op_true,
21302 op_false)));
21304 else
21306 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21307 rtx d = dest;
21309 if (!nonimmediate_operand (op_true, mode))
21310 op_true = force_reg (mode, op_true);
21312 op_false = force_reg (mode, op_false);
21314 switch (mode)
21316 case V4SFmode:
21317 if (TARGET_SSE4_1)
21318 gen = gen_sse4_1_blendvps;
21319 break;
21320 case V2DFmode:
21321 if (TARGET_SSE4_1)
21322 gen = gen_sse4_1_blendvpd;
21323 break;
21324 case V16QImode:
21325 case V8HImode:
21326 case V4SImode:
21327 case V2DImode:
21328 if (TARGET_SSE4_1)
21330 gen = gen_sse4_1_pblendvb;
21331 if (mode != V16QImode)
21332 d = gen_reg_rtx (V16QImode);
21333 op_false = gen_lowpart (V16QImode, op_false);
21334 op_true = gen_lowpart (V16QImode, op_true);
21335 cmp = gen_lowpart (V16QImode, cmp);
21337 break;
21338 case V8SFmode:
21339 if (TARGET_AVX)
21340 gen = gen_avx_blendvps256;
21341 break;
21342 case V4DFmode:
21343 if (TARGET_AVX)
21344 gen = gen_avx_blendvpd256;
21345 break;
21346 case V32QImode:
21347 case V16HImode:
21348 case V8SImode:
21349 case V4DImode:
21350 if (TARGET_AVX2)
21352 gen = gen_avx2_pblendvb;
21353 if (mode != V32QImode)
21354 d = gen_reg_rtx (V32QImode);
21355 op_false = gen_lowpart (V32QImode, op_false);
21356 op_true = gen_lowpart (V32QImode, op_true);
21357 cmp = gen_lowpart (V32QImode, cmp);
21359 break;
21361 case V64QImode:
21362 gen = gen_avx512bw_blendmv64qi;
21363 break;
21364 case V32HImode:
21365 gen = gen_avx512bw_blendmv32hi;
21366 break;
21367 case V16SImode:
21368 gen = gen_avx512f_blendmv16si;
21369 break;
21370 case V8DImode:
21371 gen = gen_avx512f_blendmv8di;
21372 break;
21373 case V8DFmode:
21374 gen = gen_avx512f_blendmv8df;
21375 break;
21376 case V16SFmode:
21377 gen = gen_avx512f_blendmv16sf;
21378 break;
21380 default:
21381 break;
21384 if (gen != NULL)
21386 emit_insn (gen (d, op_false, op_true, cmp));
21387 if (d != dest)
21388 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
21390 else
21392 op_true = force_reg (mode, op_true);
21394 t2 = gen_reg_rtx (mode);
21395 if (optimize)
21396 t3 = gen_reg_rtx (mode);
21397 else
21398 t3 = dest;
21400 x = gen_rtx_AND (mode, op_true, cmp);
21401 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
21403 x = gen_rtx_NOT (mode, cmp);
21404 x = gen_rtx_AND (mode, x, op_false);
21405 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
21407 x = gen_rtx_IOR (mode, t3, t2);
21408 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21413 /* Expand a floating-point conditional move. Return true if successful. */
21415 bool
21416 ix86_expand_fp_movcc (rtx operands[])
21418 machine_mode mode = GET_MODE (operands[0]);
21419 enum rtx_code code = GET_CODE (operands[1]);
21420 rtx tmp, compare_op;
21421 rtx op0 = XEXP (operands[1], 0);
21422 rtx op1 = XEXP (operands[1], 1);
21424 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21426 machine_mode cmode;
21428 /* Since we've no cmove for sse registers, don't force bad register
21429 allocation just to gain access to it. Deny movcc when the
21430 comparison mode doesn't match the move mode. */
21431 cmode = GET_MODE (op0);
21432 if (cmode == VOIDmode)
21433 cmode = GET_MODE (op1);
21434 if (cmode != mode)
21435 return false;
21437 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
21438 if (code == UNKNOWN)
21439 return false;
21441 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
21442 operands[2], operands[3]))
21443 return true;
21445 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
21446 operands[2], operands[3]);
21447 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
21448 return true;
21451 if (GET_MODE (op0) == TImode
21452 || (GET_MODE (op0) == DImode
21453 && !TARGET_64BIT))
21454 return false;
21456 /* The floating point conditional move instructions don't directly
21457 support conditions resulting from a signed integer comparison. */
21459 compare_op = ix86_expand_compare (code, op0, op1);
21460 if (!fcmov_comparison_operator (compare_op, VOIDmode))
21462 tmp = gen_reg_rtx (QImode);
21463 ix86_expand_setcc (tmp, code, op0, op1);
21465 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
21468 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21469 gen_rtx_IF_THEN_ELSE (mode, compare_op,
21470 operands[2], operands[3])));
21472 return true;
21475 /* Expand a floating-point vector conditional move; a vcond operation
21476 rather than a movcc operation. */
21478 bool
21479 ix86_expand_fp_vcond (rtx operands[])
21481 enum rtx_code code = GET_CODE (operands[3]);
21482 rtx cmp;
21484 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
21485 &operands[4], &operands[5]);
21486 if (code == UNKNOWN)
21488 rtx temp;
21489 switch (GET_CODE (operands[3]))
21491 case LTGT:
21492 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
21493 operands[5], operands[0], operands[0]);
21494 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
21495 operands[5], operands[1], operands[2]);
21496 code = AND;
21497 break;
21498 case UNEQ:
21499 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
21500 operands[5], operands[0], operands[0]);
21501 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
21502 operands[5], operands[1], operands[2]);
21503 code = IOR;
21504 break;
21505 default:
21506 gcc_unreachable ();
21508 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
21509 OPTAB_DIRECT);
21510 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21511 return true;
21514 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
21515 operands[5], operands[1], operands[2]))
21516 return true;
21518 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
21519 operands[1], operands[2]);
21520 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21521 return true;
21524 /* Expand a signed/unsigned integral vector conditional move. */
21526 bool
21527 ix86_expand_int_vcond (rtx operands[])
21529 machine_mode data_mode = GET_MODE (operands[0]);
21530 machine_mode mode = GET_MODE (operands[4]);
21531 enum rtx_code code = GET_CODE (operands[3]);
21532 bool negate = false;
21533 rtx x, cop0, cop1;
21535 cop0 = operands[4];
21536 cop1 = operands[5];
21538 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
21539 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
21540 if ((code == LT || code == GE)
21541 && data_mode == mode
21542 && cop1 == CONST0_RTX (mode)
21543 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
21544 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
21545 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
21546 && (GET_MODE_SIZE (data_mode) == 16
21547 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
21549 rtx negop = operands[2 - (code == LT)];
21550 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
21551 if (negop == CONST1_RTX (data_mode))
21553 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
21554 operands[0], 1, OPTAB_DIRECT);
21555 if (res != operands[0])
21556 emit_move_insn (operands[0], res);
21557 return true;
21559 else if (GET_MODE_INNER (data_mode) != DImode
21560 && vector_all_ones_operand (negop, data_mode))
21562 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
21563 operands[0], 0, OPTAB_DIRECT);
21564 if (res != operands[0])
21565 emit_move_insn (operands[0], res);
21566 return true;
21570 if (!nonimmediate_operand (cop1, mode))
21571 cop1 = force_reg (mode, cop1);
21572 if (!general_operand (operands[1], data_mode))
21573 operands[1] = force_reg (data_mode, operands[1]);
21574 if (!general_operand (operands[2], data_mode))
21575 operands[2] = force_reg (data_mode, operands[2]);
21577 /* XOP supports all of the comparisons on all 128-bit vector int types. */
21578 if (TARGET_XOP
21579 && (mode == V16QImode || mode == V8HImode
21580 || mode == V4SImode || mode == V2DImode))
21582 else
21584 /* Canonicalize the comparison to EQ, GT, GTU. */
21585 switch (code)
21587 case EQ:
21588 case GT:
21589 case GTU:
21590 break;
21592 case NE:
21593 case LE:
21594 case LEU:
21595 code = reverse_condition (code);
21596 negate = true;
21597 break;
21599 case GE:
21600 case GEU:
21601 code = reverse_condition (code);
21602 negate = true;
21603 /* FALLTHRU */
21605 case LT:
21606 case LTU:
21607 std::swap (cop0, cop1);
21608 code = swap_condition (code);
21609 break;
21611 default:
21612 gcc_unreachable ();
21615 /* Only SSE4.1/SSE4.2 supports V2DImode. */
21616 if (mode == V2DImode)
21618 switch (code)
21620 case EQ:
21621 /* SSE4.1 supports EQ. */
21622 if (!TARGET_SSE4_1)
21623 return false;
21624 break;
21626 case GT:
21627 case GTU:
21628 /* SSE4.2 supports GT/GTU. */
21629 if (!TARGET_SSE4_2)
21630 return false;
21631 break;
21633 default:
21634 gcc_unreachable ();
21638 /* Unsigned parallel compare is not supported by the hardware.
21639 Play some tricks to turn this into a signed comparison
21640 against 0. */
21641 if (code == GTU)
21643 cop0 = force_reg (mode, cop0);
21645 switch (mode)
21647 case V16SImode:
21648 case V8DImode:
21649 case V8SImode:
21650 case V4DImode:
21651 case V4SImode:
21652 case V2DImode:
21654 rtx t1, t2, mask;
21655 rtx (*gen_sub3) (rtx, rtx, rtx);
21657 switch (mode)
21659 case V16SImode: gen_sub3 = gen_subv16si3; break;
21660 case V8DImode: gen_sub3 = gen_subv8di3; break;
21661 case V8SImode: gen_sub3 = gen_subv8si3; break;
21662 case V4DImode: gen_sub3 = gen_subv4di3; break;
21663 case V4SImode: gen_sub3 = gen_subv4si3; break;
21664 case V2DImode: gen_sub3 = gen_subv2di3; break;
21665 default:
21666 gcc_unreachable ();
21668 /* Subtract (-(INT MAX) - 1) from both operands to make
21669 them signed. */
21670 mask = ix86_build_signbit_mask (mode, true, false);
21671 t1 = gen_reg_rtx (mode);
21672 emit_insn (gen_sub3 (t1, cop0, mask));
21674 t2 = gen_reg_rtx (mode);
21675 emit_insn (gen_sub3 (t2, cop1, mask));
21677 cop0 = t1;
21678 cop1 = t2;
21679 code = GT;
21681 break;
21683 case V64QImode:
21684 case V32HImode:
21685 case V32QImode:
21686 case V16HImode:
21687 case V16QImode:
21688 case V8HImode:
21689 /* Perform a parallel unsigned saturating subtraction. */
21690 x = gen_reg_rtx (mode);
21691 emit_insn (gen_rtx_SET (VOIDmode, x,
21692 gen_rtx_US_MINUS (mode, cop0, cop1)));
21694 cop0 = x;
21695 cop1 = CONST0_RTX (mode);
21696 code = EQ;
21697 negate = !negate;
21698 break;
21700 default:
21701 gcc_unreachable ();
21706 /* Allow the comparison to be done in one mode, but the movcc to
21707 happen in another mode. */
21708 if (data_mode == mode)
21710 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
21711 operands[1+negate], operands[2-negate]);
21713 else
21715 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
21716 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
21717 operands[1+negate], operands[2-negate]);
21718 if (GET_MODE (x) == mode)
21719 x = gen_lowpart (data_mode, x);
21722 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
21723 operands[2-negate]);
21724 return true;
21727 /* AVX512F does support 64-byte integer vector operations,
21728 thus the longest vector we are faced with is V64QImode. */
21729 #define MAX_VECT_LEN 64
21731 struct expand_vec_perm_d
21733 rtx target, op0, op1;
21734 unsigned char perm[MAX_VECT_LEN];
21735 machine_mode vmode;
21736 unsigned char nelt;
21737 bool one_operand_p;
21738 bool testing_p;
21741 static bool
21742 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1,
21743 struct expand_vec_perm_d *d)
21745 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
21746 expander, so args are either in d, or in op0, op1 etc. */
21747 machine_mode mode = GET_MODE (d ? d->op0 : op0);
21748 machine_mode maskmode = mode;
21749 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21751 switch (mode)
21753 case V8HImode:
21754 if (TARGET_AVX512VL && TARGET_AVX512BW)
21755 gen = gen_avx512vl_vpermi2varv8hi3;
21756 break;
21757 case V16HImode:
21758 if (TARGET_AVX512VL && TARGET_AVX512BW)
21759 gen = gen_avx512vl_vpermi2varv16hi3;
21760 break;
21761 case V32HImode:
21762 if (TARGET_AVX512BW)
21763 gen = gen_avx512bw_vpermi2varv32hi3;
21764 break;
21765 case V4SImode:
21766 if (TARGET_AVX512VL)
21767 gen = gen_avx512vl_vpermi2varv4si3;
21768 break;
21769 case V8SImode:
21770 if (TARGET_AVX512VL)
21771 gen = gen_avx512vl_vpermi2varv8si3;
21772 break;
21773 case V16SImode:
21774 if (TARGET_AVX512F)
21775 gen = gen_avx512f_vpermi2varv16si3;
21776 break;
21777 case V4SFmode:
21778 if (TARGET_AVX512VL)
21780 gen = gen_avx512vl_vpermi2varv4sf3;
21781 maskmode = V4SImode;
21783 break;
21784 case V8SFmode:
21785 if (TARGET_AVX512VL)
21787 gen = gen_avx512vl_vpermi2varv8sf3;
21788 maskmode = V8SImode;
21790 break;
21791 case V16SFmode:
21792 if (TARGET_AVX512F)
21794 gen = gen_avx512f_vpermi2varv16sf3;
21795 maskmode = V16SImode;
21797 break;
21798 case V2DImode:
21799 if (TARGET_AVX512VL)
21800 gen = gen_avx512vl_vpermi2varv2di3;
21801 break;
21802 case V4DImode:
21803 if (TARGET_AVX512VL)
21804 gen = gen_avx512vl_vpermi2varv4di3;
21805 break;
21806 case V8DImode:
21807 if (TARGET_AVX512F)
21808 gen = gen_avx512f_vpermi2varv8di3;
21809 break;
21810 case V2DFmode:
21811 if (TARGET_AVX512VL)
21813 gen = gen_avx512vl_vpermi2varv2df3;
21814 maskmode = V2DImode;
21816 break;
21817 case V4DFmode:
21818 if (TARGET_AVX512VL)
21820 gen = gen_avx512vl_vpermi2varv4df3;
21821 maskmode = V4DImode;
21823 break;
21824 case V8DFmode:
21825 if (TARGET_AVX512F)
21827 gen = gen_avx512f_vpermi2varv8df3;
21828 maskmode = V8DImode;
21830 break;
21831 default:
21832 break;
21835 if (gen == NULL)
21836 return false;
21838 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
21839 expander, so args are either in d, or in op0, op1 etc. */
21840 if (d)
21842 rtx vec[64];
21843 target = d->target;
21844 op0 = d->op0;
21845 op1 = d->op1;
21846 for (int i = 0; i < d->nelt; ++i)
21847 vec[i] = GEN_INT (d->perm[i]);
21848 mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
21851 emit_insn (gen (target, op0, force_reg (maskmode, mask), op1));
21852 return true;
21855 /* Expand a variable vector permutation. */
21857 void
21858 ix86_expand_vec_perm (rtx operands[])
21860 rtx target = operands[0];
21861 rtx op0 = operands[1];
21862 rtx op1 = operands[2];
21863 rtx mask = operands[3];
21864 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
21865 machine_mode mode = GET_MODE (op0);
21866 machine_mode maskmode = GET_MODE (mask);
21867 int w, e, i;
21868 bool one_operand_shuffle = rtx_equal_p (op0, op1);
21870 /* Number of elements in the vector. */
21871 w = GET_MODE_NUNITS (mode);
21872 e = GET_MODE_UNIT_SIZE (mode);
21873 gcc_assert (w <= 64);
21875 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1, NULL))
21876 return;
21878 if (TARGET_AVX2)
21880 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
21882 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
21883 an constant shuffle operand. With a tiny bit of effort we can
21884 use VPERMD instead. A re-interpretation stall for V4DFmode is
21885 unfortunate but there's no avoiding it.
21886 Similarly for V16HImode we don't have instructions for variable
21887 shuffling, while for V32QImode we can use after preparing suitable
21888 masks vpshufb; vpshufb; vpermq; vpor. */
21890 if (mode == V16HImode)
21892 maskmode = mode = V32QImode;
21893 w = 32;
21894 e = 1;
21896 else
21898 maskmode = mode = V8SImode;
21899 w = 8;
21900 e = 4;
21902 t1 = gen_reg_rtx (maskmode);
21904 /* Replicate the low bits of the V4DImode mask into V8SImode:
21905 mask = { A B C D }
21906 t1 = { A A B B C C D D }. */
21907 for (i = 0; i < w / 2; ++i)
21908 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
21909 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21910 vt = force_reg (maskmode, vt);
21911 mask = gen_lowpart (maskmode, mask);
21912 if (maskmode == V8SImode)
21913 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
21914 else
21915 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
21917 /* Multiply the shuffle indicies by two. */
21918 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
21919 OPTAB_DIRECT);
21921 /* Add one to the odd shuffle indicies:
21922 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
21923 for (i = 0; i < w / 2; ++i)
21925 vec[i * 2] = const0_rtx;
21926 vec[i * 2 + 1] = const1_rtx;
21928 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21929 vt = validize_mem (force_const_mem (maskmode, vt));
21930 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
21931 OPTAB_DIRECT);
21933 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
21934 operands[3] = mask = t1;
21935 target = gen_reg_rtx (mode);
21936 op0 = gen_lowpart (mode, op0);
21937 op1 = gen_lowpart (mode, op1);
21940 switch (mode)
21942 case V8SImode:
21943 /* The VPERMD and VPERMPS instructions already properly ignore
21944 the high bits of the shuffle elements. No need for us to
21945 perform an AND ourselves. */
21946 if (one_operand_shuffle)
21948 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
21949 if (target != operands[0])
21950 emit_move_insn (operands[0],
21951 gen_lowpart (GET_MODE (operands[0]), target));
21953 else
21955 t1 = gen_reg_rtx (V8SImode);
21956 t2 = gen_reg_rtx (V8SImode);
21957 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
21958 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
21959 goto merge_two;
21961 return;
21963 case V8SFmode:
21964 mask = gen_lowpart (V8SImode, mask);
21965 if (one_operand_shuffle)
21966 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
21967 else
21969 t1 = gen_reg_rtx (V8SFmode);
21970 t2 = gen_reg_rtx (V8SFmode);
21971 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
21972 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
21973 goto merge_two;
21975 return;
21977 case V4SImode:
21978 /* By combining the two 128-bit input vectors into one 256-bit
21979 input vector, we can use VPERMD and VPERMPS for the full
21980 two-operand shuffle. */
21981 t1 = gen_reg_rtx (V8SImode);
21982 t2 = gen_reg_rtx (V8SImode);
21983 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
21984 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21985 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
21986 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
21987 return;
21989 case V4SFmode:
21990 t1 = gen_reg_rtx (V8SFmode);
21991 t2 = gen_reg_rtx (V8SImode);
21992 mask = gen_lowpart (V4SImode, mask);
21993 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
21994 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21995 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
21996 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
21997 return;
21999 case V32QImode:
22000 t1 = gen_reg_rtx (V32QImode);
22001 t2 = gen_reg_rtx (V32QImode);
22002 t3 = gen_reg_rtx (V32QImode);
22003 vt2 = GEN_INT (-128);
22004 for (i = 0; i < 32; i++)
22005 vec[i] = vt2;
22006 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
22007 vt = force_reg (V32QImode, vt);
22008 for (i = 0; i < 32; i++)
22009 vec[i] = i < 16 ? vt2 : const0_rtx;
22010 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
22011 vt2 = force_reg (V32QImode, vt2);
22012 /* From mask create two adjusted masks, which contain the same
22013 bits as mask in the low 7 bits of each vector element.
22014 The first mask will have the most significant bit clear
22015 if it requests element from the same 128-bit lane
22016 and MSB set if it requests element from the other 128-bit lane.
22017 The second mask will have the opposite values of the MSB,
22018 and additionally will have its 128-bit lanes swapped.
22019 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
22020 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
22021 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
22022 stands for other 12 bytes. */
22023 /* The bit whether element is from the same lane or the other
22024 lane is bit 4, so shift it up by 3 to the MSB position. */
22025 t5 = gen_reg_rtx (V4DImode);
22026 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
22027 GEN_INT (3)));
22028 /* Clear MSB bits from the mask just in case it had them set. */
22029 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
22030 /* After this t1 will have MSB set for elements from other lane. */
22031 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
22032 /* Clear bits other than MSB. */
22033 emit_insn (gen_andv32qi3 (t1, t1, vt));
22034 /* Or in the lower bits from mask into t3. */
22035 emit_insn (gen_iorv32qi3 (t3, t1, t2));
22036 /* And invert MSB bits in t1, so MSB is set for elements from the same
22037 lane. */
22038 emit_insn (gen_xorv32qi3 (t1, t1, vt));
22039 /* Swap 128-bit lanes in t3. */
22040 t6 = gen_reg_rtx (V4DImode);
22041 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
22042 const2_rtx, GEN_INT (3),
22043 const0_rtx, const1_rtx));
22044 /* And or in the lower bits from mask into t1. */
22045 emit_insn (gen_iorv32qi3 (t1, t1, t2));
22046 if (one_operand_shuffle)
22048 /* Each of these shuffles will put 0s in places where
22049 element from the other 128-bit lane is needed, otherwise
22050 will shuffle in the requested value. */
22051 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
22052 gen_lowpart (V32QImode, t6)));
22053 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
22054 /* For t3 the 128-bit lanes are swapped again. */
22055 t7 = gen_reg_rtx (V4DImode);
22056 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
22057 const2_rtx, GEN_INT (3),
22058 const0_rtx, const1_rtx));
22059 /* And oring both together leads to the result. */
22060 emit_insn (gen_iorv32qi3 (target, t1,
22061 gen_lowpart (V32QImode, t7)));
22062 if (target != operands[0])
22063 emit_move_insn (operands[0],
22064 gen_lowpart (GET_MODE (operands[0]), target));
22065 return;
22068 t4 = gen_reg_rtx (V32QImode);
22069 /* Similarly to the above one_operand_shuffle code,
22070 just for repeated twice for each operand. merge_two:
22071 code will merge the two results together. */
22072 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
22073 gen_lowpart (V32QImode, t6)));
22074 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
22075 gen_lowpart (V32QImode, t6)));
22076 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
22077 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
22078 t7 = gen_reg_rtx (V4DImode);
22079 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
22080 const2_rtx, GEN_INT (3),
22081 const0_rtx, const1_rtx));
22082 t8 = gen_reg_rtx (V4DImode);
22083 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
22084 const2_rtx, GEN_INT (3),
22085 const0_rtx, const1_rtx));
22086 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
22087 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
22088 t1 = t4;
22089 t2 = t3;
22090 goto merge_two;
22092 default:
22093 gcc_assert (GET_MODE_SIZE (mode) <= 16);
22094 break;
22098 if (TARGET_XOP)
22100 /* The XOP VPPERM insn supports three inputs. By ignoring the
22101 one_operand_shuffle special case, we avoid creating another
22102 set of constant vectors in memory. */
22103 one_operand_shuffle = false;
22105 /* mask = mask & {2*w-1, ...} */
22106 vt = GEN_INT (2*w - 1);
22108 else
22110 /* mask = mask & {w-1, ...} */
22111 vt = GEN_INT (w - 1);
22114 for (i = 0; i < w; i++)
22115 vec[i] = vt;
22116 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22117 mask = expand_simple_binop (maskmode, AND, mask, vt,
22118 NULL_RTX, 0, OPTAB_DIRECT);
22120 /* For non-QImode operations, convert the word permutation control
22121 into a byte permutation control. */
22122 if (mode != V16QImode)
22124 mask = expand_simple_binop (maskmode, ASHIFT, mask,
22125 GEN_INT (exact_log2 (e)),
22126 NULL_RTX, 0, OPTAB_DIRECT);
22128 /* Convert mask to vector of chars. */
22129 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
22131 /* Replicate each of the input bytes into byte positions:
22132 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
22133 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
22134 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
22135 for (i = 0; i < 16; ++i)
22136 vec[i] = GEN_INT (i/e * e);
22137 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
22138 vt = validize_mem (force_const_mem (V16QImode, vt));
22139 if (TARGET_XOP)
22140 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
22141 else
22142 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
22144 /* Convert it into the byte positions by doing
22145 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
22146 for (i = 0; i < 16; ++i)
22147 vec[i] = GEN_INT (i % e);
22148 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
22149 vt = validize_mem (force_const_mem (V16QImode, vt));
22150 emit_insn (gen_addv16qi3 (mask, mask, vt));
22153 /* The actual shuffle operations all operate on V16QImode. */
22154 op0 = gen_lowpart (V16QImode, op0);
22155 op1 = gen_lowpart (V16QImode, op1);
22157 if (TARGET_XOP)
22159 if (GET_MODE (target) != V16QImode)
22160 target = gen_reg_rtx (V16QImode);
22161 emit_insn (gen_xop_pperm (target, op0, op1, mask));
22162 if (target != operands[0])
22163 emit_move_insn (operands[0],
22164 gen_lowpart (GET_MODE (operands[0]), target));
22166 else if (one_operand_shuffle)
22168 if (GET_MODE (target) != V16QImode)
22169 target = gen_reg_rtx (V16QImode);
22170 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
22171 if (target != operands[0])
22172 emit_move_insn (operands[0],
22173 gen_lowpart (GET_MODE (operands[0]), target));
22175 else
22177 rtx xops[6];
22178 bool ok;
22180 /* Shuffle the two input vectors independently. */
22181 t1 = gen_reg_rtx (V16QImode);
22182 t2 = gen_reg_rtx (V16QImode);
22183 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
22184 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
22186 merge_two:
22187 /* Then merge them together. The key is whether any given control
22188 element contained a bit set that indicates the second word. */
22189 mask = operands[3];
22190 vt = GEN_INT (w);
22191 if (maskmode == V2DImode && !TARGET_SSE4_1)
22193 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
22194 more shuffle to convert the V2DI input mask into a V4SI
22195 input mask. At which point the masking that expand_int_vcond
22196 will work as desired. */
22197 rtx t3 = gen_reg_rtx (V4SImode);
22198 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
22199 const0_rtx, const0_rtx,
22200 const2_rtx, const2_rtx));
22201 mask = t3;
22202 maskmode = V4SImode;
22203 e = w = 4;
22206 for (i = 0; i < w; i++)
22207 vec[i] = vt;
22208 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22209 vt = force_reg (maskmode, vt);
22210 mask = expand_simple_binop (maskmode, AND, mask, vt,
22211 NULL_RTX, 0, OPTAB_DIRECT);
22213 if (GET_MODE (target) != mode)
22214 target = gen_reg_rtx (mode);
22215 xops[0] = target;
22216 xops[1] = gen_lowpart (mode, t2);
22217 xops[2] = gen_lowpart (mode, t1);
22218 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
22219 xops[4] = mask;
22220 xops[5] = vt;
22221 ok = ix86_expand_int_vcond (xops);
22222 gcc_assert (ok);
22223 if (target != operands[0])
22224 emit_move_insn (operands[0],
22225 gen_lowpart (GET_MODE (operands[0]), target));
22229 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
22230 true if we should do zero extension, else sign extension. HIGH_P is
22231 true if we want the N/2 high elements, else the low elements. */
22233 void
22234 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
22236 machine_mode imode = GET_MODE (src);
22237 rtx tmp;
22239 if (TARGET_SSE4_1)
22241 rtx (*unpack)(rtx, rtx);
22242 rtx (*extract)(rtx, rtx) = NULL;
22243 machine_mode halfmode = BLKmode;
22245 switch (imode)
22247 case V64QImode:
22248 if (unsigned_p)
22249 unpack = gen_avx512bw_zero_extendv32qiv32hi2;
22250 else
22251 unpack = gen_avx512bw_sign_extendv32qiv32hi2;
22252 halfmode = V32QImode;
22253 extract
22254 = high_p ? gen_vec_extract_hi_v64qi : gen_vec_extract_lo_v64qi;
22255 break;
22256 case V32QImode:
22257 if (unsigned_p)
22258 unpack = gen_avx2_zero_extendv16qiv16hi2;
22259 else
22260 unpack = gen_avx2_sign_extendv16qiv16hi2;
22261 halfmode = V16QImode;
22262 extract
22263 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
22264 break;
22265 case V32HImode:
22266 if (unsigned_p)
22267 unpack = gen_avx512f_zero_extendv16hiv16si2;
22268 else
22269 unpack = gen_avx512f_sign_extendv16hiv16si2;
22270 halfmode = V16HImode;
22271 extract
22272 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
22273 break;
22274 case V16HImode:
22275 if (unsigned_p)
22276 unpack = gen_avx2_zero_extendv8hiv8si2;
22277 else
22278 unpack = gen_avx2_sign_extendv8hiv8si2;
22279 halfmode = V8HImode;
22280 extract
22281 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
22282 break;
22283 case V16SImode:
22284 if (unsigned_p)
22285 unpack = gen_avx512f_zero_extendv8siv8di2;
22286 else
22287 unpack = gen_avx512f_sign_extendv8siv8di2;
22288 halfmode = V8SImode;
22289 extract
22290 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
22291 break;
22292 case V8SImode:
22293 if (unsigned_p)
22294 unpack = gen_avx2_zero_extendv4siv4di2;
22295 else
22296 unpack = gen_avx2_sign_extendv4siv4di2;
22297 halfmode = V4SImode;
22298 extract
22299 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
22300 break;
22301 case V16QImode:
22302 if (unsigned_p)
22303 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
22304 else
22305 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
22306 break;
22307 case V8HImode:
22308 if (unsigned_p)
22309 unpack = gen_sse4_1_zero_extendv4hiv4si2;
22310 else
22311 unpack = gen_sse4_1_sign_extendv4hiv4si2;
22312 break;
22313 case V4SImode:
22314 if (unsigned_p)
22315 unpack = gen_sse4_1_zero_extendv2siv2di2;
22316 else
22317 unpack = gen_sse4_1_sign_extendv2siv2di2;
22318 break;
22319 default:
22320 gcc_unreachable ();
22323 if (GET_MODE_SIZE (imode) >= 32)
22325 tmp = gen_reg_rtx (halfmode);
22326 emit_insn (extract (tmp, src));
22328 else if (high_p)
22330 /* Shift higher 8 bytes to lower 8 bytes. */
22331 tmp = gen_reg_rtx (V1TImode);
22332 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
22333 GEN_INT (64)));
22334 tmp = gen_lowpart (imode, tmp);
22336 else
22337 tmp = src;
22339 emit_insn (unpack (dest, tmp));
22341 else
22343 rtx (*unpack)(rtx, rtx, rtx);
22345 switch (imode)
22347 case V16QImode:
22348 if (high_p)
22349 unpack = gen_vec_interleave_highv16qi;
22350 else
22351 unpack = gen_vec_interleave_lowv16qi;
22352 break;
22353 case V8HImode:
22354 if (high_p)
22355 unpack = gen_vec_interleave_highv8hi;
22356 else
22357 unpack = gen_vec_interleave_lowv8hi;
22358 break;
22359 case V4SImode:
22360 if (high_p)
22361 unpack = gen_vec_interleave_highv4si;
22362 else
22363 unpack = gen_vec_interleave_lowv4si;
22364 break;
22365 default:
22366 gcc_unreachable ();
22369 if (unsigned_p)
22370 tmp = force_reg (imode, CONST0_RTX (imode));
22371 else
22372 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
22373 src, pc_rtx, pc_rtx);
22375 rtx tmp2 = gen_reg_rtx (imode);
22376 emit_insn (unpack (tmp2, src, tmp));
22377 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
22381 /* Expand conditional increment or decrement using adb/sbb instructions.
22382 The default case using setcc followed by the conditional move can be
22383 done by generic code. */
22384 bool
22385 ix86_expand_int_addcc (rtx operands[])
22387 enum rtx_code code = GET_CODE (operands[1]);
22388 rtx flags;
22389 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
22390 rtx compare_op;
22391 rtx val = const0_rtx;
22392 bool fpcmp = false;
22393 machine_mode mode;
22394 rtx op0 = XEXP (operands[1], 0);
22395 rtx op1 = XEXP (operands[1], 1);
22397 if (operands[3] != const1_rtx
22398 && operands[3] != constm1_rtx)
22399 return false;
22400 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
22401 return false;
22402 code = GET_CODE (compare_op);
22404 flags = XEXP (compare_op, 0);
22406 if (GET_MODE (flags) == CCFPmode
22407 || GET_MODE (flags) == CCFPUmode)
22409 fpcmp = true;
22410 code = ix86_fp_compare_code_to_integer (code);
22413 if (code != LTU)
22415 val = constm1_rtx;
22416 if (fpcmp)
22417 PUT_CODE (compare_op,
22418 reverse_condition_maybe_unordered
22419 (GET_CODE (compare_op)));
22420 else
22421 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
22424 mode = GET_MODE (operands[0]);
22426 /* Construct either adc or sbb insn. */
22427 if ((code == LTU) == (operands[3] == constm1_rtx))
22429 switch (mode)
22431 case QImode:
22432 insn = gen_subqi3_carry;
22433 break;
22434 case HImode:
22435 insn = gen_subhi3_carry;
22436 break;
22437 case SImode:
22438 insn = gen_subsi3_carry;
22439 break;
22440 case DImode:
22441 insn = gen_subdi3_carry;
22442 break;
22443 default:
22444 gcc_unreachable ();
22447 else
22449 switch (mode)
22451 case QImode:
22452 insn = gen_addqi3_carry;
22453 break;
22454 case HImode:
22455 insn = gen_addhi3_carry;
22456 break;
22457 case SImode:
22458 insn = gen_addsi3_carry;
22459 break;
22460 case DImode:
22461 insn = gen_adddi3_carry;
22462 break;
22463 default:
22464 gcc_unreachable ();
22467 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
22469 return true;
22473 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
22474 but works for floating pointer parameters and nonoffsetable memories.
22475 For pushes, it returns just stack offsets; the values will be saved
22476 in the right order. Maximally three parts are generated. */
22478 static int
22479 ix86_split_to_parts (rtx operand, rtx *parts, machine_mode mode)
22481 int size;
22483 if (!TARGET_64BIT)
22484 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
22485 else
22486 size = (GET_MODE_SIZE (mode) + 4) / 8;
22488 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
22489 gcc_assert (size >= 2 && size <= 4);
22491 /* Optimize constant pool reference to immediates. This is used by fp
22492 moves, that force all constants to memory to allow combining. */
22493 if (MEM_P (operand) && MEM_READONLY_P (operand))
22495 rtx tmp = maybe_get_pool_constant (operand);
22496 if (tmp)
22497 operand = tmp;
22500 if (MEM_P (operand) && !offsettable_memref_p (operand))
22502 /* The only non-offsetable memories we handle are pushes. */
22503 int ok = push_operand (operand, VOIDmode);
22505 gcc_assert (ok);
22507 operand = copy_rtx (operand);
22508 PUT_MODE (operand, word_mode);
22509 parts[0] = parts[1] = parts[2] = parts[3] = operand;
22510 return size;
22513 if (GET_CODE (operand) == CONST_VECTOR)
22515 machine_mode imode = int_mode_for_mode (mode);
22516 /* Caution: if we looked through a constant pool memory above,
22517 the operand may actually have a different mode now. That's
22518 ok, since we want to pun this all the way back to an integer. */
22519 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
22520 gcc_assert (operand != NULL);
22521 mode = imode;
22524 if (!TARGET_64BIT)
22526 if (mode == DImode)
22527 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22528 else
22530 int i;
22532 if (REG_P (operand))
22534 gcc_assert (reload_completed);
22535 for (i = 0; i < size; i++)
22536 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
22538 else if (offsettable_memref_p (operand))
22540 operand = adjust_address (operand, SImode, 0);
22541 parts[0] = operand;
22542 for (i = 1; i < size; i++)
22543 parts[i] = adjust_address (operand, SImode, 4 * i);
22545 else if (GET_CODE (operand) == CONST_DOUBLE)
22547 REAL_VALUE_TYPE r;
22548 long l[4];
22550 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22551 switch (mode)
22553 case TFmode:
22554 real_to_target (l, &r, mode);
22555 parts[3] = gen_int_mode (l[3], SImode);
22556 parts[2] = gen_int_mode (l[2], SImode);
22557 break;
22558 case XFmode:
22559 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
22560 long double may not be 80-bit. */
22561 real_to_target (l, &r, mode);
22562 parts[2] = gen_int_mode (l[2], SImode);
22563 break;
22564 case DFmode:
22565 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
22566 break;
22567 default:
22568 gcc_unreachable ();
22570 parts[1] = gen_int_mode (l[1], SImode);
22571 parts[0] = gen_int_mode (l[0], SImode);
22573 else
22574 gcc_unreachable ();
22577 else
22579 if (mode == TImode)
22580 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22581 if (mode == XFmode || mode == TFmode)
22583 machine_mode upper_mode = mode==XFmode ? SImode : DImode;
22584 if (REG_P (operand))
22586 gcc_assert (reload_completed);
22587 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
22588 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
22590 else if (offsettable_memref_p (operand))
22592 operand = adjust_address (operand, DImode, 0);
22593 parts[0] = operand;
22594 parts[1] = adjust_address (operand, upper_mode, 8);
22596 else if (GET_CODE (operand) == CONST_DOUBLE)
22598 REAL_VALUE_TYPE r;
22599 long l[4];
22601 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22602 real_to_target (l, &r, mode);
22604 /* Do not use shift by 32 to avoid warning on 32bit systems. */
22605 if (HOST_BITS_PER_WIDE_INT >= 64)
22606 parts[0]
22607 = gen_int_mode
22608 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
22609 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
22610 DImode);
22611 else
22612 parts[0] = immed_double_const (l[0], l[1], DImode);
22614 if (upper_mode == SImode)
22615 parts[1] = gen_int_mode (l[2], SImode);
22616 else if (HOST_BITS_PER_WIDE_INT >= 64)
22617 parts[1]
22618 = gen_int_mode
22619 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
22620 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
22621 DImode);
22622 else
22623 parts[1] = immed_double_const (l[2], l[3], DImode);
22625 else
22626 gcc_unreachable ();
22630 return size;
22633 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
22634 Return false when normal moves are needed; true when all required
22635 insns have been emitted. Operands 2-4 contain the input values
22636 int the correct order; operands 5-7 contain the output values. */
22638 void
22639 ix86_split_long_move (rtx operands[])
22641 rtx part[2][4];
22642 int nparts, i, j;
22643 int push = 0;
22644 int collisions = 0;
22645 machine_mode mode = GET_MODE (operands[0]);
22646 bool collisionparts[4];
22648 /* The DFmode expanders may ask us to move double.
22649 For 64bit target this is single move. By hiding the fact
22650 here we simplify i386.md splitters. */
22651 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
22653 /* Optimize constant pool reference to immediates. This is used by
22654 fp moves, that force all constants to memory to allow combining. */
22656 if (MEM_P (operands[1])
22657 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
22658 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
22659 operands[1] = get_pool_constant (XEXP (operands[1], 0));
22660 if (push_operand (operands[0], VOIDmode))
22662 operands[0] = copy_rtx (operands[0]);
22663 PUT_MODE (operands[0], word_mode);
22665 else
22666 operands[0] = gen_lowpart (DImode, operands[0]);
22667 operands[1] = gen_lowpart (DImode, operands[1]);
22668 emit_move_insn (operands[0], operands[1]);
22669 return;
22672 /* The only non-offsettable memory we handle is push. */
22673 if (push_operand (operands[0], VOIDmode))
22674 push = 1;
22675 else
22676 gcc_assert (!MEM_P (operands[0])
22677 || offsettable_memref_p (operands[0]));
22679 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
22680 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
22682 /* When emitting push, take care for source operands on the stack. */
22683 if (push && MEM_P (operands[1])
22684 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
22686 rtx src_base = XEXP (part[1][nparts - 1], 0);
22688 /* Compensate for the stack decrement by 4. */
22689 if (!TARGET_64BIT && nparts == 3
22690 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
22691 src_base = plus_constant (Pmode, src_base, 4);
22693 /* src_base refers to the stack pointer and is
22694 automatically decreased by emitted push. */
22695 for (i = 0; i < nparts; i++)
22696 part[1][i] = change_address (part[1][i],
22697 GET_MODE (part[1][i]), src_base);
22700 /* We need to do copy in the right order in case an address register
22701 of the source overlaps the destination. */
22702 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
22704 rtx tmp;
22706 for (i = 0; i < nparts; i++)
22708 collisionparts[i]
22709 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
22710 if (collisionparts[i])
22711 collisions++;
22714 /* Collision in the middle part can be handled by reordering. */
22715 if (collisions == 1 && nparts == 3 && collisionparts [1])
22717 std::swap (part[0][1], part[0][2]);
22718 std::swap (part[1][1], part[1][2]);
22720 else if (collisions == 1
22721 && nparts == 4
22722 && (collisionparts [1] || collisionparts [2]))
22724 if (collisionparts [1])
22726 std::swap (part[0][1], part[0][2]);
22727 std::swap (part[1][1], part[1][2]);
22729 else
22731 std::swap (part[0][2], part[0][3]);
22732 std::swap (part[1][2], part[1][3]);
22736 /* If there are more collisions, we can't handle it by reordering.
22737 Do an lea to the last part and use only one colliding move. */
22738 else if (collisions > 1)
22740 rtx base;
22742 collisions = 1;
22744 base = part[0][nparts - 1];
22746 /* Handle the case when the last part isn't valid for lea.
22747 Happens in 64-bit mode storing the 12-byte XFmode. */
22748 if (GET_MODE (base) != Pmode)
22749 base = gen_rtx_REG (Pmode, REGNO (base));
22751 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
22752 part[1][0] = replace_equiv_address (part[1][0], base);
22753 for (i = 1; i < nparts; i++)
22755 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
22756 part[1][i] = replace_equiv_address (part[1][i], tmp);
22761 if (push)
22763 if (!TARGET_64BIT)
22765 if (nparts == 3)
22767 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
22768 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
22769 stack_pointer_rtx, GEN_INT (-4)));
22770 emit_move_insn (part[0][2], part[1][2]);
22772 else if (nparts == 4)
22774 emit_move_insn (part[0][3], part[1][3]);
22775 emit_move_insn (part[0][2], part[1][2]);
22778 else
22780 /* In 64bit mode we don't have 32bit push available. In case this is
22781 register, it is OK - we will just use larger counterpart. We also
22782 retype memory - these comes from attempt to avoid REX prefix on
22783 moving of second half of TFmode value. */
22784 if (GET_MODE (part[1][1]) == SImode)
22786 switch (GET_CODE (part[1][1]))
22788 case MEM:
22789 part[1][1] = adjust_address (part[1][1], DImode, 0);
22790 break;
22792 case REG:
22793 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
22794 break;
22796 default:
22797 gcc_unreachable ();
22800 if (GET_MODE (part[1][0]) == SImode)
22801 part[1][0] = part[1][1];
22804 emit_move_insn (part[0][1], part[1][1]);
22805 emit_move_insn (part[0][0], part[1][0]);
22806 return;
22809 /* Choose correct order to not overwrite the source before it is copied. */
22810 if ((REG_P (part[0][0])
22811 && REG_P (part[1][1])
22812 && (REGNO (part[0][0]) == REGNO (part[1][1])
22813 || (nparts == 3
22814 && REGNO (part[0][0]) == REGNO (part[1][2]))
22815 || (nparts == 4
22816 && REGNO (part[0][0]) == REGNO (part[1][3]))))
22817 || (collisions > 0
22818 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
22820 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
22822 operands[2 + i] = part[0][j];
22823 operands[6 + i] = part[1][j];
22826 else
22828 for (i = 0; i < nparts; i++)
22830 operands[2 + i] = part[0][i];
22831 operands[6 + i] = part[1][i];
22835 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
22836 if (optimize_insn_for_size_p ())
22838 for (j = 0; j < nparts - 1; j++)
22839 if (CONST_INT_P (operands[6 + j])
22840 && operands[6 + j] != const0_rtx
22841 && REG_P (operands[2 + j]))
22842 for (i = j; i < nparts - 1; i++)
22843 if (CONST_INT_P (operands[7 + i])
22844 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
22845 operands[7 + i] = operands[2 + j];
22848 for (i = 0; i < nparts; i++)
22849 emit_move_insn (operands[2 + i], operands[6 + i]);
22851 return;
22854 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
22855 left shift by a constant, either using a single shift or
22856 a sequence of add instructions. */
22858 static void
22859 ix86_expand_ashl_const (rtx operand, int count, machine_mode mode)
22861 rtx (*insn)(rtx, rtx, rtx);
22863 if (count == 1
22864 || (count * ix86_cost->add <= ix86_cost->shift_const
22865 && !optimize_insn_for_size_p ()))
22867 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
22868 while (count-- > 0)
22869 emit_insn (insn (operand, operand, operand));
22871 else
22873 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22874 emit_insn (insn (operand, operand, GEN_INT (count)));
22878 void
22879 ix86_split_ashl (rtx *operands, rtx scratch, machine_mode mode)
22881 rtx (*gen_ashl3)(rtx, rtx, rtx);
22882 rtx (*gen_shld)(rtx, rtx, rtx);
22883 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22885 rtx low[2], high[2];
22886 int count;
22888 if (CONST_INT_P (operands[2]))
22890 split_double_mode (mode, operands, 2, low, high);
22891 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22893 if (count >= half_width)
22895 emit_move_insn (high[0], low[1]);
22896 emit_move_insn (low[0], const0_rtx);
22898 if (count > half_width)
22899 ix86_expand_ashl_const (high[0], count - half_width, mode);
22901 else
22903 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22905 if (!rtx_equal_p (operands[0], operands[1]))
22906 emit_move_insn (operands[0], operands[1]);
22908 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
22909 ix86_expand_ashl_const (low[0], count, mode);
22911 return;
22914 split_double_mode (mode, operands, 1, low, high);
22916 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22918 if (operands[1] == const1_rtx)
22920 /* Assuming we've chosen a QImode capable registers, then 1 << N
22921 can be done with two 32/64-bit shifts, no branches, no cmoves. */
22922 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
22924 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
22926 ix86_expand_clear (low[0]);
22927 ix86_expand_clear (high[0]);
22928 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
22930 d = gen_lowpart (QImode, low[0]);
22931 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22932 s = gen_rtx_EQ (QImode, flags, const0_rtx);
22933 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22935 d = gen_lowpart (QImode, high[0]);
22936 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22937 s = gen_rtx_NE (QImode, flags, const0_rtx);
22938 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22941 /* Otherwise, we can get the same results by manually performing
22942 a bit extract operation on bit 5/6, and then performing the two
22943 shifts. The two methods of getting 0/1 into low/high are exactly
22944 the same size. Avoiding the shift in the bit extract case helps
22945 pentium4 a bit; no one else seems to care much either way. */
22946 else
22948 machine_mode half_mode;
22949 rtx (*gen_lshr3)(rtx, rtx, rtx);
22950 rtx (*gen_and3)(rtx, rtx, rtx);
22951 rtx (*gen_xor3)(rtx, rtx, rtx);
22952 HOST_WIDE_INT bits;
22953 rtx x;
22955 if (mode == DImode)
22957 half_mode = SImode;
22958 gen_lshr3 = gen_lshrsi3;
22959 gen_and3 = gen_andsi3;
22960 gen_xor3 = gen_xorsi3;
22961 bits = 5;
22963 else
22965 half_mode = DImode;
22966 gen_lshr3 = gen_lshrdi3;
22967 gen_and3 = gen_anddi3;
22968 gen_xor3 = gen_xordi3;
22969 bits = 6;
22972 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
22973 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
22974 else
22975 x = gen_lowpart (half_mode, operands[2]);
22976 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
22978 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
22979 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
22980 emit_move_insn (low[0], high[0]);
22981 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
22984 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
22985 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
22986 return;
22989 if (operands[1] == constm1_rtx)
22991 /* For -1 << N, we can avoid the shld instruction, because we
22992 know that we're shifting 0...31/63 ones into a -1. */
22993 emit_move_insn (low[0], constm1_rtx);
22994 if (optimize_insn_for_size_p ())
22995 emit_move_insn (high[0], low[0]);
22996 else
22997 emit_move_insn (high[0], constm1_rtx);
22999 else
23001 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
23003 if (!rtx_equal_p (operands[0], operands[1]))
23004 emit_move_insn (operands[0], operands[1]);
23006 split_double_mode (mode, operands, 1, low, high);
23007 emit_insn (gen_shld (high[0], low[0], operands[2]));
23010 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
23012 if (TARGET_CMOVE && scratch)
23014 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23015 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23017 ix86_expand_clear (scratch);
23018 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
23020 else
23022 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
23023 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
23025 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
23029 void
23030 ix86_split_ashr (rtx *operands, rtx scratch, machine_mode mode)
23032 rtx (*gen_ashr3)(rtx, rtx, rtx)
23033 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
23034 rtx (*gen_shrd)(rtx, rtx, rtx);
23035 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23037 rtx low[2], high[2];
23038 int count;
23040 if (CONST_INT_P (operands[2]))
23042 split_double_mode (mode, operands, 2, low, high);
23043 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23045 if (count == GET_MODE_BITSIZE (mode) - 1)
23047 emit_move_insn (high[0], high[1]);
23048 emit_insn (gen_ashr3 (high[0], high[0],
23049 GEN_INT (half_width - 1)));
23050 emit_move_insn (low[0], high[0]);
23053 else if (count >= half_width)
23055 emit_move_insn (low[0], high[1]);
23056 emit_move_insn (high[0], low[0]);
23057 emit_insn (gen_ashr3 (high[0], high[0],
23058 GEN_INT (half_width - 1)));
23060 if (count > half_width)
23061 emit_insn (gen_ashr3 (low[0], low[0],
23062 GEN_INT (count - half_width)));
23064 else
23066 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23068 if (!rtx_equal_p (operands[0], operands[1]))
23069 emit_move_insn (operands[0], operands[1]);
23071 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
23072 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
23075 else
23077 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23079 if (!rtx_equal_p (operands[0], operands[1]))
23080 emit_move_insn (operands[0], operands[1]);
23082 split_double_mode (mode, operands, 1, low, high);
23084 emit_insn (gen_shrd (low[0], high[0], operands[2]));
23085 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
23087 if (TARGET_CMOVE && scratch)
23089 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23090 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23092 emit_move_insn (scratch, high[0]);
23093 emit_insn (gen_ashr3 (scratch, scratch,
23094 GEN_INT (half_width - 1)));
23095 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
23096 scratch));
23098 else
23100 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
23101 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
23103 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
23108 void
23109 ix86_split_lshr (rtx *operands, rtx scratch, machine_mode mode)
23111 rtx (*gen_lshr3)(rtx, rtx, rtx)
23112 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
23113 rtx (*gen_shrd)(rtx, rtx, rtx);
23114 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23116 rtx low[2], high[2];
23117 int count;
23119 if (CONST_INT_P (operands[2]))
23121 split_double_mode (mode, operands, 2, low, high);
23122 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23124 if (count >= half_width)
23126 emit_move_insn (low[0], high[1]);
23127 ix86_expand_clear (high[0]);
23129 if (count > half_width)
23130 emit_insn (gen_lshr3 (low[0], low[0],
23131 GEN_INT (count - half_width)));
23133 else
23135 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23137 if (!rtx_equal_p (operands[0], operands[1]))
23138 emit_move_insn (operands[0], operands[1]);
23140 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
23141 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
23144 else
23146 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23148 if (!rtx_equal_p (operands[0], operands[1]))
23149 emit_move_insn (operands[0], operands[1]);
23151 split_double_mode (mode, operands, 1, low, high);
23153 emit_insn (gen_shrd (low[0], high[0], operands[2]));
23154 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
23156 if (TARGET_CMOVE && scratch)
23158 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23159 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23161 ix86_expand_clear (scratch);
23162 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
23163 scratch));
23165 else
23167 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
23168 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
23170 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
23175 /* Predict just emitted jump instruction to be taken with probability PROB. */
23176 static void
23177 predict_jump (int prob)
23179 rtx insn = get_last_insn ();
23180 gcc_assert (JUMP_P (insn));
23181 add_int_reg_note (insn, REG_BR_PROB, prob);
23184 /* Helper function for the string operations below. Dest VARIABLE whether
23185 it is aligned to VALUE bytes. If true, jump to the label. */
23186 static rtx_code_label *
23187 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
23189 rtx_code_label *label = gen_label_rtx ();
23190 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
23191 if (GET_MODE (variable) == DImode)
23192 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
23193 else
23194 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
23195 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
23196 1, label);
23197 if (epilogue)
23198 predict_jump (REG_BR_PROB_BASE * 50 / 100);
23199 else
23200 predict_jump (REG_BR_PROB_BASE * 90 / 100);
23201 return label;
23204 /* Adjust COUNTER by the VALUE. */
23205 static void
23206 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
23208 rtx (*gen_add)(rtx, rtx, rtx)
23209 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
23211 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
23214 /* Zero extend possibly SImode EXP to Pmode register. */
23216 ix86_zero_extend_to_Pmode (rtx exp)
23218 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
23221 /* Divide COUNTREG by SCALE. */
23222 static rtx
23223 scale_counter (rtx countreg, int scale)
23225 rtx sc;
23227 if (scale == 1)
23228 return countreg;
23229 if (CONST_INT_P (countreg))
23230 return GEN_INT (INTVAL (countreg) / scale);
23231 gcc_assert (REG_P (countreg));
23233 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
23234 GEN_INT (exact_log2 (scale)),
23235 NULL, 1, OPTAB_DIRECT);
23236 return sc;
23239 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
23240 DImode for constant loop counts. */
23242 static machine_mode
23243 counter_mode (rtx count_exp)
23245 if (GET_MODE (count_exp) != VOIDmode)
23246 return GET_MODE (count_exp);
23247 if (!CONST_INT_P (count_exp))
23248 return Pmode;
23249 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
23250 return DImode;
23251 return SImode;
23254 /* Copy the address to a Pmode register. This is used for x32 to
23255 truncate DImode TLS address to a SImode register. */
23257 static rtx
23258 ix86_copy_addr_to_reg (rtx addr)
23260 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
23261 return copy_addr_to_reg (addr);
23262 else
23264 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
23265 return gen_rtx_SUBREG (SImode, copy_to_mode_reg (DImode, addr), 0);
23269 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
23270 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
23271 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
23272 memory by VALUE (supposed to be in MODE).
23274 The size is rounded down to whole number of chunk size moved at once.
23275 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
23278 static void
23279 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
23280 rtx destptr, rtx srcptr, rtx value,
23281 rtx count, machine_mode mode, int unroll,
23282 int expected_size, bool issetmem)
23284 rtx_code_label *out_label, *top_label;
23285 rtx iter, tmp;
23286 machine_mode iter_mode = counter_mode (count);
23287 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
23288 rtx piece_size = GEN_INT (piece_size_n);
23289 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
23290 rtx size;
23291 int i;
23293 top_label = gen_label_rtx ();
23294 out_label = gen_label_rtx ();
23295 iter = gen_reg_rtx (iter_mode);
23297 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
23298 NULL, 1, OPTAB_DIRECT);
23299 /* Those two should combine. */
23300 if (piece_size == const1_rtx)
23302 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
23303 true, out_label);
23304 predict_jump (REG_BR_PROB_BASE * 10 / 100);
23306 emit_move_insn (iter, const0_rtx);
23308 emit_label (top_label);
23310 tmp = convert_modes (Pmode, iter_mode, iter, true);
23312 /* This assert could be relaxed - in this case we'll need to compute
23313 smallest power of two, containing in PIECE_SIZE_N and pass it to
23314 offset_address. */
23315 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
23316 destmem = offset_address (destmem, tmp, piece_size_n);
23317 destmem = adjust_address (destmem, mode, 0);
23319 if (!issetmem)
23321 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
23322 srcmem = adjust_address (srcmem, mode, 0);
23324 /* When unrolling for chips that reorder memory reads and writes,
23325 we can save registers by using single temporary.
23326 Also using 4 temporaries is overkill in 32bit mode. */
23327 if (!TARGET_64BIT && 0)
23329 for (i = 0; i < unroll; i++)
23331 if (i)
23333 destmem =
23334 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23335 srcmem =
23336 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23338 emit_move_insn (destmem, srcmem);
23341 else
23343 rtx tmpreg[4];
23344 gcc_assert (unroll <= 4);
23345 for (i = 0; i < unroll; i++)
23347 tmpreg[i] = gen_reg_rtx (mode);
23348 if (i)
23350 srcmem =
23351 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23353 emit_move_insn (tmpreg[i], srcmem);
23355 for (i = 0; i < unroll; i++)
23357 if (i)
23359 destmem =
23360 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23362 emit_move_insn (destmem, tmpreg[i]);
23366 else
23367 for (i = 0; i < unroll; i++)
23369 if (i)
23370 destmem =
23371 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23372 emit_move_insn (destmem, value);
23375 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
23376 true, OPTAB_LIB_WIDEN);
23377 if (tmp != iter)
23378 emit_move_insn (iter, tmp);
23380 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
23381 true, top_label);
23382 if (expected_size != -1)
23384 expected_size /= GET_MODE_SIZE (mode) * unroll;
23385 if (expected_size == 0)
23386 predict_jump (0);
23387 else if (expected_size > REG_BR_PROB_BASE)
23388 predict_jump (REG_BR_PROB_BASE - 1);
23389 else
23390 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
23392 else
23393 predict_jump (REG_BR_PROB_BASE * 80 / 100);
23394 iter = ix86_zero_extend_to_Pmode (iter);
23395 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
23396 true, OPTAB_LIB_WIDEN);
23397 if (tmp != destptr)
23398 emit_move_insn (destptr, tmp);
23399 if (!issetmem)
23401 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
23402 true, OPTAB_LIB_WIDEN);
23403 if (tmp != srcptr)
23404 emit_move_insn (srcptr, tmp);
23406 emit_label (out_label);
23409 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
23410 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
23411 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
23412 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
23413 ORIG_VALUE is the original value passed to memset to fill the memory with.
23414 Other arguments have same meaning as for previous function. */
23416 static void
23417 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
23418 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
23419 rtx count,
23420 machine_mode mode, bool issetmem)
23422 rtx destexp;
23423 rtx srcexp;
23424 rtx countreg;
23425 HOST_WIDE_INT rounded_count;
23427 /* If possible, it is shorter to use rep movs.
23428 TODO: Maybe it is better to move this logic to decide_alg. */
23429 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
23430 && (!issetmem || orig_value == const0_rtx))
23431 mode = SImode;
23433 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
23434 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
23436 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
23437 GET_MODE_SIZE (mode)));
23438 if (mode != QImode)
23440 destexp = gen_rtx_ASHIFT (Pmode, countreg,
23441 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23442 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
23444 else
23445 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
23446 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
23448 rounded_count = (INTVAL (count)
23449 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23450 destmem = shallow_copy_rtx (destmem);
23451 set_mem_size (destmem, rounded_count);
23453 else if (MEM_SIZE_KNOWN_P (destmem))
23454 clear_mem_size (destmem);
23456 if (issetmem)
23458 value = force_reg (mode, gen_lowpart (mode, value));
23459 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
23461 else
23463 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
23464 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
23465 if (mode != QImode)
23467 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
23468 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23469 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
23471 else
23472 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
23473 if (CONST_INT_P (count))
23475 rounded_count = (INTVAL (count)
23476 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23477 srcmem = shallow_copy_rtx (srcmem);
23478 set_mem_size (srcmem, rounded_count);
23480 else
23482 if (MEM_SIZE_KNOWN_P (srcmem))
23483 clear_mem_size (srcmem);
23485 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
23486 destexp, srcexp));
23490 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
23491 DESTMEM.
23492 SRC is passed by pointer to be updated on return.
23493 Return value is updated DST. */
23494 static rtx
23495 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
23496 HOST_WIDE_INT size_to_move)
23498 rtx dst = destmem, src = *srcmem, adjust, tempreg;
23499 enum insn_code code;
23500 machine_mode move_mode;
23501 int piece_size, i;
23503 /* Find the widest mode in which we could perform moves.
23504 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23505 it until move of such size is supported. */
23506 piece_size = 1 << floor_log2 (size_to_move);
23507 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23508 code = optab_handler (mov_optab, move_mode);
23509 while (code == CODE_FOR_nothing && piece_size > 1)
23511 piece_size >>= 1;
23512 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23513 code = optab_handler (mov_optab, move_mode);
23516 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23517 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23518 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
23520 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
23521 move_mode = mode_for_vector (word_mode, nunits);
23522 code = optab_handler (mov_optab, move_mode);
23523 if (code == CODE_FOR_nothing)
23525 move_mode = word_mode;
23526 piece_size = GET_MODE_SIZE (move_mode);
23527 code = optab_handler (mov_optab, move_mode);
23530 gcc_assert (code != CODE_FOR_nothing);
23532 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23533 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
23535 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23536 gcc_assert (size_to_move % piece_size == 0);
23537 adjust = GEN_INT (piece_size);
23538 for (i = 0; i < size_to_move; i += piece_size)
23540 /* We move from memory to memory, so we'll need to do it via
23541 a temporary register. */
23542 tempreg = gen_reg_rtx (move_mode);
23543 emit_insn (GEN_FCN (code) (tempreg, src));
23544 emit_insn (GEN_FCN (code) (dst, tempreg));
23546 emit_move_insn (destptr,
23547 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23548 emit_move_insn (srcptr,
23549 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
23551 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23552 piece_size);
23553 src = adjust_automodify_address_nv (src, move_mode, srcptr,
23554 piece_size);
23557 /* Update DST and SRC rtx. */
23558 *srcmem = src;
23559 return dst;
23562 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
23563 static void
23564 expand_movmem_epilogue (rtx destmem, rtx srcmem,
23565 rtx destptr, rtx srcptr, rtx count, int max_size)
23567 rtx src, dest;
23568 if (CONST_INT_P (count))
23570 HOST_WIDE_INT countval = INTVAL (count);
23571 HOST_WIDE_INT epilogue_size = countval % max_size;
23572 int i;
23574 /* For now MAX_SIZE should be a power of 2. This assert could be
23575 relaxed, but it'll require a bit more complicated epilogue
23576 expanding. */
23577 gcc_assert ((max_size & (max_size - 1)) == 0);
23578 for (i = max_size; i >= 1; i >>= 1)
23580 if (epilogue_size & i)
23581 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23583 return;
23585 if (max_size > 8)
23587 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
23588 count, 1, OPTAB_DIRECT);
23589 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
23590 count, QImode, 1, 4, false);
23591 return;
23594 /* When there are stringops, we can cheaply increase dest and src pointers.
23595 Otherwise we save code size by maintaining offset (zero is readily
23596 available from preceding rep operation) and using x86 addressing modes.
23598 if (TARGET_SINGLE_STRINGOP)
23600 if (max_size > 4)
23602 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23603 src = change_address (srcmem, SImode, srcptr);
23604 dest = change_address (destmem, SImode, destptr);
23605 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23606 emit_label (label);
23607 LABEL_NUSES (label) = 1;
23609 if (max_size > 2)
23611 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23612 src = change_address (srcmem, HImode, srcptr);
23613 dest = change_address (destmem, HImode, destptr);
23614 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23615 emit_label (label);
23616 LABEL_NUSES (label) = 1;
23618 if (max_size > 1)
23620 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23621 src = change_address (srcmem, QImode, srcptr);
23622 dest = change_address (destmem, QImode, destptr);
23623 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23624 emit_label (label);
23625 LABEL_NUSES (label) = 1;
23628 else
23630 rtx offset = force_reg (Pmode, const0_rtx);
23631 rtx tmp;
23633 if (max_size > 4)
23635 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23636 src = change_address (srcmem, SImode, srcptr);
23637 dest = change_address (destmem, SImode, destptr);
23638 emit_move_insn (dest, src);
23639 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
23640 true, OPTAB_LIB_WIDEN);
23641 if (tmp != offset)
23642 emit_move_insn (offset, tmp);
23643 emit_label (label);
23644 LABEL_NUSES (label) = 1;
23646 if (max_size > 2)
23648 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23649 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23650 src = change_address (srcmem, HImode, tmp);
23651 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23652 dest = change_address (destmem, HImode, tmp);
23653 emit_move_insn (dest, src);
23654 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
23655 true, OPTAB_LIB_WIDEN);
23656 if (tmp != offset)
23657 emit_move_insn (offset, tmp);
23658 emit_label (label);
23659 LABEL_NUSES (label) = 1;
23661 if (max_size > 1)
23663 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23664 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23665 src = change_address (srcmem, QImode, tmp);
23666 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23667 dest = change_address (destmem, QImode, tmp);
23668 emit_move_insn (dest, src);
23669 emit_label (label);
23670 LABEL_NUSES (label) = 1;
23675 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
23676 with value PROMOTED_VAL.
23677 SRC is passed by pointer to be updated on return.
23678 Return value is updated DST. */
23679 static rtx
23680 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
23681 HOST_WIDE_INT size_to_move)
23683 rtx dst = destmem, adjust;
23684 enum insn_code code;
23685 machine_mode move_mode;
23686 int piece_size, i;
23688 /* Find the widest mode in which we could perform moves.
23689 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23690 it until move of such size is supported. */
23691 move_mode = GET_MODE (promoted_val);
23692 if (move_mode == VOIDmode)
23693 move_mode = QImode;
23694 if (size_to_move < GET_MODE_SIZE (move_mode))
23696 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
23697 promoted_val = gen_lowpart (move_mode, promoted_val);
23699 piece_size = GET_MODE_SIZE (move_mode);
23700 code = optab_handler (mov_optab, move_mode);
23701 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
23703 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23705 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23706 gcc_assert (size_to_move % piece_size == 0);
23707 adjust = GEN_INT (piece_size);
23708 for (i = 0; i < size_to_move; i += piece_size)
23710 if (piece_size <= GET_MODE_SIZE (word_mode))
23712 emit_insn (gen_strset (destptr, dst, promoted_val));
23713 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23714 piece_size);
23715 continue;
23718 emit_insn (GEN_FCN (code) (dst, promoted_val));
23720 emit_move_insn (destptr,
23721 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23723 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23724 piece_size);
23727 /* Update DST rtx. */
23728 return dst;
23730 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23731 static void
23732 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
23733 rtx count, int max_size)
23735 count =
23736 expand_simple_binop (counter_mode (count), AND, count,
23737 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
23738 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
23739 gen_lowpart (QImode, value), count, QImode,
23740 1, max_size / 2, true);
23743 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23744 static void
23745 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
23746 rtx count, int max_size)
23748 rtx dest;
23750 if (CONST_INT_P (count))
23752 HOST_WIDE_INT countval = INTVAL (count);
23753 HOST_WIDE_INT epilogue_size = countval % max_size;
23754 int i;
23756 /* For now MAX_SIZE should be a power of 2. This assert could be
23757 relaxed, but it'll require a bit more complicated epilogue
23758 expanding. */
23759 gcc_assert ((max_size & (max_size - 1)) == 0);
23760 for (i = max_size; i >= 1; i >>= 1)
23762 if (epilogue_size & i)
23764 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23765 destmem = emit_memset (destmem, destptr, vec_value, i);
23766 else
23767 destmem = emit_memset (destmem, destptr, value, i);
23770 return;
23772 if (max_size > 32)
23774 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
23775 return;
23777 if (max_size > 16)
23779 rtx_code_label *label = ix86_expand_aligntest (count, 16, true);
23780 if (TARGET_64BIT)
23782 dest = change_address (destmem, DImode, destptr);
23783 emit_insn (gen_strset (destptr, dest, value));
23784 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
23785 emit_insn (gen_strset (destptr, dest, value));
23787 else
23789 dest = change_address (destmem, SImode, destptr);
23790 emit_insn (gen_strset (destptr, dest, value));
23791 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23792 emit_insn (gen_strset (destptr, dest, value));
23793 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
23794 emit_insn (gen_strset (destptr, dest, value));
23795 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
23796 emit_insn (gen_strset (destptr, dest, value));
23798 emit_label (label);
23799 LABEL_NUSES (label) = 1;
23801 if (max_size > 8)
23803 rtx_code_label *label = ix86_expand_aligntest (count, 8, true);
23804 if (TARGET_64BIT)
23806 dest = change_address (destmem, DImode, destptr);
23807 emit_insn (gen_strset (destptr, dest, value));
23809 else
23811 dest = change_address (destmem, SImode, destptr);
23812 emit_insn (gen_strset (destptr, dest, value));
23813 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23814 emit_insn (gen_strset (destptr, dest, value));
23816 emit_label (label);
23817 LABEL_NUSES (label) = 1;
23819 if (max_size > 4)
23821 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23822 dest = change_address (destmem, SImode, destptr);
23823 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
23824 emit_label (label);
23825 LABEL_NUSES (label) = 1;
23827 if (max_size > 2)
23829 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23830 dest = change_address (destmem, HImode, destptr);
23831 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
23832 emit_label (label);
23833 LABEL_NUSES (label) = 1;
23835 if (max_size > 1)
23837 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23838 dest = change_address (destmem, QImode, destptr);
23839 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
23840 emit_label (label);
23841 LABEL_NUSES (label) = 1;
23845 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
23846 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
23847 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
23848 ignored.
23849 Return value is updated DESTMEM. */
23850 static rtx
23851 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
23852 rtx destptr, rtx srcptr, rtx value,
23853 rtx vec_value, rtx count, int align,
23854 int desired_alignment, bool issetmem)
23856 int i;
23857 for (i = 1; i < desired_alignment; i <<= 1)
23859 if (align <= i)
23861 rtx_code_label *label = ix86_expand_aligntest (destptr, i, false);
23862 if (issetmem)
23864 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23865 destmem = emit_memset (destmem, destptr, vec_value, i);
23866 else
23867 destmem = emit_memset (destmem, destptr, value, i);
23869 else
23870 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23871 ix86_adjust_counter (count, i);
23872 emit_label (label);
23873 LABEL_NUSES (label) = 1;
23874 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
23877 return destmem;
23880 /* Test if COUNT&SIZE is nonzero and if so, expand movme
23881 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
23882 and jump to DONE_LABEL. */
23883 static void
23884 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
23885 rtx destptr, rtx srcptr,
23886 rtx value, rtx vec_value,
23887 rtx count, int size,
23888 rtx done_label, bool issetmem)
23890 rtx_code_label *label = ix86_expand_aligntest (count, size, false);
23891 machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
23892 rtx modesize;
23893 int n;
23895 /* If we do not have vector value to copy, we must reduce size. */
23896 if (issetmem)
23898 if (!vec_value)
23900 if (GET_MODE (value) == VOIDmode && size > 8)
23901 mode = Pmode;
23902 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
23903 mode = GET_MODE (value);
23905 else
23906 mode = GET_MODE (vec_value), value = vec_value;
23908 else
23910 /* Choose appropriate vector mode. */
23911 if (size >= 32)
23912 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
23913 else if (size >= 16)
23914 mode = TARGET_SSE ? V16QImode : DImode;
23915 srcmem = change_address (srcmem, mode, srcptr);
23917 destmem = change_address (destmem, mode, destptr);
23918 modesize = GEN_INT (GET_MODE_SIZE (mode));
23919 gcc_assert (GET_MODE_SIZE (mode) <= size);
23920 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23922 if (issetmem)
23923 emit_move_insn (destmem, gen_lowpart (mode, value));
23924 else
23926 emit_move_insn (destmem, srcmem);
23927 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23929 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23932 destmem = offset_address (destmem, count, 1);
23933 destmem = offset_address (destmem, GEN_INT (-2 * size),
23934 GET_MODE_SIZE (mode));
23935 if (!issetmem)
23937 srcmem = offset_address (srcmem, count, 1);
23938 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
23939 GET_MODE_SIZE (mode));
23941 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23943 if (issetmem)
23944 emit_move_insn (destmem, gen_lowpart (mode, value));
23945 else
23947 emit_move_insn (destmem, srcmem);
23948 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23950 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23952 emit_jump_insn (gen_jump (done_label));
23953 emit_barrier ();
23955 emit_label (label);
23956 LABEL_NUSES (label) = 1;
23959 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
23960 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
23961 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
23962 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
23963 DONE_LABEL is a label after the whole copying sequence. The label is created
23964 on demand if *DONE_LABEL is NULL.
23965 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
23966 bounds after the initial copies.
23968 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
23969 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
23970 we will dispatch to a library call for large blocks.
23972 In pseudocode we do:
23974 if (COUNT < SIZE)
23976 Assume that SIZE is 4. Bigger sizes are handled analogously
23977 if (COUNT & 4)
23979 copy 4 bytes from SRCPTR to DESTPTR
23980 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
23981 goto done_label
23983 if (!COUNT)
23984 goto done_label;
23985 copy 1 byte from SRCPTR to DESTPTR
23986 if (COUNT & 2)
23988 copy 2 bytes from SRCPTR to DESTPTR
23989 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
23992 else
23994 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
23995 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
23997 OLD_DESPTR = DESTPTR;
23998 Align DESTPTR up to DESIRED_ALIGN
23999 SRCPTR += DESTPTR - OLD_DESTPTR
24000 COUNT -= DEST_PTR - OLD_DESTPTR
24001 if (DYNAMIC_CHECK)
24002 Round COUNT down to multiple of SIZE
24003 << optional caller supplied zero size guard is here >>
24004 << optional caller suppplied dynamic check is here >>
24005 << caller supplied main copy loop is here >>
24007 done_label:
24009 static void
24010 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
24011 rtx *destptr, rtx *srcptr,
24012 machine_mode mode,
24013 rtx value, rtx vec_value,
24014 rtx *count,
24015 rtx_code_label **done_label,
24016 int size,
24017 int desired_align,
24018 int align,
24019 unsigned HOST_WIDE_INT *min_size,
24020 bool dynamic_check,
24021 bool issetmem)
24023 rtx_code_label *loop_label = NULL, *label;
24024 int n;
24025 rtx modesize;
24026 int prolog_size = 0;
24027 rtx mode_value;
24029 /* Chose proper value to copy. */
24030 if (issetmem && VECTOR_MODE_P (mode))
24031 mode_value = vec_value;
24032 else
24033 mode_value = value;
24034 gcc_assert (GET_MODE_SIZE (mode) <= size);
24036 /* See if block is big or small, handle small blocks. */
24037 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
24039 int size2 = size;
24040 loop_label = gen_label_rtx ();
24042 if (!*done_label)
24043 *done_label = gen_label_rtx ();
24045 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
24046 1, loop_label);
24047 size2 >>= 1;
24049 /* Handle sizes > 3. */
24050 for (;size2 > 2; size2 >>= 1)
24051 expand_small_movmem_or_setmem (destmem, srcmem,
24052 *destptr, *srcptr,
24053 value, vec_value,
24054 *count,
24055 size2, *done_label, issetmem);
24056 /* Nothing to copy? Jump to DONE_LABEL if so */
24057 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
24058 1, *done_label);
24060 /* Do a byte copy. */
24061 destmem = change_address (destmem, QImode, *destptr);
24062 if (issetmem)
24063 emit_move_insn (destmem, gen_lowpart (QImode, value));
24064 else
24066 srcmem = change_address (srcmem, QImode, *srcptr);
24067 emit_move_insn (destmem, srcmem);
24070 /* Handle sizes 2 and 3. */
24071 label = ix86_expand_aligntest (*count, 2, false);
24072 destmem = change_address (destmem, HImode, *destptr);
24073 destmem = offset_address (destmem, *count, 1);
24074 destmem = offset_address (destmem, GEN_INT (-2), 2);
24075 if (issetmem)
24076 emit_move_insn (destmem, gen_lowpart (HImode, value));
24077 else
24079 srcmem = change_address (srcmem, HImode, *srcptr);
24080 srcmem = offset_address (srcmem, *count, 1);
24081 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
24082 emit_move_insn (destmem, srcmem);
24085 emit_label (label);
24086 LABEL_NUSES (label) = 1;
24087 emit_jump_insn (gen_jump (*done_label));
24088 emit_barrier ();
24090 else
24091 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
24092 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
24094 /* Start memcpy for COUNT >= SIZE. */
24095 if (loop_label)
24097 emit_label (loop_label);
24098 LABEL_NUSES (loop_label) = 1;
24101 /* Copy first desired_align bytes. */
24102 if (!issetmem)
24103 srcmem = change_address (srcmem, mode, *srcptr);
24104 destmem = change_address (destmem, mode, *destptr);
24105 modesize = GEN_INT (GET_MODE_SIZE (mode));
24106 for (n = 0; prolog_size < desired_align - align; n++)
24108 if (issetmem)
24109 emit_move_insn (destmem, mode_value);
24110 else
24112 emit_move_insn (destmem, srcmem);
24113 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24115 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24116 prolog_size += GET_MODE_SIZE (mode);
24120 /* Copy last SIZE bytes. */
24121 destmem = offset_address (destmem, *count, 1);
24122 destmem = offset_address (destmem,
24123 GEN_INT (-size - prolog_size),
24125 if (issetmem)
24126 emit_move_insn (destmem, mode_value);
24127 else
24129 srcmem = offset_address (srcmem, *count, 1);
24130 srcmem = offset_address (srcmem,
24131 GEN_INT (-size - prolog_size),
24133 emit_move_insn (destmem, srcmem);
24135 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
24137 destmem = offset_address (destmem, modesize, 1);
24138 if (issetmem)
24139 emit_move_insn (destmem, mode_value);
24140 else
24142 srcmem = offset_address (srcmem, modesize, 1);
24143 emit_move_insn (destmem, srcmem);
24147 /* Align destination. */
24148 if (desired_align > 1 && desired_align > align)
24150 rtx saveddest = *destptr;
24152 gcc_assert (desired_align <= size);
24153 /* Align destptr up, place it to new register. */
24154 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
24155 GEN_INT (prolog_size),
24156 NULL_RTX, 1, OPTAB_DIRECT);
24157 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
24158 GEN_INT (-desired_align),
24159 *destptr, 1, OPTAB_DIRECT);
24160 /* See how many bytes we skipped. */
24161 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
24162 *destptr,
24163 saveddest, 1, OPTAB_DIRECT);
24164 /* Adjust srcptr and count. */
24165 if (!issetmem)
24166 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr, saveddest,
24167 *srcptr, 1, OPTAB_DIRECT);
24168 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
24169 saveddest, *count, 1, OPTAB_DIRECT);
24170 /* We copied at most size + prolog_size. */
24171 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
24172 *min_size = (*min_size - size) & ~(unsigned HOST_WIDE_INT)(size - 1);
24173 else
24174 *min_size = 0;
24176 /* Our loops always round down the bock size, but for dispatch to library
24177 we need precise value. */
24178 if (dynamic_check)
24179 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
24180 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
24182 else
24184 gcc_assert (prolog_size == 0);
24185 /* Decrease count, so we won't end up copying last word twice. */
24186 if (!CONST_INT_P (*count))
24187 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
24188 constm1_rtx, *count, 1, OPTAB_DIRECT);
24189 else
24190 *count = GEN_INT ((UINTVAL (*count) - 1) & ~(unsigned HOST_WIDE_INT)(size - 1));
24191 if (*min_size)
24192 *min_size = (*min_size - 1) & ~(unsigned HOST_WIDE_INT)(size - 1);
24197 /* This function is like the previous one, except here we know how many bytes
24198 need to be copied. That allows us to update alignment not only of DST, which
24199 is returned, but also of SRC, which is passed as a pointer for that
24200 reason. */
24201 static rtx
24202 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
24203 rtx srcreg, rtx value, rtx vec_value,
24204 int desired_align, int align_bytes,
24205 bool issetmem)
24207 rtx src = NULL;
24208 rtx orig_dst = dst;
24209 rtx orig_src = NULL;
24210 int piece_size = 1;
24211 int copied_bytes = 0;
24213 if (!issetmem)
24215 gcc_assert (srcp != NULL);
24216 src = *srcp;
24217 orig_src = src;
24220 for (piece_size = 1;
24221 piece_size <= desired_align && copied_bytes < align_bytes;
24222 piece_size <<= 1)
24224 if (align_bytes & piece_size)
24226 if (issetmem)
24228 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
24229 dst = emit_memset (dst, destreg, vec_value, piece_size);
24230 else
24231 dst = emit_memset (dst, destreg, value, piece_size);
24233 else
24234 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
24235 copied_bytes += piece_size;
24238 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
24239 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24240 if (MEM_SIZE_KNOWN_P (orig_dst))
24241 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
24243 if (!issetmem)
24245 int src_align_bytes = get_mem_align_offset (src, desired_align
24246 * BITS_PER_UNIT);
24247 if (src_align_bytes >= 0)
24248 src_align_bytes = desired_align - src_align_bytes;
24249 if (src_align_bytes >= 0)
24251 unsigned int src_align;
24252 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
24254 if ((src_align_bytes & (src_align - 1))
24255 == (align_bytes & (src_align - 1)))
24256 break;
24258 if (src_align > (unsigned int) desired_align)
24259 src_align = desired_align;
24260 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
24261 set_mem_align (src, src_align * BITS_PER_UNIT);
24263 if (MEM_SIZE_KNOWN_P (orig_src))
24264 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
24265 *srcp = src;
24268 return dst;
24271 /* Return true if ALG can be used in current context.
24272 Assume we expand memset if MEMSET is true. */
24273 static bool
24274 alg_usable_p (enum stringop_alg alg, bool memset)
24276 if (alg == no_stringop)
24277 return false;
24278 if (alg == vector_loop)
24279 return TARGET_SSE || TARGET_AVX;
24280 /* Algorithms using the rep prefix want at least edi and ecx;
24281 additionally, memset wants eax and memcpy wants esi. Don't
24282 consider such algorithms if the user has appropriated those
24283 registers for their own purposes. */
24284 if (alg == rep_prefix_1_byte
24285 || alg == rep_prefix_4_byte
24286 || alg == rep_prefix_8_byte)
24287 return !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
24288 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
24289 return true;
24292 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
24293 static enum stringop_alg
24294 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
24295 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
24296 bool memset, bool zero_memset, int *dynamic_check, bool *noalign)
24298 const struct stringop_algs * algs;
24299 bool optimize_for_speed;
24300 int max = 0;
24301 const struct processor_costs *cost;
24302 int i;
24303 bool any_alg_usable_p = false;
24305 *noalign = false;
24306 *dynamic_check = -1;
24308 /* Even if the string operation call is cold, we still might spend a lot
24309 of time processing large blocks. */
24310 if (optimize_function_for_size_p (cfun)
24311 || (optimize_insn_for_size_p ()
24312 && (max_size < 256
24313 || (expected_size != -1 && expected_size < 256))))
24314 optimize_for_speed = false;
24315 else
24316 optimize_for_speed = true;
24318 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
24319 if (memset)
24320 algs = &cost->memset[TARGET_64BIT != 0];
24321 else
24322 algs = &cost->memcpy[TARGET_64BIT != 0];
24324 /* See maximal size for user defined algorithm. */
24325 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24327 enum stringop_alg candidate = algs->size[i].alg;
24328 bool usable = alg_usable_p (candidate, memset);
24329 any_alg_usable_p |= usable;
24331 if (candidate != libcall && candidate && usable)
24332 max = algs->size[i].max;
24335 /* If expected size is not known but max size is small enough
24336 so inline version is a win, set expected size into
24337 the range. */
24338 if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
24339 && expected_size == -1)
24340 expected_size = min_size / 2 + max_size / 2;
24342 /* If user specified the algorithm, honnor it if possible. */
24343 if (ix86_stringop_alg != no_stringop
24344 && alg_usable_p (ix86_stringop_alg, memset))
24345 return ix86_stringop_alg;
24346 /* rep; movq or rep; movl is the smallest variant. */
24347 else if (!optimize_for_speed)
24349 *noalign = true;
24350 if (!count || (count & 3) || (memset && !zero_memset))
24351 return alg_usable_p (rep_prefix_1_byte, memset)
24352 ? rep_prefix_1_byte : loop_1_byte;
24353 else
24354 return alg_usable_p (rep_prefix_4_byte, memset)
24355 ? rep_prefix_4_byte : loop;
24357 /* Very tiny blocks are best handled via the loop, REP is expensive to
24358 setup. */
24359 else if (expected_size != -1 && expected_size < 4)
24360 return loop_1_byte;
24361 else if (expected_size != -1)
24363 enum stringop_alg alg = libcall;
24364 bool alg_noalign = false;
24365 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24367 /* We get here if the algorithms that were not libcall-based
24368 were rep-prefix based and we are unable to use rep prefixes
24369 based on global register usage. Break out of the loop and
24370 use the heuristic below. */
24371 if (algs->size[i].max == 0)
24372 break;
24373 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
24375 enum stringop_alg candidate = algs->size[i].alg;
24377 if (candidate != libcall && alg_usable_p (candidate, memset))
24379 alg = candidate;
24380 alg_noalign = algs->size[i].noalign;
24382 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
24383 last non-libcall inline algorithm. */
24384 if (TARGET_INLINE_ALL_STRINGOPS)
24386 /* When the current size is best to be copied by a libcall,
24387 but we are still forced to inline, run the heuristic below
24388 that will pick code for medium sized blocks. */
24389 if (alg != libcall)
24391 *noalign = alg_noalign;
24392 return alg;
24394 break;
24396 else if (alg_usable_p (candidate, memset))
24398 *noalign = algs->size[i].noalign;
24399 return candidate;
24404 /* When asked to inline the call anyway, try to pick meaningful choice.
24405 We look for maximal size of block that is faster to copy by hand and
24406 take blocks of at most of that size guessing that average size will
24407 be roughly half of the block.
24409 If this turns out to be bad, we might simply specify the preferred
24410 choice in ix86_costs. */
24411 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24412 && (algs->unknown_size == libcall
24413 || !alg_usable_p (algs->unknown_size, memset)))
24415 enum stringop_alg alg;
24417 /* If there aren't any usable algorithms, then recursing on
24418 smaller sizes isn't going to find anything. Just return the
24419 simple byte-at-a-time copy loop. */
24420 if (!any_alg_usable_p)
24422 /* Pick something reasonable. */
24423 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24424 *dynamic_check = 128;
24425 return loop_1_byte;
24427 if (max <= 0)
24428 max = 4096;
24429 alg = decide_alg (count, max / 2, min_size, max_size, memset,
24430 zero_memset, dynamic_check, noalign);
24431 gcc_assert (*dynamic_check == -1);
24432 gcc_assert (alg != libcall);
24433 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24434 *dynamic_check = max;
24435 return alg;
24437 return (alg_usable_p (algs->unknown_size, memset)
24438 ? algs->unknown_size : libcall);
24441 /* Decide on alignment. We know that the operand is already aligned to ALIGN
24442 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
24443 static int
24444 decide_alignment (int align,
24445 enum stringop_alg alg,
24446 int expected_size,
24447 machine_mode move_mode)
24449 int desired_align = 0;
24451 gcc_assert (alg != no_stringop);
24453 if (alg == libcall)
24454 return 0;
24455 if (move_mode == VOIDmode)
24456 return 0;
24458 desired_align = GET_MODE_SIZE (move_mode);
24459 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
24460 copying whole cacheline at once. */
24461 if (TARGET_PENTIUMPRO
24462 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
24463 desired_align = 8;
24465 if (optimize_size)
24466 desired_align = 1;
24467 if (desired_align < align)
24468 desired_align = align;
24469 if (expected_size != -1 && expected_size < 4)
24470 desired_align = align;
24472 return desired_align;
24476 /* Helper function for memcpy. For QImode value 0xXY produce
24477 0xXYXYXYXY of wide specified by MODE. This is essentially
24478 a * 0x10101010, but we can do slightly better than
24479 synth_mult by unwinding the sequence by hand on CPUs with
24480 slow multiply. */
24481 static rtx
24482 promote_duplicated_reg (machine_mode mode, rtx val)
24484 machine_mode valmode = GET_MODE (val);
24485 rtx tmp;
24486 int nops = mode == DImode ? 3 : 2;
24488 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
24489 if (val == const0_rtx)
24490 return copy_to_mode_reg (mode, CONST0_RTX (mode));
24491 if (CONST_INT_P (val))
24493 HOST_WIDE_INT v = INTVAL (val) & 255;
24495 v |= v << 8;
24496 v |= v << 16;
24497 if (mode == DImode)
24498 v |= (v << 16) << 16;
24499 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
24502 if (valmode == VOIDmode)
24503 valmode = QImode;
24504 if (valmode != QImode)
24505 val = gen_lowpart (QImode, val);
24506 if (mode == QImode)
24507 return val;
24508 if (!TARGET_PARTIAL_REG_STALL)
24509 nops--;
24510 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
24511 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
24512 <= (ix86_cost->shift_const + ix86_cost->add) * nops
24513 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
24515 rtx reg = convert_modes (mode, QImode, val, true);
24516 tmp = promote_duplicated_reg (mode, const1_rtx);
24517 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
24518 OPTAB_DIRECT);
24520 else
24522 rtx reg = convert_modes (mode, QImode, val, true);
24524 if (!TARGET_PARTIAL_REG_STALL)
24525 if (mode == SImode)
24526 emit_insn (gen_movsi_insv_1 (reg, reg));
24527 else
24528 emit_insn (gen_movdi_insv_1 (reg, reg));
24529 else
24531 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
24532 NULL, 1, OPTAB_DIRECT);
24533 reg =
24534 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24536 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
24537 NULL, 1, OPTAB_DIRECT);
24538 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24539 if (mode == SImode)
24540 return reg;
24541 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
24542 NULL, 1, OPTAB_DIRECT);
24543 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24544 return reg;
24548 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
24549 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
24550 alignment from ALIGN to DESIRED_ALIGN. */
24551 static rtx
24552 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
24553 int align)
24555 rtx promoted_val;
24557 if (TARGET_64BIT
24558 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
24559 promoted_val = promote_duplicated_reg (DImode, val);
24560 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
24561 promoted_val = promote_duplicated_reg (SImode, val);
24562 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
24563 promoted_val = promote_duplicated_reg (HImode, val);
24564 else
24565 promoted_val = val;
24567 return promoted_val;
24570 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
24571 operations when profitable. The code depends upon architecture, block size
24572 and alignment, but always has one of the following overall structures:
24574 Aligned move sequence:
24576 1) Prologue guard: Conditional that jumps up to epilogues for small
24577 blocks that can be handled by epilogue alone. This is faster
24578 but also needed for correctness, since prologue assume the block
24579 is larger than the desired alignment.
24581 Optional dynamic check for size and libcall for large
24582 blocks is emitted here too, with -minline-stringops-dynamically.
24584 2) Prologue: copy first few bytes in order to get destination
24585 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
24586 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
24587 copied. We emit either a jump tree on power of two sized
24588 blocks, or a byte loop.
24590 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24591 with specified algorithm.
24593 4) Epilogue: code copying tail of the block that is too small to be
24594 handled by main body (or up to size guarded by prologue guard).
24596 Misaligned move sequence
24598 1) missaligned move prologue/epilogue containing:
24599 a) Prologue handling small memory blocks and jumping to done_label
24600 (skipped if blocks are known to be large enough)
24601 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
24602 needed by single possibly misaligned move
24603 (skipped if alignment is not needed)
24604 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
24606 2) Zero size guard dispatching to done_label, if needed
24608 3) dispatch to library call, if needed,
24610 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24611 with specified algorithm. */
24612 bool
24613 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
24614 rtx align_exp, rtx expected_align_exp,
24615 rtx expected_size_exp, rtx min_size_exp,
24616 rtx max_size_exp, rtx probable_max_size_exp,
24617 bool issetmem)
24619 rtx destreg;
24620 rtx srcreg = NULL;
24621 rtx_code_label *label = NULL;
24622 rtx tmp;
24623 rtx_code_label *jump_around_label = NULL;
24624 HOST_WIDE_INT align = 1;
24625 unsigned HOST_WIDE_INT count = 0;
24626 HOST_WIDE_INT expected_size = -1;
24627 int size_needed = 0, epilogue_size_needed;
24628 int desired_align = 0, align_bytes = 0;
24629 enum stringop_alg alg;
24630 rtx promoted_val = NULL;
24631 rtx vec_promoted_val = NULL;
24632 bool force_loopy_epilogue = false;
24633 int dynamic_check;
24634 bool need_zero_guard = false;
24635 bool noalign;
24636 machine_mode move_mode = VOIDmode;
24637 int unroll_factor = 1;
24638 /* TODO: Once value ranges are available, fill in proper data. */
24639 unsigned HOST_WIDE_INT min_size = 0;
24640 unsigned HOST_WIDE_INT max_size = -1;
24641 unsigned HOST_WIDE_INT probable_max_size = -1;
24642 bool misaligned_prologue_used = false;
24644 if (CONST_INT_P (align_exp))
24645 align = INTVAL (align_exp);
24646 /* i386 can do misaligned access on reasonably increased cost. */
24647 if (CONST_INT_P (expected_align_exp)
24648 && INTVAL (expected_align_exp) > align)
24649 align = INTVAL (expected_align_exp);
24650 /* ALIGN is the minimum of destination and source alignment, but we care here
24651 just about destination alignment. */
24652 else if (!issetmem
24653 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
24654 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
24656 if (CONST_INT_P (count_exp))
24658 min_size = max_size = probable_max_size = count = expected_size
24659 = INTVAL (count_exp);
24660 /* When COUNT is 0, there is nothing to do. */
24661 if (!count)
24662 return true;
24664 else
24666 if (min_size_exp)
24667 min_size = INTVAL (min_size_exp);
24668 if (max_size_exp)
24669 max_size = INTVAL (max_size_exp);
24670 if (probable_max_size_exp)
24671 probable_max_size = INTVAL (probable_max_size_exp);
24672 if (CONST_INT_P (expected_size_exp))
24673 expected_size = INTVAL (expected_size_exp);
24676 /* Make sure we don't need to care about overflow later on. */
24677 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
24678 return false;
24680 /* Step 0: Decide on preferred algorithm, desired alignment and
24681 size of chunks to be copied by main loop. */
24682 alg = decide_alg (count, expected_size, min_size, probable_max_size,
24683 issetmem,
24684 issetmem && val_exp == const0_rtx,
24685 &dynamic_check, &noalign);
24686 if (alg == libcall)
24687 return false;
24688 gcc_assert (alg != no_stringop);
24690 /* For now vector-version of memset is generated only for memory zeroing, as
24691 creating of promoted vector value is very cheap in this case. */
24692 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
24693 alg = unrolled_loop;
24695 if (!count)
24696 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
24697 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
24698 if (!issetmem)
24699 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
24701 unroll_factor = 1;
24702 move_mode = word_mode;
24703 switch (alg)
24705 case libcall:
24706 case no_stringop:
24707 case last_alg:
24708 gcc_unreachable ();
24709 case loop_1_byte:
24710 need_zero_guard = true;
24711 move_mode = QImode;
24712 break;
24713 case loop:
24714 need_zero_guard = true;
24715 break;
24716 case unrolled_loop:
24717 need_zero_guard = true;
24718 unroll_factor = (TARGET_64BIT ? 4 : 2);
24719 break;
24720 case vector_loop:
24721 need_zero_guard = true;
24722 unroll_factor = 4;
24723 /* Find the widest supported mode. */
24724 move_mode = word_mode;
24725 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
24726 != CODE_FOR_nothing)
24727 move_mode = GET_MODE_WIDER_MODE (move_mode);
24729 /* Find the corresponding vector mode with the same size as MOVE_MODE.
24730 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
24731 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
24733 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
24734 move_mode = mode_for_vector (word_mode, nunits);
24735 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
24736 move_mode = word_mode;
24738 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
24739 break;
24740 case rep_prefix_8_byte:
24741 move_mode = DImode;
24742 break;
24743 case rep_prefix_4_byte:
24744 move_mode = SImode;
24745 break;
24746 case rep_prefix_1_byte:
24747 move_mode = QImode;
24748 break;
24750 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
24751 epilogue_size_needed = size_needed;
24753 desired_align = decide_alignment (align, alg, expected_size, move_mode);
24754 if (!TARGET_ALIGN_STRINGOPS || noalign)
24755 align = desired_align;
24757 /* Step 1: Prologue guard. */
24759 /* Alignment code needs count to be in register. */
24760 if (CONST_INT_P (count_exp) && desired_align > align)
24762 if (INTVAL (count_exp) > desired_align
24763 && INTVAL (count_exp) > size_needed)
24765 align_bytes
24766 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
24767 if (align_bytes <= 0)
24768 align_bytes = 0;
24769 else
24770 align_bytes = desired_align - align_bytes;
24772 if (align_bytes == 0)
24773 count_exp = force_reg (counter_mode (count_exp), count_exp);
24775 gcc_assert (desired_align >= 1 && align >= 1);
24777 /* Misaligned move sequences handle both prologue and epilogue at once.
24778 Default code generation results in a smaller code for large alignments
24779 and also avoids redundant job when sizes are known precisely. */
24780 misaligned_prologue_used
24781 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
24782 && MAX (desired_align, epilogue_size_needed) <= 32
24783 && desired_align <= epilogue_size_needed
24784 && ((desired_align > align && !align_bytes)
24785 || (!count && epilogue_size_needed > 1)));
24787 /* Do the cheap promotion to allow better CSE across the
24788 main loop and epilogue (ie one load of the big constant in the
24789 front of all code.
24790 For now the misaligned move sequences do not have fast path
24791 without broadcasting. */
24792 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
24794 if (alg == vector_loop)
24796 gcc_assert (val_exp == const0_rtx);
24797 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
24798 promoted_val = promote_duplicated_reg_to_size (val_exp,
24799 GET_MODE_SIZE (word_mode),
24800 desired_align, align);
24802 else
24804 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24805 desired_align, align);
24808 /* Misaligned move sequences handles both prologues and epilogues at once.
24809 Default code generation results in smaller code for large alignments and
24810 also avoids redundant job when sizes are known precisely. */
24811 if (misaligned_prologue_used)
24813 /* Misaligned move prologue handled small blocks by itself. */
24814 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
24815 (dst, src, &destreg, &srcreg,
24816 move_mode, promoted_val, vec_promoted_val,
24817 &count_exp,
24818 &jump_around_label,
24819 desired_align < align
24820 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
24821 desired_align, align, &min_size, dynamic_check, issetmem);
24822 if (!issetmem)
24823 src = change_address (src, BLKmode, srcreg);
24824 dst = change_address (dst, BLKmode, destreg);
24825 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24826 epilogue_size_needed = 0;
24827 if (need_zero_guard && !min_size)
24829 /* It is possible that we copied enough so the main loop will not
24830 execute. */
24831 gcc_assert (size_needed > 1);
24832 if (jump_around_label == NULL_RTX)
24833 jump_around_label = gen_label_rtx ();
24834 emit_cmp_and_jump_insns (count_exp,
24835 GEN_INT (size_needed),
24836 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
24837 if (expected_size == -1
24838 || expected_size < (desired_align - align) / 2 + size_needed)
24839 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24840 else
24841 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24844 /* Ensure that alignment prologue won't copy past end of block. */
24845 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
24847 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
24848 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
24849 Make sure it is power of 2. */
24850 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
24852 /* To improve performance of small blocks, we jump around the VAL
24853 promoting mode. This mean that if the promoted VAL is not constant,
24854 we might not use it in the epilogue and have to use byte
24855 loop variant. */
24856 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
24857 force_loopy_epilogue = true;
24858 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24859 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24861 /* If main algorithm works on QImode, no epilogue is needed.
24862 For small sizes just don't align anything. */
24863 if (size_needed == 1)
24864 desired_align = align;
24865 else
24866 goto epilogue;
24868 else if (!count
24869 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24871 label = gen_label_rtx ();
24872 emit_cmp_and_jump_insns (count_exp,
24873 GEN_INT (epilogue_size_needed),
24874 LTU, 0, counter_mode (count_exp), 1, label);
24875 if (expected_size == -1 || expected_size < epilogue_size_needed)
24876 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24877 else
24878 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24882 /* Emit code to decide on runtime whether library call or inline should be
24883 used. */
24884 if (dynamic_check != -1)
24886 if (!issetmem && CONST_INT_P (count_exp))
24888 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
24890 emit_block_move_via_libcall (dst, src, count_exp, false);
24891 count_exp = const0_rtx;
24892 goto epilogue;
24895 else
24897 rtx_code_label *hot_label = gen_label_rtx ();
24898 if (jump_around_label == NULL_RTX)
24899 jump_around_label = gen_label_rtx ();
24900 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
24901 LEU, 0, counter_mode (count_exp),
24902 1, hot_label);
24903 predict_jump (REG_BR_PROB_BASE * 90 / 100);
24904 if (issetmem)
24905 set_storage_via_libcall (dst, count_exp, val_exp, false);
24906 else
24907 emit_block_move_via_libcall (dst, src, count_exp, false);
24908 emit_jump (jump_around_label);
24909 emit_label (hot_label);
24913 /* Step 2: Alignment prologue. */
24914 /* Do the expensive promotion once we branched off the small blocks. */
24915 if (issetmem && !promoted_val)
24916 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24917 desired_align, align);
24919 if (desired_align > align && !misaligned_prologue_used)
24921 if (align_bytes == 0)
24923 /* Except for the first move in prologue, we no longer know
24924 constant offset in aliasing info. It don't seems to worth
24925 the pain to maintain it for the first move, so throw away
24926 the info early. */
24927 dst = change_address (dst, BLKmode, destreg);
24928 if (!issetmem)
24929 src = change_address (src, BLKmode, srcreg);
24930 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
24931 promoted_val, vec_promoted_val,
24932 count_exp, align, desired_align,
24933 issetmem);
24934 /* At most desired_align - align bytes are copied. */
24935 if (min_size < (unsigned)(desired_align - align))
24936 min_size = 0;
24937 else
24938 min_size -= desired_align - align;
24940 else
24942 /* If we know how many bytes need to be stored before dst is
24943 sufficiently aligned, maintain aliasing info accurately. */
24944 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
24945 srcreg,
24946 promoted_val,
24947 vec_promoted_val,
24948 desired_align,
24949 align_bytes,
24950 issetmem);
24952 count_exp = plus_constant (counter_mode (count_exp),
24953 count_exp, -align_bytes);
24954 count -= align_bytes;
24955 min_size -= align_bytes;
24956 max_size -= align_bytes;
24958 if (need_zero_guard
24959 && !min_size
24960 && (count < (unsigned HOST_WIDE_INT) size_needed
24961 || (align_bytes == 0
24962 && count < ((unsigned HOST_WIDE_INT) size_needed
24963 + desired_align - align))))
24965 /* It is possible that we copied enough so the main loop will not
24966 execute. */
24967 gcc_assert (size_needed > 1);
24968 if (label == NULL_RTX)
24969 label = gen_label_rtx ();
24970 emit_cmp_and_jump_insns (count_exp,
24971 GEN_INT (size_needed),
24972 LTU, 0, counter_mode (count_exp), 1, label);
24973 if (expected_size == -1
24974 || expected_size < (desired_align - align) / 2 + size_needed)
24975 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24976 else
24977 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24980 if (label && size_needed == 1)
24982 emit_label (label);
24983 LABEL_NUSES (label) = 1;
24984 label = NULL;
24985 epilogue_size_needed = 1;
24986 if (issetmem)
24987 promoted_val = val_exp;
24989 else if (label == NULL_RTX && !misaligned_prologue_used)
24990 epilogue_size_needed = size_needed;
24992 /* Step 3: Main loop. */
24994 switch (alg)
24996 case libcall:
24997 case no_stringop:
24998 case last_alg:
24999 gcc_unreachable ();
25000 case loop_1_byte:
25001 case loop:
25002 case unrolled_loop:
25003 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
25004 count_exp, move_mode, unroll_factor,
25005 expected_size, issetmem);
25006 break;
25007 case vector_loop:
25008 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
25009 vec_promoted_val, count_exp, move_mode,
25010 unroll_factor, expected_size, issetmem);
25011 break;
25012 case rep_prefix_8_byte:
25013 case rep_prefix_4_byte:
25014 case rep_prefix_1_byte:
25015 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
25016 val_exp, count_exp, move_mode, issetmem);
25017 break;
25019 /* Adjust properly the offset of src and dest memory for aliasing. */
25020 if (CONST_INT_P (count_exp))
25022 if (!issetmem)
25023 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
25024 (count / size_needed) * size_needed);
25025 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
25026 (count / size_needed) * size_needed);
25028 else
25030 if (!issetmem)
25031 src = change_address (src, BLKmode, srcreg);
25032 dst = change_address (dst, BLKmode, destreg);
25035 /* Step 4: Epilogue to copy the remaining bytes. */
25036 epilogue:
25037 if (label)
25039 /* When the main loop is done, COUNT_EXP might hold original count,
25040 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
25041 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
25042 bytes. Compensate if needed. */
25044 if (size_needed < epilogue_size_needed)
25046 tmp =
25047 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
25048 GEN_INT (size_needed - 1), count_exp, 1,
25049 OPTAB_DIRECT);
25050 if (tmp != count_exp)
25051 emit_move_insn (count_exp, tmp);
25053 emit_label (label);
25054 LABEL_NUSES (label) = 1;
25057 if (count_exp != const0_rtx && epilogue_size_needed > 1)
25059 if (force_loopy_epilogue)
25060 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
25061 epilogue_size_needed);
25062 else
25064 if (issetmem)
25065 expand_setmem_epilogue (dst, destreg, promoted_val,
25066 vec_promoted_val, count_exp,
25067 epilogue_size_needed);
25068 else
25069 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
25070 epilogue_size_needed);
25073 if (jump_around_label)
25074 emit_label (jump_around_label);
25075 return true;
25079 /* Expand the appropriate insns for doing strlen if not just doing
25080 repnz; scasb
25082 out = result, initialized with the start address
25083 align_rtx = alignment of the address.
25084 scratch = scratch register, initialized with the startaddress when
25085 not aligned, otherwise undefined
25087 This is just the body. It needs the initializations mentioned above and
25088 some address computing at the end. These things are done in i386.md. */
25090 static void
25091 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
25093 int align;
25094 rtx tmp;
25095 rtx_code_label *align_2_label = NULL;
25096 rtx_code_label *align_3_label = NULL;
25097 rtx_code_label *align_4_label = gen_label_rtx ();
25098 rtx_code_label *end_0_label = gen_label_rtx ();
25099 rtx mem;
25100 rtx tmpreg = gen_reg_rtx (SImode);
25101 rtx scratch = gen_reg_rtx (SImode);
25102 rtx cmp;
25104 align = 0;
25105 if (CONST_INT_P (align_rtx))
25106 align = INTVAL (align_rtx);
25108 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
25110 /* Is there a known alignment and is it less than 4? */
25111 if (align < 4)
25113 rtx scratch1 = gen_reg_rtx (Pmode);
25114 emit_move_insn (scratch1, out);
25115 /* Is there a known alignment and is it not 2? */
25116 if (align != 2)
25118 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
25119 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
25121 /* Leave just the 3 lower bits. */
25122 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
25123 NULL_RTX, 0, OPTAB_WIDEN);
25125 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
25126 Pmode, 1, align_4_label);
25127 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
25128 Pmode, 1, align_2_label);
25129 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
25130 Pmode, 1, align_3_label);
25132 else
25134 /* Since the alignment is 2, we have to check 2 or 0 bytes;
25135 check if is aligned to 4 - byte. */
25137 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
25138 NULL_RTX, 0, OPTAB_WIDEN);
25140 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
25141 Pmode, 1, align_4_label);
25144 mem = change_address (src, QImode, out);
25146 /* Now compare the bytes. */
25148 /* Compare the first n unaligned byte on a byte per byte basis. */
25149 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
25150 QImode, 1, end_0_label);
25152 /* Increment the address. */
25153 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25155 /* Not needed with an alignment of 2 */
25156 if (align != 2)
25158 emit_label (align_2_label);
25160 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
25161 end_0_label);
25163 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25165 emit_label (align_3_label);
25168 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
25169 end_0_label);
25171 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25174 /* Generate loop to check 4 bytes at a time. It is not a good idea to
25175 align this loop. It gives only huge programs, but does not help to
25176 speed up. */
25177 emit_label (align_4_label);
25179 mem = change_address (src, SImode, out);
25180 emit_move_insn (scratch, mem);
25181 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
25183 /* This formula yields a nonzero result iff one of the bytes is zero.
25184 This saves three branches inside loop and many cycles. */
25186 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
25187 emit_insn (gen_one_cmplsi2 (scratch, scratch));
25188 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
25189 emit_insn (gen_andsi3 (tmpreg, tmpreg,
25190 gen_int_mode (0x80808080, SImode)));
25191 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
25192 align_4_label);
25194 if (TARGET_CMOVE)
25196 rtx reg = gen_reg_rtx (SImode);
25197 rtx reg2 = gen_reg_rtx (Pmode);
25198 emit_move_insn (reg, tmpreg);
25199 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
25201 /* If zero is not in the first two bytes, move two bytes forward. */
25202 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
25203 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25204 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
25205 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
25206 gen_rtx_IF_THEN_ELSE (SImode, tmp,
25207 reg,
25208 tmpreg)));
25209 /* Emit lea manually to avoid clobbering of flags. */
25210 emit_insn (gen_rtx_SET (SImode, reg2,
25211 gen_rtx_PLUS (Pmode, out, const2_rtx)));
25213 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25214 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
25215 emit_insn (gen_rtx_SET (VOIDmode, out,
25216 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
25217 reg2,
25218 out)));
25220 else
25222 rtx_code_label *end_2_label = gen_label_rtx ();
25223 /* Is zero in the first two bytes? */
25225 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
25226 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25227 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
25228 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
25229 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
25230 pc_rtx);
25231 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
25232 JUMP_LABEL (tmp) = end_2_label;
25234 /* Not in the first two. Move two bytes forward. */
25235 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
25236 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
25238 emit_label (end_2_label);
25242 /* Avoid branch in fixing the byte. */
25243 tmpreg = gen_lowpart (QImode, tmpreg);
25244 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
25245 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
25246 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
25247 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
25249 emit_label (end_0_label);
25252 /* Expand strlen. */
25254 bool
25255 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
25257 rtx addr, scratch1, scratch2, scratch3, scratch4;
25259 /* The generic case of strlen expander is long. Avoid it's
25260 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
25262 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
25263 && !TARGET_INLINE_ALL_STRINGOPS
25264 && !optimize_insn_for_size_p ()
25265 && (!CONST_INT_P (align) || INTVAL (align) < 4))
25266 return false;
25268 addr = force_reg (Pmode, XEXP (src, 0));
25269 scratch1 = gen_reg_rtx (Pmode);
25271 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
25272 && !optimize_insn_for_size_p ())
25274 /* Well it seems that some optimizer does not combine a call like
25275 foo(strlen(bar), strlen(bar));
25276 when the move and the subtraction is done here. It does calculate
25277 the length just once when these instructions are done inside of
25278 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
25279 often used and I use one fewer register for the lifetime of
25280 output_strlen_unroll() this is better. */
25282 emit_move_insn (out, addr);
25284 ix86_expand_strlensi_unroll_1 (out, src, align);
25286 /* strlensi_unroll_1 returns the address of the zero at the end of
25287 the string, like memchr(), so compute the length by subtracting
25288 the start address. */
25289 emit_insn (ix86_gen_sub3 (out, out, addr));
25291 else
25293 rtx unspec;
25295 /* Can't use this if the user has appropriated eax, ecx, or edi. */
25296 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
25297 return false;
25299 scratch2 = gen_reg_rtx (Pmode);
25300 scratch3 = gen_reg_rtx (Pmode);
25301 scratch4 = force_reg (Pmode, constm1_rtx);
25303 emit_move_insn (scratch3, addr);
25304 eoschar = force_reg (QImode, eoschar);
25306 src = replace_equiv_address_nv (src, scratch3);
25308 /* If .md starts supporting :P, this can be done in .md. */
25309 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
25310 scratch4), UNSPEC_SCAS);
25311 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
25312 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
25313 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
25315 return true;
25318 /* For given symbol (function) construct code to compute address of it's PLT
25319 entry in large x86-64 PIC model. */
25320 static rtx
25321 construct_plt_address (rtx symbol)
25323 rtx tmp, unspec;
25325 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
25326 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
25327 gcc_assert (Pmode == DImode);
25329 tmp = gen_reg_rtx (Pmode);
25330 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
25332 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
25333 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
25334 return tmp;
25338 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
25339 rtx callarg2,
25340 rtx pop, bool sibcall)
25342 rtx vec[3];
25343 rtx use = NULL, call;
25344 unsigned int vec_len = 0;
25346 if (pop == const0_rtx)
25347 pop = NULL;
25348 gcc_assert (!TARGET_64BIT || !pop);
25350 if (TARGET_MACHO && !TARGET_64BIT)
25352 #if TARGET_MACHO
25353 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
25354 fnaddr = machopic_indirect_call_target (fnaddr);
25355 #endif
25357 else
25359 /* Static functions and indirect calls don't need the pic register. */
25360 if (flag_pic
25361 && (!TARGET_64BIT
25362 || (ix86_cmodel == CM_LARGE_PIC
25363 && DEFAULT_ABI != MS_ABI))
25364 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25365 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
25367 use_reg (&use, gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM));
25368 if (ix86_use_pseudo_pic_reg ())
25369 emit_move_insn (gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM),
25370 pic_offset_table_rtx);
25374 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
25376 rtx al = gen_rtx_REG (QImode, AX_REG);
25377 emit_move_insn (al, callarg2);
25378 use_reg (&use, al);
25381 if (ix86_cmodel == CM_LARGE_PIC
25382 && !TARGET_PECOFF
25383 && MEM_P (fnaddr)
25384 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25385 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
25386 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
25387 else if (sibcall
25388 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
25389 : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
25391 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
25392 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
25395 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
25397 if (retval)
25399 /* We should add bounds as destination register in case
25400 pointer with bounds may be returned. */
25401 if (TARGET_MPX && SCALAR_INT_MODE_P (GET_MODE (retval)))
25403 rtx b0 = gen_rtx_REG (BND64mode, FIRST_BND_REG);
25404 rtx b1 = gen_rtx_REG (BND64mode, FIRST_BND_REG + 1);
25405 retval = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, retval, b0, b1));
25406 chkp_put_regs_to_expr_list (retval);
25409 call = gen_rtx_SET (VOIDmode, retval, call);
25411 vec[vec_len++] = call;
25413 if (pop)
25415 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
25416 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
25417 vec[vec_len++] = pop;
25420 if (TARGET_64BIT_MS_ABI
25421 && (!callarg2 || INTVAL (callarg2) != -2))
25423 int const cregs_size
25424 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
25425 int i;
25427 for (i = 0; i < cregs_size; i++)
25429 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
25430 machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
25432 clobber_reg (&use, gen_rtx_REG (mode, regno));
25436 if (vec_len > 1)
25437 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
25438 call = emit_call_insn (call);
25439 if (use)
25440 CALL_INSN_FUNCTION_USAGE (call) = use;
25442 return call;
25445 /* Output the assembly for a call instruction. */
25447 const char *
25448 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
25450 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
25451 bool seh_nop_p = false;
25452 const char *xasm;
25454 if (SIBLING_CALL_P (insn))
25456 if (direct_p)
25457 xasm = "%!jmp\t%P0";
25458 /* SEH epilogue detection requires the indirect branch case
25459 to include REX.W. */
25460 else if (TARGET_SEH)
25461 xasm = "%!rex.W jmp %A0";
25462 else
25463 xasm = "%!jmp\t%A0";
25465 output_asm_insn (xasm, &call_op);
25466 return "";
25469 /* SEH unwinding can require an extra nop to be emitted in several
25470 circumstances. Determine if we have one of those. */
25471 if (TARGET_SEH)
25473 rtx_insn *i;
25475 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
25477 /* If we get to another real insn, we don't need the nop. */
25478 if (INSN_P (i))
25479 break;
25481 /* If we get to the epilogue note, prevent a catch region from
25482 being adjacent to the standard epilogue sequence. If non-
25483 call-exceptions, we'll have done this during epilogue emission. */
25484 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
25485 && !flag_non_call_exceptions
25486 && !can_throw_internal (insn))
25488 seh_nop_p = true;
25489 break;
25493 /* If we didn't find a real insn following the call, prevent the
25494 unwinder from looking into the next function. */
25495 if (i == NULL)
25496 seh_nop_p = true;
25499 if (direct_p)
25500 xasm = "%!call\t%P0";
25501 else
25502 xasm = "%!call\t%A0";
25504 output_asm_insn (xasm, &call_op);
25506 if (seh_nop_p)
25507 return "nop";
25509 return "";
25512 /* Clear stack slot assignments remembered from previous functions.
25513 This is called from INIT_EXPANDERS once before RTL is emitted for each
25514 function. */
25516 static struct machine_function *
25517 ix86_init_machine_status (void)
25519 struct machine_function *f;
25521 f = ggc_cleared_alloc<machine_function> ();
25522 f->use_fast_prologue_epilogue_nregs = -1;
25523 f->call_abi = ix86_abi;
25525 return f;
25528 /* Return a MEM corresponding to a stack slot with mode MODE.
25529 Allocate a new slot if necessary.
25531 The RTL for a function can have several slots available: N is
25532 which slot to use. */
25535 assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
25537 struct stack_local_entry *s;
25539 gcc_assert (n < MAX_386_STACK_LOCALS);
25541 for (s = ix86_stack_locals; s; s = s->next)
25542 if (s->mode == mode && s->n == n)
25543 return validize_mem (copy_rtx (s->rtl));
25545 s = ggc_alloc<stack_local_entry> ();
25546 s->n = n;
25547 s->mode = mode;
25548 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
25550 s->next = ix86_stack_locals;
25551 ix86_stack_locals = s;
25552 return validize_mem (copy_rtx (s->rtl));
25555 static void
25556 ix86_instantiate_decls (void)
25558 struct stack_local_entry *s;
25560 for (s = ix86_stack_locals; s; s = s->next)
25561 if (s->rtl != NULL_RTX)
25562 instantiate_decl_rtl (s->rtl);
25565 /* Check whether x86 address PARTS is a pc-relative address. */
25567 static bool
25568 rip_relative_addr_p (struct ix86_address *parts)
25570 rtx base, index, disp;
25572 base = parts->base;
25573 index = parts->index;
25574 disp = parts->disp;
25576 if (disp && !base && !index)
25578 if (TARGET_64BIT)
25580 rtx symbol = disp;
25582 if (GET_CODE (disp) == CONST)
25583 symbol = XEXP (disp, 0);
25584 if (GET_CODE (symbol) == PLUS
25585 && CONST_INT_P (XEXP (symbol, 1)))
25586 symbol = XEXP (symbol, 0);
25588 if (GET_CODE (symbol) == LABEL_REF
25589 || (GET_CODE (symbol) == SYMBOL_REF
25590 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
25591 || (GET_CODE (symbol) == UNSPEC
25592 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
25593 || XINT (symbol, 1) == UNSPEC_PCREL
25594 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
25595 return true;
25598 return false;
25601 /* Calculate the length of the memory address in the instruction encoding.
25602 Includes addr32 prefix, does not include the one-byte modrm, opcode,
25603 or other prefixes. We never generate addr32 prefix for LEA insn. */
25606 memory_address_length (rtx addr, bool lea)
25608 struct ix86_address parts;
25609 rtx base, index, disp;
25610 int len;
25611 int ok;
25613 if (GET_CODE (addr) == PRE_DEC
25614 || GET_CODE (addr) == POST_INC
25615 || GET_CODE (addr) == PRE_MODIFY
25616 || GET_CODE (addr) == POST_MODIFY)
25617 return 0;
25619 ok = ix86_decompose_address (addr, &parts);
25620 gcc_assert (ok);
25622 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
25624 /* If this is not LEA instruction, add the length of addr32 prefix. */
25625 if (TARGET_64BIT && !lea
25626 && (SImode_address_operand (addr, VOIDmode)
25627 || (parts.base && GET_MODE (parts.base) == SImode)
25628 || (parts.index && GET_MODE (parts.index) == SImode)))
25629 len++;
25631 base = parts.base;
25632 index = parts.index;
25633 disp = parts.disp;
25635 if (base && GET_CODE (base) == SUBREG)
25636 base = SUBREG_REG (base);
25637 if (index && GET_CODE (index) == SUBREG)
25638 index = SUBREG_REG (index);
25640 gcc_assert (base == NULL_RTX || REG_P (base));
25641 gcc_assert (index == NULL_RTX || REG_P (index));
25643 /* Rule of thumb:
25644 - esp as the base always wants an index,
25645 - ebp as the base always wants a displacement,
25646 - r12 as the base always wants an index,
25647 - r13 as the base always wants a displacement. */
25649 /* Register Indirect. */
25650 if (base && !index && !disp)
25652 /* esp (for its index) and ebp (for its displacement) need
25653 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
25654 code. */
25655 if (base == arg_pointer_rtx
25656 || base == frame_pointer_rtx
25657 || REGNO (base) == SP_REG
25658 || REGNO (base) == BP_REG
25659 || REGNO (base) == R12_REG
25660 || REGNO (base) == R13_REG)
25661 len++;
25664 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
25665 is not disp32, but disp32(%rip), so for disp32
25666 SIB byte is needed, unless print_operand_address
25667 optimizes it into disp32(%rip) or (%rip) is implied
25668 by UNSPEC. */
25669 else if (disp && !base && !index)
25671 len += 4;
25672 if (rip_relative_addr_p (&parts))
25673 len++;
25675 else
25677 /* Find the length of the displacement constant. */
25678 if (disp)
25680 if (base && satisfies_constraint_K (disp))
25681 len += 1;
25682 else
25683 len += 4;
25685 /* ebp always wants a displacement. Similarly r13. */
25686 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
25687 len++;
25689 /* An index requires the two-byte modrm form.... */
25690 if (index
25691 /* ...like esp (or r12), which always wants an index. */
25692 || base == arg_pointer_rtx
25693 || base == frame_pointer_rtx
25694 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
25695 len++;
25698 return len;
25701 /* Compute default value for "length_immediate" attribute. When SHORTFORM
25702 is set, expect that insn have 8bit immediate alternative. */
25704 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
25706 int len = 0;
25707 int i;
25708 extract_insn_cached (insn);
25709 for (i = recog_data.n_operands - 1; i >= 0; --i)
25710 if (CONSTANT_P (recog_data.operand[i]))
25712 enum attr_mode mode = get_attr_mode (insn);
25714 gcc_assert (!len);
25715 if (shortform && CONST_INT_P (recog_data.operand[i]))
25717 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
25718 switch (mode)
25720 case MODE_QI:
25721 len = 1;
25722 continue;
25723 case MODE_HI:
25724 ival = trunc_int_for_mode (ival, HImode);
25725 break;
25726 case MODE_SI:
25727 ival = trunc_int_for_mode (ival, SImode);
25728 break;
25729 default:
25730 break;
25732 if (IN_RANGE (ival, -128, 127))
25734 len = 1;
25735 continue;
25738 switch (mode)
25740 case MODE_QI:
25741 len = 1;
25742 break;
25743 case MODE_HI:
25744 len = 2;
25745 break;
25746 case MODE_SI:
25747 len = 4;
25748 break;
25749 /* Immediates for DImode instructions are encoded
25750 as 32bit sign extended values. */
25751 case MODE_DI:
25752 len = 4;
25753 break;
25754 default:
25755 fatal_insn ("unknown insn mode", insn);
25758 return len;
25761 /* Compute default value for "length_address" attribute. */
25763 ix86_attr_length_address_default (rtx_insn *insn)
25765 int i;
25767 if (get_attr_type (insn) == TYPE_LEA)
25769 rtx set = PATTERN (insn), addr;
25771 if (GET_CODE (set) == PARALLEL)
25772 set = XVECEXP (set, 0, 0);
25774 gcc_assert (GET_CODE (set) == SET);
25776 addr = SET_SRC (set);
25778 return memory_address_length (addr, true);
25781 extract_insn_cached (insn);
25782 for (i = recog_data.n_operands - 1; i >= 0; --i)
25783 if (MEM_P (recog_data.operand[i]))
25785 constrain_operands_cached (insn, reload_completed);
25786 if (which_alternative != -1)
25788 const char *constraints = recog_data.constraints[i];
25789 int alt = which_alternative;
25791 while (*constraints == '=' || *constraints == '+')
25792 constraints++;
25793 while (alt-- > 0)
25794 while (*constraints++ != ',')
25796 /* Skip ignored operands. */
25797 if (*constraints == 'X')
25798 continue;
25800 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
25802 return 0;
25805 /* Compute default value for "length_vex" attribute. It includes
25806 2 or 3 byte VEX prefix and 1 opcode byte. */
25809 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
25810 bool has_vex_w)
25812 int i;
25814 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
25815 byte VEX prefix. */
25816 if (!has_0f_opcode || has_vex_w)
25817 return 3 + 1;
25819 /* We can always use 2 byte VEX prefix in 32bit. */
25820 if (!TARGET_64BIT)
25821 return 2 + 1;
25823 extract_insn_cached (insn);
25825 for (i = recog_data.n_operands - 1; i >= 0; --i)
25826 if (REG_P (recog_data.operand[i]))
25828 /* REX.W bit uses 3 byte VEX prefix. */
25829 if (GET_MODE (recog_data.operand[i]) == DImode
25830 && GENERAL_REG_P (recog_data.operand[i]))
25831 return 3 + 1;
25833 else
25835 /* REX.X or REX.B bits use 3 byte VEX prefix. */
25836 if (MEM_P (recog_data.operand[i])
25837 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
25838 return 3 + 1;
25841 return 2 + 1;
25844 /* Return the maximum number of instructions a cpu can issue. */
25846 static int
25847 ix86_issue_rate (void)
25849 switch (ix86_tune)
25851 case PROCESSOR_PENTIUM:
25852 case PROCESSOR_BONNELL:
25853 case PROCESSOR_SILVERMONT:
25854 case PROCESSOR_INTEL:
25855 case PROCESSOR_K6:
25856 case PROCESSOR_BTVER2:
25857 case PROCESSOR_PENTIUM4:
25858 case PROCESSOR_NOCONA:
25859 return 2;
25861 case PROCESSOR_PENTIUMPRO:
25862 case PROCESSOR_ATHLON:
25863 case PROCESSOR_K8:
25864 case PROCESSOR_AMDFAM10:
25865 case PROCESSOR_GENERIC:
25866 case PROCESSOR_BTVER1:
25867 return 3;
25869 case PROCESSOR_BDVER1:
25870 case PROCESSOR_BDVER2:
25871 case PROCESSOR_BDVER3:
25872 case PROCESSOR_BDVER4:
25873 case PROCESSOR_CORE2:
25874 case PROCESSOR_NEHALEM:
25875 case PROCESSOR_SANDYBRIDGE:
25876 case PROCESSOR_HASWELL:
25877 return 4;
25879 default:
25880 return 1;
25884 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
25885 by DEP_INSN and nothing set by DEP_INSN. */
25887 static bool
25888 ix86_flags_dependent (rtx_insn *insn, rtx_insn *dep_insn, enum attr_type insn_type)
25890 rtx set, set2;
25892 /* Simplify the test for uninteresting insns. */
25893 if (insn_type != TYPE_SETCC
25894 && insn_type != TYPE_ICMOV
25895 && insn_type != TYPE_FCMOV
25896 && insn_type != TYPE_IBR)
25897 return false;
25899 if ((set = single_set (dep_insn)) != 0)
25901 set = SET_DEST (set);
25902 set2 = NULL_RTX;
25904 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
25905 && XVECLEN (PATTERN (dep_insn), 0) == 2
25906 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
25907 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
25909 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25910 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25912 else
25913 return false;
25915 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
25916 return false;
25918 /* This test is true if the dependent insn reads the flags but
25919 not any other potentially set register. */
25920 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
25921 return false;
25923 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
25924 return false;
25926 return true;
25929 /* Return true iff USE_INSN has a memory address with operands set by
25930 SET_INSN. */
25932 bool
25933 ix86_agi_dependent (rtx_insn *set_insn, rtx_insn *use_insn)
25935 int i;
25936 extract_insn_cached (use_insn);
25937 for (i = recog_data.n_operands - 1; i >= 0; --i)
25938 if (MEM_P (recog_data.operand[i]))
25940 rtx addr = XEXP (recog_data.operand[i], 0);
25941 return modified_in_p (addr, set_insn) != 0;
25943 return false;
25946 /* Helper function for exact_store_load_dependency.
25947 Return true if addr is found in insn. */
25948 static bool
25949 exact_dependency_1 (rtx addr, rtx insn)
25951 enum rtx_code code;
25952 const char *format_ptr;
25953 int i, j;
25955 code = GET_CODE (insn);
25956 switch (code)
25958 case MEM:
25959 if (rtx_equal_p (addr, insn))
25960 return true;
25961 break;
25962 case REG:
25963 CASE_CONST_ANY:
25964 case SYMBOL_REF:
25965 case CODE_LABEL:
25966 case PC:
25967 case CC0:
25968 case EXPR_LIST:
25969 return false;
25970 default:
25971 break;
25974 format_ptr = GET_RTX_FORMAT (code);
25975 for (i = 0; i < GET_RTX_LENGTH (code); i++)
25977 switch (*format_ptr++)
25979 case 'e':
25980 if (exact_dependency_1 (addr, XEXP (insn, i)))
25981 return true;
25982 break;
25983 case 'E':
25984 for (j = 0; j < XVECLEN (insn, i); j++)
25985 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
25986 return true;
25987 break;
25990 return false;
25993 /* Return true if there exists exact dependency for store & load, i.e.
25994 the same memory address is used in them. */
25995 static bool
25996 exact_store_load_dependency (rtx_insn *store, rtx_insn *load)
25998 rtx set1, set2;
26000 set1 = single_set (store);
26001 if (!set1)
26002 return false;
26003 if (!MEM_P (SET_DEST (set1)))
26004 return false;
26005 set2 = single_set (load);
26006 if (!set2)
26007 return false;
26008 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
26009 return true;
26010 return false;
26013 static int
26014 ix86_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
26016 enum attr_type insn_type, dep_insn_type;
26017 enum attr_memory memory;
26018 rtx set, set2;
26019 int dep_insn_code_number;
26021 /* Anti and output dependencies have zero cost on all CPUs. */
26022 if (REG_NOTE_KIND (link) != 0)
26023 return 0;
26025 dep_insn_code_number = recog_memoized (dep_insn);
26027 /* If we can't recognize the insns, we can't really do anything. */
26028 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
26029 return cost;
26031 insn_type = get_attr_type (insn);
26032 dep_insn_type = get_attr_type (dep_insn);
26034 switch (ix86_tune)
26036 case PROCESSOR_PENTIUM:
26037 /* Address Generation Interlock adds a cycle of latency. */
26038 if (insn_type == TYPE_LEA)
26040 rtx addr = PATTERN (insn);
26042 if (GET_CODE (addr) == PARALLEL)
26043 addr = XVECEXP (addr, 0, 0);
26045 gcc_assert (GET_CODE (addr) == SET);
26047 addr = SET_SRC (addr);
26048 if (modified_in_p (addr, dep_insn))
26049 cost += 1;
26051 else if (ix86_agi_dependent (dep_insn, insn))
26052 cost += 1;
26054 /* ??? Compares pair with jump/setcc. */
26055 if (ix86_flags_dependent (insn, dep_insn, insn_type))
26056 cost = 0;
26058 /* Floating point stores require value to be ready one cycle earlier. */
26059 if (insn_type == TYPE_FMOV
26060 && get_attr_memory (insn) == MEMORY_STORE
26061 && !ix86_agi_dependent (dep_insn, insn))
26062 cost += 1;
26063 break;
26065 case PROCESSOR_PENTIUMPRO:
26066 /* INT->FP conversion is expensive. */
26067 if (get_attr_fp_int_src (dep_insn))
26068 cost += 5;
26070 /* There is one cycle extra latency between an FP op and a store. */
26071 if (insn_type == TYPE_FMOV
26072 && (set = single_set (dep_insn)) != NULL_RTX
26073 && (set2 = single_set (insn)) != NULL_RTX
26074 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
26075 && MEM_P (SET_DEST (set2)))
26076 cost += 1;
26078 memory = get_attr_memory (insn);
26080 /* Show ability of reorder buffer to hide latency of load by executing
26081 in parallel with previous instruction in case
26082 previous instruction is not needed to compute the address. */
26083 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26084 && !ix86_agi_dependent (dep_insn, insn))
26086 /* Claim moves to take one cycle, as core can issue one load
26087 at time and the next load can start cycle later. */
26088 if (dep_insn_type == TYPE_IMOV
26089 || dep_insn_type == TYPE_FMOV)
26090 cost = 1;
26091 else if (cost > 1)
26092 cost--;
26094 break;
26096 case PROCESSOR_K6:
26097 /* The esp dependency is resolved before
26098 the instruction is really finished. */
26099 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26100 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26101 return 1;
26103 /* INT->FP conversion is expensive. */
26104 if (get_attr_fp_int_src (dep_insn))
26105 cost += 5;
26107 memory = get_attr_memory (insn);
26109 /* Show ability of reorder buffer to hide latency of load by executing
26110 in parallel with previous instruction in case
26111 previous instruction is not needed to compute the address. */
26112 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26113 && !ix86_agi_dependent (dep_insn, insn))
26115 /* Claim moves to take one cycle, as core can issue one load
26116 at time and the next load can start cycle later. */
26117 if (dep_insn_type == TYPE_IMOV
26118 || dep_insn_type == TYPE_FMOV)
26119 cost = 1;
26120 else if (cost > 2)
26121 cost -= 2;
26122 else
26123 cost = 1;
26125 break;
26127 case PROCESSOR_AMDFAM10:
26128 case PROCESSOR_BDVER1:
26129 case PROCESSOR_BDVER2:
26130 case PROCESSOR_BDVER3:
26131 case PROCESSOR_BDVER4:
26132 case PROCESSOR_BTVER1:
26133 case PROCESSOR_BTVER2:
26134 case PROCESSOR_GENERIC:
26135 /* Stack engine allows to execute push&pop instructions in parall. */
26136 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26137 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26138 return 0;
26139 /* FALLTHRU */
26141 case PROCESSOR_ATHLON:
26142 case PROCESSOR_K8:
26143 memory = get_attr_memory (insn);
26145 /* Show ability of reorder buffer to hide latency of load by executing
26146 in parallel with previous instruction in case
26147 previous instruction is not needed to compute the address. */
26148 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26149 && !ix86_agi_dependent (dep_insn, insn))
26151 enum attr_unit unit = get_attr_unit (insn);
26152 int loadcost = 3;
26154 /* Because of the difference between the length of integer and
26155 floating unit pipeline preparation stages, the memory operands
26156 for floating point are cheaper.
26158 ??? For Athlon it the difference is most probably 2. */
26159 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
26160 loadcost = 3;
26161 else
26162 loadcost = TARGET_ATHLON ? 2 : 0;
26164 if (cost >= loadcost)
26165 cost -= loadcost;
26166 else
26167 cost = 0;
26169 break;
26171 case PROCESSOR_CORE2:
26172 case PROCESSOR_NEHALEM:
26173 case PROCESSOR_SANDYBRIDGE:
26174 case PROCESSOR_HASWELL:
26175 /* Stack engine allows to execute push&pop instructions in parall. */
26176 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26177 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26178 return 0;
26180 memory = get_attr_memory (insn);
26182 /* Show ability of reorder buffer to hide latency of load by executing
26183 in parallel with previous instruction in case
26184 previous instruction is not needed to compute the address. */
26185 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26186 && !ix86_agi_dependent (dep_insn, insn))
26188 if (cost >= 4)
26189 cost -= 4;
26190 else
26191 cost = 0;
26193 break;
26195 case PROCESSOR_SILVERMONT:
26196 case PROCESSOR_INTEL:
26197 if (!reload_completed)
26198 return cost;
26200 /* Increase cost of integer loads. */
26201 memory = get_attr_memory (dep_insn);
26202 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26204 enum attr_unit unit = get_attr_unit (dep_insn);
26205 if (unit == UNIT_INTEGER && cost == 1)
26207 if (memory == MEMORY_LOAD)
26208 cost = 3;
26209 else
26211 /* Increase cost of ld/st for short int types only
26212 because of store forwarding issue. */
26213 rtx set = single_set (dep_insn);
26214 if (set && (GET_MODE (SET_DEST (set)) == QImode
26215 || GET_MODE (SET_DEST (set)) == HImode))
26217 /* Increase cost of store/load insn if exact
26218 dependence exists and it is load insn. */
26219 enum attr_memory insn_memory = get_attr_memory (insn);
26220 if (insn_memory == MEMORY_LOAD
26221 && exact_store_load_dependency (dep_insn, insn))
26222 cost = 3;
26228 default:
26229 break;
26232 return cost;
26235 /* How many alternative schedules to try. This should be as wide as the
26236 scheduling freedom in the DFA, but no wider. Making this value too
26237 large results extra work for the scheduler. */
26239 static int
26240 ia32_multipass_dfa_lookahead (void)
26242 switch (ix86_tune)
26244 case PROCESSOR_PENTIUM:
26245 return 2;
26247 case PROCESSOR_PENTIUMPRO:
26248 case PROCESSOR_K6:
26249 return 1;
26251 case PROCESSOR_BDVER1:
26252 case PROCESSOR_BDVER2:
26253 case PROCESSOR_BDVER3:
26254 case PROCESSOR_BDVER4:
26255 /* We use lookahead value 4 for BD both before and after reload
26256 schedules. Plan is to have value 8 included for O3. */
26257 return 4;
26259 case PROCESSOR_CORE2:
26260 case PROCESSOR_NEHALEM:
26261 case PROCESSOR_SANDYBRIDGE:
26262 case PROCESSOR_HASWELL:
26263 case PROCESSOR_BONNELL:
26264 case PROCESSOR_SILVERMONT:
26265 case PROCESSOR_INTEL:
26266 /* Generally, we want haifa-sched:max_issue() to look ahead as far
26267 as many instructions can be executed on a cycle, i.e.,
26268 issue_rate. I wonder why tuning for many CPUs does not do this. */
26269 if (reload_completed)
26270 return ix86_issue_rate ();
26271 /* Don't use lookahead for pre-reload schedule to save compile time. */
26272 return 0;
26274 default:
26275 return 0;
26279 /* Return true if target platform supports macro-fusion. */
26281 static bool
26282 ix86_macro_fusion_p ()
26284 return TARGET_FUSE_CMP_AND_BRANCH;
26287 /* Check whether current microarchitecture support macro fusion
26288 for insn pair "CONDGEN + CONDJMP". Refer to
26289 "Intel Architectures Optimization Reference Manual". */
26291 static bool
26292 ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
26294 rtx src, dest;
26295 enum rtx_code ccode;
26296 rtx compare_set = NULL_RTX, test_if, cond;
26297 rtx alu_set = NULL_RTX, addr = NULL_RTX;
26299 if (!any_condjump_p (condjmp))
26300 return false;
26302 if (get_attr_type (condgen) != TYPE_TEST
26303 && get_attr_type (condgen) != TYPE_ICMP
26304 && get_attr_type (condgen) != TYPE_INCDEC
26305 && get_attr_type (condgen) != TYPE_ALU)
26306 return false;
26308 compare_set = single_set (condgen);
26309 if (compare_set == NULL_RTX
26310 && !TARGET_FUSE_ALU_AND_BRANCH)
26311 return false;
26313 if (compare_set == NULL_RTX)
26315 int i;
26316 rtx pat = PATTERN (condgen);
26317 for (i = 0; i < XVECLEN (pat, 0); i++)
26318 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
26320 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
26321 if (GET_CODE (set_src) == COMPARE)
26322 compare_set = XVECEXP (pat, 0, i);
26323 else
26324 alu_set = XVECEXP (pat, 0, i);
26327 if (compare_set == NULL_RTX)
26328 return false;
26329 src = SET_SRC (compare_set);
26330 if (GET_CODE (src) != COMPARE)
26331 return false;
26333 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
26334 supported. */
26335 if ((MEM_P (XEXP (src, 0))
26336 && CONST_INT_P (XEXP (src, 1)))
26337 || (MEM_P (XEXP (src, 1))
26338 && CONST_INT_P (XEXP (src, 0))))
26339 return false;
26341 /* No fusion for RIP-relative address. */
26342 if (MEM_P (XEXP (src, 0)))
26343 addr = XEXP (XEXP (src, 0), 0);
26344 else if (MEM_P (XEXP (src, 1)))
26345 addr = XEXP (XEXP (src, 1), 0);
26347 if (addr) {
26348 ix86_address parts;
26349 int ok = ix86_decompose_address (addr, &parts);
26350 gcc_assert (ok);
26352 if (rip_relative_addr_p (&parts))
26353 return false;
26356 test_if = SET_SRC (pc_set (condjmp));
26357 cond = XEXP (test_if, 0);
26358 ccode = GET_CODE (cond);
26359 /* Check whether conditional jump use Sign or Overflow Flags. */
26360 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
26361 && (ccode == GE
26362 || ccode == GT
26363 || ccode == LE
26364 || ccode == LT))
26365 return false;
26367 /* Return true for TYPE_TEST and TYPE_ICMP. */
26368 if (get_attr_type (condgen) == TYPE_TEST
26369 || get_attr_type (condgen) == TYPE_ICMP)
26370 return true;
26372 /* The following is the case that macro-fusion for alu + jmp. */
26373 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
26374 return false;
26376 /* No fusion for alu op with memory destination operand. */
26377 dest = SET_DEST (alu_set);
26378 if (MEM_P (dest))
26379 return false;
26381 /* Macro-fusion for inc/dec + unsigned conditional jump is not
26382 supported. */
26383 if (get_attr_type (condgen) == TYPE_INCDEC
26384 && (ccode == GEU
26385 || ccode == GTU
26386 || ccode == LEU
26387 || ccode == LTU))
26388 return false;
26390 return true;
26393 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
26394 execution. It is applied if
26395 (1) IMUL instruction is on the top of list;
26396 (2) There exists the only producer of independent IMUL instruction in
26397 ready list.
26398 Return index of IMUL producer if it was found and -1 otherwise. */
26399 static int
26400 do_reorder_for_imul (rtx_insn **ready, int n_ready)
26402 rtx_insn *insn;
26403 rtx set, insn1, insn2;
26404 sd_iterator_def sd_it;
26405 dep_t dep;
26406 int index = -1;
26407 int i;
26409 if (!TARGET_BONNELL)
26410 return index;
26412 /* Check that IMUL instruction is on the top of ready list. */
26413 insn = ready[n_ready - 1];
26414 set = single_set (insn);
26415 if (!set)
26416 return index;
26417 if (!(GET_CODE (SET_SRC (set)) == MULT
26418 && GET_MODE (SET_SRC (set)) == SImode))
26419 return index;
26421 /* Search for producer of independent IMUL instruction. */
26422 for (i = n_ready - 2; i >= 0; i--)
26424 insn = ready[i];
26425 if (!NONDEBUG_INSN_P (insn))
26426 continue;
26427 /* Skip IMUL instruction. */
26428 insn2 = PATTERN (insn);
26429 if (GET_CODE (insn2) == PARALLEL)
26430 insn2 = XVECEXP (insn2, 0, 0);
26431 if (GET_CODE (insn2) == SET
26432 && GET_CODE (SET_SRC (insn2)) == MULT
26433 && GET_MODE (SET_SRC (insn2)) == SImode)
26434 continue;
26436 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
26438 rtx con;
26439 con = DEP_CON (dep);
26440 if (!NONDEBUG_INSN_P (con))
26441 continue;
26442 insn1 = PATTERN (con);
26443 if (GET_CODE (insn1) == PARALLEL)
26444 insn1 = XVECEXP (insn1, 0, 0);
26446 if (GET_CODE (insn1) == SET
26447 && GET_CODE (SET_SRC (insn1)) == MULT
26448 && GET_MODE (SET_SRC (insn1)) == SImode)
26450 sd_iterator_def sd_it1;
26451 dep_t dep1;
26452 /* Check if there is no other dependee for IMUL. */
26453 index = i;
26454 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
26456 rtx pro;
26457 pro = DEP_PRO (dep1);
26458 if (!NONDEBUG_INSN_P (pro))
26459 continue;
26460 if (pro != insn)
26461 index = -1;
26463 if (index >= 0)
26464 break;
26467 if (index >= 0)
26468 break;
26470 return index;
26473 /* Try to find the best candidate on the top of ready list if two insns
26474 have the same priority - candidate is best if its dependees were
26475 scheduled earlier. Applied for Silvermont only.
26476 Return true if top 2 insns must be interchanged. */
26477 static bool
26478 swap_top_of_ready_list (rtx_insn **ready, int n_ready)
26480 rtx_insn *top = ready[n_ready - 1];
26481 rtx_insn *next = ready[n_ready - 2];
26482 rtx set;
26483 sd_iterator_def sd_it;
26484 dep_t dep;
26485 int clock1 = -1;
26486 int clock2 = -1;
26487 #define INSN_TICK(INSN) (HID (INSN)->tick)
26489 if (!TARGET_SILVERMONT && !TARGET_INTEL)
26490 return false;
26492 if (!NONDEBUG_INSN_P (top))
26493 return false;
26494 if (!NONJUMP_INSN_P (top))
26495 return false;
26496 if (!NONDEBUG_INSN_P (next))
26497 return false;
26498 if (!NONJUMP_INSN_P (next))
26499 return false;
26500 set = single_set (top);
26501 if (!set)
26502 return false;
26503 set = single_set (next);
26504 if (!set)
26505 return false;
26507 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
26509 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
26510 return false;
26511 /* Determine winner more precise. */
26512 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
26514 rtx pro;
26515 pro = DEP_PRO (dep);
26516 if (!NONDEBUG_INSN_P (pro))
26517 continue;
26518 if (INSN_TICK (pro) > clock1)
26519 clock1 = INSN_TICK (pro);
26521 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
26523 rtx pro;
26524 pro = DEP_PRO (dep);
26525 if (!NONDEBUG_INSN_P (pro))
26526 continue;
26527 if (INSN_TICK (pro) > clock2)
26528 clock2 = INSN_TICK (pro);
26531 if (clock1 == clock2)
26533 /* Determine winner - load must win. */
26534 enum attr_memory memory1, memory2;
26535 memory1 = get_attr_memory (top);
26536 memory2 = get_attr_memory (next);
26537 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
26538 return true;
26540 return (bool) (clock2 < clock1);
26542 return false;
26543 #undef INSN_TICK
26546 /* Perform possible reodering of ready list for Atom/Silvermont only.
26547 Return issue rate. */
26548 static int
26549 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
26550 int *pn_ready, int clock_var)
26552 int issue_rate = -1;
26553 int n_ready = *pn_ready;
26554 int i;
26555 rtx_insn *insn;
26556 int index = -1;
26558 /* Set up issue rate. */
26559 issue_rate = ix86_issue_rate ();
26561 /* Do reodering for BONNELL/SILVERMONT only. */
26562 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
26563 return issue_rate;
26565 /* Nothing to do if ready list contains only 1 instruction. */
26566 if (n_ready <= 1)
26567 return issue_rate;
26569 /* Do reodering for post-reload scheduler only. */
26570 if (!reload_completed)
26571 return issue_rate;
26573 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
26575 if (sched_verbose > 1)
26576 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
26577 INSN_UID (ready[index]));
26579 /* Put IMUL producer (ready[index]) at the top of ready list. */
26580 insn = ready[index];
26581 for (i = index; i < n_ready - 1; i++)
26582 ready[i] = ready[i + 1];
26583 ready[n_ready - 1] = insn;
26584 return issue_rate;
26586 if (clock_var != 0 && swap_top_of_ready_list (ready, n_ready))
26588 if (sched_verbose > 1)
26589 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
26590 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
26591 /* Swap 2 top elements of ready list. */
26592 insn = ready[n_ready - 1];
26593 ready[n_ready - 1] = ready[n_ready - 2];
26594 ready[n_ready - 2] = insn;
26596 return issue_rate;
26599 static bool
26600 ix86_class_likely_spilled_p (reg_class_t);
26602 /* Returns true if lhs of insn is HW function argument register and set up
26603 is_spilled to true if it is likely spilled HW register. */
26604 static bool
26605 insn_is_function_arg (rtx insn, bool* is_spilled)
26607 rtx dst;
26609 if (!NONDEBUG_INSN_P (insn))
26610 return false;
26611 /* Call instructions are not movable, ignore it. */
26612 if (CALL_P (insn))
26613 return false;
26614 insn = PATTERN (insn);
26615 if (GET_CODE (insn) == PARALLEL)
26616 insn = XVECEXP (insn, 0, 0);
26617 if (GET_CODE (insn) != SET)
26618 return false;
26619 dst = SET_DEST (insn);
26620 if (REG_P (dst) && HARD_REGISTER_P (dst)
26621 && ix86_function_arg_regno_p (REGNO (dst)))
26623 /* Is it likely spilled HW register? */
26624 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
26625 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
26626 *is_spilled = true;
26627 return true;
26629 return false;
26632 /* Add output dependencies for chain of function adjacent arguments if only
26633 there is a move to likely spilled HW register. Return first argument
26634 if at least one dependence was added or NULL otherwise. */
26635 static rtx_insn *
26636 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
26638 rtx_insn *insn;
26639 rtx_insn *last = call;
26640 rtx_insn *first_arg = NULL;
26641 bool is_spilled = false;
26643 head = PREV_INSN (head);
26645 /* Find nearest to call argument passing instruction. */
26646 while (true)
26648 last = PREV_INSN (last);
26649 if (last == head)
26650 return NULL;
26651 if (!NONDEBUG_INSN_P (last))
26652 continue;
26653 if (insn_is_function_arg (last, &is_spilled))
26654 break;
26655 return NULL;
26658 first_arg = last;
26659 while (true)
26661 insn = PREV_INSN (last);
26662 if (!INSN_P (insn))
26663 break;
26664 if (insn == head)
26665 break;
26666 if (!NONDEBUG_INSN_P (insn))
26668 last = insn;
26669 continue;
26671 if (insn_is_function_arg (insn, &is_spilled))
26673 /* Add output depdendence between two function arguments if chain
26674 of output arguments contains likely spilled HW registers. */
26675 if (is_spilled)
26676 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26677 first_arg = last = insn;
26679 else
26680 break;
26682 if (!is_spilled)
26683 return NULL;
26684 return first_arg;
26687 /* Add output or anti dependency from insn to first_arg to restrict its code
26688 motion. */
26689 static void
26690 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
26692 rtx set;
26693 rtx tmp;
26695 set = single_set (insn);
26696 if (!set)
26697 return;
26698 tmp = SET_DEST (set);
26699 if (REG_P (tmp))
26701 /* Add output dependency to the first function argument. */
26702 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26703 return;
26705 /* Add anti dependency. */
26706 add_dependence (first_arg, insn, REG_DEP_ANTI);
26709 /* Avoid cross block motion of function argument through adding dependency
26710 from the first non-jump instruction in bb. */
26711 static void
26712 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
26714 rtx_insn *insn = BB_END (bb);
26716 while (insn)
26718 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
26720 rtx set = single_set (insn);
26721 if (set)
26723 avoid_func_arg_motion (arg, insn);
26724 return;
26727 if (insn == BB_HEAD (bb))
26728 return;
26729 insn = PREV_INSN (insn);
26733 /* Hook for pre-reload schedule - avoid motion of function arguments
26734 passed in likely spilled HW registers. */
26735 static void
26736 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
26738 rtx_insn *insn;
26739 rtx_insn *first_arg = NULL;
26740 if (reload_completed)
26741 return;
26742 while (head != tail && DEBUG_INSN_P (head))
26743 head = NEXT_INSN (head);
26744 for (insn = tail; insn != head; insn = PREV_INSN (insn))
26745 if (INSN_P (insn) && CALL_P (insn))
26747 first_arg = add_parameter_dependencies (insn, head);
26748 if (first_arg)
26750 /* Add dependee for first argument to predecessors if only
26751 region contains more than one block. */
26752 basic_block bb = BLOCK_FOR_INSN (insn);
26753 int rgn = CONTAINING_RGN (bb->index);
26754 int nr_blks = RGN_NR_BLOCKS (rgn);
26755 /* Skip trivial regions and region head blocks that can have
26756 predecessors outside of region. */
26757 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
26759 edge e;
26760 edge_iterator ei;
26762 /* Regions are SCCs with the exception of selective
26763 scheduling with pipelining of outer blocks enabled.
26764 So also check that immediate predecessors of a non-head
26765 block are in the same region. */
26766 FOR_EACH_EDGE (e, ei, bb->preds)
26768 /* Avoid creating of loop-carried dependencies through
26769 using topological ordering in the region. */
26770 if (rgn == CONTAINING_RGN (e->src->index)
26771 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
26772 add_dependee_for_func_arg (first_arg, e->src);
26775 insn = first_arg;
26776 if (insn == head)
26777 break;
26780 else if (first_arg)
26781 avoid_func_arg_motion (first_arg, insn);
26784 /* Hook for pre-reload schedule - set priority of moves from likely spilled
26785 HW registers to maximum, to schedule them at soon as possible. These are
26786 moves from function argument registers at the top of the function entry
26787 and moves from function return value registers after call. */
26788 static int
26789 ix86_adjust_priority (rtx_insn *insn, int priority)
26791 rtx set;
26793 if (reload_completed)
26794 return priority;
26796 if (!NONDEBUG_INSN_P (insn))
26797 return priority;
26799 set = single_set (insn);
26800 if (set)
26802 rtx tmp = SET_SRC (set);
26803 if (REG_P (tmp)
26804 && HARD_REGISTER_P (tmp)
26805 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
26806 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
26807 return current_sched_info->sched_max_insns_priority;
26810 return priority;
26813 /* Model decoder of Core 2/i7.
26814 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
26815 track the instruction fetch block boundaries and make sure that long
26816 (9+ bytes) instructions are assigned to D0. */
26818 /* Maximum length of an insn that can be handled by
26819 a secondary decoder unit. '8' for Core 2/i7. */
26820 static int core2i7_secondary_decoder_max_insn_size;
26822 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
26823 '16' for Core 2/i7. */
26824 static int core2i7_ifetch_block_size;
26826 /* Maximum number of instructions decoder can handle per cycle.
26827 '6' for Core 2/i7. */
26828 static int core2i7_ifetch_block_max_insns;
26830 typedef struct ix86_first_cycle_multipass_data_ *
26831 ix86_first_cycle_multipass_data_t;
26832 typedef const struct ix86_first_cycle_multipass_data_ *
26833 const_ix86_first_cycle_multipass_data_t;
26835 /* A variable to store target state across calls to max_issue within
26836 one cycle. */
26837 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
26838 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
26840 /* Initialize DATA. */
26841 static void
26842 core2i7_first_cycle_multipass_init (void *_data)
26844 ix86_first_cycle_multipass_data_t data
26845 = (ix86_first_cycle_multipass_data_t) _data;
26847 data->ifetch_block_len = 0;
26848 data->ifetch_block_n_insns = 0;
26849 data->ready_try_change = NULL;
26850 data->ready_try_change_size = 0;
26853 /* Advancing the cycle; reset ifetch block counts. */
26854 static void
26855 core2i7_dfa_post_advance_cycle (void)
26857 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
26859 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26861 data->ifetch_block_len = 0;
26862 data->ifetch_block_n_insns = 0;
26865 static int min_insn_size (rtx_insn *);
26867 /* Filter out insns from ready_try that the core will not be able to issue
26868 on current cycle due to decoder. */
26869 static void
26870 core2i7_first_cycle_multipass_filter_ready_try
26871 (const_ix86_first_cycle_multipass_data_t data,
26872 signed char *ready_try, int n_ready, bool first_cycle_insn_p)
26874 while (n_ready--)
26876 rtx_insn *insn;
26877 int insn_size;
26879 if (ready_try[n_ready])
26880 continue;
26882 insn = get_ready_element (n_ready);
26883 insn_size = min_insn_size (insn);
26885 if (/* If this is a too long an insn for a secondary decoder ... */
26886 (!first_cycle_insn_p
26887 && insn_size > core2i7_secondary_decoder_max_insn_size)
26888 /* ... or it would not fit into the ifetch block ... */
26889 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
26890 /* ... or the decoder is full already ... */
26891 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
26892 /* ... mask the insn out. */
26894 ready_try[n_ready] = 1;
26896 if (data->ready_try_change)
26897 bitmap_set_bit (data->ready_try_change, n_ready);
26902 /* Prepare for a new round of multipass lookahead scheduling. */
26903 static void
26904 core2i7_first_cycle_multipass_begin (void *_data,
26905 signed char *ready_try, int n_ready,
26906 bool first_cycle_insn_p)
26908 ix86_first_cycle_multipass_data_t data
26909 = (ix86_first_cycle_multipass_data_t) _data;
26910 const_ix86_first_cycle_multipass_data_t prev_data
26911 = ix86_first_cycle_multipass_data;
26913 /* Restore the state from the end of the previous round. */
26914 data->ifetch_block_len = prev_data->ifetch_block_len;
26915 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
26917 /* Filter instructions that cannot be issued on current cycle due to
26918 decoder restrictions. */
26919 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26920 first_cycle_insn_p);
26923 /* INSN is being issued in current solution. Account for its impact on
26924 the decoder model. */
26925 static void
26926 core2i7_first_cycle_multipass_issue (void *_data,
26927 signed char *ready_try, int n_ready,
26928 rtx_insn *insn, const void *_prev_data)
26930 ix86_first_cycle_multipass_data_t data
26931 = (ix86_first_cycle_multipass_data_t) _data;
26932 const_ix86_first_cycle_multipass_data_t prev_data
26933 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
26935 int insn_size = min_insn_size (insn);
26937 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
26938 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
26939 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
26940 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26942 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
26943 if (!data->ready_try_change)
26945 data->ready_try_change = sbitmap_alloc (n_ready);
26946 data->ready_try_change_size = n_ready;
26948 else if (data->ready_try_change_size < n_ready)
26950 data->ready_try_change = sbitmap_resize (data->ready_try_change,
26951 n_ready, 0);
26952 data->ready_try_change_size = n_ready;
26954 bitmap_clear (data->ready_try_change);
26956 /* Filter out insns from ready_try that the core will not be able to issue
26957 on current cycle due to decoder. */
26958 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26959 false);
26962 /* Revert the effect on ready_try. */
26963 static void
26964 core2i7_first_cycle_multipass_backtrack (const void *_data,
26965 signed char *ready_try,
26966 int n_ready ATTRIBUTE_UNUSED)
26968 const_ix86_first_cycle_multipass_data_t data
26969 = (const_ix86_first_cycle_multipass_data_t) _data;
26970 unsigned int i = 0;
26971 sbitmap_iterator sbi;
26973 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
26974 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
26976 ready_try[i] = 0;
26980 /* Save the result of multipass lookahead scheduling for the next round. */
26981 static void
26982 core2i7_first_cycle_multipass_end (const void *_data)
26984 const_ix86_first_cycle_multipass_data_t data
26985 = (const_ix86_first_cycle_multipass_data_t) _data;
26986 ix86_first_cycle_multipass_data_t next_data
26987 = ix86_first_cycle_multipass_data;
26989 if (data != NULL)
26991 next_data->ifetch_block_len = data->ifetch_block_len;
26992 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
26996 /* Deallocate target data. */
26997 static void
26998 core2i7_first_cycle_multipass_fini (void *_data)
27000 ix86_first_cycle_multipass_data_t data
27001 = (ix86_first_cycle_multipass_data_t) _data;
27003 if (data->ready_try_change)
27005 sbitmap_free (data->ready_try_change);
27006 data->ready_try_change = NULL;
27007 data->ready_try_change_size = 0;
27011 /* Prepare for scheduling pass. */
27012 static void
27013 ix86_sched_init_global (FILE *, int, int)
27015 /* Install scheduling hooks for current CPU. Some of these hooks are used
27016 in time-critical parts of the scheduler, so we only set them up when
27017 they are actually used. */
27018 switch (ix86_tune)
27020 case PROCESSOR_CORE2:
27021 case PROCESSOR_NEHALEM:
27022 case PROCESSOR_SANDYBRIDGE:
27023 case PROCESSOR_HASWELL:
27024 /* Do not perform multipass scheduling for pre-reload schedule
27025 to save compile time. */
27026 if (reload_completed)
27028 targetm.sched.dfa_post_advance_cycle
27029 = core2i7_dfa_post_advance_cycle;
27030 targetm.sched.first_cycle_multipass_init
27031 = core2i7_first_cycle_multipass_init;
27032 targetm.sched.first_cycle_multipass_begin
27033 = core2i7_first_cycle_multipass_begin;
27034 targetm.sched.first_cycle_multipass_issue
27035 = core2i7_first_cycle_multipass_issue;
27036 targetm.sched.first_cycle_multipass_backtrack
27037 = core2i7_first_cycle_multipass_backtrack;
27038 targetm.sched.first_cycle_multipass_end
27039 = core2i7_first_cycle_multipass_end;
27040 targetm.sched.first_cycle_multipass_fini
27041 = core2i7_first_cycle_multipass_fini;
27043 /* Set decoder parameters. */
27044 core2i7_secondary_decoder_max_insn_size = 8;
27045 core2i7_ifetch_block_size = 16;
27046 core2i7_ifetch_block_max_insns = 6;
27047 break;
27049 /* ... Fall through ... */
27050 default:
27051 targetm.sched.dfa_post_advance_cycle = NULL;
27052 targetm.sched.first_cycle_multipass_init = NULL;
27053 targetm.sched.first_cycle_multipass_begin = NULL;
27054 targetm.sched.first_cycle_multipass_issue = NULL;
27055 targetm.sched.first_cycle_multipass_backtrack = NULL;
27056 targetm.sched.first_cycle_multipass_end = NULL;
27057 targetm.sched.first_cycle_multipass_fini = NULL;
27058 break;
27063 /* Compute the alignment given to a constant that is being placed in memory.
27064 EXP is the constant and ALIGN is the alignment that the object would
27065 ordinarily have.
27066 The value of this function is used instead of that alignment to align
27067 the object. */
27070 ix86_constant_alignment (tree exp, int align)
27072 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
27073 || TREE_CODE (exp) == INTEGER_CST)
27075 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
27076 return 64;
27077 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
27078 return 128;
27080 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
27081 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
27082 return BITS_PER_WORD;
27084 return align;
27087 /* Compute the alignment for a static variable.
27088 TYPE is the data type, and ALIGN is the alignment that
27089 the object would ordinarily have. The value of this function is used
27090 instead of that alignment to align the object. */
27093 ix86_data_alignment (tree type, int align, bool opt)
27095 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
27096 for symbols from other compilation units or symbols that don't need
27097 to bind locally. In order to preserve some ABI compatibility with
27098 those compilers, ensure we don't decrease alignment from what we
27099 used to assume. */
27101 int max_align_compat
27102 = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
27104 /* A data structure, equal or greater than the size of a cache line
27105 (64 bytes in the Pentium 4 and other recent Intel processors, including
27106 processors based on Intel Core microarchitecture) should be aligned
27107 so that its base address is a multiple of a cache line size. */
27109 int max_align
27110 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
27112 if (max_align < BITS_PER_WORD)
27113 max_align = BITS_PER_WORD;
27115 if (opt
27116 && AGGREGATE_TYPE_P (type)
27117 && TYPE_SIZE (type)
27118 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
27120 if (wi::geu_p (TYPE_SIZE (type), max_align_compat)
27121 && align < max_align_compat)
27122 align = max_align_compat;
27123 if (wi::geu_p (TYPE_SIZE (type), max_align)
27124 && align < max_align)
27125 align = max_align;
27128 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
27129 to 16byte boundary. */
27130 if (TARGET_64BIT)
27132 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
27133 && TYPE_SIZE (type)
27134 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
27135 && wi::geu_p (TYPE_SIZE (type), 128)
27136 && align < 128)
27137 return 128;
27140 if (!opt)
27141 return align;
27143 if (TREE_CODE (type) == ARRAY_TYPE)
27145 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
27146 return 64;
27147 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
27148 return 128;
27150 else if (TREE_CODE (type) == COMPLEX_TYPE)
27153 if (TYPE_MODE (type) == DCmode && align < 64)
27154 return 64;
27155 if ((TYPE_MODE (type) == XCmode
27156 || TYPE_MODE (type) == TCmode) && align < 128)
27157 return 128;
27159 else if ((TREE_CODE (type) == RECORD_TYPE
27160 || TREE_CODE (type) == UNION_TYPE
27161 || TREE_CODE (type) == QUAL_UNION_TYPE)
27162 && TYPE_FIELDS (type))
27164 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
27165 return 64;
27166 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
27167 return 128;
27169 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
27170 || TREE_CODE (type) == INTEGER_TYPE)
27172 if (TYPE_MODE (type) == DFmode && align < 64)
27173 return 64;
27174 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
27175 return 128;
27178 return align;
27181 /* Compute the alignment for a local variable or a stack slot. EXP is
27182 the data type or decl itself, MODE is the widest mode available and
27183 ALIGN is the alignment that the object would ordinarily have. The
27184 value of this macro is used instead of that alignment to align the
27185 object. */
27187 unsigned int
27188 ix86_local_alignment (tree exp, machine_mode mode,
27189 unsigned int align)
27191 tree type, decl;
27193 if (exp && DECL_P (exp))
27195 type = TREE_TYPE (exp);
27196 decl = exp;
27198 else
27200 type = exp;
27201 decl = NULL;
27204 /* Don't do dynamic stack realignment for long long objects with
27205 -mpreferred-stack-boundary=2. */
27206 if (!TARGET_64BIT
27207 && align == 64
27208 && ix86_preferred_stack_boundary < 64
27209 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
27210 && (!type || !TYPE_USER_ALIGN (type))
27211 && (!decl || !DECL_USER_ALIGN (decl)))
27212 align = 32;
27214 /* If TYPE is NULL, we are allocating a stack slot for caller-save
27215 register in MODE. We will return the largest alignment of XF
27216 and DF. */
27217 if (!type)
27219 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
27220 align = GET_MODE_ALIGNMENT (DFmode);
27221 return align;
27224 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
27225 to 16byte boundary. Exact wording is:
27227 An array uses the same alignment as its elements, except that a local or
27228 global array variable of length at least 16 bytes or
27229 a C99 variable-length array variable always has alignment of at least 16 bytes.
27231 This was added to allow use of aligned SSE instructions at arrays. This
27232 rule is meant for static storage (where compiler can not do the analysis
27233 by itself). We follow it for automatic variables only when convenient.
27234 We fully control everything in the function compiled and functions from
27235 other unit can not rely on the alignment.
27237 Exclude va_list type. It is the common case of local array where
27238 we can not benefit from the alignment.
27240 TODO: Probably one should optimize for size only when var is not escaping. */
27241 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
27242 && TARGET_SSE)
27244 if (AGGREGATE_TYPE_P (type)
27245 && (va_list_type_node == NULL_TREE
27246 || (TYPE_MAIN_VARIANT (type)
27247 != TYPE_MAIN_VARIANT (va_list_type_node)))
27248 && TYPE_SIZE (type)
27249 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
27250 && wi::geu_p (TYPE_SIZE (type), 16)
27251 && align < 128)
27252 return 128;
27254 if (TREE_CODE (type) == ARRAY_TYPE)
27256 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
27257 return 64;
27258 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
27259 return 128;
27261 else if (TREE_CODE (type) == COMPLEX_TYPE)
27263 if (TYPE_MODE (type) == DCmode && align < 64)
27264 return 64;
27265 if ((TYPE_MODE (type) == XCmode
27266 || TYPE_MODE (type) == TCmode) && align < 128)
27267 return 128;
27269 else if ((TREE_CODE (type) == RECORD_TYPE
27270 || TREE_CODE (type) == UNION_TYPE
27271 || TREE_CODE (type) == QUAL_UNION_TYPE)
27272 && TYPE_FIELDS (type))
27274 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
27275 return 64;
27276 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
27277 return 128;
27279 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
27280 || TREE_CODE (type) == INTEGER_TYPE)
27283 if (TYPE_MODE (type) == DFmode && align < 64)
27284 return 64;
27285 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
27286 return 128;
27288 return align;
27291 /* Compute the minimum required alignment for dynamic stack realignment
27292 purposes for a local variable, parameter or a stack slot. EXP is
27293 the data type or decl itself, MODE is its mode and ALIGN is the
27294 alignment that the object would ordinarily have. */
27296 unsigned int
27297 ix86_minimum_alignment (tree exp, machine_mode mode,
27298 unsigned int align)
27300 tree type, decl;
27302 if (exp && DECL_P (exp))
27304 type = TREE_TYPE (exp);
27305 decl = exp;
27307 else
27309 type = exp;
27310 decl = NULL;
27313 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
27314 return align;
27316 /* Don't do dynamic stack realignment for long long objects with
27317 -mpreferred-stack-boundary=2. */
27318 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
27319 && (!type || !TYPE_USER_ALIGN (type))
27320 && (!decl || !DECL_USER_ALIGN (decl)))
27321 return 32;
27323 return align;
27326 /* Find a location for the static chain incoming to a nested function.
27327 This is a register, unless all free registers are used by arguments. */
27329 static rtx
27330 ix86_static_chain (const_tree fndecl, bool incoming_p)
27332 unsigned regno;
27334 if (!DECL_STATIC_CHAIN (fndecl))
27335 return NULL;
27337 if (TARGET_64BIT)
27339 /* We always use R10 in 64-bit mode. */
27340 regno = R10_REG;
27342 else
27344 tree fntype;
27345 unsigned int ccvt;
27347 /* By default in 32-bit mode we use ECX to pass the static chain. */
27348 regno = CX_REG;
27350 fntype = TREE_TYPE (fndecl);
27351 ccvt = ix86_get_callcvt (fntype);
27352 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
27354 /* Fastcall functions use ecx/edx for arguments, which leaves
27355 us with EAX for the static chain.
27356 Thiscall functions use ecx for arguments, which also
27357 leaves us with EAX for the static chain. */
27358 regno = AX_REG;
27360 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
27362 /* Thiscall functions use ecx for arguments, which leaves
27363 us with EAX and EDX for the static chain.
27364 We are using for abi-compatibility EAX. */
27365 regno = AX_REG;
27367 else if (ix86_function_regparm (fntype, fndecl) == 3)
27369 /* For regparm 3, we have no free call-clobbered registers in
27370 which to store the static chain. In order to implement this,
27371 we have the trampoline push the static chain to the stack.
27372 However, we can't push a value below the return address when
27373 we call the nested function directly, so we have to use an
27374 alternate entry point. For this we use ESI, and have the
27375 alternate entry point push ESI, so that things appear the
27376 same once we're executing the nested function. */
27377 if (incoming_p)
27379 if (fndecl == current_function_decl)
27380 ix86_static_chain_on_stack = true;
27381 return gen_frame_mem (SImode,
27382 plus_constant (Pmode,
27383 arg_pointer_rtx, -8));
27385 regno = SI_REG;
27389 return gen_rtx_REG (Pmode, regno);
27392 /* Emit RTL insns to initialize the variable parts of a trampoline.
27393 FNDECL is the decl of the target address; M_TRAMP is a MEM for
27394 the trampoline, and CHAIN_VALUE is an RTX for the static chain
27395 to be passed to the target function. */
27397 static void
27398 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
27400 rtx mem, fnaddr;
27401 int opcode;
27402 int offset = 0;
27404 fnaddr = XEXP (DECL_RTL (fndecl), 0);
27406 if (TARGET_64BIT)
27408 int size;
27410 /* Load the function address to r11. Try to load address using
27411 the shorter movl instead of movabs. We may want to support
27412 movq for kernel mode, but kernel does not use trampolines at
27413 the moment. FNADDR is a 32bit address and may not be in
27414 DImode when ptr_mode == SImode. Always use movl in this
27415 case. */
27416 if (ptr_mode == SImode
27417 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
27419 fnaddr = copy_addr_to_reg (fnaddr);
27421 mem = adjust_address (m_tramp, HImode, offset);
27422 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
27424 mem = adjust_address (m_tramp, SImode, offset + 2);
27425 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
27426 offset += 6;
27428 else
27430 mem = adjust_address (m_tramp, HImode, offset);
27431 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
27433 mem = adjust_address (m_tramp, DImode, offset + 2);
27434 emit_move_insn (mem, fnaddr);
27435 offset += 10;
27438 /* Load static chain using movabs to r10. Use the shorter movl
27439 instead of movabs when ptr_mode == SImode. */
27440 if (ptr_mode == SImode)
27442 opcode = 0xba41;
27443 size = 6;
27445 else
27447 opcode = 0xba49;
27448 size = 10;
27451 mem = adjust_address (m_tramp, HImode, offset);
27452 emit_move_insn (mem, gen_int_mode (opcode, HImode));
27454 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
27455 emit_move_insn (mem, chain_value);
27456 offset += size;
27458 /* Jump to r11; the last (unused) byte is a nop, only there to
27459 pad the write out to a single 32-bit store. */
27460 mem = adjust_address (m_tramp, SImode, offset);
27461 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
27462 offset += 4;
27464 else
27466 rtx disp, chain;
27468 /* Depending on the static chain location, either load a register
27469 with a constant, or push the constant to the stack. All of the
27470 instructions are the same size. */
27471 chain = ix86_static_chain (fndecl, true);
27472 if (REG_P (chain))
27474 switch (REGNO (chain))
27476 case AX_REG:
27477 opcode = 0xb8; break;
27478 case CX_REG:
27479 opcode = 0xb9; break;
27480 default:
27481 gcc_unreachable ();
27484 else
27485 opcode = 0x68;
27487 mem = adjust_address (m_tramp, QImode, offset);
27488 emit_move_insn (mem, gen_int_mode (opcode, QImode));
27490 mem = adjust_address (m_tramp, SImode, offset + 1);
27491 emit_move_insn (mem, chain_value);
27492 offset += 5;
27494 mem = adjust_address (m_tramp, QImode, offset);
27495 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
27497 mem = adjust_address (m_tramp, SImode, offset + 1);
27499 /* Compute offset from the end of the jmp to the target function.
27500 In the case in which the trampoline stores the static chain on
27501 the stack, we need to skip the first insn which pushes the
27502 (call-saved) register static chain; this push is 1 byte. */
27503 offset += 5;
27504 disp = expand_binop (SImode, sub_optab, fnaddr,
27505 plus_constant (Pmode, XEXP (m_tramp, 0),
27506 offset - (MEM_P (chain) ? 1 : 0)),
27507 NULL_RTX, 1, OPTAB_DIRECT);
27508 emit_move_insn (mem, disp);
27511 gcc_assert (offset <= TRAMPOLINE_SIZE);
27513 #ifdef HAVE_ENABLE_EXECUTE_STACK
27514 #ifdef CHECK_EXECUTE_STACK_ENABLED
27515 if (CHECK_EXECUTE_STACK_ENABLED)
27516 #endif
27517 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
27518 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
27519 #endif
27522 /* The following file contains several enumerations and data structures
27523 built from the definitions in i386-builtin-types.def. */
27525 #include "i386-builtin-types.inc"
27527 /* Table for the ix86 builtin non-function types. */
27528 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
27530 /* Retrieve an element from the above table, building some of
27531 the types lazily. */
27533 static tree
27534 ix86_get_builtin_type (enum ix86_builtin_type tcode)
27536 unsigned int index;
27537 tree type, itype;
27539 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
27541 type = ix86_builtin_type_tab[(int) tcode];
27542 if (type != NULL)
27543 return type;
27545 gcc_assert (tcode > IX86_BT_LAST_PRIM);
27546 if (tcode <= IX86_BT_LAST_VECT)
27548 machine_mode mode;
27550 index = tcode - IX86_BT_LAST_PRIM - 1;
27551 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
27552 mode = ix86_builtin_type_vect_mode[index];
27554 type = build_vector_type_for_mode (itype, mode);
27556 else
27558 int quals;
27560 index = tcode - IX86_BT_LAST_VECT - 1;
27561 if (tcode <= IX86_BT_LAST_PTR)
27562 quals = TYPE_UNQUALIFIED;
27563 else
27564 quals = TYPE_QUAL_CONST;
27566 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
27567 if (quals != TYPE_UNQUALIFIED)
27568 itype = build_qualified_type (itype, quals);
27570 type = build_pointer_type (itype);
27573 ix86_builtin_type_tab[(int) tcode] = type;
27574 return type;
27577 /* Table for the ix86 builtin function types. */
27578 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
27580 /* Retrieve an element from the above table, building some of
27581 the types lazily. */
27583 static tree
27584 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
27586 tree type;
27588 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
27590 type = ix86_builtin_func_type_tab[(int) tcode];
27591 if (type != NULL)
27592 return type;
27594 if (tcode <= IX86_BT_LAST_FUNC)
27596 unsigned start = ix86_builtin_func_start[(int) tcode];
27597 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
27598 tree rtype, atype, args = void_list_node;
27599 unsigned i;
27601 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
27602 for (i = after - 1; i > start; --i)
27604 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
27605 args = tree_cons (NULL, atype, args);
27608 type = build_function_type (rtype, args);
27610 else
27612 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
27613 enum ix86_builtin_func_type icode;
27615 icode = ix86_builtin_func_alias_base[index];
27616 type = ix86_get_builtin_func_type (icode);
27619 ix86_builtin_func_type_tab[(int) tcode] = type;
27620 return type;
27624 /* Codes for all the SSE/MMX builtins. */
27625 enum ix86_builtins
27627 IX86_BUILTIN_ADDPS,
27628 IX86_BUILTIN_ADDSS,
27629 IX86_BUILTIN_DIVPS,
27630 IX86_BUILTIN_DIVSS,
27631 IX86_BUILTIN_MULPS,
27632 IX86_BUILTIN_MULSS,
27633 IX86_BUILTIN_SUBPS,
27634 IX86_BUILTIN_SUBSS,
27636 IX86_BUILTIN_CMPEQPS,
27637 IX86_BUILTIN_CMPLTPS,
27638 IX86_BUILTIN_CMPLEPS,
27639 IX86_BUILTIN_CMPGTPS,
27640 IX86_BUILTIN_CMPGEPS,
27641 IX86_BUILTIN_CMPNEQPS,
27642 IX86_BUILTIN_CMPNLTPS,
27643 IX86_BUILTIN_CMPNLEPS,
27644 IX86_BUILTIN_CMPNGTPS,
27645 IX86_BUILTIN_CMPNGEPS,
27646 IX86_BUILTIN_CMPORDPS,
27647 IX86_BUILTIN_CMPUNORDPS,
27648 IX86_BUILTIN_CMPEQSS,
27649 IX86_BUILTIN_CMPLTSS,
27650 IX86_BUILTIN_CMPLESS,
27651 IX86_BUILTIN_CMPNEQSS,
27652 IX86_BUILTIN_CMPNLTSS,
27653 IX86_BUILTIN_CMPNLESS,
27654 IX86_BUILTIN_CMPORDSS,
27655 IX86_BUILTIN_CMPUNORDSS,
27657 IX86_BUILTIN_COMIEQSS,
27658 IX86_BUILTIN_COMILTSS,
27659 IX86_BUILTIN_COMILESS,
27660 IX86_BUILTIN_COMIGTSS,
27661 IX86_BUILTIN_COMIGESS,
27662 IX86_BUILTIN_COMINEQSS,
27663 IX86_BUILTIN_UCOMIEQSS,
27664 IX86_BUILTIN_UCOMILTSS,
27665 IX86_BUILTIN_UCOMILESS,
27666 IX86_BUILTIN_UCOMIGTSS,
27667 IX86_BUILTIN_UCOMIGESS,
27668 IX86_BUILTIN_UCOMINEQSS,
27670 IX86_BUILTIN_CVTPI2PS,
27671 IX86_BUILTIN_CVTPS2PI,
27672 IX86_BUILTIN_CVTSI2SS,
27673 IX86_BUILTIN_CVTSI642SS,
27674 IX86_BUILTIN_CVTSS2SI,
27675 IX86_BUILTIN_CVTSS2SI64,
27676 IX86_BUILTIN_CVTTPS2PI,
27677 IX86_BUILTIN_CVTTSS2SI,
27678 IX86_BUILTIN_CVTTSS2SI64,
27680 IX86_BUILTIN_MAXPS,
27681 IX86_BUILTIN_MAXSS,
27682 IX86_BUILTIN_MINPS,
27683 IX86_BUILTIN_MINSS,
27685 IX86_BUILTIN_LOADUPS,
27686 IX86_BUILTIN_STOREUPS,
27687 IX86_BUILTIN_MOVSS,
27689 IX86_BUILTIN_MOVHLPS,
27690 IX86_BUILTIN_MOVLHPS,
27691 IX86_BUILTIN_LOADHPS,
27692 IX86_BUILTIN_LOADLPS,
27693 IX86_BUILTIN_STOREHPS,
27694 IX86_BUILTIN_STORELPS,
27696 IX86_BUILTIN_MASKMOVQ,
27697 IX86_BUILTIN_MOVMSKPS,
27698 IX86_BUILTIN_PMOVMSKB,
27700 IX86_BUILTIN_MOVNTPS,
27701 IX86_BUILTIN_MOVNTQ,
27703 IX86_BUILTIN_LOADDQU,
27704 IX86_BUILTIN_STOREDQU,
27706 IX86_BUILTIN_PACKSSWB,
27707 IX86_BUILTIN_PACKSSDW,
27708 IX86_BUILTIN_PACKUSWB,
27710 IX86_BUILTIN_PADDB,
27711 IX86_BUILTIN_PADDW,
27712 IX86_BUILTIN_PADDD,
27713 IX86_BUILTIN_PADDQ,
27714 IX86_BUILTIN_PADDSB,
27715 IX86_BUILTIN_PADDSW,
27716 IX86_BUILTIN_PADDUSB,
27717 IX86_BUILTIN_PADDUSW,
27718 IX86_BUILTIN_PSUBB,
27719 IX86_BUILTIN_PSUBW,
27720 IX86_BUILTIN_PSUBD,
27721 IX86_BUILTIN_PSUBQ,
27722 IX86_BUILTIN_PSUBSB,
27723 IX86_BUILTIN_PSUBSW,
27724 IX86_BUILTIN_PSUBUSB,
27725 IX86_BUILTIN_PSUBUSW,
27727 IX86_BUILTIN_PAND,
27728 IX86_BUILTIN_PANDN,
27729 IX86_BUILTIN_POR,
27730 IX86_BUILTIN_PXOR,
27732 IX86_BUILTIN_PAVGB,
27733 IX86_BUILTIN_PAVGW,
27735 IX86_BUILTIN_PCMPEQB,
27736 IX86_BUILTIN_PCMPEQW,
27737 IX86_BUILTIN_PCMPEQD,
27738 IX86_BUILTIN_PCMPGTB,
27739 IX86_BUILTIN_PCMPGTW,
27740 IX86_BUILTIN_PCMPGTD,
27742 IX86_BUILTIN_PMADDWD,
27744 IX86_BUILTIN_PMAXSW,
27745 IX86_BUILTIN_PMAXUB,
27746 IX86_BUILTIN_PMINSW,
27747 IX86_BUILTIN_PMINUB,
27749 IX86_BUILTIN_PMULHUW,
27750 IX86_BUILTIN_PMULHW,
27751 IX86_BUILTIN_PMULLW,
27753 IX86_BUILTIN_PSADBW,
27754 IX86_BUILTIN_PSHUFW,
27756 IX86_BUILTIN_PSLLW,
27757 IX86_BUILTIN_PSLLD,
27758 IX86_BUILTIN_PSLLQ,
27759 IX86_BUILTIN_PSRAW,
27760 IX86_BUILTIN_PSRAD,
27761 IX86_BUILTIN_PSRLW,
27762 IX86_BUILTIN_PSRLD,
27763 IX86_BUILTIN_PSRLQ,
27764 IX86_BUILTIN_PSLLWI,
27765 IX86_BUILTIN_PSLLDI,
27766 IX86_BUILTIN_PSLLQI,
27767 IX86_BUILTIN_PSRAWI,
27768 IX86_BUILTIN_PSRADI,
27769 IX86_BUILTIN_PSRLWI,
27770 IX86_BUILTIN_PSRLDI,
27771 IX86_BUILTIN_PSRLQI,
27773 IX86_BUILTIN_PUNPCKHBW,
27774 IX86_BUILTIN_PUNPCKHWD,
27775 IX86_BUILTIN_PUNPCKHDQ,
27776 IX86_BUILTIN_PUNPCKLBW,
27777 IX86_BUILTIN_PUNPCKLWD,
27778 IX86_BUILTIN_PUNPCKLDQ,
27780 IX86_BUILTIN_SHUFPS,
27782 IX86_BUILTIN_RCPPS,
27783 IX86_BUILTIN_RCPSS,
27784 IX86_BUILTIN_RSQRTPS,
27785 IX86_BUILTIN_RSQRTPS_NR,
27786 IX86_BUILTIN_RSQRTSS,
27787 IX86_BUILTIN_RSQRTF,
27788 IX86_BUILTIN_SQRTPS,
27789 IX86_BUILTIN_SQRTPS_NR,
27790 IX86_BUILTIN_SQRTSS,
27792 IX86_BUILTIN_UNPCKHPS,
27793 IX86_BUILTIN_UNPCKLPS,
27795 IX86_BUILTIN_ANDPS,
27796 IX86_BUILTIN_ANDNPS,
27797 IX86_BUILTIN_ORPS,
27798 IX86_BUILTIN_XORPS,
27800 IX86_BUILTIN_EMMS,
27801 IX86_BUILTIN_LDMXCSR,
27802 IX86_BUILTIN_STMXCSR,
27803 IX86_BUILTIN_SFENCE,
27805 IX86_BUILTIN_FXSAVE,
27806 IX86_BUILTIN_FXRSTOR,
27807 IX86_BUILTIN_FXSAVE64,
27808 IX86_BUILTIN_FXRSTOR64,
27810 IX86_BUILTIN_XSAVE,
27811 IX86_BUILTIN_XRSTOR,
27812 IX86_BUILTIN_XSAVE64,
27813 IX86_BUILTIN_XRSTOR64,
27815 IX86_BUILTIN_XSAVEOPT,
27816 IX86_BUILTIN_XSAVEOPT64,
27818 IX86_BUILTIN_XSAVEC,
27819 IX86_BUILTIN_XSAVEC64,
27821 IX86_BUILTIN_XSAVES,
27822 IX86_BUILTIN_XRSTORS,
27823 IX86_BUILTIN_XSAVES64,
27824 IX86_BUILTIN_XRSTORS64,
27826 /* 3DNow! Original */
27827 IX86_BUILTIN_FEMMS,
27828 IX86_BUILTIN_PAVGUSB,
27829 IX86_BUILTIN_PF2ID,
27830 IX86_BUILTIN_PFACC,
27831 IX86_BUILTIN_PFADD,
27832 IX86_BUILTIN_PFCMPEQ,
27833 IX86_BUILTIN_PFCMPGE,
27834 IX86_BUILTIN_PFCMPGT,
27835 IX86_BUILTIN_PFMAX,
27836 IX86_BUILTIN_PFMIN,
27837 IX86_BUILTIN_PFMUL,
27838 IX86_BUILTIN_PFRCP,
27839 IX86_BUILTIN_PFRCPIT1,
27840 IX86_BUILTIN_PFRCPIT2,
27841 IX86_BUILTIN_PFRSQIT1,
27842 IX86_BUILTIN_PFRSQRT,
27843 IX86_BUILTIN_PFSUB,
27844 IX86_BUILTIN_PFSUBR,
27845 IX86_BUILTIN_PI2FD,
27846 IX86_BUILTIN_PMULHRW,
27848 /* 3DNow! Athlon Extensions */
27849 IX86_BUILTIN_PF2IW,
27850 IX86_BUILTIN_PFNACC,
27851 IX86_BUILTIN_PFPNACC,
27852 IX86_BUILTIN_PI2FW,
27853 IX86_BUILTIN_PSWAPDSI,
27854 IX86_BUILTIN_PSWAPDSF,
27856 /* SSE2 */
27857 IX86_BUILTIN_ADDPD,
27858 IX86_BUILTIN_ADDSD,
27859 IX86_BUILTIN_DIVPD,
27860 IX86_BUILTIN_DIVSD,
27861 IX86_BUILTIN_MULPD,
27862 IX86_BUILTIN_MULSD,
27863 IX86_BUILTIN_SUBPD,
27864 IX86_BUILTIN_SUBSD,
27866 IX86_BUILTIN_CMPEQPD,
27867 IX86_BUILTIN_CMPLTPD,
27868 IX86_BUILTIN_CMPLEPD,
27869 IX86_BUILTIN_CMPGTPD,
27870 IX86_BUILTIN_CMPGEPD,
27871 IX86_BUILTIN_CMPNEQPD,
27872 IX86_BUILTIN_CMPNLTPD,
27873 IX86_BUILTIN_CMPNLEPD,
27874 IX86_BUILTIN_CMPNGTPD,
27875 IX86_BUILTIN_CMPNGEPD,
27876 IX86_BUILTIN_CMPORDPD,
27877 IX86_BUILTIN_CMPUNORDPD,
27878 IX86_BUILTIN_CMPEQSD,
27879 IX86_BUILTIN_CMPLTSD,
27880 IX86_BUILTIN_CMPLESD,
27881 IX86_BUILTIN_CMPNEQSD,
27882 IX86_BUILTIN_CMPNLTSD,
27883 IX86_BUILTIN_CMPNLESD,
27884 IX86_BUILTIN_CMPORDSD,
27885 IX86_BUILTIN_CMPUNORDSD,
27887 IX86_BUILTIN_COMIEQSD,
27888 IX86_BUILTIN_COMILTSD,
27889 IX86_BUILTIN_COMILESD,
27890 IX86_BUILTIN_COMIGTSD,
27891 IX86_BUILTIN_COMIGESD,
27892 IX86_BUILTIN_COMINEQSD,
27893 IX86_BUILTIN_UCOMIEQSD,
27894 IX86_BUILTIN_UCOMILTSD,
27895 IX86_BUILTIN_UCOMILESD,
27896 IX86_BUILTIN_UCOMIGTSD,
27897 IX86_BUILTIN_UCOMIGESD,
27898 IX86_BUILTIN_UCOMINEQSD,
27900 IX86_BUILTIN_MAXPD,
27901 IX86_BUILTIN_MAXSD,
27902 IX86_BUILTIN_MINPD,
27903 IX86_BUILTIN_MINSD,
27905 IX86_BUILTIN_ANDPD,
27906 IX86_BUILTIN_ANDNPD,
27907 IX86_BUILTIN_ORPD,
27908 IX86_BUILTIN_XORPD,
27910 IX86_BUILTIN_SQRTPD,
27911 IX86_BUILTIN_SQRTSD,
27913 IX86_BUILTIN_UNPCKHPD,
27914 IX86_BUILTIN_UNPCKLPD,
27916 IX86_BUILTIN_SHUFPD,
27918 IX86_BUILTIN_LOADUPD,
27919 IX86_BUILTIN_STOREUPD,
27920 IX86_BUILTIN_MOVSD,
27922 IX86_BUILTIN_LOADHPD,
27923 IX86_BUILTIN_LOADLPD,
27925 IX86_BUILTIN_CVTDQ2PD,
27926 IX86_BUILTIN_CVTDQ2PS,
27928 IX86_BUILTIN_CVTPD2DQ,
27929 IX86_BUILTIN_CVTPD2PI,
27930 IX86_BUILTIN_CVTPD2PS,
27931 IX86_BUILTIN_CVTTPD2DQ,
27932 IX86_BUILTIN_CVTTPD2PI,
27934 IX86_BUILTIN_CVTPI2PD,
27935 IX86_BUILTIN_CVTSI2SD,
27936 IX86_BUILTIN_CVTSI642SD,
27938 IX86_BUILTIN_CVTSD2SI,
27939 IX86_BUILTIN_CVTSD2SI64,
27940 IX86_BUILTIN_CVTSD2SS,
27941 IX86_BUILTIN_CVTSS2SD,
27942 IX86_BUILTIN_CVTTSD2SI,
27943 IX86_BUILTIN_CVTTSD2SI64,
27945 IX86_BUILTIN_CVTPS2DQ,
27946 IX86_BUILTIN_CVTPS2PD,
27947 IX86_BUILTIN_CVTTPS2DQ,
27949 IX86_BUILTIN_MOVNTI,
27950 IX86_BUILTIN_MOVNTI64,
27951 IX86_BUILTIN_MOVNTPD,
27952 IX86_BUILTIN_MOVNTDQ,
27954 IX86_BUILTIN_MOVQ128,
27956 /* SSE2 MMX */
27957 IX86_BUILTIN_MASKMOVDQU,
27958 IX86_BUILTIN_MOVMSKPD,
27959 IX86_BUILTIN_PMOVMSKB128,
27961 IX86_BUILTIN_PACKSSWB128,
27962 IX86_BUILTIN_PACKSSDW128,
27963 IX86_BUILTIN_PACKUSWB128,
27965 IX86_BUILTIN_PADDB128,
27966 IX86_BUILTIN_PADDW128,
27967 IX86_BUILTIN_PADDD128,
27968 IX86_BUILTIN_PADDQ128,
27969 IX86_BUILTIN_PADDSB128,
27970 IX86_BUILTIN_PADDSW128,
27971 IX86_BUILTIN_PADDUSB128,
27972 IX86_BUILTIN_PADDUSW128,
27973 IX86_BUILTIN_PSUBB128,
27974 IX86_BUILTIN_PSUBW128,
27975 IX86_BUILTIN_PSUBD128,
27976 IX86_BUILTIN_PSUBQ128,
27977 IX86_BUILTIN_PSUBSB128,
27978 IX86_BUILTIN_PSUBSW128,
27979 IX86_BUILTIN_PSUBUSB128,
27980 IX86_BUILTIN_PSUBUSW128,
27982 IX86_BUILTIN_PAND128,
27983 IX86_BUILTIN_PANDN128,
27984 IX86_BUILTIN_POR128,
27985 IX86_BUILTIN_PXOR128,
27987 IX86_BUILTIN_PAVGB128,
27988 IX86_BUILTIN_PAVGW128,
27990 IX86_BUILTIN_PCMPEQB128,
27991 IX86_BUILTIN_PCMPEQW128,
27992 IX86_BUILTIN_PCMPEQD128,
27993 IX86_BUILTIN_PCMPGTB128,
27994 IX86_BUILTIN_PCMPGTW128,
27995 IX86_BUILTIN_PCMPGTD128,
27997 IX86_BUILTIN_PMADDWD128,
27999 IX86_BUILTIN_PMAXSW128,
28000 IX86_BUILTIN_PMAXUB128,
28001 IX86_BUILTIN_PMINSW128,
28002 IX86_BUILTIN_PMINUB128,
28004 IX86_BUILTIN_PMULUDQ,
28005 IX86_BUILTIN_PMULUDQ128,
28006 IX86_BUILTIN_PMULHUW128,
28007 IX86_BUILTIN_PMULHW128,
28008 IX86_BUILTIN_PMULLW128,
28010 IX86_BUILTIN_PSADBW128,
28011 IX86_BUILTIN_PSHUFHW,
28012 IX86_BUILTIN_PSHUFLW,
28013 IX86_BUILTIN_PSHUFD,
28015 IX86_BUILTIN_PSLLDQI128,
28016 IX86_BUILTIN_PSLLWI128,
28017 IX86_BUILTIN_PSLLDI128,
28018 IX86_BUILTIN_PSLLQI128,
28019 IX86_BUILTIN_PSRAWI128,
28020 IX86_BUILTIN_PSRADI128,
28021 IX86_BUILTIN_PSRLDQI128,
28022 IX86_BUILTIN_PSRLWI128,
28023 IX86_BUILTIN_PSRLDI128,
28024 IX86_BUILTIN_PSRLQI128,
28026 IX86_BUILTIN_PSLLDQ128,
28027 IX86_BUILTIN_PSLLW128,
28028 IX86_BUILTIN_PSLLD128,
28029 IX86_BUILTIN_PSLLQ128,
28030 IX86_BUILTIN_PSRAW128,
28031 IX86_BUILTIN_PSRAD128,
28032 IX86_BUILTIN_PSRLW128,
28033 IX86_BUILTIN_PSRLD128,
28034 IX86_BUILTIN_PSRLQ128,
28036 IX86_BUILTIN_PUNPCKHBW128,
28037 IX86_BUILTIN_PUNPCKHWD128,
28038 IX86_BUILTIN_PUNPCKHDQ128,
28039 IX86_BUILTIN_PUNPCKHQDQ128,
28040 IX86_BUILTIN_PUNPCKLBW128,
28041 IX86_BUILTIN_PUNPCKLWD128,
28042 IX86_BUILTIN_PUNPCKLDQ128,
28043 IX86_BUILTIN_PUNPCKLQDQ128,
28045 IX86_BUILTIN_CLFLUSH,
28046 IX86_BUILTIN_MFENCE,
28047 IX86_BUILTIN_LFENCE,
28048 IX86_BUILTIN_PAUSE,
28050 IX86_BUILTIN_FNSTENV,
28051 IX86_BUILTIN_FLDENV,
28052 IX86_BUILTIN_FNSTSW,
28053 IX86_BUILTIN_FNCLEX,
28055 IX86_BUILTIN_BSRSI,
28056 IX86_BUILTIN_BSRDI,
28057 IX86_BUILTIN_RDPMC,
28058 IX86_BUILTIN_RDTSC,
28059 IX86_BUILTIN_RDTSCP,
28060 IX86_BUILTIN_ROLQI,
28061 IX86_BUILTIN_ROLHI,
28062 IX86_BUILTIN_RORQI,
28063 IX86_BUILTIN_RORHI,
28065 /* SSE3. */
28066 IX86_BUILTIN_ADDSUBPS,
28067 IX86_BUILTIN_HADDPS,
28068 IX86_BUILTIN_HSUBPS,
28069 IX86_BUILTIN_MOVSHDUP,
28070 IX86_BUILTIN_MOVSLDUP,
28071 IX86_BUILTIN_ADDSUBPD,
28072 IX86_BUILTIN_HADDPD,
28073 IX86_BUILTIN_HSUBPD,
28074 IX86_BUILTIN_LDDQU,
28076 IX86_BUILTIN_MONITOR,
28077 IX86_BUILTIN_MWAIT,
28079 /* SSSE3. */
28080 IX86_BUILTIN_PHADDW,
28081 IX86_BUILTIN_PHADDD,
28082 IX86_BUILTIN_PHADDSW,
28083 IX86_BUILTIN_PHSUBW,
28084 IX86_BUILTIN_PHSUBD,
28085 IX86_BUILTIN_PHSUBSW,
28086 IX86_BUILTIN_PMADDUBSW,
28087 IX86_BUILTIN_PMULHRSW,
28088 IX86_BUILTIN_PSHUFB,
28089 IX86_BUILTIN_PSIGNB,
28090 IX86_BUILTIN_PSIGNW,
28091 IX86_BUILTIN_PSIGND,
28092 IX86_BUILTIN_PALIGNR,
28093 IX86_BUILTIN_PABSB,
28094 IX86_BUILTIN_PABSW,
28095 IX86_BUILTIN_PABSD,
28097 IX86_BUILTIN_PHADDW128,
28098 IX86_BUILTIN_PHADDD128,
28099 IX86_BUILTIN_PHADDSW128,
28100 IX86_BUILTIN_PHSUBW128,
28101 IX86_BUILTIN_PHSUBD128,
28102 IX86_BUILTIN_PHSUBSW128,
28103 IX86_BUILTIN_PMADDUBSW128,
28104 IX86_BUILTIN_PMULHRSW128,
28105 IX86_BUILTIN_PSHUFB128,
28106 IX86_BUILTIN_PSIGNB128,
28107 IX86_BUILTIN_PSIGNW128,
28108 IX86_BUILTIN_PSIGND128,
28109 IX86_BUILTIN_PALIGNR128,
28110 IX86_BUILTIN_PABSB128,
28111 IX86_BUILTIN_PABSW128,
28112 IX86_BUILTIN_PABSD128,
28114 /* AMDFAM10 - SSE4A New Instructions. */
28115 IX86_BUILTIN_MOVNTSD,
28116 IX86_BUILTIN_MOVNTSS,
28117 IX86_BUILTIN_EXTRQI,
28118 IX86_BUILTIN_EXTRQ,
28119 IX86_BUILTIN_INSERTQI,
28120 IX86_BUILTIN_INSERTQ,
28122 /* SSE4.1. */
28123 IX86_BUILTIN_BLENDPD,
28124 IX86_BUILTIN_BLENDPS,
28125 IX86_BUILTIN_BLENDVPD,
28126 IX86_BUILTIN_BLENDVPS,
28127 IX86_BUILTIN_PBLENDVB128,
28128 IX86_BUILTIN_PBLENDW128,
28130 IX86_BUILTIN_DPPD,
28131 IX86_BUILTIN_DPPS,
28133 IX86_BUILTIN_INSERTPS128,
28135 IX86_BUILTIN_MOVNTDQA,
28136 IX86_BUILTIN_MPSADBW128,
28137 IX86_BUILTIN_PACKUSDW128,
28138 IX86_BUILTIN_PCMPEQQ,
28139 IX86_BUILTIN_PHMINPOSUW128,
28141 IX86_BUILTIN_PMAXSB128,
28142 IX86_BUILTIN_PMAXSD128,
28143 IX86_BUILTIN_PMAXUD128,
28144 IX86_BUILTIN_PMAXUW128,
28146 IX86_BUILTIN_PMINSB128,
28147 IX86_BUILTIN_PMINSD128,
28148 IX86_BUILTIN_PMINUD128,
28149 IX86_BUILTIN_PMINUW128,
28151 IX86_BUILTIN_PMOVSXBW128,
28152 IX86_BUILTIN_PMOVSXBD128,
28153 IX86_BUILTIN_PMOVSXBQ128,
28154 IX86_BUILTIN_PMOVSXWD128,
28155 IX86_BUILTIN_PMOVSXWQ128,
28156 IX86_BUILTIN_PMOVSXDQ128,
28158 IX86_BUILTIN_PMOVZXBW128,
28159 IX86_BUILTIN_PMOVZXBD128,
28160 IX86_BUILTIN_PMOVZXBQ128,
28161 IX86_BUILTIN_PMOVZXWD128,
28162 IX86_BUILTIN_PMOVZXWQ128,
28163 IX86_BUILTIN_PMOVZXDQ128,
28165 IX86_BUILTIN_PMULDQ128,
28166 IX86_BUILTIN_PMULLD128,
28168 IX86_BUILTIN_ROUNDSD,
28169 IX86_BUILTIN_ROUNDSS,
28171 IX86_BUILTIN_ROUNDPD,
28172 IX86_BUILTIN_ROUNDPS,
28174 IX86_BUILTIN_FLOORPD,
28175 IX86_BUILTIN_CEILPD,
28176 IX86_BUILTIN_TRUNCPD,
28177 IX86_BUILTIN_RINTPD,
28178 IX86_BUILTIN_ROUNDPD_AZ,
28180 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
28181 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
28182 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
28184 IX86_BUILTIN_FLOORPS,
28185 IX86_BUILTIN_CEILPS,
28186 IX86_BUILTIN_TRUNCPS,
28187 IX86_BUILTIN_RINTPS,
28188 IX86_BUILTIN_ROUNDPS_AZ,
28190 IX86_BUILTIN_FLOORPS_SFIX,
28191 IX86_BUILTIN_CEILPS_SFIX,
28192 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
28194 IX86_BUILTIN_PTESTZ,
28195 IX86_BUILTIN_PTESTC,
28196 IX86_BUILTIN_PTESTNZC,
28198 IX86_BUILTIN_VEC_INIT_V2SI,
28199 IX86_BUILTIN_VEC_INIT_V4HI,
28200 IX86_BUILTIN_VEC_INIT_V8QI,
28201 IX86_BUILTIN_VEC_EXT_V2DF,
28202 IX86_BUILTIN_VEC_EXT_V2DI,
28203 IX86_BUILTIN_VEC_EXT_V4SF,
28204 IX86_BUILTIN_VEC_EXT_V4SI,
28205 IX86_BUILTIN_VEC_EXT_V8HI,
28206 IX86_BUILTIN_VEC_EXT_V2SI,
28207 IX86_BUILTIN_VEC_EXT_V4HI,
28208 IX86_BUILTIN_VEC_EXT_V16QI,
28209 IX86_BUILTIN_VEC_SET_V2DI,
28210 IX86_BUILTIN_VEC_SET_V4SF,
28211 IX86_BUILTIN_VEC_SET_V4SI,
28212 IX86_BUILTIN_VEC_SET_V8HI,
28213 IX86_BUILTIN_VEC_SET_V4HI,
28214 IX86_BUILTIN_VEC_SET_V16QI,
28216 IX86_BUILTIN_VEC_PACK_SFIX,
28217 IX86_BUILTIN_VEC_PACK_SFIX256,
28219 /* SSE4.2. */
28220 IX86_BUILTIN_CRC32QI,
28221 IX86_BUILTIN_CRC32HI,
28222 IX86_BUILTIN_CRC32SI,
28223 IX86_BUILTIN_CRC32DI,
28225 IX86_BUILTIN_PCMPESTRI128,
28226 IX86_BUILTIN_PCMPESTRM128,
28227 IX86_BUILTIN_PCMPESTRA128,
28228 IX86_BUILTIN_PCMPESTRC128,
28229 IX86_BUILTIN_PCMPESTRO128,
28230 IX86_BUILTIN_PCMPESTRS128,
28231 IX86_BUILTIN_PCMPESTRZ128,
28232 IX86_BUILTIN_PCMPISTRI128,
28233 IX86_BUILTIN_PCMPISTRM128,
28234 IX86_BUILTIN_PCMPISTRA128,
28235 IX86_BUILTIN_PCMPISTRC128,
28236 IX86_BUILTIN_PCMPISTRO128,
28237 IX86_BUILTIN_PCMPISTRS128,
28238 IX86_BUILTIN_PCMPISTRZ128,
28240 IX86_BUILTIN_PCMPGTQ,
28242 /* AES instructions */
28243 IX86_BUILTIN_AESENC128,
28244 IX86_BUILTIN_AESENCLAST128,
28245 IX86_BUILTIN_AESDEC128,
28246 IX86_BUILTIN_AESDECLAST128,
28247 IX86_BUILTIN_AESIMC128,
28248 IX86_BUILTIN_AESKEYGENASSIST128,
28250 /* PCLMUL instruction */
28251 IX86_BUILTIN_PCLMULQDQ128,
28253 /* AVX */
28254 IX86_BUILTIN_ADDPD256,
28255 IX86_BUILTIN_ADDPS256,
28256 IX86_BUILTIN_ADDSUBPD256,
28257 IX86_BUILTIN_ADDSUBPS256,
28258 IX86_BUILTIN_ANDPD256,
28259 IX86_BUILTIN_ANDPS256,
28260 IX86_BUILTIN_ANDNPD256,
28261 IX86_BUILTIN_ANDNPS256,
28262 IX86_BUILTIN_BLENDPD256,
28263 IX86_BUILTIN_BLENDPS256,
28264 IX86_BUILTIN_BLENDVPD256,
28265 IX86_BUILTIN_BLENDVPS256,
28266 IX86_BUILTIN_DIVPD256,
28267 IX86_BUILTIN_DIVPS256,
28268 IX86_BUILTIN_DPPS256,
28269 IX86_BUILTIN_HADDPD256,
28270 IX86_BUILTIN_HADDPS256,
28271 IX86_BUILTIN_HSUBPD256,
28272 IX86_BUILTIN_HSUBPS256,
28273 IX86_BUILTIN_MAXPD256,
28274 IX86_BUILTIN_MAXPS256,
28275 IX86_BUILTIN_MINPD256,
28276 IX86_BUILTIN_MINPS256,
28277 IX86_BUILTIN_MULPD256,
28278 IX86_BUILTIN_MULPS256,
28279 IX86_BUILTIN_ORPD256,
28280 IX86_BUILTIN_ORPS256,
28281 IX86_BUILTIN_SHUFPD256,
28282 IX86_BUILTIN_SHUFPS256,
28283 IX86_BUILTIN_SUBPD256,
28284 IX86_BUILTIN_SUBPS256,
28285 IX86_BUILTIN_XORPD256,
28286 IX86_BUILTIN_XORPS256,
28287 IX86_BUILTIN_CMPSD,
28288 IX86_BUILTIN_CMPSS,
28289 IX86_BUILTIN_CMPPD,
28290 IX86_BUILTIN_CMPPS,
28291 IX86_BUILTIN_CMPPD256,
28292 IX86_BUILTIN_CMPPS256,
28293 IX86_BUILTIN_CVTDQ2PD256,
28294 IX86_BUILTIN_CVTDQ2PS256,
28295 IX86_BUILTIN_CVTPD2PS256,
28296 IX86_BUILTIN_CVTPS2DQ256,
28297 IX86_BUILTIN_CVTPS2PD256,
28298 IX86_BUILTIN_CVTTPD2DQ256,
28299 IX86_BUILTIN_CVTPD2DQ256,
28300 IX86_BUILTIN_CVTTPS2DQ256,
28301 IX86_BUILTIN_EXTRACTF128PD256,
28302 IX86_BUILTIN_EXTRACTF128PS256,
28303 IX86_BUILTIN_EXTRACTF128SI256,
28304 IX86_BUILTIN_VZEROALL,
28305 IX86_BUILTIN_VZEROUPPER,
28306 IX86_BUILTIN_VPERMILVARPD,
28307 IX86_BUILTIN_VPERMILVARPS,
28308 IX86_BUILTIN_VPERMILVARPD256,
28309 IX86_BUILTIN_VPERMILVARPS256,
28310 IX86_BUILTIN_VPERMILPD,
28311 IX86_BUILTIN_VPERMILPS,
28312 IX86_BUILTIN_VPERMILPD256,
28313 IX86_BUILTIN_VPERMILPS256,
28314 IX86_BUILTIN_VPERMIL2PD,
28315 IX86_BUILTIN_VPERMIL2PS,
28316 IX86_BUILTIN_VPERMIL2PD256,
28317 IX86_BUILTIN_VPERMIL2PS256,
28318 IX86_BUILTIN_VPERM2F128PD256,
28319 IX86_BUILTIN_VPERM2F128PS256,
28320 IX86_BUILTIN_VPERM2F128SI256,
28321 IX86_BUILTIN_VBROADCASTSS,
28322 IX86_BUILTIN_VBROADCASTSD256,
28323 IX86_BUILTIN_VBROADCASTSS256,
28324 IX86_BUILTIN_VBROADCASTPD256,
28325 IX86_BUILTIN_VBROADCASTPS256,
28326 IX86_BUILTIN_VINSERTF128PD256,
28327 IX86_BUILTIN_VINSERTF128PS256,
28328 IX86_BUILTIN_VINSERTF128SI256,
28329 IX86_BUILTIN_LOADUPD256,
28330 IX86_BUILTIN_LOADUPS256,
28331 IX86_BUILTIN_STOREUPD256,
28332 IX86_BUILTIN_STOREUPS256,
28333 IX86_BUILTIN_LDDQU256,
28334 IX86_BUILTIN_MOVNTDQ256,
28335 IX86_BUILTIN_MOVNTPD256,
28336 IX86_BUILTIN_MOVNTPS256,
28337 IX86_BUILTIN_LOADDQU256,
28338 IX86_BUILTIN_STOREDQU256,
28339 IX86_BUILTIN_MASKLOADPD,
28340 IX86_BUILTIN_MASKLOADPS,
28341 IX86_BUILTIN_MASKSTOREPD,
28342 IX86_BUILTIN_MASKSTOREPS,
28343 IX86_BUILTIN_MASKLOADPD256,
28344 IX86_BUILTIN_MASKLOADPS256,
28345 IX86_BUILTIN_MASKSTOREPD256,
28346 IX86_BUILTIN_MASKSTOREPS256,
28347 IX86_BUILTIN_MOVSHDUP256,
28348 IX86_BUILTIN_MOVSLDUP256,
28349 IX86_BUILTIN_MOVDDUP256,
28351 IX86_BUILTIN_SQRTPD256,
28352 IX86_BUILTIN_SQRTPS256,
28353 IX86_BUILTIN_SQRTPS_NR256,
28354 IX86_BUILTIN_RSQRTPS256,
28355 IX86_BUILTIN_RSQRTPS_NR256,
28357 IX86_BUILTIN_RCPPS256,
28359 IX86_BUILTIN_ROUNDPD256,
28360 IX86_BUILTIN_ROUNDPS256,
28362 IX86_BUILTIN_FLOORPD256,
28363 IX86_BUILTIN_CEILPD256,
28364 IX86_BUILTIN_TRUNCPD256,
28365 IX86_BUILTIN_RINTPD256,
28366 IX86_BUILTIN_ROUNDPD_AZ256,
28368 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
28369 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
28370 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
28372 IX86_BUILTIN_FLOORPS256,
28373 IX86_BUILTIN_CEILPS256,
28374 IX86_BUILTIN_TRUNCPS256,
28375 IX86_BUILTIN_RINTPS256,
28376 IX86_BUILTIN_ROUNDPS_AZ256,
28378 IX86_BUILTIN_FLOORPS_SFIX256,
28379 IX86_BUILTIN_CEILPS_SFIX256,
28380 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
28382 IX86_BUILTIN_UNPCKHPD256,
28383 IX86_BUILTIN_UNPCKLPD256,
28384 IX86_BUILTIN_UNPCKHPS256,
28385 IX86_BUILTIN_UNPCKLPS256,
28387 IX86_BUILTIN_SI256_SI,
28388 IX86_BUILTIN_PS256_PS,
28389 IX86_BUILTIN_PD256_PD,
28390 IX86_BUILTIN_SI_SI256,
28391 IX86_BUILTIN_PS_PS256,
28392 IX86_BUILTIN_PD_PD256,
28394 IX86_BUILTIN_VTESTZPD,
28395 IX86_BUILTIN_VTESTCPD,
28396 IX86_BUILTIN_VTESTNZCPD,
28397 IX86_BUILTIN_VTESTZPS,
28398 IX86_BUILTIN_VTESTCPS,
28399 IX86_BUILTIN_VTESTNZCPS,
28400 IX86_BUILTIN_VTESTZPD256,
28401 IX86_BUILTIN_VTESTCPD256,
28402 IX86_BUILTIN_VTESTNZCPD256,
28403 IX86_BUILTIN_VTESTZPS256,
28404 IX86_BUILTIN_VTESTCPS256,
28405 IX86_BUILTIN_VTESTNZCPS256,
28406 IX86_BUILTIN_PTESTZ256,
28407 IX86_BUILTIN_PTESTC256,
28408 IX86_BUILTIN_PTESTNZC256,
28410 IX86_BUILTIN_MOVMSKPD256,
28411 IX86_BUILTIN_MOVMSKPS256,
28413 /* AVX2 */
28414 IX86_BUILTIN_MPSADBW256,
28415 IX86_BUILTIN_PABSB256,
28416 IX86_BUILTIN_PABSW256,
28417 IX86_BUILTIN_PABSD256,
28418 IX86_BUILTIN_PACKSSDW256,
28419 IX86_BUILTIN_PACKSSWB256,
28420 IX86_BUILTIN_PACKUSDW256,
28421 IX86_BUILTIN_PACKUSWB256,
28422 IX86_BUILTIN_PADDB256,
28423 IX86_BUILTIN_PADDW256,
28424 IX86_BUILTIN_PADDD256,
28425 IX86_BUILTIN_PADDQ256,
28426 IX86_BUILTIN_PADDSB256,
28427 IX86_BUILTIN_PADDSW256,
28428 IX86_BUILTIN_PADDUSB256,
28429 IX86_BUILTIN_PADDUSW256,
28430 IX86_BUILTIN_PALIGNR256,
28431 IX86_BUILTIN_AND256I,
28432 IX86_BUILTIN_ANDNOT256I,
28433 IX86_BUILTIN_PAVGB256,
28434 IX86_BUILTIN_PAVGW256,
28435 IX86_BUILTIN_PBLENDVB256,
28436 IX86_BUILTIN_PBLENDVW256,
28437 IX86_BUILTIN_PCMPEQB256,
28438 IX86_BUILTIN_PCMPEQW256,
28439 IX86_BUILTIN_PCMPEQD256,
28440 IX86_BUILTIN_PCMPEQQ256,
28441 IX86_BUILTIN_PCMPGTB256,
28442 IX86_BUILTIN_PCMPGTW256,
28443 IX86_BUILTIN_PCMPGTD256,
28444 IX86_BUILTIN_PCMPGTQ256,
28445 IX86_BUILTIN_PHADDW256,
28446 IX86_BUILTIN_PHADDD256,
28447 IX86_BUILTIN_PHADDSW256,
28448 IX86_BUILTIN_PHSUBW256,
28449 IX86_BUILTIN_PHSUBD256,
28450 IX86_BUILTIN_PHSUBSW256,
28451 IX86_BUILTIN_PMADDUBSW256,
28452 IX86_BUILTIN_PMADDWD256,
28453 IX86_BUILTIN_PMAXSB256,
28454 IX86_BUILTIN_PMAXSW256,
28455 IX86_BUILTIN_PMAXSD256,
28456 IX86_BUILTIN_PMAXUB256,
28457 IX86_BUILTIN_PMAXUW256,
28458 IX86_BUILTIN_PMAXUD256,
28459 IX86_BUILTIN_PMINSB256,
28460 IX86_BUILTIN_PMINSW256,
28461 IX86_BUILTIN_PMINSD256,
28462 IX86_BUILTIN_PMINUB256,
28463 IX86_BUILTIN_PMINUW256,
28464 IX86_BUILTIN_PMINUD256,
28465 IX86_BUILTIN_PMOVMSKB256,
28466 IX86_BUILTIN_PMOVSXBW256,
28467 IX86_BUILTIN_PMOVSXBD256,
28468 IX86_BUILTIN_PMOVSXBQ256,
28469 IX86_BUILTIN_PMOVSXWD256,
28470 IX86_BUILTIN_PMOVSXWQ256,
28471 IX86_BUILTIN_PMOVSXDQ256,
28472 IX86_BUILTIN_PMOVZXBW256,
28473 IX86_BUILTIN_PMOVZXBD256,
28474 IX86_BUILTIN_PMOVZXBQ256,
28475 IX86_BUILTIN_PMOVZXWD256,
28476 IX86_BUILTIN_PMOVZXWQ256,
28477 IX86_BUILTIN_PMOVZXDQ256,
28478 IX86_BUILTIN_PMULDQ256,
28479 IX86_BUILTIN_PMULHRSW256,
28480 IX86_BUILTIN_PMULHUW256,
28481 IX86_BUILTIN_PMULHW256,
28482 IX86_BUILTIN_PMULLW256,
28483 IX86_BUILTIN_PMULLD256,
28484 IX86_BUILTIN_PMULUDQ256,
28485 IX86_BUILTIN_POR256,
28486 IX86_BUILTIN_PSADBW256,
28487 IX86_BUILTIN_PSHUFB256,
28488 IX86_BUILTIN_PSHUFD256,
28489 IX86_BUILTIN_PSHUFHW256,
28490 IX86_BUILTIN_PSHUFLW256,
28491 IX86_BUILTIN_PSIGNB256,
28492 IX86_BUILTIN_PSIGNW256,
28493 IX86_BUILTIN_PSIGND256,
28494 IX86_BUILTIN_PSLLDQI256,
28495 IX86_BUILTIN_PSLLWI256,
28496 IX86_BUILTIN_PSLLW256,
28497 IX86_BUILTIN_PSLLDI256,
28498 IX86_BUILTIN_PSLLD256,
28499 IX86_BUILTIN_PSLLQI256,
28500 IX86_BUILTIN_PSLLQ256,
28501 IX86_BUILTIN_PSRAWI256,
28502 IX86_BUILTIN_PSRAW256,
28503 IX86_BUILTIN_PSRADI256,
28504 IX86_BUILTIN_PSRAD256,
28505 IX86_BUILTIN_PSRLDQI256,
28506 IX86_BUILTIN_PSRLWI256,
28507 IX86_BUILTIN_PSRLW256,
28508 IX86_BUILTIN_PSRLDI256,
28509 IX86_BUILTIN_PSRLD256,
28510 IX86_BUILTIN_PSRLQI256,
28511 IX86_BUILTIN_PSRLQ256,
28512 IX86_BUILTIN_PSUBB256,
28513 IX86_BUILTIN_PSUBW256,
28514 IX86_BUILTIN_PSUBD256,
28515 IX86_BUILTIN_PSUBQ256,
28516 IX86_BUILTIN_PSUBSB256,
28517 IX86_BUILTIN_PSUBSW256,
28518 IX86_BUILTIN_PSUBUSB256,
28519 IX86_BUILTIN_PSUBUSW256,
28520 IX86_BUILTIN_PUNPCKHBW256,
28521 IX86_BUILTIN_PUNPCKHWD256,
28522 IX86_BUILTIN_PUNPCKHDQ256,
28523 IX86_BUILTIN_PUNPCKHQDQ256,
28524 IX86_BUILTIN_PUNPCKLBW256,
28525 IX86_BUILTIN_PUNPCKLWD256,
28526 IX86_BUILTIN_PUNPCKLDQ256,
28527 IX86_BUILTIN_PUNPCKLQDQ256,
28528 IX86_BUILTIN_PXOR256,
28529 IX86_BUILTIN_MOVNTDQA256,
28530 IX86_BUILTIN_VBROADCASTSS_PS,
28531 IX86_BUILTIN_VBROADCASTSS_PS256,
28532 IX86_BUILTIN_VBROADCASTSD_PD256,
28533 IX86_BUILTIN_VBROADCASTSI256,
28534 IX86_BUILTIN_PBLENDD256,
28535 IX86_BUILTIN_PBLENDD128,
28536 IX86_BUILTIN_PBROADCASTB256,
28537 IX86_BUILTIN_PBROADCASTW256,
28538 IX86_BUILTIN_PBROADCASTD256,
28539 IX86_BUILTIN_PBROADCASTQ256,
28540 IX86_BUILTIN_PBROADCASTB128,
28541 IX86_BUILTIN_PBROADCASTW128,
28542 IX86_BUILTIN_PBROADCASTD128,
28543 IX86_BUILTIN_PBROADCASTQ128,
28544 IX86_BUILTIN_VPERMVARSI256,
28545 IX86_BUILTIN_VPERMDF256,
28546 IX86_BUILTIN_VPERMVARSF256,
28547 IX86_BUILTIN_VPERMDI256,
28548 IX86_BUILTIN_VPERMTI256,
28549 IX86_BUILTIN_VEXTRACT128I256,
28550 IX86_BUILTIN_VINSERT128I256,
28551 IX86_BUILTIN_MASKLOADD,
28552 IX86_BUILTIN_MASKLOADQ,
28553 IX86_BUILTIN_MASKLOADD256,
28554 IX86_BUILTIN_MASKLOADQ256,
28555 IX86_BUILTIN_MASKSTORED,
28556 IX86_BUILTIN_MASKSTOREQ,
28557 IX86_BUILTIN_MASKSTORED256,
28558 IX86_BUILTIN_MASKSTOREQ256,
28559 IX86_BUILTIN_PSLLVV4DI,
28560 IX86_BUILTIN_PSLLVV2DI,
28561 IX86_BUILTIN_PSLLVV8SI,
28562 IX86_BUILTIN_PSLLVV4SI,
28563 IX86_BUILTIN_PSRAVV8SI,
28564 IX86_BUILTIN_PSRAVV4SI,
28565 IX86_BUILTIN_PSRLVV4DI,
28566 IX86_BUILTIN_PSRLVV2DI,
28567 IX86_BUILTIN_PSRLVV8SI,
28568 IX86_BUILTIN_PSRLVV4SI,
28570 IX86_BUILTIN_GATHERSIV2DF,
28571 IX86_BUILTIN_GATHERSIV4DF,
28572 IX86_BUILTIN_GATHERDIV2DF,
28573 IX86_BUILTIN_GATHERDIV4DF,
28574 IX86_BUILTIN_GATHERSIV4SF,
28575 IX86_BUILTIN_GATHERSIV8SF,
28576 IX86_BUILTIN_GATHERDIV4SF,
28577 IX86_BUILTIN_GATHERDIV8SF,
28578 IX86_BUILTIN_GATHERSIV2DI,
28579 IX86_BUILTIN_GATHERSIV4DI,
28580 IX86_BUILTIN_GATHERDIV2DI,
28581 IX86_BUILTIN_GATHERDIV4DI,
28582 IX86_BUILTIN_GATHERSIV4SI,
28583 IX86_BUILTIN_GATHERSIV8SI,
28584 IX86_BUILTIN_GATHERDIV4SI,
28585 IX86_BUILTIN_GATHERDIV8SI,
28587 /* AVX512F */
28588 IX86_BUILTIN_SI512_SI256,
28589 IX86_BUILTIN_PD512_PD256,
28590 IX86_BUILTIN_PS512_PS256,
28591 IX86_BUILTIN_SI512_SI,
28592 IX86_BUILTIN_PD512_PD,
28593 IX86_BUILTIN_PS512_PS,
28594 IX86_BUILTIN_ADDPD512,
28595 IX86_BUILTIN_ADDPS512,
28596 IX86_BUILTIN_ADDSD_ROUND,
28597 IX86_BUILTIN_ADDSS_ROUND,
28598 IX86_BUILTIN_ALIGND512,
28599 IX86_BUILTIN_ALIGNQ512,
28600 IX86_BUILTIN_BLENDMD512,
28601 IX86_BUILTIN_BLENDMPD512,
28602 IX86_BUILTIN_BLENDMPS512,
28603 IX86_BUILTIN_BLENDMQ512,
28604 IX86_BUILTIN_BROADCASTF32X4_512,
28605 IX86_BUILTIN_BROADCASTF64X4_512,
28606 IX86_BUILTIN_BROADCASTI32X4_512,
28607 IX86_BUILTIN_BROADCASTI64X4_512,
28608 IX86_BUILTIN_BROADCASTSD512,
28609 IX86_BUILTIN_BROADCASTSS512,
28610 IX86_BUILTIN_CMPD512,
28611 IX86_BUILTIN_CMPPD512,
28612 IX86_BUILTIN_CMPPS512,
28613 IX86_BUILTIN_CMPQ512,
28614 IX86_BUILTIN_CMPSD_MASK,
28615 IX86_BUILTIN_CMPSS_MASK,
28616 IX86_BUILTIN_COMIDF,
28617 IX86_BUILTIN_COMISF,
28618 IX86_BUILTIN_COMPRESSPD512,
28619 IX86_BUILTIN_COMPRESSPDSTORE512,
28620 IX86_BUILTIN_COMPRESSPS512,
28621 IX86_BUILTIN_COMPRESSPSSTORE512,
28622 IX86_BUILTIN_CVTDQ2PD512,
28623 IX86_BUILTIN_CVTDQ2PS512,
28624 IX86_BUILTIN_CVTPD2DQ512,
28625 IX86_BUILTIN_CVTPD2PS512,
28626 IX86_BUILTIN_CVTPD2UDQ512,
28627 IX86_BUILTIN_CVTPH2PS512,
28628 IX86_BUILTIN_CVTPS2DQ512,
28629 IX86_BUILTIN_CVTPS2PD512,
28630 IX86_BUILTIN_CVTPS2PH512,
28631 IX86_BUILTIN_CVTPS2UDQ512,
28632 IX86_BUILTIN_CVTSD2SS_ROUND,
28633 IX86_BUILTIN_CVTSI2SD64,
28634 IX86_BUILTIN_CVTSI2SS32,
28635 IX86_BUILTIN_CVTSI2SS64,
28636 IX86_BUILTIN_CVTSS2SD_ROUND,
28637 IX86_BUILTIN_CVTTPD2DQ512,
28638 IX86_BUILTIN_CVTTPD2UDQ512,
28639 IX86_BUILTIN_CVTTPS2DQ512,
28640 IX86_BUILTIN_CVTTPS2UDQ512,
28641 IX86_BUILTIN_CVTUDQ2PD512,
28642 IX86_BUILTIN_CVTUDQ2PS512,
28643 IX86_BUILTIN_CVTUSI2SD32,
28644 IX86_BUILTIN_CVTUSI2SD64,
28645 IX86_BUILTIN_CVTUSI2SS32,
28646 IX86_BUILTIN_CVTUSI2SS64,
28647 IX86_BUILTIN_DIVPD512,
28648 IX86_BUILTIN_DIVPS512,
28649 IX86_BUILTIN_DIVSD_ROUND,
28650 IX86_BUILTIN_DIVSS_ROUND,
28651 IX86_BUILTIN_EXPANDPD512,
28652 IX86_BUILTIN_EXPANDPD512Z,
28653 IX86_BUILTIN_EXPANDPDLOAD512,
28654 IX86_BUILTIN_EXPANDPDLOAD512Z,
28655 IX86_BUILTIN_EXPANDPS512,
28656 IX86_BUILTIN_EXPANDPS512Z,
28657 IX86_BUILTIN_EXPANDPSLOAD512,
28658 IX86_BUILTIN_EXPANDPSLOAD512Z,
28659 IX86_BUILTIN_EXTRACTF32X4,
28660 IX86_BUILTIN_EXTRACTF64X4,
28661 IX86_BUILTIN_EXTRACTI32X4,
28662 IX86_BUILTIN_EXTRACTI64X4,
28663 IX86_BUILTIN_FIXUPIMMPD512_MASK,
28664 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
28665 IX86_BUILTIN_FIXUPIMMPS512_MASK,
28666 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
28667 IX86_BUILTIN_FIXUPIMMSD128_MASK,
28668 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
28669 IX86_BUILTIN_FIXUPIMMSS128_MASK,
28670 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
28671 IX86_BUILTIN_GETEXPPD512,
28672 IX86_BUILTIN_GETEXPPS512,
28673 IX86_BUILTIN_GETEXPSD128,
28674 IX86_BUILTIN_GETEXPSS128,
28675 IX86_BUILTIN_GETMANTPD512,
28676 IX86_BUILTIN_GETMANTPS512,
28677 IX86_BUILTIN_GETMANTSD128,
28678 IX86_BUILTIN_GETMANTSS128,
28679 IX86_BUILTIN_INSERTF32X4,
28680 IX86_BUILTIN_INSERTF64X4,
28681 IX86_BUILTIN_INSERTI32X4,
28682 IX86_BUILTIN_INSERTI64X4,
28683 IX86_BUILTIN_LOADAPD512,
28684 IX86_BUILTIN_LOADAPS512,
28685 IX86_BUILTIN_LOADDQUDI512,
28686 IX86_BUILTIN_LOADDQUSI512,
28687 IX86_BUILTIN_LOADUPD512,
28688 IX86_BUILTIN_LOADUPS512,
28689 IX86_BUILTIN_MAXPD512,
28690 IX86_BUILTIN_MAXPS512,
28691 IX86_BUILTIN_MAXSD_ROUND,
28692 IX86_BUILTIN_MAXSS_ROUND,
28693 IX86_BUILTIN_MINPD512,
28694 IX86_BUILTIN_MINPS512,
28695 IX86_BUILTIN_MINSD_ROUND,
28696 IX86_BUILTIN_MINSS_ROUND,
28697 IX86_BUILTIN_MOVAPD512,
28698 IX86_BUILTIN_MOVAPS512,
28699 IX86_BUILTIN_MOVDDUP512,
28700 IX86_BUILTIN_MOVDQA32LOAD512,
28701 IX86_BUILTIN_MOVDQA32STORE512,
28702 IX86_BUILTIN_MOVDQA32_512,
28703 IX86_BUILTIN_MOVDQA64LOAD512,
28704 IX86_BUILTIN_MOVDQA64STORE512,
28705 IX86_BUILTIN_MOVDQA64_512,
28706 IX86_BUILTIN_MOVNTDQ512,
28707 IX86_BUILTIN_MOVNTDQA512,
28708 IX86_BUILTIN_MOVNTPD512,
28709 IX86_BUILTIN_MOVNTPS512,
28710 IX86_BUILTIN_MOVSHDUP512,
28711 IX86_BUILTIN_MOVSLDUP512,
28712 IX86_BUILTIN_MULPD512,
28713 IX86_BUILTIN_MULPS512,
28714 IX86_BUILTIN_MULSD_ROUND,
28715 IX86_BUILTIN_MULSS_ROUND,
28716 IX86_BUILTIN_PABSD512,
28717 IX86_BUILTIN_PABSQ512,
28718 IX86_BUILTIN_PADDD512,
28719 IX86_BUILTIN_PADDQ512,
28720 IX86_BUILTIN_PANDD512,
28721 IX86_BUILTIN_PANDND512,
28722 IX86_BUILTIN_PANDNQ512,
28723 IX86_BUILTIN_PANDQ512,
28724 IX86_BUILTIN_PBROADCASTD512,
28725 IX86_BUILTIN_PBROADCASTD512_GPR,
28726 IX86_BUILTIN_PBROADCASTMB512,
28727 IX86_BUILTIN_PBROADCASTMW512,
28728 IX86_BUILTIN_PBROADCASTQ512,
28729 IX86_BUILTIN_PBROADCASTQ512_GPR,
28730 IX86_BUILTIN_PBROADCASTQ512_MEM,
28731 IX86_BUILTIN_PCMPEQD512_MASK,
28732 IX86_BUILTIN_PCMPEQQ512_MASK,
28733 IX86_BUILTIN_PCMPGTD512_MASK,
28734 IX86_BUILTIN_PCMPGTQ512_MASK,
28735 IX86_BUILTIN_PCOMPRESSD512,
28736 IX86_BUILTIN_PCOMPRESSDSTORE512,
28737 IX86_BUILTIN_PCOMPRESSQ512,
28738 IX86_BUILTIN_PCOMPRESSQSTORE512,
28739 IX86_BUILTIN_PEXPANDD512,
28740 IX86_BUILTIN_PEXPANDD512Z,
28741 IX86_BUILTIN_PEXPANDDLOAD512,
28742 IX86_BUILTIN_PEXPANDDLOAD512Z,
28743 IX86_BUILTIN_PEXPANDQ512,
28744 IX86_BUILTIN_PEXPANDQ512Z,
28745 IX86_BUILTIN_PEXPANDQLOAD512,
28746 IX86_BUILTIN_PEXPANDQLOAD512Z,
28747 IX86_BUILTIN_PMAXSD512,
28748 IX86_BUILTIN_PMAXSQ512,
28749 IX86_BUILTIN_PMAXUD512,
28750 IX86_BUILTIN_PMAXUQ512,
28751 IX86_BUILTIN_PMINSD512,
28752 IX86_BUILTIN_PMINSQ512,
28753 IX86_BUILTIN_PMINUD512,
28754 IX86_BUILTIN_PMINUQ512,
28755 IX86_BUILTIN_PMOVDB512,
28756 IX86_BUILTIN_PMOVDB512_MEM,
28757 IX86_BUILTIN_PMOVDW512,
28758 IX86_BUILTIN_PMOVDW512_MEM,
28759 IX86_BUILTIN_PMOVQB512,
28760 IX86_BUILTIN_PMOVQB512_MEM,
28761 IX86_BUILTIN_PMOVQD512,
28762 IX86_BUILTIN_PMOVQD512_MEM,
28763 IX86_BUILTIN_PMOVQW512,
28764 IX86_BUILTIN_PMOVQW512_MEM,
28765 IX86_BUILTIN_PMOVSDB512,
28766 IX86_BUILTIN_PMOVSDB512_MEM,
28767 IX86_BUILTIN_PMOVSDW512,
28768 IX86_BUILTIN_PMOVSDW512_MEM,
28769 IX86_BUILTIN_PMOVSQB512,
28770 IX86_BUILTIN_PMOVSQB512_MEM,
28771 IX86_BUILTIN_PMOVSQD512,
28772 IX86_BUILTIN_PMOVSQD512_MEM,
28773 IX86_BUILTIN_PMOVSQW512,
28774 IX86_BUILTIN_PMOVSQW512_MEM,
28775 IX86_BUILTIN_PMOVSXBD512,
28776 IX86_BUILTIN_PMOVSXBQ512,
28777 IX86_BUILTIN_PMOVSXDQ512,
28778 IX86_BUILTIN_PMOVSXWD512,
28779 IX86_BUILTIN_PMOVSXWQ512,
28780 IX86_BUILTIN_PMOVUSDB512,
28781 IX86_BUILTIN_PMOVUSDB512_MEM,
28782 IX86_BUILTIN_PMOVUSDW512,
28783 IX86_BUILTIN_PMOVUSDW512_MEM,
28784 IX86_BUILTIN_PMOVUSQB512,
28785 IX86_BUILTIN_PMOVUSQB512_MEM,
28786 IX86_BUILTIN_PMOVUSQD512,
28787 IX86_BUILTIN_PMOVUSQD512_MEM,
28788 IX86_BUILTIN_PMOVUSQW512,
28789 IX86_BUILTIN_PMOVUSQW512_MEM,
28790 IX86_BUILTIN_PMOVZXBD512,
28791 IX86_BUILTIN_PMOVZXBQ512,
28792 IX86_BUILTIN_PMOVZXDQ512,
28793 IX86_BUILTIN_PMOVZXWD512,
28794 IX86_BUILTIN_PMOVZXWQ512,
28795 IX86_BUILTIN_PMULDQ512,
28796 IX86_BUILTIN_PMULLD512,
28797 IX86_BUILTIN_PMULUDQ512,
28798 IX86_BUILTIN_PORD512,
28799 IX86_BUILTIN_PORQ512,
28800 IX86_BUILTIN_PROLD512,
28801 IX86_BUILTIN_PROLQ512,
28802 IX86_BUILTIN_PROLVD512,
28803 IX86_BUILTIN_PROLVQ512,
28804 IX86_BUILTIN_PRORD512,
28805 IX86_BUILTIN_PRORQ512,
28806 IX86_BUILTIN_PRORVD512,
28807 IX86_BUILTIN_PRORVQ512,
28808 IX86_BUILTIN_PSHUFD512,
28809 IX86_BUILTIN_PSLLD512,
28810 IX86_BUILTIN_PSLLDI512,
28811 IX86_BUILTIN_PSLLQ512,
28812 IX86_BUILTIN_PSLLQI512,
28813 IX86_BUILTIN_PSLLVV16SI,
28814 IX86_BUILTIN_PSLLVV8DI,
28815 IX86_BUILTIN_PSRAD512,
28816 IX86_BUILTIN_PSRADI512,
28817 IX86_BUILTIN_PSRAQ512,
28818 IX86_BUILTIN_PSRAQI512,
28819 IX86_BUILTIN_PSRAVV16SI,
28820 IX86_BUILTIN_PSRAVV8DI,
28821 IX86_BUILTIN_PSRLD512,
28822 IX86_BUILTIN_PSRLDI512,
28823 IX86_BUILTIN_PSRLQ512,
28824 IX86_BUILTIN_PSRLQI512,
28825 IX86_BUILTIN_PSRLVV16SI,
28826 IX86_BUILTIN_PSRLVV8DI,
28827 IX86_BUILTIN_PSUBD512,
28828 IX86_BUILTIN_PSUBQ512,
28829 IX86_BUILTIN_PTESTMD512,
28830 IX86_BUILTIN_PTESTMQ512,
28831 IX86_BUILTIN_PTESTNMD512,
28832 IX86_BUILTIN_PTESTNMQ512,
28833 IX86_BUILTIN_PUNPCKHDQ512,
28834 IX86_BUILTIN_PUNPCKHQDQ512,
28835 IX86_BUILTIN_PUNPCKLDQ512,
28836 IX86_BUILTIN_PUNPCKLQDQ512,
28837 IX86_BUILTIN_PXORD512,
28838 IX86_BUILTIN_PXORQ512,
28839 IX86_BUILTIN_RCP14PD512,
28840 IX86_BUILTIN_RCP14PS512,
28841 IX86_BUILTIN_RCP14SD,
28842 IX86_BUILTIN_RCP14SS,
28843 IX86_BUILTIN_RNDSCALEPD,
28844 IX86_BUILTIN_RNDSCALEPS,
28845 IX86_BUILTIN_RNDSCALESD,
28846 IX86_BUILTIN_RNDSCALESS,
28847 IX86_BUILTIN_RSQRT14PD512,
28848 IX86_BUILTIN_RSQRT14PS512,
28849 IX86_BUILTIN_RSQRT14SD,
28850 IX86_BUILTIN_RSQRT14SS,
28851 IX86_BUILTIN_SCALEFPD512,
28852 IX86_BUILTIN_SCALEFPS512,
28853 IX86_BUILTIN_SCALEFSD,
28854 IX86_BUILTIN_SCALEFSS,
28855 IX86_BUILTIN_SHUFPD512,
28856 IX86_BUILTIN_SHUFPS512,
28857 IX86_BUILTIN_SHUF_F32x4,
28858 IX86_BUILTIN_SHUF_F64x2,
28859 IX86_BUILTIN_SHUF_I32x4,
28860 IX86_BUILTIN_SHUF_I64x2,
28861 IX86_BUILTIN_SQRTPD512,
28862 IX86_BUILTIN_SQRTPD512_MASK,
28863 IX86_BUILTIN_SQRTPS512_MASK,
28864 IX86_BUILTIN_SQRTPS_NR512,
28865 IX86_BUILTIN_SQRTSD_ROUND,
28866 IX86_BUILTIN_SQRTSS_ROUND,
28867 IX86_BUILTIN_STOREAPD512,
28868 IX86_BUILTIN_STOREAPS512,
28869 IX86_BUILTIN_STOREDQUDI512,
28870 IX86_BUILTIN_STOREDQUSI512,
28871 IX86_BUILTIN_STOREUPD512,
28872 IX86_BUILTIN_STOREUPS512,
28873 IX86_BUILTIN_SUBPD512,
28874 IX86_BUILTIN_SUBPS512,
28875 IX86_BUILTIN_SUBSD_ROUND,
28876 IX86_BUILTIN_SUBSS_ROUND,
28877 IX86_BUILTIN_UCMPD512,
28878 IX86_BUILTIN_UCMPQ512,
28879 IX86_BUILTIN_UNPCKHPD512,
28880 IX86_BUILTIN_UNPCKHPS512,
28881 IX86_BUILTIN_UNPCKLPD512,
28882 IX86_BUILTIN_UNPCKLPS512,
28883 IX86_BUILTIN_VCVTSD2SI32,
28884 IX86_BUILTIN_VCVTSD2SI64,
28885 IX86_BUILTIN_VCVTSD2USI32,
28886 IX86_BUILTIN_VCVTSD2USI64,
28887 IX86_BUILTIN_VCVTSS2SI32,
28888 IX86_BUILTIN_VCVTSS2SI64,
28889 IX86_BUILTIN_VCVTSS2USI32,
28890 IX86_BUILTIN_VCVTSS2USI64,
28891 IX86_BUILTIN_VCVTTSD2SI32,
28892 IX86_BUILTIN_VCVTTSD2SI64,
28893 IX86_BUILTIN_VCVTTSD2USI32,
28894 IX86_BUILTIN_VCVTTSD2USI64,
28895 IX86_BUILTIN_VCVTTSS2SI32,
28896 IX86_BUILTIN_VCVTTSS2SI64,
28897 IX86_BUILTIN_VCVTTSS2USI32,
28898 IX86_BUILTIN_VCVTTSS2USI64,
28899 IX86_BUILTIN_VFMADDPD512_MASK,
28900 IX86_BUILTIN_VFMADDPD512_MASK3,
28901 IX86_BUILTIN_VFMADDPD512_MASKZ,
28902 IX86_BUILTIN_VFMADDPS512_MASK,
28903 IX86_BUILTIN_VFMADDPS512_MASK3,
28904 IX86_BUILTIN_VFMADDPS512_MASKZ,
28905 IX86_BUILTIN_VFMADDSD3_ROUND,
28906 IX86_BUILTIN_VFMADDSS3_ROUND,
28907 IX86_BUILTIN_VFMADDSUBPD512_MASK,
28908 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
28909 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
28910 IX86_BUILTIN_VFMADDSUBPS512_MASK,
28911 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
28912 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
28913 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
28914 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
28915 IX86_BUILTIN_VFMSUBPD512_MASK3,
28916 IX86_BUILTIN_VFMSUBPS512_MASK3,
28917 IX86_BUILTIN_VFMSUBSD3_MASK3,
28918 IX86_BUILTIN_VFMSUBSS3_MASK3,
28919 IX86_BUILTIN_VFNMADDPD512_MASK,
28920 IX86_BUILTIN_VFNMADDPS512_MASK,
28921 IX86_BUILTIN_VFNMSUBPD512_MASK,
28922 IX86_BUILTIN_VFNMSUBPD512_MASK3,
28923 IX86_BUILTIN_VFNMSUBPS512_MASK,
28924 IX86_BUILTIN_VFNMSUBPS512_MASK3,
28925 IX86_BUILTIN_VPCLZCNTD512,
28926 IX86_BUILTIN_VPCLZCNTQ512,
28927 IX86_BUILTIN_VPCONFLICTD512,
28928 IX86_BUILTIN_VPCONFLICTQ512,
28929 IX86_BUILTIN_VPERMDF512,
28930 IX86_BUILTIN_VPERMDI512,
28931 IX86_BUILTIN_VPERMI2VARD512,
28932 IX86_BUILTIN_VPERMI2VARPD512,
28933 IX86_BUILTIN_VPERMI2VARPS512,
28934 IX86_BUILTIN_VPERMI2VARQ512,
28935 IX86_BUILTIN_VPERMILPD512,
28936 IX86_BUILTIN_VPERMILPS512,
28937 IX86_BUILTIN_VPERMILVARPD512,
28938 IX86_BUILTIN_VPERMILVARPS512,
28939 IX86_BUILTIN_VPERMT2VARD512,
28940 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
28941 IX86_BUILTIN_VPERMT2VARPD512,
28942 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
28943 IX86_BUILTIN_VPERMT2VARPS512,
28944 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
28945 IX86_BUILTIN_VPERMT2VARQ512,
28946 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
28947 IX86_BUILTIN_VPERMVARDF512,
28948 IX86_BUILTIN_VPERMVARDI512,
28949 IX86_BUILTIN_VPERMVARSF512,
28950 IX86_BUILTIN_VPERMVARSI512,
28951 IX86_BUILTIN_VTERNLOGD512_MASK,
28952 IX86_BUILTIN_VTERNLOGD512_MASKZ,
28953 IX86_BUILTIN_VTERNLOGQ512_MASK,
28954 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
28956 /* Mask arithmetic operations */
28957 IX86_BUILTIN_KAND16,
28958 IX86_BUILTIN_KANDN16,
28959 IX86_BUILTIN_KNOT16,
28960 IX86_BUILTIN_KOR16,
28961 IX86_BUILTIN_KORTESTC16,
28962 IX86_BUILTIN_KORTESTZ16,
28963 IX86_BUILTIN_KUNPCKBW,
28964 IX86_BUILTIN_KXNOR16,
28965 IX86_BUILTIN_KXOR16,
28966 IX86_BUILTIN_KMOV16,
28968 /* AVX512VL. */
28969 IX86_BUILTIN_PMOVUSQD256_MEM,
28970 IX86_BUILTIN_PMOVUSQD128_MEM,
28971 IX86_BUILTIN_PMOVSQD256_MEM,
28972 IX86_BUILTIN_PMOVSQD128_MEM,
28973 IX86_BUILTIN_PMOVQD256_MEM,
28974 IX86_BUILTIN_PMOVQD128_MEM,
28975 IX86_BUILTIN_PMOVUSQW256_MEM,
28976 IX86_BUILTIN_PMOVUSQW128_MEM,
28977 IX86_BUILTIN_PMOVSQW256_MEM,
28978 IX86_BUILTIN_PMOVSQW128_MEM,
28979 IX86_BUILTIN_PMOVQW256_MEM,
28980 IX86_BUILTIN_PMOVQW128_MEM,
28981 IX86_BUILTIN_PMOVUSQB256_MEM,
28982 IX86_BUILTIN_PMOVUSQB128_MEM,
28983 IX86_BUILTIN_PMOVSQB256_MEM,
28984 IX86_BUILTIN_PMOVSQB128_MEM,
28985 IX86_BUILTIN_PMOVQB256_MEM,
28986 IX86_BUILTIN_PMOVQB128_MEM,
28987 IX86_BUILTIN_PMOVUSDW256_MEM,
28988 IX86_BUILTIN_PMOVUSDW128_MEM,
28989 IX86_BUILTIN_PMOVSDW256_MEM,
28990 IX86_BUILTIN_PMOVSDW128_MEM,
28991 IX86_BUILTIN_PMOVDW256_MEM,
28992 IX86_BUILTIN_PMOVDW128_MEM,
28993 IX86_BUILTIN_PMOVUSDB256_MEM,
28994 IX86_BUILTIN_PMOVUSDB128_MEM,
28995 IX86_BUILTIN_PMOVSDB256_MEM,
28996 IX86_BUILTIN_PMOVSDB128_MEM,
28997 IX86_BUILTIN_PMOVDB256_MEM,
28998 IX86_BUILTIN_PMOVDB128_MEM,
28999 IX86_BUILTIN_MOVDQA64LOAD256_MASK,
29000 IX86_BUILTIN_MOVDQA64LOAD128_MASK,
29001 IX86_BUILTIN_MOVDQA32LOAD256_MASK,
29002 IX86_BUILTIN_MOVDQA32LOAD128_MASK,
29003 IX86_BUILTIN_MOVDQA64STORE256_MASK,
29004 IX86_BUILTIN_MOVDQA64STORE128_MASK,
29005 IX86_BUILTIN_MOVDQA32STORE256_MASK,
29006 IX86_BUILTIN_MOVDQA32STORE128_MASK,
29007 IX86_BUILTIN_LOADAPD256_MASK,
29008 IX86_BUILTIN_LOADAPD128_MASK,
29009 IX86_BUILTIN_LOADAPS256_MASK,
29010 IX86_BUILTIN_LOADAPS128_MASK,
29011 IX86_BUILTIN_STOREAPD256_MASK,
29012 IX86_BUILTIN_STOREAPD128_MASK,
29013 IX86_BUILTIN_STOREAPS256_MASK,
29014 IX86_BUILTIN_STOREAPS128_MASK,
29015 IX86_BUILTIN_LOADUPD256_MASK,
29016 IX86_BUILTIN_LOADUPD128_MASK,
29017 IX86_BUILTIN_LOADUPS256_MASK,
29018 IX86_BUILTIN_LOADUPS128_MASK,
29019 IX86_BUILTIN_STOREUPD256_MASK,
29020 IX86_BUILTIN_STOREUPD128_MASK,
29021 IX86_BUILTIN_STOREUPS256_MASK,
29022 IX86_BUILTIN_STOREUPS128_MASK,
29023 IX86_BUILTIN_LOADDQUDI256_MASK,
29024 IX86_BUILTIN_LOADDQUDI128_MASK,
29025 IX86_BUILTIN_LOADDQUSI256_MASK,
29026 IX86_BUILTIN_LOADDQUSI128_MASK,
29027 IX86_BUILTIN_LOADDQUHI256_MASK,
29028 IX86_BUILTIN_LOADDQUHI128_MASK,
29029 IX86_BUILTIN_LOADDQUQI256_MASK,
29030 IX86_BUILTIN_LOADDQUQI128_MASK,
29031 IX86_BUILTIN_STOREDQUDI256_MASK,
29032 IX86_BUILTIN_STOREDQUDI128_MASK,
29033 IX86_BUILTIN_STOREDQUSI256_MASK,
29034 IX86_BUILTIN_STOREDQUSI128_MASK,
29035 IX86_BUILTIN_STOREDQUHI256_MASK,
29036 IX86_BUILTIN_STOREDQUHI128_MASK,
29037 IX86_BUILTIN_STOREDQUQI256_MASK,
29038 IX86_BUILTIN_STOREDQUQI128_MASK,
29039 IX86_BUILTIN_COMPRESSPDSTORE256,
29040 IX86_BUILTIN_COMPRESSPDSTORE128,
29041 IX86_BUILTIN_COMPRESSPSSTORE256,
29042 IX86_BUILTIN_COMPRESSPSSTORE128,
29043 IX86_BUILTIN_PCOMPRESSQSTORE256,
29044 IX86_BUILTIN_PCOMPRESSQSTORE128,
29045 IX86_BUILTIN_PCOMPRESSDSTORE256,
29046 IX86_BUILTIN_PCOMPRESSDSTORE128,
29047 IX86_BUILTIN_EXPANDPDLOAD256,
29048 IX86_BUILTIN_EXPANDPDLOAD128,
29049 IX86_BUILTIN_EXPANDPSLOAD256,
29050 IX86_BUILTIN_EXPANDPSLOAD128,
29051 IX86_BUILTIN_PEXPANDQLOAD256,
29052 IX86_BUILTIN_PEXPANDQLOAD128,
29053 IX86_BUILTIN_PEXPANDDLOAD256,
29054 IX86_BUILTIN_PEXPANDDLOAD128,
29055 IX86_BUILTIN_EXPANDPDLOAD256Z,
29056 IX86_BUILTIN_EXPANDPDLOAD128Z,
29057 IX86_BUILTIN_EXPANDPSLOAD256Z,
29058 IX86_BUILTIN_EXPANDPSLOAD128Z,
29059 IX86_BUILTIN_PEXPANDQLOAD256Z,
29060 IX86_BUILTIN_PEXPANDQLOAD128Z,
29061 IX86_BUILTIN_PEXPANDDLOAD256Z,
29062 IX86_BUILTIN_PEXPANDDLOAD128Z,
29063 IX86_BUILTIN_PALIGNR256_MASK,
29064 IX86_BUILTIN_PALIGNR128_MASK,
29065 IX86_BUILTIN_MOVDQA64_256_MASK,
29066 IX86_BUILTIN_MOVDQA64_128_MASK,
29067 IX86_BUILTIN_MOVDQA32_256_MASK,
29068 IX86_BUILTIN_MOVDQA32_128_MASK,
29069 IX86_BUILTIN_MOVAPD256_MASK,
29070 IX86_BUILTIN_MOVAPD128_MASK,
29071 IX86_BUILTIN_MOVAPS256_MASK,
29072 IX86_BUILTIN_MOVAPS128_MASK,
29073 IX86_BUILTIN_MOVDQUHI256_MASK,
29074 IX86_BUILTIN_MOVDQUHI128_MASK,
29075 IX86_BUILTIN_MOVDQUQI256_MASK,
29076 IX86_BUILTIN_MOVDQUQI128_MASK,
29077 IX86_BUILTIN_MINPS128_MASK,
29078 IX86_BUILTIN_MAXPS128_MASK,
29079 IX86_BUILTIN_MINPD128_MASK,
29080 IX86_BUILTIN_MAXPD128_MASK,
29081 IX86_BUILTIN_MAXPD256_MASK,
29082 IX86_BUILTIN_MAXPS256_MASK,
29083 IX86_BUILTIN_MINPD256_MASK,
29084 IX86_BUILTIN_MINPS256_MASK,
29085 IX86_BUILTIN_MULPS128_MASK,
29086 IX86_BUILTIN_DIVPS128_MASK,
29087 IX86_BUILTIN_MULPD128_MASK,
29088 IX86_BUILTIN_DIVPD128_MASK,
29089 IX86_BUILTIN_DIVPD256_MASK,
29090 IX86_BUILTIN_DIVPS256_MASK,
29091 IX86_BUILTIN_MULPD256_MASK,
29092 IX86_BUILTIN_MULPS256_MASK,
29093 IX86_BUILTIN_ADDPD128_MASK,
29094 IX86_BUILTIN_ADDPD256_MASK,
29095 IX86_BUILTIN_ADDPS128_MASK,
29096 IX86_BUILTIN_ADDPS256_MASK,
29097 IX86_BUILTIN_SUBPD128_MASK,
29098 IX86_BUILTIN_SUBPD256_MASK,
29099 IX86_BUILTIN_SUBPS128_MASK,
29100 IX86_BUILTIN_SUBPS256_MASK,
29101 IX86_BUILTIN_XORPD256_MASK,
29102 IX86_BUILTIN_XORPD128_MASK,
29103 IX86_BUILTIN_XORPS256_MASK,
29104 IX86_BUILTIN_XORPS128_MASK,
29105 IX86_BUILTIN_ORPD256_MASK,
29106 IX86_BUILTIN_ORPD128_MASK,
29107 IX86_BUILTIN_ORPS256_MASK,
29108 IX86_BUILTIN_ORPS128_MASK,
29109 IX86_BUILTIN_BROADCASTF32x2_256,
29110 IX86_BUILTIN_BROADCASTI32x2_256,
29111 IX86_BUILTIN_BROADCASTI32x2_128,
29112 IX86_BUILTIN_BROADCASTF64X2_256,
29113 IX86_BUILTIN_BROADCASTI64X2_256,
29114 IX86_BUILTIN_BROADCASTF32X4_256,
29115 IX86_BUILTIN_BROADCASTI32X4_256,
29116 IX86_BUILTIN_EXTRACTF32X4_256,
29117 IX86_BUILTIN_EXTRACTI32X4_256,
29118 IX86_BUILTIN_DBPSADBW256,
29119 IX86_BUILTIN_DBPSADBW128,
29120 IX86_BUILTIN_CVTTPD2QQ256,
29121 IX86_BUILTIN_CVTTPD2QQ128,
29122 IX86_BUILTIN_CVTTPD2UQQ256,
29123 IX86_BUILTIN_CVTTPD2UQQ128,
29124 IX86_BUILTIN_CVTPD2QQ256,
29125 IX86_BUILTIN_CVTPD2QQ128,
29126 IX86_BUILTIN_CVTPD2UQQ256,
29127 IX86_BUILTIN_CVTPD2UQQ128,
29128 IX86_BUILTIN_CVTPD2UDQ256_MASK,
29129 IX86_BUILTIN_CVTPD2UDQ128_MASK,
29130 IX86_BUILTIN_CVTTPS2QQ256,
29131 IX86_BUILTIN_CVTTPS2QQ128,
29132 IX86_BUILTIN_CVTTPS2UQQ256,
29133 IX86_BUILTIN_CVTTPS2UQQ128,
29134 IX86_BUILTIN_CVTTPS2DQ256_MASK,
29135 IX86_BUILTIN_CVTTPS2DQ128_MASK,
29136 IX86_BUILTIN_CVTTPS2UDQ256,
29137 IX86_BUILTIN_CVTTPS2UDQ128,
29138 IX86_BUILTIN_CVTTPD2DQ256_MASK,
29139 IX86_BUILTIN_CVTTPD2DQ128_MASK,
29140 IX86_BUILTIN_CVTTPD2UDQ256_MASK,
29141 IX86_BUILTIN_CVTTPD2UDQ128_MASK,
29142 IX86_BUILTIN_CVTPD2DQ256_MASK,
29143 IX86_BUILTIN_CVTPD2DQ128_MASK,
29144 IX86_BUILTIN_CVTDQ2PD256_MASK,
29145 IX86_BUILTIN_CVTDQ2PD128_MASK,
29146 IX86_BUILTIN_CVTUDQ2PD256_MASK,
29147 IX86_BUILTIN_CVTUDQ2PD128_MASK,
29148 IX86_BUILTIN_CVTDQ2PS256_MASK,
29149 IX86_BUILTIN_CVTDQ2PS128_MASK,
29150 IX86_BUILTIN_CVTUDQ2PS256_MASK,
29151 IX86_BUILTIN_CVTUDQ2PS128_MASK,
29152 IX86_BUILTIN_CVTPS2PD256_MASK,
29153 IX86_BUILTIN_CVTPS2PD128_MASK,
29154 IX86_BUILTIN_PBROADCASTB256_MASK,
29155 IX86_BUILTIN_PBROADCASTB256_GPR_MASK,
29156 IX86_BUILTIN_PBROADCASTB128_MASK,
29157 IX86_BUILTIN_PBROADCASTB128_GPR_MASK,
29158 IX86_BUILTIN_PBROADCASTW256_MASK,
29159 IX86_BUILTIN_PBROADCASTW256_GPR_MASK,
29160 IX86_BUILTIN_PBROADCASTW128_MASK,
29161 IX86_BUILTIN_PBROADCASTW128_GPR_MASK,
29162 IX86_BUILTIN_PBROADCASTD256_MASK,
29163 IX86_BUILTIN_PBROADCASTD256_GPR_MASK,
29164 IX86_BUILTIN_PBROADCASTD128_MASK,
29165 IX86_BUILTIN_PBROADCASTD128_GPR_MASK,
29166 IX86_BUILTIN_PBROADCASTQ256_MASK,
29167 IX86_BUILTIN_PBROADCASTQ256_GPR_MASK,
29168 IX86_BUILTIN_PBROADCASTQ256_MEM_MASK,
29169 IX86_BUILTIN_PBROADCASTQ128_MASK,
29170 IX86_BUILTIN_PBROADCASTQ128_GPR_MASK,
29171 IX86_BUILTIN_PBROADCASTQ128_MEM_MASK,
29172 IX86_BUILTIN_BROADCASTSS256,
29173 IX86_BUILTIN_BROADCASTSS128,
29174 IX86_BUILTIN_BROADCASTSD256,
29175 IX86_BUILTIN_EXTRACTF64X2_256,
29176 IX86_BUILTIN_EXTRACTI64X2_256,
29177 IX86_BUILTIN_INSERTF32X4_256,
29178 IX86_BUILTIN_INSERTI32X4_256,
29179 IX86_BUILTIN_PMOVSXBW256_MASK,
29180 IX86_BUILTIN_PMOVSXBW128_MASK,
29181 IX86_BUILTIN_PMOVSXBD256_MASK,
29182 IX86_BUILTIN_PMOVSXBD128_MASK,
29183 IX86_BUILTIN_PMOVSXBQ256_MASK,
29184 IX86_BUILTIN_PMOVSXBQ128_MASK,
29185 IX86_BUILTIN_PMOVSXWD256_MASK,
29186 IX86_BUILTIN_PMOVSXWD128_MASK,
29187 IX86_BUILTIN_PMOVSXWQ256_MASK,
29188 IX86_BUILTIN_PMOVSXWQ128_MASK,
29189 IX86_BUILTIN_PMOVSXDQ256_MASK,
29190 IX86_BUILTIN_PMOVSXDQ128_MASK,
29191 IX86_BUILTIN_PMOVZXBW256_MASK,
29192 IX86_BUILTIN_PMOVZXBW128_MASK,
29193 IX86_BUILTIN_PMOVZXBD256_MASK,
29194 IX86_BUILTIN_PMOVZXBD128_MASK,
29195 IX86_BUILTIN_PMOVZXBQ256_MASK,
29196 IX86_BUILTIN_PMOVZXBQ128_MASK,
29197 IX86_BUILTIN_PMOVZXWD256_MASK,
29198 IX86_BUILTIN_PMOVZXWD128_MASK,
29199 IX86_BUILTIN_PMOVZXWQ256_MASK,
29200 IX86_BUILTIN_PMOVZXWQ128_MASK,
29201 IX86_BUILTIN_PMOVZXDQ256_MASK,
29202 IX86_BUILTIN_PMOVZXDQ128_MASK,
29203 IX86_BUILTIN_REDUCEPD256_MASK,
29204 IX86_BUILTIN_REDUCEPD128_MASK,
29205 IX86_BUILTIN_REDUCEPS256_MASK,
29206 IX86_BUILTIN_REDUCEPS128_MASK,
29207 IX86_BUILTIN_REDUCESD_MASK,
29208 IX86_BUILTIN_REDUCESS_MASK,
29209 IX86_BUILTIN_VPERMVARHI256_MASK,
29210 IX86_BUILTIN_VPERMVARHI128_MASK,
29211 IX86_BUILTIN_VPERMT2VARHI256,
29212 IX86_BUILTIN_VPERMT2VARHI256_MASKZ,
29213 IX86_BUILTIN_VPERMT2VARHI128,
29214 IX86_BUILTIN_VPERMT2VARHI128_MASKZ,
29215 IX86_BUILTIN_VPERMI2VARHI256,
29216 IX86_BUILTIN_VPERMI2VARHI128,
29217 IX86_BUILTIN_RCP14PD256,
29218 IX86_BUILTIN_RCP14PD128,
29219 IX86_BUILTIN_RCP14PS256,
29220 IX86_BUILTIN_RCP14PS128,
29221 IX86_BUILTIN_RSQRT14PD256_MASK,
29222 IX86_BUILTIN_RSQRT14PD128_MASK,
29223 IX86_BUILTIN_RSQRT14PS256_MASK,
29224 IX86_BUILTIN_RSQRT14PS128_MASK,
29225 IX86_BUILTIN_SQRTPD256_MASK,
29226 IX86_BUILTIN_SQRTPD128_MASK,
29227 IX86_BUILTIN_SQRTPS256_MASK,
29228 IX86_BUILTIN_SQRTPS128_MASK,
29229 IX86_BUILTIN_PADDB128_MASK,
29230 IX86_BUILTIN_PADDW128_MASK,
29231 IX86_BUILTIN_PADDD128_MASK,
29232 IX86_BUILTIN_PADDQ128_MASK,
29233 IX86_BUILTIN_PSUBB128_MASK,
29234 IX86_BUILTIN_PSUBW128_MASK,
29235 IX86_BUILTIN_PSUBD128_MASK,
29236 IX86_BUILTIN_PSUBQ128_MASK,
29237 IX86_BUILTIN_PADDSB128_MASK,
29238 IX86_BUILTIN_PADDSW128_MASK,
29239 IX86_BUILTIN_PSUBSB128_MASK,
29240 IX86_BUILTIN_PSUBSW128_MASK,
29241 IX86_BUILTIN_PADDUSB128_MASK,
29242 IX86_BUILTIN_PADDUSW128_MASK,
29243 IX86_BUILTIN_PSUBUSB128_MASK,
29244 IX86_BUILTIN_PSUBUSW128_MASK,
29245 IX86_BUILTIN_PADDB256_MASK,
29246 IX86_BUILTIN_PADDW256_MASK,
29247 IX86_BUILTIN_PADDD256_MASK,
29248 IX86_BUILTIN_PADDQ256_MASK,
29249 IX86_BUILTIN_PADDSB256_MASK,
29250 IX86_BUILTIN_PADDSW256_MASK,
29251 IX86_BUILTIN_PADDUSB256_MASK,
29252 IX86_BUILTIN_PADDUSW256_MASK,
29253 IX86_BUILTIN_PSUBB256_MASK,
29254 IX86_BUILTIN_PSUBW256_MASK,
29255 IX86_BUILTIN_PSUBD256_MASK,
29256 IX86_BUILTIN_PSUBQ256_MASK,
29257 IX86_BUILTIN_PSUBSB256_MASK,
29258 IX86_BUILTIN_PSUBSW256_MASK,
29259 IX86_BUILTIN_PSUBUSB256_MASK,
29260 IX86_BUILTIN_PSUBUSW256_MASK,
29261 IX86_BUILTIN_SHUF_F64x2_256,
29262 IX86_BUILTIN_SHUF_I64x2_256,
29263 IX86_BUILTIN_SHUF_I32x4_256,
29264 IX86_BUILTIN_SHUF_F32x4_256,
29265 IX86_BUILTIN_PMOVWB128,
29266 IX86_BUILTIN_PMOVWB256,
29267 IX86_BUILTIN_PMOVSWB128,
29268 IX86_BUILTIN_PMOVSWB256,
29269 IX86_BUILTIN_PMOVUSWB128,
29270 IX86_BUILTIN_PMOVUSWB256,
29271 IX86_BUILTIN_PMOVDB128,
29272 IX86_BUILTIN_PMOVDB256,
29273 IX86_BUILTIN_PMOVSDB128,
29274 IX86_BUILTIN_PMOVSDB256,
29275 IX86_BUILTIN_PMOVUSDB128,
29276 IX86_BUILTIN_PMOVUSDB256,
29277 IX86_BUILTIN_PMOVDW128,
29278 IX86_BUILTIN_PMOVDW256,
29279 IX86_BUILTIN_PMOVSDW128,
29280 IX86_BUILTIN_PMOVSDW256,
29281 IX86_BUILTIN_PMOVUSDW128,
29282 IX86_BUILTIN_PMOVUSDW256,
29283 IX86_BUILTIN_PMOVQB128,
29284 IX86_BUILTIN_PMOVQB256,
29285 IX86_BUILTIN_PMOVSQB128,
29286 IX86_BUILTIN_PMOVSQB256,
29287 IX86_BUILTIN_PMOVUSQB128,
29288 IX86_BUILTIN_PMOVUSQB256,
29289 IX86_BUILTIN_PMOVQW128,
29290 IX86_BUILTIN_PMOVQW256,
29291 IX86_BUILTIN_PMOVSQW128,
29292 IX86_BUILTIN_PMOVSQW256,
29293 IX86_BUILTIN_PMOVUSQW128,
29294 IX86_BUILTIN_PMOVUSQW256,
29295 IX86_BUILTIN_PMOVQD128,
29296 IX86_BUILTIN_PMOVQD256,
29297 IX86_BUILTIN_PMOVSQD128,
29298 IX86_BUILTIN_PMOVSQD256,
29299 IX86_BUILTIN_PMOVUSQD128,
29300 IX86_BUILTIN_PMOVUSQD256,
29301 IX86_BUILTIN_RANGEPD256,
29302 IX86_BUILTIN_RANGEPD128,
29303 IX86_BUILTIN_RANGEPS256,
29304 IX86_BUILTIN_RANGEPS128,
29305 IX86_BUILTIN_GETEXPPS256,
29306 IX86_BUILTIN_GETEXPPD256,
29307 IX86_BUILTIN_GETEXPPS128,
29308 IX86_BUILTIN_GETEXPPD128,
29309 IX86_BUILTIN_FIXUPIMMPD256_MASK,
29310 IX86_BUILTIN_FIXUPIMMPD256_MASKZ,
29311 IX86_BUILTIN_FIXUPIMMPS256_MASK,
29312 IX86_BUILTIN_FIXUPIMMPS256_MASKZ,
29313 IX86_BUILTIN_FIXUPIMMPD128_MASK,
29314 IX86_BUILTIN_FIXUPIMMPD128_MASKZ,
29315 IX86_BUILTIN_FIXUPIMMPS128_MASK,
29316 IX86_BUILTIN_FIXUPIMMPS128_MASKZ,
29317 IX86_BUILTIN_PABSQ256,
29318 IX86_BUILTIN_PABSQ128,
29319 IX86_BUILTIN_PABSD256_MASK,
29320 IX86_BUILTIN_PABSD128_MASK,
29321 IX86_BUILTIN_PMULHRSW256_MASK,
29322 IX86_BUILTIN_PMULHRSW128_MASK,
29323 IX86_BUILTIN_PMULHUW128_MASK,
29324 IX86_BUILTIN_PMULHUW256_MASK,
29325 IX86_BUILTIN_PMULHW256_MASK,
29326 IX86_BUILTIN_PMULHW128_MASK,
29327 IX86_BUILTIN_PMULLW256_MASK,
29328 IX86_BUILTIN_PMULLW128_MASK,
29329 IX86_BUILTIN_PMULLQ256,
29330 IX86_BUILTIN_PMULLQ128,
29331 IX86_BUILTIN_ANDPD256_MASK,
29332 IX86_BUILTIN_ANDPD128_MASK,
29333 IX86_BUILTIN_ANDPS256_MASK,
29334 IX86_BUILTIN_ANDPS128_MASK,
29335 IX86_BUILTIN_ANDNPD256_MASK,
29336 IX86_BUILTIN_ANDNPD128_MASK,
29337 IX86_BUILTIN_ANDNPS256_MASK,
29338 IX86_BUILTIN_ANDNPS128_MASK,
29339 IX86_BUILTIN_PSLLWI128_MASK,
29340 IX86_BUILTIN_PSLLDI128_MASK,
29341 IX86_BUILTIN_PSLLQI128_MASK,
29342 IX86_BUILTIN_PSLLW128_MASK,
29343 IX86_BUILTIN_PSLLD128_MASK,
29344 IX86_BUILTIN_PSLLQ128_MASK,
29345 IX86_BUILTIN_PSLLWI256_MASK ,
29346 IX86_BUILTIN_PSLLW256_MASK,
29347 IX86_BUILTIN_PSLLDI256_MASK,
29348 IX86_BUILTIN_PSLLD256_MASK,
29349 IX86_BUILTIN_PSLLQI256_MASK,
29350 IX86_BUILTIN_PSLLQ256_MASK,
29351 IX86_BUILTIN_PSRADI128_MASK,
29352 IX86_BUILTIN_PSRAD128_MASK,
29353 IX86_BUILTIN_PSRADI256_MASK,
29354 IX86_BUILTIN_PSRAD256_MASK,
29355 IX86_BUILTIN_PSRAQI128_MASK,
29356 IX86_BUILTIN_PSRAQ128_MASK,
29357 IX86_BUILTIN_PSRAQI256_MASK,
29358 IX86_BUILTIN_PSRAQ256_MASK,
29359 IX86_BUILTIN_PANDD256,
29360 IX86_BUILTIN_PANDD128,
29361 IX86_BUILTIN_PSRLDI128_MASK,
29362 IX86_BUILTIN_PSRLD128_MASK,
29363 IX86_BUILTIN_PSRLDI256_MASK,
29364 IX86_BUILTIN_PSRLD256_MASK,
29365 IX86_BUILTIN_PSRLQI128_MASK,
29366 IX86_BUILTIN_PSRLQ128_MASK,
29367 IX86_BUILTIN_PSRLQI256_MASK,
29368 IX86_BUILTIN_PSRLQ256_MASK,
29369 IX86_BUILTIN_PANDQ256,
29370 IX86_BUILTIN_PANDQ128,
29371 IX86_BUILTIN_PANDND256,
29372 IX86_BUILTIN_PANDND128,
29373 IX86_BUILTIN_PANDNQ256,
29374 IX86_BUILTIN_PANDNQ128,
29375 IX86_BUILTIN_PORD256,
29376 IX86_BUILTIN_PORD128,
29377 IX86_BUILTIN_PORQ256,
29378 IX86_BUILTIN_PORQ128,
29379 IX86_BUILTIN_PXORD256,
29380 IX86_BUILTIN_PXORD128,
29381 IX86_BUILTIN_PXORQ256,
29382 IX86_BUILTIN_PXORQ128,
29383 IX86_BUILTIN_PACKSSWB256_MASK,
29384 IX86_BUILTIN_PACKSSWB128_MASK,
29385 IX86_BUILTIN_PACKUSWB256_MASK,
29386 IX86_BUILTIN_PACKUSWB128_MASK,
29387 IX86_BUILTIN_RNDSCALEPS256,
29388 IX86_BUILTIN_RNDSCALEPD256,
29389 IX86_BUILTIN_RNDSCALEPS128,
29390 IX86_BUILTIN_RNDSCALEPD128,
29391 IX86_BUILTIN_VTERNLOGQ256_MASK,
29392 IX86_BUILTIN_VTERNLOGQ256_MASKZ,
29393 IX86_BUILTIN_VTERNLOGD256_MASK,
29394 IX86_BUILTIN_VTERNLOGD256_MASKZ,
29395 IX86_BUILTIN_VTERNLOGQ128_MASK,
29396 IX86_BUILTIN_VTERNLOGQ128_MASKZ,
29397 IX86_BUILTIN_VTERNLOGD128_MASK,
29398 IX86_BUILTIN_VTERNLOGD128_MASKZ,
29399 IX86_BUILTIN_SCALEFPD256,
29400 IX86_BUILTIN_SCALEFPS256,
29401 IX86_BUILTIN_SCALEFPD128,
29402 IX86_BUILTIN_SCALEFPS128,
29403 IX86_BUILTIN_VFMADDPD256_MASK,
29404 IX86_BUILTIN_VFMADDPD256_MASK3,
29405 IX86_BUILTIN_VFMADDPD256_MASKZ,
29406 IX86_BUILTIN_VFMADDPD128_MASK,
29407 IX86_BUILTIN_VFMADDPD128_MASK3,
29408 IX86_BUILTIN_VFMADDPD128_MASKZ,
29409 IX86_BUILTIN_VFMADDPS256_MASK,
29410 IX86_BUILTIN_VFMADDPS256_MASK3,
29411 IX86_BUILTIN_VFMADDPS256_MASKZ,
29412 IX86_BUILTIN_VFMADDPS128_MASK,
29413 IX86_BUILTIN_VFMADDPS128_MASK3,
29414 IX86_BUILTIN_VFMADDPS128_MASKZ,
29415 IX86_BUILTIN_VFMSUBPD256_MASK3,
29416 IX86_BUILTIN_VFMSUBPD128_MASK3,
29417 IX86_BUILTIN_VFMSUBPS256_MASK3,
29418 IX86_BUILTIN_VFMSUBPS128_MASK3,
29419 IX86_BUILTIN_VFNMADDPD256_MASK,
29420 IX86_BUILTIN_VFNMADDPD128_MASK,
29421 IX86_BUILTIN_VFNMADDPS256_MASK,
29422 IX86_BUILTIN_VFNMADDPS128_MASK,
29423 IX86_BUILTIN_VFNMSUBPD256_MASK,
29424 IX86_BUILTIN_VFNMSUBPD256_MASK3,
29425 IX86_BUILTIN_VFNMSUBPD128_MASK,
29426 IX86_BUILTIN_VFNMSUBPD128_MASK3,
29427 IX86_BUILTIN_VFNMSUBPS256_MASK,
29428 IX86_BUILTIN_VFNMSUBPS256_MASK3,
29429 IX86_BUILTIN_VFNMSUBPS128_MASK,
29430 IX86_BUILTIN_VFNMSUBPS128_MASK3,
29431 IX86_BUILTIN_VFMADDSUBPD256_MASK,
29432 IX86_BUILTIN_VFMADDSUBPD256_MASK3,
29433 IX86_BUILTIN_VFMADDSUBPD256_MASKZ,
29434 IX86_BUILTIN_VFMADDSUBPD128_MASK,
29435 IX86_BUILTIN_VFMADDSUBPD128_MASK3,
29436 IX86_BUILTIN_VFMADDSUBPD128_MASKZ,
29437 IX86_BUILTIN_VFMADDSUBPS256_MASK,
29438 IX86_BUILTIN_VFMADDSUBPS256_MASK3,
29439 IX86_BUILTIN_VFMADDSUBPS256_MASKZ,
29440 IX86_BUILTIN_VFMADDSUBPS128_MASK,
29441 IX86_BUILTIN_VFMADDSUBPS128_MASK3,
29442 IX86_BUILTIN_VFMADDSUBPS128_MASKZ,
29443 IX86_BUILTIN_VFMSUBADDPD256_MASK3,
29444 IX86_BUILTIN_VFMSUBADDPD128_MASK3,
29445 IX86_BUILTIN_VFMSUBADDPS256_MASK3,
29446 IX86_BUILTIN_VFMSUBADDPS128_MASK3,
29447 IX86_BUILTIN_INSERTF64X2_256,
29448 IX86_BUILTIN_INSERTI64X2_256,
29449 IX86_BUILTIN_PSRAVV16HI,
29450 IX86_BUILTIN_PSRAVV8HI,
29451 IX86_BUILTIN_PMADDUBSW256_MASK,
29452 IX86_BUILTIN_PMADDUBSW128_MASK,
29453 IX86_BUILTIN_PMADDWD256_MASK,
29454 IX86_BUILTIN_PMADDWD128_MASK,
29455 IX86_BUILTIN_PSRLVV16HI,
29456 IX86_BUILTIN_PSRLVV8HI,
29457 IX86_BUILTIN_CVTPS2DQ256_MASK,
29458 IX86_BUILTIN_CVTPS2DQ128_MASK,
29459 IX86_BUILTIN_CVTPS2UDQ256,
29460 IX86_BUILTIN_CVTPS2UDQ128,
29461 IX86_BUILTIN_CVTPS2QQ256,
29462 IX86_BUILTIN_CVTPS2QQ128,
29463 IX86_BUILTIN_CVTPS2UQQ256,
29464 IX86_BUILTIN_CVTPS2UQQ128,
29465 IX86_BUILTIN_GETMANTPS256,
29466 IX86_BUILTIN_GETMANTPS128,
29467 IX86_BUILTIN_GETMANTPD256,
29468 IX86_BUILTIN_GETMANTPD128,
29469 IX86_BUILTIN_MOVDDUP256_MASK,
29470 IX86_BUILTIN_MOVDDUP128_MASK,
29471 IX86_BUILTIN_MOVSHDUP256_MASK,
29472 IX86_BUILTIN_MOVSHDUP128_MASK,
29473 IX86_BUILTIN_MOVSLDUP256_MASK,
29474 IX86_BUILTIN_MOVSLDUP128_MASK,
29475 IX86_BUILTIN_CVTQQ2PS256,
29476 IX86_BUILTIN_CVTQQ2PS128,
29477 IX86_BUILTIN_CVTUQQ2PS256,
29478 IX86_BUILTIN_CVTUQQ2PS128,
29479 IX86_BUILTIN_CVTQQ2PD256,
29480 IX86_BUILTIN_CVTQQ2PD128,
29481 IX86_BUILTIN_CVTUQQ2PD256,
29482 IX86_BUILTIN_CVTUQQ2PD128,
29483 IX86_BUILTIN_VPERMT2VARQ256,
29484 IX86_BUILTIN_VPERMT2VARQ256_MASKZ,
29485 IX86_BUILTIN_VPERMT2VARD256,
29486 IX86_BUILTIN_VPERMT2VARD256_MASKZ,
29487 IX86_BUILTIN_VPERMI2VARQ256,
29488 IX86_BUILTIN_VPERMI2VARD256,
29489 IX86_BUILTIN_VPERMT2VARPD256,
29490 IX86_BUILTIN_VPERMT2VARPD256_MASKZ,
29491 IX86_BUILTIN_VPERMT2VARPS256,
29492 IX86_BUILTIN_VPERMT2VARPS256_MASKZ,
29493 IX86_BUILTIN_VPERMI2VARPD256,
29494 IX86_BUILTIN_VPERMI2VARPS256,
29495 IX86_BUILTIN_VPERMT2VARQ128,
29496 IX86_BUILTIN_VPERMT2VARQ128_MASKZ,
29497 IX86_BUILTIN_VPERMT2VARD128,
29498 IX86_BUILTIN_VPERMT2VARD128_MASKZ,
29499 IX86_BUILTIN_VPERMI2VARQ128,
29500 IX86_BUILTIN_VPERMI2VARD128,
29501 IX86_BUILTIN_VPERMT2VARPD128,
29502 IX86_BUILTIN_VPERMT2VARPD128_MASKZ,
29503 IX86_BUILTIN_VPERMT2VARPS128,
29504 IX86_BUILTIN_VPERMT2VARPS128_MASKZ,
29505 IX86_BUILTIN_VPERMI2VARPD128,
29506 IX86_BUILTIN_VPERMI2VARPS128,
29507 IX86_BUILTIN_PSHUFB256_MASK,
29508 IX86_BUILTIN_PSHUFB128_MASK,
29509 IX86_BUILTIN_PSHUFHW256_MASK,
29510 IX86_BUILTIN_PSHUFHW128_MASK,
29511 IX86_BUILTIN_PSHUFLW256_MASK,
29512 IX86_BUILTIN_PSHUFLW128_MASK,
29513 IX86_BUILTIN_PSHUFD256_MASK,
29514 IX86_BUILTIN_PSHUFD128_MASK,
29515 IX86_BUILTIN_SHUFPD256_MASK,
29516 IX86_BUILTIN_SHUFPD128_MASK,
29517 IX86_BUILTIN_SHUFPS256_MASK,
29518 IX86_BUILTIN_SHUFPS128_MASK,
29519 IX86_BUILTIN_PROLVQ256,
29520 IX86_BUILTIN_PROLVQ128,
29521 IX86_BUILTIN_PROLQ256,
29522 IX86_BUILTIN_PROLQ128,
29523 IX86_BUILTIN_PRORVQ256,
29524 IX86_BUILTIN_PRORVQ128,
29525 IX86_BUILTIN_PRORQ256,
29526 IX86_BUILTIN_PRORQ128,
29527 IX86_BUILTIN_PSRAVQ128,
29528 IX86_BUILTIN_PSRAVQ256,
29529 IX86_BUILTIN_PSLLVV4DI_MASK,
29530 IX86_BUILTIN_PSLLVV2DI_MASK,
29531 IX86_BUILTIN_PSLLVV8SI_MASK,
29532 IX86_BUILTIN_PSLLVV4SI_MASK,
29533 IX86_BUILTIN_PSRAVV8SI_MASK,
29534 IX86_BUILTIN_PSRAVV4SI_MASK,
29535 IX86_BUILTIN_PSRLVV4DI_MASK,
29536 IX86_BUILTIN_PSRLVV2DI_MASK,
29537 IX86_BUILTIN_PSRLVV8SI_MASK,
29538 IX86_BUILTIN_PSRLVV4SI_MASK,
29539 IX86_BUILTIN_PSRAWI256_MASK,
29540 IX86_BUILTIN_PSRAW256_MASK,
29541 IX86_BUILTIN_PSRAWI128_MASK,
29542 IX86_BUILTIN_PSRAW128_MASK,
29543 IX86_BUILTIN_PSRLWI256_MASK,
29544 IX86_BUILTIN_PSRLW256_MASK,
29545 IX86_BUILTIN_PSRLWI128_MASK,
29546 IX86_BUILTIN_PSRLW128_MASK,
29547 IX86_BUILTIN_PRORVD256,
29548 IX86_BUILTIN_PROLVD256,
29549 IX86_BUILTIN_PRORD256,
29550 IX86_BUILTIN_PROLD256,
29551 IX86_BUILTIN_PRORVD128,
29552 IX86_BUILTIN_PROLVD128,
29553 IX86_BUILTIN_PRORD128,
29554 IX86_BUILTIN_PROLD128,
29555 IX86_BUILTIN_FPCLASSPD256,
29556 IX86_BUILTIN_FPCLASSPD128,
29557 IX86_BUILTIN_FPCLASSSD,
29558 IX86_BUILTIN_FPCLASSPS256,
29559 IX86_BUILTIN_FPCLASSPS128,
29560 IX86_BUILTIN_FPCLASSSS,
29561 IX86_BUILTIN_CVTB2MASK128,
29562 IX86_BUILTIN_CVTB2MASK256,
29563 IX86_BUILTIN_CVTW2MASK128,
29564 IX86_BUILTIN_CVTW2MASK256,
29565 IX86_BUILTIN_CVTD2MASK128,
29566 IX86_BUILTIN_CVTD2MASK256,
29567 IX86_BUILTIN_CVTQ2MASK128,
29568 IX86_BUILTIN_CVTQ2MASK256,
29569 IX86_BUILTIN_CVTMASK2B128,
29570 IX86_BUILTIN_CVTMASK2B256,
29571 IX86_BUILTIN_CVTMASK2W128,
29572 IX86_BUILTIN_CVTMASK2W256,
29573 IX86_BUILTIN_CVTMASK2D128,
29574 IX86_BUILTIN_CVTMASK2D256,
29575 IX86_BUILTIN_CVTMASK2Q128,
29576 IX86_BUILTIN_CVTMASK2Q256,
29577 IX86_BUILTIN_PCMPEQB128_MASK,
29578 IX86_BUILTIN_PCMPEQB256_MASK,
29579 IX86_BUILTIN_PCMPEQW128_MASK,
29580 IX86_BUILTIN_PCMPEQW256_MASK,
29581 IX86_BUILTIN_PCMPEQD128_MASK,
29582 IX86_BUILTIN_PCMPEQD256_MASK,
29583 IX86_BUILTIN_PCMPEQQ128_MASK,
29584 IX86_BUILTIN_PCMPEQQ256_MASK,
29585 IX86_BUILTIN_PCMPGTB128_MASK,
29586 IX86_BUILTIN_PCMPGTB256_MASK,
29587 IX86_BUILTIN_PCMPGTW128_MASK,
29588 IX86_BUILTIN_PCMPGTW256_MASK,
29589 IX86_BUILTIN_PCMPGTD128_MASK,
29590 IX86_BUILTIN_PCMPGTD256_MASK,
29591 IX86_BUILTIN_PCMPGTQ128_MASK,
29592 IX86_BUILTIN_PCMPGTQ256_MASK,
29593 IX86_BUILTIN_PTESTMB128,
29594 IX86_BUILTIN_PTESTMB256,
29595 IX86_BUILTIN_PTESTMW128,
29596 IX86_BUILTIN_PTESTMW256,
29597 IX86_BUILTIN_PTESTMD128,
29598 IX86_BUILTIN_PTESTMD256,
29599 IX86_BUILTIN_PTESTMQ128,
29600 IX86_BUILTIN_PTESTMQ256,
29601 IX86_BUILTIN_PTESTNMB128,
29602 IX86_BUILTIN_PTESTNMB256,
29603 IX86_BUILTIN_PTESTNMW128,
29604 IX86_BUILTIN_PTESTNMW256,
29605 IX86_BUILTIN_PTESTNMD128,
29606 IX86_BUILTIN_PTESTNMD256,
29607 IX86_BUILTIN_PTESTNMQ128,
29608 IX86_BUILTIN_PTESTNMQ256,
29609 IX86_BUILTIN_PBROADCASTMB128,
29610 IX86_BUILTIN_PBROADCASTMB256,
29611 IX86_BUILTIN_PBROADCASTMW128,
29612 IX86_BUILTIN_PBROADCASTMW256,
29613 IX86_BUILTIN_COMPRESSPD256,
29614 IX86_BUILTIN_COMPRESSPD128,
29615 IX86_BUILTIN_COMPRESSPS256,
29616 IX86_BUILTIN_COMPRESSPS128,
29617 IX86_BUILTIN_PCOMPRESSQ256,
29618 IX86_BUILTIN_PCOMPRESSQ128,
29619 IX86_BUILTIN_PCOMPRESSD256,
29620 IX86_BUILTIN_PCOMPRESSD128,
29621 IX86_BUILTIN_EXPANDPD256,
29622 IX86_BUILTIN_EXPANDPD128,
29623 IX86_BUILTIN_EXPANDPS256,
29624 IX86_BUILTIN_EXPANDPS128,
29625 IX86_BUILTIN_PEXPANDQ256,
29626 IX86_BUILTIN_PEXPANDQ128,
29627 IX86_BUILTIN_PEXPANDD256,
29628 IX86_BUILTIN_PEXPANDD128,
29629 IX86_BUILTIN_EXPANDPD256Z,
29630 IX86_BUILTIN_EXPANDPD128Z,
29631 IX86_BUILTIN_EXPANDPS256Z,
29632 IX86_BUILTIN_EXPANDPS128Z,
29633 IX86_BUILTIN_PEXPANDQ256Z,
29634 IX86_BUILTIN_PEXPANDQ128Z,
29635 IX86_BUILTIN_PEXPANDD256Z,
29636 IX86_BUILTIN_PEXPANDD128Z,
29637 IX86_BUILTIN_PMAXSD256_MASK,
29638 IX86_BUILTIN_PMINSD256_MASK,
29639 IX86_BUILTIN_PMAXUD256_MASK,
29640 IX86_BUILTIN_PMINUD256_MASK,
29641 IX86_BUILTIN_PMAXSD128_MASK,
29642 IX86_BUILTIN_PMINSD128_MASK,
29643 IX86_BUILTIN_PMAXUD128_MASK,
29644 IX86_BUILTIN_PMINUD128_MASK,
29645 IX86_BUILTIN_PMAXSQ256_MASK,
29646 IX86_BUILTIN_PMINSQ256_MASK,
29647 IX86_BUILTIN_PMAXUQ256_MASK,
29648 IX86_BUILTIN_PMINUQ256_MASK,
29649 IX86_BUILTIN_PMAXSQ128_MASK,
29650 IX86_BUILTIN_PMINSQ128_MASK,
29651 IX86_BUILTIN_PMAXUQ128_MASK,
29652 IX86_BUILTIN_PMINUQ128_MASK,
29653 IX86_BUILTIN_PMINSB256_MASK,
29654 IX86_BUILTIN_PMINUB256_MASK,
29655 IX86_BUILTIN_PMAXSB256_MASK,
29656 IX86_BUILTIN_PMAXUB256_MASK,
29657 IX86_BUILTIN_PMINSB128_MASK,
29658 IX86_BUILTIN_PMINUB128_MASK,
29659 IX86_BUILTIN_PMAXSB128_MASK,
29660 IX86_BUILTIN_PMAXUB128_MASK,
29661 IX86_BUILTIN_PMINSW256_MASK,
29662 IX86_BUILTIN_PMINUW256_MASK,
29663 IX86_BUILTIN_PMAXSW256_MASK,
29664 IX86_BUILTIN_PMAXUW256_MASK,
29665 IX86_BUILTIN_PMINSW128_MASK,
29666 IX86_BUILTIN_PMINUW128_MASK,
29667 IX86_BUILTIN_PMAXSW128_MASK,
29668 IX86_BUILTIN_PMAXUW128_MASK,
29669 IX86_BUILTIN_VPCONFLICTQ256,
29670 IX86_BUILTIN_VPCONFLICTD256,
29671 IX86_BUILTIN_VPCLZCNTQ256,
29672 IX86_BUILTIN_VPCLZCNTD256,
29673 IX86_BUILTIN_UNPCKHPD256_MASK,
29674 IX86_BUILTIN_UNPCKHPD128_MASK,
29675 IX86_BUILTIN_UNPCKHPS256_MASK,
29676 IX86_BUILTIN_UNPCKHPS128_MASK,
29677 IX86_BUILTIN_UNPCKLPD256_MASK,
29678 IX86_BUILTIN_UNPCKLPD128_MASK,
29679 IX86_BUILTIN_UNPCKLPS256_MASK,
29680 IX86_BUILTIN_VPCONFLICTQ128,
29681 IX86_BUILTIN_VPCONFLICTD128,
29682 IX86_BUILTIN_VPCLZCNTQ128,
29683 IX86_BUILTIN_VPCLZCNTD128,
29684 IX86_BUILTIN_UNPCKLPS128_MASK,
29685 IX86_BUILTIN_ALIGND256,
29686 IX86_BUILTIN_ALIGNQ256,
29687 IX86_BUILTIN_ALIGND128,
29688 IX86_BUILTIN_ALIGNQ128,
29689 IX86_BUILTIN_CVTPS2PH256_MASK,
29690 IX86_BUILTIN_CVTPS2PH_MASK,
29691 IX86_BUILTIN_CVTPH2PS_MASK,
29692 IX86_BUILTIN_CVTPH2PS256_MASK,
29693 IX86_BUILTIN_PUNPCKHDQ128_MASK,
29694 IX86_BUILTIN_PUNPCKHDQ256_MASK,
29695 IX86_BUILTIN_PUNPCKHQDQ128_MASK,
29696 IX86_BUILTIN_PUNPCKHQDQ256_MASK,
29697 IX86_BUILTIN_PUNPCKLDQ128_MASK,
29698 IX86_BUILTIN_PUNPCKLDQ256_MASK,
29699 IX86_BUILTIN_PUNPCKLQDQ128_MASK,
29700 IX86_BUILTIN_PUNPCKLQDQ256_MASK,
29701 IX86_BUILTIN_PUNPCKHBW128_MASK,
29702 IX86_BUILTIN_PUNPCKHBW256_MASK,
29703 IX86_BUILTIN_PUNPCKHWD128_MASK,
29704 IX86_BUILTIN_PUNPCKHWD256_MASK,
29705 IX86_BUILTIN_PUNPCKLBW128_MASK,
29706 IX86_BUILTIN_PUNPCKLBW256_MASK,
29707 IX86_BUILTIN_PUNPCKLWD128_MASK,
29708 IX86_BUILTIN_PUNPCKLWD256_MASK,
29709 IX86_BUILTIN_PSLLVV16HI,
29710 IX86_BUILTIN_PSLLVV8HI,
29711 IX86_BUILTIN_PACKSSDW256_MASK,
29712 IX86_BUILTIN_PACKSSDW128_MASK,
29713 IX86_BUILTIN_PACKUSDW256_MASK,
29714 IX86_BUILTIN_PACKUSDW128_MASK,
29715 IX86_BUILTIN_PAVGB256_MASK,
29716 IX86_BUILTIN_PAVGW256_MASK,
29717 IX86_BUILTIN_PAVGB128_MASK,
29718 IX86_BUILTIN_PAVGW128_MASK,
29719 IX86_BUILTIN_VPERMVARSF256_MASK,
29720 IX86_BUILTIN_VPERMVARDF256_MASK,
29721 IX86_BUILTIN_VPERMDF256_MASK,
29722 IX86_BUILTIN_PABSB256_MASK,
29723 IX86_BUILTIN_PABSB128_MASK,
29724 IX86_BUILTIN_PABSW256_MASK,
29725 IX86_BUILTIN_PABSW128_MASK,
29726 IX86_BUILTIN_VPERMILVARPD_MASK,
29727 IX86_BUILTIN_VPERMILVARPS_MASK,
29728 IX86_BUILTIN_VPERMILVARPD256_MASK,
29729 IX86_BUILTIN_VPERMILVARPS256_MASK,
29730 IX86_BUILTIN_VPERMILPD_MASK,
29731 IX86_BUILTIN_VPERMILPS_MASK,
29732 IX86_BUILTIN_VPERMILPD256_MASK,
29733 IX86_BUILTIN_VPERMILPS256_MASK,
29734 IX86_BUILTIN_BLENDMQ256,
29735 IX86_BUILTIN_BLENDMD256,
29736 IX86_BUILTIN_BLENDMPD256,
29737 IX86_BUILTIN_BLENDMPS256,
29738 IX86_BUILTIN_BLENDMQ128,
29739 IX86_BUILTIN_BLENDMD128,
29740 IX86_BUILTIN_BLENDMPD128,
29741 IX86_BUILTIN_BLENDMPS128,
29742 IX86_BUILTIN_BLENDMW256,
29743 IX86_BUILTIN_BLENDMB256,
29744 IX86_BUILTIN_BLENDMW128,
29745 IX86_BUILTIN_BLENDMB128,
29746 IX86_BUILTIN_PMULLD256_MASK,
29747 IX86_BUILTIN_PMULLD128_MASK,
29748 IX86_BUILTIN_PMULUDQ256_MASK,
29749 IX86_BUILTIN_PMULDQ256_MASK,
29750 IX86_BUILTIN_PMULDQ128_MASK,
29751 IX86_BUILTIN_PMULUDQ128_MASK,
29752 IX86_BUILTIN_CVTPD2PS256_MASK,
29753 IX86_BUILTIN_CVTPD2PS_MASK,
29754 IX86_BUILTIN_VPERMVARSI256_MASK,
29755 IX86_BUILTIN_VPERMVARDI256_MASK,
29756 IX86_BUILTIN_VPERMDI256_MASK,
29757 IX86_BUILTIN_CMPQ256,
29758 IX86_BUILTIN_CMPD256,
29759 IX86_BUILTIN_UCMPQ256,
29760 IX86_BUILTIN_UCMPD256,
29761 IX86_BUILTIN_CMPB256,
29762 IX86_BUILTIN_CMPW256,
29763 IX86_BUILTIN_UCMPB256,
29764 IX86_BUILTIN_UCMPW256,
29765 IX86_BUILTIN_CMPPD256_MASK,
29766 IX86_BUILTIN_CMPPS256_MASK,
29767 IX86_BUILTIN_CMPQ128,
29768 IX86_BUILTIN_CMPD128,
29769 IX86_BUILTIN_UCMPQ128,
29770 IX86_BUILTIN_UCMPD128,
29771 IX86_BUILTIN_CMPB128,
29772 IX86_BUILTIN_CMPW128,
29773 IX86_BUILTIN_UCMPB128,
29774 IX86_BUILTIN_UCMPW128,
29775 IX86_BUILTIN_CMPPD128_MASK,
29776 IX86_BUILTIN_CMPPS128_MASK,
29778 IX86_BUILTIN_GATHER3SIV8SF,
29779 IX86_BUILTIN_GATHER3SIV4SF,
29780 IX86_BUILTIN_GATHER3SIV4DF,
29781 IX86_BUILTIN_GATHER3SIV2DF,
29782 IX86_BUILTIN_GATHER3DIV8SF,
29783 IX86_BUILTIN_GATHER3DIV4SF,
29784 IX86_BUILTIN_GATHER3DIV4DF,
29785 IX86_BUILTIN_GATHER3DIV2DF,
29786 IX86_BUILTIN_GATHER3SIV8SI,
29787 IX86_BUILTIN_GATHER3SIV4SI,
29788 IX86_BUILTIN_GATHER3SIV4DI,
29789 IX86_BUILTIN_GATHER3SIV2DI,
29790 IX86_BUILTIN_GATHER3DIV8SI,
29791 IX86_BUILTIN_GATHER3DIV4SI,
29792 IX86_BUILTIN_GATHER3DIV4DI,
29793 IX86_BUILTIN_GATHER3DIV2DI,
29794 IX86_BUILTIN_SCATTERSIV8SF,
29795 IX86_BUILTIN_SCATTERSIV4SF,
29796 IX86_BUILTIN_SCATTERSIV4DF,
29797 IX86_BUILTIN_SCATTERSIV2DF,
29798 IX86_BUILTIN_SCATTERDIV8SF,
29799 IX86_BUILTIN_SCATTERDIV4SF,
29800 IX86_BUILTIN_SCATTERDIV4DF,
29801 IX86_BUILTIN_SCATTERDIV2DF,
29802 IX86_BUILTIN_SCATTERSIV8SI,
29803 IX86_BUILTIN_SCATTERSIV4SI,
29804 IX86_BUILTIN_SCATTERSIV4DI,
29805 IX86_BUILTIN_SCATTERSIV2DI,
29806 IX86_BUILTIN_SCATTERDIV8SI,
29807 IX86_BUILTIN_SCATTERDIV4SI,
29808 IX86_BUILTIN_SCATTERDIV4DI,
29809 IX86_BUILTIN_SCATTERDIV2DI,
29811 /* AVX512DQ. */
29812 IX86_BUILTIN_RANGESD128,
29813 IX86_BUILTIN_RANGESS128,
29814 IX86_BUILTIN_KUNPCKWD,
29815 IX86_BUILTIN_KUNPCKDQ,
29816 IX86_BUILTIN_BROADCASTF32x2_512,
29817 IX86_BUILTIN_BROADCASTI32x2_512,
29818 IX86_BUILTIN_BROADCASTF64X2_512,
29819 IX86_BUILTIN_BROADCASTI64X2_512,
29820 IX86_BUILTIN_BROADCASTF32X8_512,
29821 IX86_BUILTIN_BROADCASTI32X8_512,
29822 IX86_BUILTIN_EXTRACTF64X2_512,
29823 IX86_BUILTIN_EXTRACTF32X8,
29824 IX86_BUILTIN_EXTRACTI64X2_512,
29825 IX86_BUILTIN_EXTRACTI32X8,
29826 IX86_BUILTIN_REDUCEPD512_MASK,
29827 IX86_BUILTIN_REDUCEPS512_MASK,
29828 IX86_BUILTIN_PMULLQ512,
29829 IX86_BUILTIN_XORPD512,
29830 IX86_BUILTIN_XORPS512,
29831 IX86_BUILTIN_ORPD512,
29832 IX86_BUILTIN_ORPS512,
29833 IX86_BUILTIN_ANDPD512,
29834 IX86_BUILTIN_ANDPS512,
29835 IX86_BUILTIN_ANDNPD512,
29836 IX86_BUILTIN_ANDNPS512,
29837 IX86_BUILTIN_INSERTF32X8,
29838 IX86_BUILTIN_INSERTI32X8,
29839 IX86_BUILTIN_INSERTF64X2_512,
29840 IX86_BUILTIN_INSERTI64X2_512,
29841 IX86_BUILTIN_FPCLASSPD512,
29842 IX86_BUILTIN_FPCLASSPS512,
29843 IX86_BUILTIN_CVTD2MASK512,
29844 IX86_BUILTIN_CVTQ2MASK512,
29845 IX86_BUILTIN_CVTMASK2D512,
29846 IX86_BUILTIN_CVTMASK2Q512,
29847 IX86_BUILTIN_CVTPD2QQ512,
29848 IX86_BUILTIN_CVTPS2QQ512,
29849 IX86_BUILTIN_CVTPD2UQQ512,
29850 IX86_BUILTIN_CVTPS2UQQ512,
29851 IX86_BUILTIN_CVTQQ2PS512,
29852 IX86_BUILTIN_CVTUQQ2PS512,
29853 IX86_BUILTIN_CVTQQ2PD512,
29854 IX86_BUILTIN_CVTUQQ2PD512,
29855 IX86_BUILTIN_CVTTPS2QQ512,
29856 IX86_BUILTIN_CVTTPS2UQQ512,
29857 IX86_BUILTIN_CVTTPD2QQ512,
29858 IX86_BUILTIN_CVTTPD2UQQ512,
29859 IX86_BUILTIN_RANGEPS512,
29860 IX86_BUILTIN_RANGEPD512,
29862 /* AVX512BW. */
29863 IX86_BUILTIN_PACKUSDW512,
29864 IX86_BUILTIN_PACKSSDW512,
29865 IX86_BUILTIN_LOADDQUHI512_MASK,
29866 IX86_BUILTIN_LOADDQUQI512_MASK,
29867 IX86_BUILTIN_PSLLDQ512,
29868 IX86_BUILTIN_PSRLDQ512,
29869 IX86_BUILTIN_STOREDQUHI512_MASK,
29870 IX86_BUILTIN_STOREDQUQI512_MASK,
29871 IX86_BUILTIN_PALIGNR512,
29872 IX86_BUILTIN_PALIGNR512_MASK,
29873 IX86_BUILTIN_MOVDQUHI512_MASK,
29874 IX86_BUILTIN_MOVDQUQI512_MASK,
29875 IX86_BUILTIN_PSADBW512,
29876 IX86_BUILTIN_DBPSADBW512,
29877 IX86_BUILTIN_PBROADCASTB512,
29878 IX86_BUILTIN_PBROADCASTB512_GPR,
29879 IX86_BUILTIN_PBROADCASTW512,
29880 IX86_BUILTIN_PBROADCASTW512_GPR,
29881 IX86_BUILTIN_PMOVSXBW512_MASK,
29882 IX86_BUILTIN_PMOVZXBW512_MASK,
29883 IX86_BUILTIN_VPERMVARHI512_MASK,
29884 IX86_BUILTIN_VPERMT2VARHI512,
29885 IX86_BUILTIN_VPERMT2VARHI512_MASKZ,
29886 IX86_BUILTIN_VPERMI2VARHI512,
29887 IX86_BUILTIN_PAVGB512,
29888 IX86_BUILTIN_PAVGW512,
29889 IX86_BUILTIN_PADDB512,
29890 IX86_BUILTIN_PSUBB512,
29891 IX86_BUILTIN_PSUBSB512,
29892 IX86_BUILTIN_PADDSB512,
29893 IX86_BUILTIN_PSUBUSB512,
29894 IX86_BUILTIN_PADDUSB512,
29895 IX86_BUILTIN_PSUBW512,
29896 IX86_BUILTIN_PADDW512,
29897 IX86_BUILTIN_PSUBSW512,
29898 IX86_BUILTIN_PADDSW512,
29899 IX86_BUILTIN_PSUBUSW512,
29900 IX86_BUILTIN_PADDUSW512,
29901 IX86_BUILTIN_PMAXUW512,
29902 IX86_BUILTIN_PMAXSW512,
29903 IX86_BUILTIN_PMINUW512,
29904 IX86_BUILTIN_PMINSW512,
29905 IX86_BUILTIN_PMAXUB512,
29906 IX86_BUILTIN_PMAXSB512,
29907 IX86_BUILTIN_PMINUB512,
29908 IX86_BUILTIN_PMINSB512,
29909 IX86_BUILTIN_PMOVWB512,
29910 IX86_BUILTIN_PMOVSWB512,
29911 IX86_BUILTIN_PMOVUSWB512,
29912 IX86_BUILTIN_PMULHRSW512_MASK,
29913 IX86_BUILTIN_PMULHUW512_MASK,
29914 IX86_BUILTIN_PMULHW512_MASK,
29915 IX86_BUILTIN_PMULLW512_MASK,
29916 IX86_BUILTIN_PSLLWI512_MASK,
29917 IX86_BUILTIN_PSLLW512_MASK,
29918 IX86_BUILTIN_PACKSSWB512,
29919 IX86_BUILTIN_PACKUSWB512,
29920 IX86_BUILTIN_PSRAVV32HI,
29921 IX86_BUILTIN_PMADDUBSW512_MASK,
29922 IX86_BUILTIN_PMADDWD512_MASK,
29923 IX86_BUILTIN_PSRLVV32HI,
29924 IX86_BUILTIN_PUNPCKHBW512,
29925 IX86_BUILTIN_PUNPCKHWD512,
29926 IX86_BUILTIN_PUNPCKLBW512,
29927 IX86_BUILTIN_PUNPCKLWD512,
29928 IX86_BUILTIN_PSHUFB512,
29929 IX86_BUILTIN_PSHUFHW512,
29930 IX86_BUILTIN_PSHUFLW512,
29931 IX86_BUILTIN_PSRAWI512,
29932 IX86_BUILTIN_PSRAW512,
29933 IX86_BUILTIN_PSRLWI512,
29934 IX86_BUILTIN_PSRLW512,
29935 IX86_BUILTIN_CVTB2MASK512,
29936 IX86_BUILTIN_CVTW2MASK512,
29937 IX86_BUILTIN_CVTMASK2B512,
29938 IX86_BUILTIN_CVTMASK2W512,
29939 IX86_BUILTIN_PCMPEQB512_MASK,
29940 IX86_BUILTIN_PCMPEQW512_MASK,
29941 IX86_BUILTIN_PCMPGTB512_MASK,
29942 IX86_BUILTIN_PCMPGTW512_MASK,
29943 IX86_BUILTIN_PTESTMB512,
29944 IX86_BUILTIN_PTESTMW512,
29945 IX86_BUILTIN_PTESTNMB512,
29946 IX86_BUILTIN_PTESTNMW512,
29947 IX86_BUILTIN_PSLLVV32HI,
29948 IX86_BUILTIN_PABSB512,
29949 IX86_BUILTIN_PABSW512,
29950 IX86_BUILTIN_BLENDMW512,
29951 IX86_BUILTIN_BLENDMB512,
29952 IX86_BUILTIN_CMPB512,
29953 IX86_BUILTIN_CMPW512,
29954 IX86_BUILTIN_UCMPB512,
29955 IX86_BUILTIN_UCMPW512,
29957 /* Alternate 4 and 8 element gather/scatter for the vectorizer
29958 where all operands are 32-byte or 64-byte wide respectively. */
29959 IX86_BUILTIN_GATHERALTSIV4DF,
29960 IX86_BUILTIN_GATHERALTDIV8SF,
29961 IX86_BUILTIN_GATHERALTSIV4DI,
29962 IX86_BUILTIN_GATHERALTDIV8SI,
29963 IX86_BUILTIN_GATHER3ALTDIV16SF,
29964 IX86_BUILTIN_GATHER3ALTDIV16SI,
29965 IX86_BUILTIN_GATHER3ALTSIV4DF,
29966 IX86_BUILTIN_GATHER3ALTDIV8SF,
29967 IX86_BUILTIN_GATHER3ALTSIV4DI,
29968 IX86_BUILTIN_GATHER3ALTDIV8SI,
29969 IX86_BUILTIN_GATHER3ALTSIV8DF,
29970 IX86_BUILTIN_GATHER3ALTSIV8DI,
29971 IX86_BUILTIN_GATHER3DIV16SF,
29972 IX86_BUILTIN_GATHER3DIV16SI,
29973 IX86_BUILTIN_GATHER3DIV8DF,
29974 IX86_BUILTIN_GATHER3DIV8DI,
29975 IX86_BUILTIN_GATHER3SIV16SF,
29976 IX86_BUILTIN_GATHER3SIV16SI,
29977 IX86_BUILTIN_GATHER3SIV8DF,
29978 IX86_BUILTIN_GATHER3SIV8DI,
29979 IX86_BUILTIN_SCATTERDIV16SF,
29980 IX86_BUILTIN_SCATTERDIV16SI,
29981 IX86_BUILTIN_SCATTERDIV8DF,
29982 IX86_BUILTIN_SCATTERDIV8DI,
29983 IX86_BUILTIN_SCATTERSIV16SF,
29984 IX86_BUILTIN_SCATTERSIV16SI,
29985 IX86_BUILTIN_SCATTERSIV8DF,
29986 IX86_BUILTIN_SCATTERSIV8DI,
29988 /* AVX512PF */
29989 IX86_BUILTIN_GATHERPFQPD,
29990 IX86_BUILTIN_GATHERPFDPS,
29991 IX86_BUILTIN_GATHERPFDPD,
29992 IX86_BUILTIN_GATHERPFQPS,
29993 IX86_BUILTIN_SCATTERPFDPD,
29994 IX86_BUILTIN_SCATTERPFDPS,
29995 IX86_BUILTIN_SCATTERPFQPD,
29996 IX86_BUILTIN_SCATTERPFQPS,
29998 /* AVX-512ER */
29999 IX86_BUILTIN_EXP2PD_MASK,
30000 IX86_BUILTIN_EXP2PS_MASK,
30001 IX86_BUILTIN_EXP2PS,
30002 IX86_BUILTIN_RCP28PD,
30003 IX86_BUILTIN_RCP28PS,
30004 IX86_BUILTIN_RCP28SD,
30005 IX86_BUILTIN_RCP28SS,
30006 IX86_BUILTIN_RSQRT28PD,
30007 IX86_BUILTIN_RSQRT28PS,
30008 IX86_BUILTIN_RSQRT28SD,
30009 IX86_BUILTIN_RSQRT28SS,
30011 /* SHA builtins. */
30012 IX86_BUILTIN_SHA1MSG1,
30013 IX86_BUILTIN_SHA1MSG2,
30014 IX86_BUILTIN_SHA1NEXTE,
30015 IX86_BUILTIN_SHA1RNDS4,
30016 IX86_BUILTIN_SHA256MSG1,
30017 IX86_BUILTIN_SHA256MSG2,
30018 IX86_BUILTIN_SHA256RNDS2,
30020 /* CLFLUSHOPT instructions. */
30021 IX86_BUILTIN_CLFLUSHOPT,
30023 /* TFmode support builtins. */
30024 IX86_BUILTIN_INFQ,
30025 IX86_BUILTIN_HUGE_VALQ,
30026 IX86_BUILTIN_FABSQ,
30027 IX86_BUILTIN_COPYSIGNQ,
30029 /* Vectorizer support builtins. */
30030 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
30031 IX86_BUILTIN_CPYSGNPS,
30032 IX86_BUILTIN_CPYSGNPD,
30033 IX86_BUILTIN_CPYSGNPS256,
30034 IX86_BUILTIN_CPYSGNPS512,
30035 IX86_BUILTIN_CPYSGNPD256,
30036 IX86_BUILTIN_CPYSGNPD512,
30037 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
30038 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
30041 /* FMA4 instructions. */
30042 IX86_BUILTIN_VFMADDSS,
30043 IX86_BUILTIN_VFMADDSD,
30044 IX86_BUILTIN_VFMADDPS,
30045 IX86_BUILTIN_VFMADDPD,
30046 IX86_BUILTIN_VFMADDPS256,
30047 IX86_BUILTIN_VFMADDPD256,
30048 IX86_BUILTIN_VFMADDSUBPS,
30049 IX86_BUILTIN_VFMADDSUBPD,
30050 IX86_BUILTIN_VFMADDSUBPS256,
30051 IX86_BUILTIN_VFMADDSUBPD256,
30053 /* FMA3 instructions. */
30054 IX86_BUILTIN_VFMADDSS3,
30055 IX86_BUILTIN_VFMADDSD3,
30057 /* XOP instructions. */
30058 IX86_BUILTIN_VPCMOV,
30059 IX86_BUILTIN_VPCMOV_V2DI,
30060 IX86_BUILTIN_VPCMOV_V4SI,
30061 IX86_BUILTIN_VPCMOV_V8HI,
30062 IX86_BUILTIN_VPCMOV_V16QI,
30063 IX86_BUILTIN_VPCMOV_V4SF,
30064 IX86_BUILTIN_VPCMOV_V2DF,
30065 IX86_BUILTIN_VPCMOV256,
30066 IX86_BUILTIN_VPCMOV_V4DI256,
30067 IX86_BUILTIN_VPCMOV_V8SI256,
30068 IX86_BUILTIN_VPCMOV_V16HI256,
30069 IX86_BUILTIN_VPCMOV_V32QI256,
30070 IX86_BUILTIN_VPCMOV_V8SF256,
30071 IX86_BUILTIN_VPCMOV_V4DF256,
30073 IX86_BUILTIN_VPPERM,
30075 IX86_BUILTIN_VPMACSSWW,
30076 IX86_BUILTIN_VPMACSWW,
30077 IX86_BUILTIN_VPMACSSWD,
30078 IX86_BUILTIN_VPMACSWD,
30079 IX86_BUILTIN_VPMACSSDD,
30080 IX86_BUILTIN_VPMACSDD,
30081 IX86_BUILTIN_VPMACSSDQL,
30082 IX86_BUILTIN_VPMACSSDQH,
30083 IX86_BUILTIN_VPMACSDQL,
30084 IX86_BUILTIN_VPMACSDQH,
30085 IX86_BUILTIN_VPMADCSSWD,
30086 IX86_BUILTIN_VPMADCSWD,
30088 IX86_BUILTIN_VPHADDBW,
30089 IX86_BUILTIN_VPHADDBD,
30090 IX86_BUILTIN_VPHADDBQ,
30091 IX86_BUILTIN_VPHADDWD,
30092 IX86_BUILTIN_VPHADDWQ,
30093 IX86_BUILTIN_VPHADDDQ,
30094 IX86_BUILTIN_VPHADDUBW,
30095 IX86_BUILTIN_VPHADDUBD,
30096 IX86_BUILTIN_VPHADDUBQ,
30097 IX86_BUILTIN_VPHADDUWD,
30098 IX86_BUILTIN_VPHADDUWQ,
30099 IX86_BUILTIN_VPHADDUDQ,
30100 IX86_BUILTIN_VPHSUBBW,
30101 IX86_BUILTIN_VPHSUBWD,
30102 IX86_BUILTIN_VPHSUBDQ,
30104 IX86_BUILTIN_VPROTB,
30105 IX86_BUILTIN_VPROTW,
30106 IX86_BUILTIN_VPROTD,
30107 IX86_BUILTIN_VPROTQ,
30108 IX86_BUILTIN_VPROTB_IMM,
30109 IX86_BUILTIN_VPROTW_IMM,
30110 IX86_BUILTIN_VPROTD_IMM,
30111 IX86_BUILTIN_VPROTQ_IMM,
30113 IX86_BUILTIN_VPSHLB,
30114 IX86_BUILTIN_VPSHLW,
30115 IX86_BUILTIN_VPSHLD,
30116 IX86_BUILTIN_VPSHLQ,
30117 IX86_BUILTIN_VPSHAB,
30118 IX86_BUILTIN_VPSHAW,
30119 IX86_BUILTIN_VPSHAD,
30120 IX86_BUILTIN_VPSHAQ,
30122 IX86_BUILTIN_VFRCZSS,
30123 IX86_BUILTIN_VFRCZSD,
30124 IX86_BUILTIN_VFRCZPS,
30125 IX86_BUILTIN_VFRCZPD,
30126 IX86_BUILTIN_VFRCZPS256,
30127 IX86_BUILTIN_VFRCZPD256,
30129 IX86_BUILTIN_VPCOMEQUB,
30130 IX86_BUILTIN_VPCOMNEUB,
30131 IX86_BUILTIN_VPCOMLTUB,
30132 IX86_BUILTIN_VPCOMLEUB,
30133 IX86_BUILTIN_VPCOMGTUB,
30134 IX86_BUILTIN_VPCOMGEUB,
30135 IX86_BUILTIN_VPCOMFALSEUB,
30136 IX86_BUILTIN_VPCOMTRUEUB,
30138 IX86_BUILTIN_VPCOMEQUW,
30139 IX86_BUILTIN_VPCOMNEUW,
30140 IX86_BUILTIN_VPCOMLTUW,
30141 IX86_BUILTIN_VPCOMLEUW,
30142 IX86_BUILTIN_VPCOMGTUW,
30143 IX86_BUILTIN_VPCOMGEUW,
30144 IX86_BUILTIN_VPCOMFALSEUW,
30145 IX86_BUILTIN_VPCOMTRUEUW,
30147 IX86_BUILTIN_VPCOMEQUD,
30148 IX86_BUILTIN_VPCOMNEUD,
30149 IX86_BUILTIN_VPCOMLTUD,
30150 IX86_BUILTIN_VPCOMLEUD,
30151 IX86_BUILTIN_VPCOMGTUD,
30152 IX86_BUILTIN_VPCOMGEUD,
30153 IX86_BUILTIN_VPCOMFALSEUD,
30154 IX86_BUILTIN_VPCOMTRUEUD,
30156 IX86_BUILTIN_VPCOMEQUQ,
30157 IX86_BUILTIN_VPCOMNEUQ,
30158 IX86_BUILTIN_VPCOMLTUQ,
30159 IX86_BUILTIN_VPCOMLEUQ,
30160 IX86_BUILTIN_VPCOMGTUQ,
30161 IX86_BUILTIN_VPCOMGEUQ,
30162 IX86_BUILTIN_VPCOMFALSEUQ,
30163 IX86_BUILTIN_VPCOMTRUEUQ,
30165 IX86_BUILTIN_VPCOMEQB,
30166 IX86_BUILTIN_VPCOMNEB,
30167 IX86_BUILTIN_VPCOMLTB,
30168 IX86_BUILTIN_VPCOMLEB,
30169 IX86_BUILTIN_VPCOMGTB,
30170 IX86_BUILTIN_VPCOMGEB,
30171 IX86_BUILTIN_VPCOMFALSEB,
30172 IX86_BUILTIN_VPCOMTRUEB,
30174 IX86_BUILTIN_VPCOMEQW,
30175 IX86_BUILTIN_VPCOMNEW,
30176 IX86_BUILTIN_VPCOMLTW,
30177 IX86_BUILTIN_VPCOMLEW,
30178 IX86_BUILTIN_VPCOMGTW,
30179 IX86_BUILTIN_VPCOMGEW,
30180 IX86_BUILTIN_VPCOMFALSEW,
30181 IX86_BUILTIN_VPCOMTRUEW,
30183 IX86_BUILTIN_VPCOMEQD,
30184 IX86_BUILTIN_VPCOMNED,
30185 IX86_BUILTIN_VPCOMLTD,
30186 IX86_BUILTIN_VPCOMLED,
30187 IX86_BUILTIN_VPCOMGTD,
30188 IX86_BUILTIN_VPCOMGED,
30189 IX86_BUILTIN_VPCOMFALSED,
30190 IX86_BUILTIN_VPCOMTRUED,
30192 IX86_BUILTIN_VPCOMEQQ,
30193 IX86_BUILTIN_VPCOMNEQ,
30194 IX86_BUILTIN_VPCOMLTQ,
30195 IX86_BUILTIN_VPCOMLEQ,
30196 IX86_BUILTIN_VPCOMGTQ,
30197 IX86_BUILTIN_VPCOMGEQ,
30198 IX86_BUILTIN_VPCOMFALSEQ,
30199 IX86_BUILTIN_VPCOMTRUEQ,
30201 /* LWP instructions. */
30202 IX86_BUILTIN_LLWPCB,
30203 IX86_BUILTIN_SLWPCB,
30204 IX86_BUILTIN_LWPVAL32,
30205 IX86_BUILTIN_LWPVAL64,
30206 IX86_BUILTIN_LWPINS32,
30207 IX86_BUILTIN_LWPINS64,
30209 IX86_BUILTIN_CLZS,
30211 /* RTM */
30212 IX86_BUILTIN_XBEGIN,
30213 IX86_BUILTIN_XEND,
30214 IX86_BUILTIN_XABORT,
30215 IX86_BUILTIN_XTEST,
30217 /* MPX */
30218 IX86_BUILTIN_BNDMK,
30219 IX86_BUILTIN_BNDSTX,
30220 IX86_BUILTIN_BNDLDX,
30221 IX86_BUILTIN_BNDCL,
30222 IX86_BUILTIN_BNDCU,
30223 IX86_BUILTIN_BNDRET,
30224 IX86_BUILTIN_BNDNARROW,
30225 IX86_BUILTIN_BNDINT,
30226 IX86_BUILTIN_SIZEOF,
30227 IX86_BUILTIN_BNDLOWER,
30228 IX86_BUILTIN_BNDUPPER,
30230 /* BMI instructions. */
30231 IX86_BUILTIN_BEXTR32,
30232 IX86_BUILTIN_BEXTR64,
30233 IX86_BUILTIN_CTZS,
30235 /* TBM instructions. */
30236 IX86_BUILTIN_BEXTRI32,
30237 IX86_BUILTIN_BEXTRI64,
30239 /* BMI2 instructions. */
30240 IX86_BUILTIN_BZHI32,
30241 IX86_BUILTIN_BZHI64,
30242 IX86_BUILTIN_PDEP32,
30243 IX86_BUILTIN_PDEP64,
30244 IX86_BUILTIN_PEXT32,
30245 IX86_BUILTIN_PEXT64,
30247 /* ADX instructions. */
30248 IX86_BUILTIN_ADDCARRYX32,
30249 IX86_BUILTIN_ADDCARRYX64,
30251 /* SBB instructions. */
30252 IX86_BUILTIN_SBB32,
30253 IX86_BUILTIN_SBB64,
30255 /* FSGSBASE instructions. */
30256 IX86_BUILTIN_RDFSBASE32,
30257 IX86_BUILTIN_RDFSBASE64,
30258 IX86_BUILTIN_RDGSBASE32,
30259 IX86_BUILTIN_RDGSBASE64,
30260 IX86_BUILTIN_WRFSBASE32,
30261 IX86_BUILTIN_WRFSBASE64,
30262 IX86_BUILTIN_WRGSBASE32,
30263 IX86_BUILTIN_WRGSBASE64,
30265 /* RDRND instructions. */
30266 IX86_BUILTIN_RDRAND16_STEP,
30267 IX86_BUILTIN_RDRAND32_STEP,
30268 IX86_BUILTIN_RDRAND64_STEP,
30270 /* RDSEED instructions. */
30271 IX86_BUILTIN_RDSEED16_STEP,
30272 IX86_BUILTIN_RDSEED32_STEP,
30273 IX86_BUILTIN_RDSEED64_STEP,
30275 /* F16C instructions. */
30276 IX86_BUILTIN_CVTPH2PS,
30277 IX86_BUILTIN_CVTPH2PS256,
30278 IX86_BUILTIN_CVTPS2PH,
30279 IX86_BUILTIN_CVTPS2PH256,
30281 /* CFString built-in for darwin */
30282 IX86_BUILTIN_CFSTRING,
30284 /* Builtins to get CPU type and supported features. */
30285 IX86_BUILTIN_CPU_INIT,
30286 IX86_BUILTIN_CPU_IS,
30287 IX86_BUILTIN_CPU_SUPPORTS,
30289 /* Read/write FLAGS register built-ins. */
30290 IX86_BUILTIN_READ_FLAGS,
30291 IX86_BUILTIN_WRITE_FLAGS,
30293 IX86_BUILTIN_MAX
30296 /* Table for the ix86 builtin decls. */
30297 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
30299 /* Table of all of the builtin functions that are possible with different ISA's
30300 but are waiting to be built until a function is declared to use that
30301 ISA. */
30302 struct builtin_isa {
30303 const char *name; /* function name */
30304 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
30305 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
30306 bool const_p; /* true if the declaration is constant */
30307 bool leaf_p; /* true if the declaration has leaf attribute */
30308 bool nothrow_p; /* true if the declaration has nothrow attribute */
30309 bool set_and_not_built_p;
30312 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
30315 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
30316 of which isa_flags to use in the ix86_builtins_isa array. Stores the
30317 function decl in the ix86_builtins array. Returns the function decl or
30318 NULL_TREE, if the builtin was not added.
30320 If the front end has a special hook for builtin functions, delay adding
30321 builtin functions that aren't in the current ISA until the ISA is changed
30322 with function specific optimization. Doing so, can save about 300K for the
30323 default compiler. When the builtin is expanded, check at that time whether
30324 it is valid.
30326 If the front end doesn't have a special hook, record all builtins, even if
30327 it isn't an instruction set in the current ISA in case the user uses
30328 function specific options for a different ISA, so that we don't get scope
30329 errors if a builtin is added in the middle of a function scope. */
30331 static inline tree
30332 def_builtin (HOST_WIDE_INT mask, const char *name,
30333 enum ix86_builtin_func_type tcode,
30334 enum ix86_builtins code)
30336 tree decl = NULL_TREE;
30338 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
30340 ix86_builtins_isa[(int) code].isa = mask;
30342 mask &= ~OPTION_MASK_ISA_64BIT;
30343 if (mask == 0
30344 || (mask & ix86_isa_flags) != 0
30345 || (lang_hooks.builtin_function
30346 == lang_hooks.builtin_function_ext_scope))
30349 tree type = ix86_get_builtin_func_type (tcode);
30350 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
30351 NULL, NULL_TREE);
30352 ix86_builtins[(int) code] = decl;
30353 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
30355 else
30357 ix86_builtins[(int) code] = NULL_TREE;
30358 ix86_builtins_isa[(int) code].tcode = tcode;
30359 ix86_builtins_isa[(int) code].name = name;
30360 ix86_builtins_isa[(int) code].leaf_p = false;
30361 ix86_builtins_isa[(int) code].nothrow_p = false;
30362 ix86_builtins_isa[(int) code].const_p = false;
30363 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
30367 return decl;
30370 /* Like def_builtin, but also marks the function decl "const". */
30372 static inline tree
30373 def_builtin_const (HOST_WIDE_INT mask, const char *name,
30374 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
30376 tree decl = def_builtin (mask, name, tcode, code);
30377 if (decl)
30378 TREE_READONLY (decl) = 1;
30379 else
30380 ix86_builtins_isa[(int) code].const_p = true;
30382 return decl;
30385 /* Add any new builtin functions for a given ISA that may not have been
30386 declared. This saves a bit of space compared to adding all of the
30387 declarations to the tree, even if we didn't use them. */
30389 static void
30390 ix86_add_new_builtins (HOST_WIDE_INT isa)
30392 int i;
30394 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
30396 if ((ix86_builtins_isa[i].isa & isa) != 0
30397 && ix86_builtins_isa[i].set_and_not_built_p)
30399 tree decl, type;
30401 /* Don't define the builtin again. */
30402 ix86_builtins_isa[i].set_and_not_built_p = false;
30404 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
30405 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
30406 type, i, BUILT_IN_MD, NULL,
30407 NULL_TREE);
30409 ix86_builtins[i] = decl;
30410 if (ix86_builtins_isa[i].const_p)
30411 TREE_READONLY (decl) = 1;
30412 if (ix86_builtins_isa[i].leaf_p)
30413 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
30414 NULL_TREE);
30415 if (ix86_builtins_isa[i].nothrow_p)
30416 TREE_NOTHROW (decl) = 1;
30421 /* Bits for builtin_description.flag. */
30423 /* Set when we don't support the comparison natively, and should
30424 swap_comparison in order to support it. */
30425 #define BUILTIN_DESC_SWAP_OPERANDS 1
30427 struct builtin_description
30429 const HOST_WIDE_INT mask;
30430 const enum insn_code icode;
30431 const char *const name;
30432 const enum ix86_builtins code;
30433 const enum rtx_code comparison;
30434 const int flag;
30437 static const struct builtin_description bdesc_comi[] =
30439 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
30440 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
30441 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
30442 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
30443 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
30444 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
30445 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
30446 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
30447 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
30448 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
30449 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
30450 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
30451 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
30452 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
30453 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
30454 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
30455 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
30456 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
30457 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
30458 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
30459 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
30460 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
30461 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
30462 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
30465 static const struct builtin_description bdesc_pcmpestr[] =
30467 /* SSE4.2 */
30468 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
30469 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
30470 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
30471 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
30472 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
30473 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
30474 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
30477 static const struct builtin_description bdesc_pcmpistr[] =
30479 /* SSE4.2 */
30480 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
30481 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
30482 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
30483 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
30484 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
30485 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
30486 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
30489 /* Special builtins with variable number of arguments. */
30490 static const struct builtin_description bdesc_special_args[] =
30492 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
30493 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
30494 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
30496 /* 80387 (for use internally for atomic compound assignment). */
30497 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
30498 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
30499 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) USHORT_FTYPE_VOID },
30500 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
30502 /* MMX */
30503 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
30505 /* 3DNow! */
30506 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
30508 /* FXSR, XSAVE, XSAVEOPT, XSAVEC and XSAVES. */
30509 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
30510 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
30511 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30512 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30513 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30514 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xsaves", IX86_BUILTIN_XSAVES, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30515 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xrstors", IX86_BUILTIN_XRSTORS, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30516 { OPTION_MASK_ISA_XSAVEC, CODE_FOR_nothing, "__builtin_ia32_xsavec", IX86_BUILTIN_XSAVEC, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30518 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
30519 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
30520 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30521 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30522 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30523 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaves64", IX86_BUILTIN_XSAVES64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30524 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30525 { OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30527 /* SSE */
30528 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30529 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30530 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
30532 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
30533 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
30534 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
30535 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
30537 /* SSE or 3DNow!A */
30538 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30539 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
30541 /* SSE2 */
30542 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30543 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30544 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30545 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
30546 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30547 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
30548 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
30549 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
30550 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
30551 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
30553 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
30554 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
30556 /* SSE3 */
30557 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
30559 /* SSE4.1 */
30560 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
30562 /* SSE4A */
30563 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30564 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30566 /* AVX */
30567 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
30568 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
30570 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
30571 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
30572 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
30573 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
30574 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
30576 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
30577 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
30578 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
30579 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
30580 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
30581 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
30582 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
30584 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
30585 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
30586 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
30588 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
30589 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
30590 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
30591 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
30592 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
30593 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
30594 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
30595 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
30597 /* AVX2 */
30598 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
30599 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
30600 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
30601 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
30602 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
30603 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
30604 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
30605 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
30606 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
30608 /* AVX512F */
30609 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30610 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30611 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30612 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30613 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30614 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30615 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30616 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30617 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30618 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30619 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30620 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30621 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30622 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30623 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30624 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30625 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30626 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30627 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30628 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30629 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
30630 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
30631 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
30632 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
30633 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30634 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30635 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30636 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30637 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30638 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30639 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30640 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30641 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30642 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30643 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30644 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30645 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30646 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30647 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30648 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30649 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30650 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30651 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30652 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30653 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30654 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30655 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30657 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
30658 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
30659 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
30660 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
30661 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
30662 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
30664 /* FSGSBASE */
30665 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30666 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
30667 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30668 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
30669 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
30670 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
30671 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
30672 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
30674 /* RTM */
30675 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30676 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
30677 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
30679 /* AVX512BW */
30680 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_loaddquhi512_mask", IX86_BUILTIN_LOADDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_SI },
30681 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_loaddquqi512_mask", IX86_BUILTIN_LOADDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_DI },
30682 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv32hi_mask, "__builtin_ia32_storedquhi512_mask", IX86_BUILTIN_STOREDQUHI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV32HI_V32HI_SI },
30683 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv64qi_mask, "__builtin_ia32_storedquqi512_mask", IX86_BUILTIN_STOREDQUQI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV64QI_V64QI_DI },
30685 /* AVX512VL */
30686 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_loaddquhi256_mask", IX86_BUILTIN_LOADDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_PCV16HI_V16HI_HI },
30687 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_loaddquhi128_mask", IX86_BUILTIN_LOADDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_PCV8HI_V8HI_QI },
30688 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_loaddquqi256_mask", IX86_BUILTIN_LOADDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_PCV32QI_V32QI_SI },
30689 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_loaddquqi128_mask", IX86_BUILTIN_LOADDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_PCV16QI_V16QI_HI },
30690 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64load256_mask", IX86_BUILTIN_MOVDQA64LOAD256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30691 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64load128_mask", IX86_BUILTIN_MOVDQA64LOAD128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30692 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32load256_mask", IX86_BUILTIN_MOVDQA32LOAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30693 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32load128_mask", IX86_BUILTIN_MOVDQA32LOAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30694 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4di_mask, "__builtin_ia32_movdqa64store256_mask", IX86_BUILTIN_MOVDQA64STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
30695 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2di_mask, "__builtin_ia32_movdqa64store128_mask", IX86_BUILTIN_MOVDQA64STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
30696 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8si_mask, "__builtin_ia32_movdqa32store256_mask", IX86_BUILTIN_MOVDQA32STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
30697 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4si_mask, "__builtin_ia32_movdqa32store128_mask", IX86_BUILTIN_MOVDQA32STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
30698 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_loadapd256_mask", IX86_BUILTIN_LOADAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30699 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_loadapd128_mask", IX86_BUILTIN_LOADAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30700 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_loadaps256_mask", IX86_BUILTIN_LOADAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30701 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_loadaps128_mask", IX86_BUILTIN_LOADAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30702 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4df_mask, "__builtin_ia32_storeapd256_mask", IX86_BUILTIN_STOREAPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
30703 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2df_mask, "__builtin_ia32_storeapd128_mask", IX86_BUILTIN_STOREAPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
30704 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8sf_mask, "__builtin_ia32_storeaps256_mask", IX86_BUILTIN_STOREAPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
30705 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4sf_mask, "__builtin_ia32_storeaps128_mask", IX86_BUILTIN_STOREAPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
30706 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadupd256_mask, "__builtin_ia32_loadupd256_mask", IX86_BUILTIN_LOADUPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30707 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loadupd_mask, "__builtin_ia32_loadupd128_mask", IX86_BUILTIN_LOADUPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30708 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadups256_mask, "__builtin_ia32_loadups256_mask", IX86_BUILTIN_LOADUPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30709 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_loadups_mask, "__builtin_ia32_loadups128_mask", IX86_BUILTIN_LOADUPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30710 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd256_mask, "__builtin_ia32_storeupd256_mask", IX86_BUILTIN_STOREUPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
30711 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd_mask, "__builtin_ia32_storeupd128_mask", IX86_BUILTIN_STOREUPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
30712 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups256_mask, "__builtin_ia32_storeups256_mask", IX86_BUILTIN_STOREUPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
30713 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups_mask, "__builtin_ia32_storeups128_mask", IX86_BUILTIN_STOREUPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
30714 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv4di_mask, "__builtin_ia32_loaddqudi256_mask", IX86_BUILTIN_LOADDQUDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30715 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv2di_mask, "__builtin_ia32_loaddqudi128_mask", IX86_BUILTIN_LOADDQUDI128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30716 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv8si_mask, "__builtin_ia32_loaddqusi256_mask", IX86_BUILTIN_LOADDQUSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30717 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv4si_mask, "__builtin_ia32_loaddqusi128_mask", IX86_BUILTIN_LOADDQUSI128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30718 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4di_mask, "__builtin_ia32_storedqudi256_mask", IX86_BUILTIN_STOREDQUDI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
30719 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv2di_mask, "__builtin_ia32_storedqudi128_mask", IX86_BUILTIN_STOREDQUDI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
30720 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8si_mask, "__builtin_ia32_storedqusi256_mask", IX86_BUILTIN_STOREDQUSI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
30721 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4si_mask, "__builtin_ia32_storedqusi128_mask", IX86_BUILTIN_STOREDQUSI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
30722 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16hi_mask, "__builtin_ia32_storedquhi256_mask", IX86_BUILTIN_STOREDQUHI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16HI_HI },
30723 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8hi_mask, "__builtin_ia32_storedquhi128_mask", IX86_BUILTIN_STOREDQUHI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8HI_QI },
30724 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv32qi_mask, "__builtin_ia32_storedquqi256_mask", IX86_BUILTIN_STOREDQUQI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32QI_SI },
30725 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16qi_mask, "__builtin_ia32_storedquqi128_mask", IX86_BUILTIN_STOREDQUQI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16QI_HI },
30726 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4df_mask, "__builtin_ia32_compressstoredf256_mask", IX86_BUILTIN_COMPRESSPDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
30727 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2df_mask, "__builtin_ia32_compressstoredf128_mask", IX86_BUILTIN_COMPRESSPDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
30728 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8sf_mask, "__builtin_ia32_compressstoresf256_mask", IX86_BUILTIN_COMPRESSPSSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
30729 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4sf_mask, "__builtin_ia32_compressstoresf128_mask", IX86_BUILTIN_COMPRESSPSSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
30730 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4di_mask, "__builtin_ia32_compressstoredi256_mask", IX86_BUILTIN_PCOMPRESSQSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
30731 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2di_mask, "__builtin_ia32_compressstoredi128_mask", IX86_BUILTIN_PCOMPRESSQSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
30732 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8si_mask, "__builtin_ia32_compressstoresi256_mask", IX86_BUILTIN_PCOMPRESSDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
30733 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4si_mask, "__builtin_ia32_compressstoresi128_mask", IX86_BUILTIN_PCOMPRESSDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
30734 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expandloaddf256_mask", IX86_BUILTIN_EXPANDPDLOAD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30735 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expandloaddf128_mask", IX86_BUILTIN_EXPANDPDLOAD128, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30736 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandloadsf256_mask", IX86_BUILTIN_EXPANDPSLOAD256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30737 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandloadsf128_mask", IX86_BUILTIN_EXPANDPSLOAD128, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30738 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expandloaddi256_mask", IX86_BUILTIN_PEXPANDQLOAD256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30739 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expandloaddi128_mask", IX86_BUILTIN_PEXPANDQLOAD128, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30740 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandloadsi256_mask", IX86_BUILTIN_PEXPANDDLOAD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30741 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandloadsi128_mask", IX86_BUILTIN_PEXPANDDLOAD128, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30742 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expandloaddf256_maskz", IX86_BUILTIN_EXPANDPDLOAD256Z, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30743 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expandloaddf128_maskz", IX86_BUILTIN_EXPANDPDLOAD128Z, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30744 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandloadsf256_maskz", IX86_BUILTIN_EXPANDPSLOAD256Z, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30745 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandloadsf128_maskz", IX86_BUILTIN_EXPANDPSLOAD128Z, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30746 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expandloaddi256_maskz", IX86_BUILTIN_PEXPANDQLOAD256Z, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30747 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expandloaddi128_maskz", IX86_BUILTIN_PEXPANDQLOAD128Z, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30748 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandloadsi256_maskz", IX86_BUILTIN_PEXPANDDLOAD256Z, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30749 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandloadsi128_maskz", IX86_BUILTIN_PEXPANDDLOAD128Z, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30750 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask_store, "__builtin_ia32_pmovqd256mem_mask", IX86_BUILTIN_PMOVQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
30751 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask_store, "__builtin_ia32_pmovqd128mem_mask", IX86_BUILTIN_PMOVQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
30752 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask_store, "__builtin_ia32_pmovsqd256mem_mask", IX86_BUILTIN_PMOVSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
30753 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask_store, "__builtin_ia32_pmovsqd128mem_mask", IX86_BUILTIN_PMOVSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
30754 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask_store, "__builtin_ia32_pmovusqd256mem_mask", IX86_BUILTIN_PMOVUSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
30755 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask_store, "__builtin_ia32_pmovusqd128mem_mask", IX86_BUILTIN_PMOVUSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
30756 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovqw256mem_mask", IX86_BUILTIN_PMOVQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
30757 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovqw128mem_mask", IX86_BUILTIN_PMOVQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
30758 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovsqw256mem_mask", IX86_BUILTIN_PMOVSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
30759 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovsqw128mem_mask", IX86_BUILTIN_PMOVSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
30760 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovusqw256mem_mask", IX86_BUILTIN_PMOVUSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
30761 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovusqw128mem_mask", IX86_BUILTIN_PMOVUSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
30762 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovqb256mem_mask", IX86_BUILTIN_PMOVQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
30763 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovqb128mem_mask", IX86_BUILTIN_PMOVQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
30764 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovsqb256mem_mask", IX86_BUILTIN_PMOVSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
30765 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovsqb128mem_mask", IX86_BUILTIN_PMOVSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
30766 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovusqb256mem_mask", IX86_BUILTIN_PMOVUSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
30767 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovusqb128mem_mask", IX86_BUILTIN_PMOVUSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
30768 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovdb256mem_mask", IX86_BUILTIN_PMOVDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
30769 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovdb128mem_mask", IX86_BUILTIN_PMOVDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
30770 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovsdb256mem_mask", IX86_BUILTIN_PMOVSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
30771 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovsdb128mem_mask", IX86_BUILTIN_PMOVSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
30772 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovusdb256mem_mask", IX86_BUILTIN_PMOVUSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
30773 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovusdb128mem_mask", IX86_BUILTIN_PMOVUSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
30774 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovdw256mem_mask", IX86_BUILTIN_PMOVDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
30775 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovdw128mem_mask", IX86_BUILTIN_PMOVDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
30776 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovsdw256mem_mask", IX86_BUILTIN_PMOVSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
30777 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovsdw128mem_mask", IX86_BUILTIN_PMOVSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
30778 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovusdw256mem_mask", IX86_BUILTIN_PMOVUSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
30779 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovusdw128mem_mask", IX86_BUILTIN_PMOVUSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
30782 /* Builtins with variable number of arguments. */
30783 static const struct builtin_description bdesc_args[] =
30785 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
30786 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
30787 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
30788 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
30789 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
30790 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
30791 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
30793 /* MMX */
30794 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30795 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30796 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30797 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30798 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30799 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30801 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30802 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30803 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30804 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30805 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30806 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30807 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30808 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30810 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30811 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30813 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30814 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30815 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30816 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30818 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30819 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30820 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30821 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30822 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30823 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30825 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30826 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30827 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30828 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30829 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
30830 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
30832 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
30833 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
30834 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
30836 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
30838 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
30839 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
30840 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
30841 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
30842 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
30843 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
30845 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
30846 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
30847 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
30848 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
30849 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
30850 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
30852 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
30853 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
30854 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
30855 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
30857 /* 3DNow! */
30858 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
30859 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
30860 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
30861 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
30863 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30864 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
30865 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
30866 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
30867 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
30868 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
30869 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
30870 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
30871 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
30872 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
30873 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
30874 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
30875 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
30876 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
30877 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30879 /* 3DNow!A */
30880 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
30881 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
30882 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
30883 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
30884 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
30885 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
30887 /* SSE */
30888 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
30889 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
30890 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
30891 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
30892 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
30893 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
30894 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
30895 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
30896 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
30897 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
30898 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
30899 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
30901 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30903 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30904 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30905 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30906 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30907 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30908 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30909 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30910 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30912 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
30913 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
30914 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
30915 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
30916 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
30917 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
30918 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
30919 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
30920 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
30921 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
30922 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
30923 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
30924 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
30925 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
30926 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
30927 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
30928 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
30929 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
30930 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
30931 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
30933 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30934 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30935 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30936 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30938 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30939 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30940 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30941 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30943 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30945 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30946 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30947 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30948 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30949 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30951 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
30952 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
30953 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
30955 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
30957 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
30958 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
30959 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
30961 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
30962 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
30964 /* SSE MMX or 3Dnow!A */
30965 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30966 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30967 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30969 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30970 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30971 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30972 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30974 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
30975 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
30977 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
30979 /* SSE2 */
30980 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30982 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
30983 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
30984 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
30985 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
30986 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
30988 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
30989 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
30990 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
30991 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
30992 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
30994 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
30996 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
30997 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
30998 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
30999 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
31001 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31002 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
31003 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31005 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31006 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31007 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31008 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31009 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31010 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31011 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31012 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31014 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
31015 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
31016 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
31017 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31018 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
31019 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31020 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
31021 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
31022 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
31023 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31024 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31025 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31026 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
31027 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
31028 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
31029 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31030 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
31031 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
31032 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
31033 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31035 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31036 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31037 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31038 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31040 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31041 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31042 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31043 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31045 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31047 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31048 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31049 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31051 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
31053 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31054 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31055 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31056 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31057 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31058 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31059 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31060 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31062 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31063 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31064 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31065 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31066 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31067 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31068 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31069 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31071 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31072 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
31074 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31075 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31076 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31077 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31079 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31080 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31082 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31083 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31084 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31085 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31086 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31087 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31089 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31090 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31091 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31092 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31094 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31095 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31096 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31097 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31098 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31099 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31100 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31101 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31103 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
31104 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
31105 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
31107 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31108 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
31110 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
31111 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
31113 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
31115 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
31116 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
31117 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
31118 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
31120 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
31121 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31122 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31123 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
31124 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31125 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31126 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
31128 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
31129 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31130 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31131 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
31132 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31133 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31134 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
31136 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31137 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31138 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31139 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31141 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
31142 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
31143 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
31145 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
31147 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31149 /* SSE2 MMX */
31150 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
31151 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
31153 /* SSE3 */
31154 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
31155 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31157 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31158 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31159 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31160 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31161 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31162 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31164 /* SSSE3 */
31165 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
31166 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
31167 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31168 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
31169 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
31170 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
31172 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31173 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31174 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31175 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31176 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31177 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31178 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31179 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31180 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31181 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31182 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31183 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31184 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
31185 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
31186 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31187 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31188 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31189 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31190 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31191 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31192 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31193 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31194 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31195 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31197 /* SSSE3. */
31198 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
31199 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
31201 /* SSE4.1 */
31202 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31203 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31204 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
31205 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
31206 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31207 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31208 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31209 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
31210 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
31211 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
31213 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
31214 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
31215 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
31216 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
31217 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
31218 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
31219 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
31220 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
31221 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
31222 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
31223 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
31224 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
31225 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31227 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
31228 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31229 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31230 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31231 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31232 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31233 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31234 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31235 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31236 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31237 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
31238 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31240 /* SSE4.1 */
31241 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
31242 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
31243 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31244 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31246 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
31247 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
31248 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
31249 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
31251 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
31252 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
31254 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
31255 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
31257 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
31258 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
31259 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
31260 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
31262 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
31263 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
31265 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31266 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31268 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31269 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31270 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31272 /* SSE4.2 */
31273 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31274 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
31275 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
31276 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31277 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31279 /* SSE4A */
31280 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
31281 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
31282 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
31283 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31285 /* AES */
31286 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
31287 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31289 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31290 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31291 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31292 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31294 /* PCLMUL */
31295 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
31297 /* AVX */
31298 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31299 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31300 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31301 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31302 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31303 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31304 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31305 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31306 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31307 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31308 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31309 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31310 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31311 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31312 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31313 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31314 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31315 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31316 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31317 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31318 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31319 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31320 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31321 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31322 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31323 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31325 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
31326 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
31327 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
31328 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
31330 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31331 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31332 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
31333 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
31334 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31335 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31336 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31337 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31338 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31339 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31340 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31341 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31342 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31343 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
31344 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
31345 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
31346 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
31347 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
31348 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
31349 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31350 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
31351 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
31352 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
31353 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31354 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31355 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31356 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
31357 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
31358 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
31359 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31360 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
31361 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
31362 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
31363 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
31365 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31366 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31367 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31369 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31370 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31371 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31372 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31373 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31375 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31377 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31378 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
31380 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
31381 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
31382 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
31383 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
31385 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31386 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
31388 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
31389 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
31391 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
31392 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
31393 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
31394 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
31396 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
31397 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
31399 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31400 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31402 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31403 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31404 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31405 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31407 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
31408 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
31409 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
31410 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
31411 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
31412 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
31414 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31415 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31416 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31417 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31418 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31419 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31420 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31421 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31422 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31423 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31424 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31425 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31426 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31427 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31428 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31430 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
31431 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
31433 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31434 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31436 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
31438 /* AVX2 */
31439 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
31440 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
31441 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
31442 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
31443 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
31444 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
31445 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
31446 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
31447 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31448 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31449 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31450 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31451 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31452 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31453 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31454 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31455 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
31456 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31457 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31458 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31459 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31460 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
31461 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
31462 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31463 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31464 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31465 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31466 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31467 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31468 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31469 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31470 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31471 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31472 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31473 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31474 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31475 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31476 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
31477 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
31478 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31479 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31480 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31481 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31482 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31483 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31484 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31485 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31486 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31487 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31488 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31489 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31490 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
31491 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
31492 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
31493 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
31494 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
31495 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
31496 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
31497 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
31498 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
31499 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
31500 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
31501 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
31502 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
31503 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
31504 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31505 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31506 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31507 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31508 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31509 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
31510 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31511 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
31512 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31513 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
31514 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
31515 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
31516 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31517 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31518 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31519 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
31520 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31521 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31522 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31523 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31524 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
31525 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
31526 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31527 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31528 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31529 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31530 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
31531 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31532 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31533 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31534 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31535 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
31536 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
31537 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31538 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31539 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31540 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31541 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31542 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31543 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31544 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31545 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31546 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31547 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31548 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31549 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31550 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31551 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31552 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31553 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31554 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31555 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
31556 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
31557 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
31558 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
31559 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
31560 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
31561 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
31562 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
31563 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
31564 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
31565 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31566 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
31567 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31568 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31569 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
31570 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31571 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
31572 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
31573 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vextractf128v4di, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
31574 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vinsertf128v4di, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
31575 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31576 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31577 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31578 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31579 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31580 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31581 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31582 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31583 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31584 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31586 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
31588 /* BMI */
31589 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31590 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31591 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
31593 /* TBM */
31594 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31595 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31597 /* F16C */
31598 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
31599 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
31600 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
31601 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
31603 /* BMI2 */
31604 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31605 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31606 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31607 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31608 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31609 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31611 /* AVX512F */
31612 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_256si, "__builtin_ia32_si512_256si", IX86_BUILTIN_SI512_SI256, UNKNOWN, (int) V16SI_FTYPE_V8SI },
31613 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_256ps, "__builtin_ia32_ps512_256ps", IX86_BUILTIN_PS512_PS256, UNKNOWN, (int) V16SF_FTYPE_V8SF },
31614 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_256pd, "__builtin_ia32_pd512_256pd", IX86_BUILTIN_PD512_PD256, UNKNOWN, (int) V8DF_FTYPE_V4DF },
31615 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_si, "__builtin_ia32_si512_si", IX86_BUILTIN_SI512_SI, UNKNOWN, (int) V16SI_FTYPE_V4SI },
31616 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_ps, "__builtin_ia32_ps512_ps", IX86_BUILTIN_PS512_PS, UNKNOWN, (int) V16SF_FTYPE_V4SF },
31617 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_pd, "__builtin_ia32_pd512_pd", IX86_BUILTIN_PD512_PD, UNKNOWN, (int) V8DF_FTYPE_V2DF },
31618 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
31619 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
31620 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31621 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31622 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31623 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31624 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
31625 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_QI },
31626 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
31627 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_QI },
31628 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
31629 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
31630 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
31631 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
31632 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31633 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31634 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
31635 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
31636 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
31637 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
31638 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31639 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31640 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31641 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31642 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_QI },
31643 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_QI },
31644 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_QI },
31645 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_QI },
31646 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI },
31647 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI },
31648 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI },
31649 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI },
31650 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31651 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31652 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31653 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31654 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31655 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31656 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31657 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31658 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31659 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31660 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31661 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31662 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31663 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31664 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31665 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
31666 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI },
31667 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
31668 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
31669 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
31670 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
31671 { OPTION_MASK_ISA_AVX512F & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_memv8di_mask, "__builtin_ia32_pbroadcastq512_mem_mask", IX86_BUILTIN_PBROADCASTQ512_MEM, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
31672 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31673 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31674 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31675 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31676 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31677 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31678 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31679 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31680 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31681 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31682 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31683 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31684 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31685 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31686 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31687 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31688 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31689 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31690 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
31691 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
31692 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
31693 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
31694 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
31695 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
31696 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
31697 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
31698 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
31699 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
31700 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
31701 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
31702 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
31703 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
31704 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
31705 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
31706 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
31707 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
31708 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
31709 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
31710 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
31711 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
31712 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
31713 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
31714 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
31715 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
31716 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31717 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
31718 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31719 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31720 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31721 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31722 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31723 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31724 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31725 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31726 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31727 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31728 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31729 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
31730 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31731 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
31732 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31733 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31734 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31735 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
31736 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31737 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
31738 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31739 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31740 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31741 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
31742 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31743 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
31744 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31745 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31746 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31747 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31748 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31749 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31750 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31751 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31752 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31753 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31754 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31755 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31756 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31757 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31758 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31759 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31760 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31761 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31762 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31763 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31764 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31765 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31766 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31767 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
31768 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
31769 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
31770 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
31771 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
31772 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
31773 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
31774 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
31775 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
31776 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
31777 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
31778 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
31779 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31780 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31781 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31782 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31783 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
31784 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31785 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31786 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
31787 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
31788 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31789 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
31790 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
31791 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
31792 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
31793 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31794 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31795 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
31796 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
31797 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
31798 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
31799 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31800 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31801 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
31802 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31803 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
31804 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31805 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
31806 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
31807 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
31808 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
31810 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
31811 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
31812 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
31813 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
31814 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
31815 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
31816 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
31817 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
31819 /* Mask arithmetic operations */
31820 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31821 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31822 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) HI_FTYPE_HI },
31823 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31824 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31825 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31826 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) HI_FTYPE_HI_HI },
31827 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31828 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31829 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) HI_FTYPE_HI },
31831 /* SHA */
31832 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31833 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31834 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31835 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
31836 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31837 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31838 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
31840 /* AVX512VL. */
31841 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_palignrv32qi_mask, "__builtin_ia32_palignr256_mask", IX86_BUILTIN_PALIGNR256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT },
31842 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_palignrv16qi_mask, "__builtin_ia32_palignr128_mask", IX86_BUILTIN_PALIGNR128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT },
31843 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64_256_mask", IX86_BUILTIN_MOVDQA64_256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
31844 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64_128_mask", IX86_BUILTIN_MOVDQA64_128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
31845 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32_256_mask", IX86_BUILTIN_MOVDQA32_256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
31846 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32_128_mask", IX86_BUILTIN_MOVDQA32_128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
31847 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_movapd256_mask", IX86_BUILTIN_MOVAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
31848 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_movapd128_mask", IX86_BUILTIN_MOVAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
31849 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_movaps256_mask", IX86_BUILTIN_MOVAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
31850 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_movaps128_mask", IX86_BUILTIN_MOVAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
31851 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_movdquhi256_mask", IX86_BUILTIN_MOVDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
31852 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_movdquhi128_mask", IX86_BUILTIN_MOVDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
31853 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_movdquqi256_mask", IX86_BUILTIN_MOVDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
31854 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_movdquqi128_mask", IX86_BUILTIN_MOVDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
31855 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4sf3_mask, "__builtin_ia32_minps_mask", IX86_BUILTIN_MINPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31856 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4sf3_mask, "__builtin_ia32_maxps_mask", IX86_BUILTIN_MAXPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31857 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2df3_mask, "__builtin_ia32_minpd_mask", IX86_BUILTIN_MINPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31858 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2df3_mask, "__builtin_ia32_maxpd_mask", IX86_BUILTIN_MAXPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31859 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4df3_mask, "__builtin_ia32_maxpd256_mask", IX86_BUILTIN_MAXPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31860 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8sf3_mask, "__builtin_ia32_maxps256_mask", IX86_BUILTIN_MAXPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31861 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4df3_mask, "__builtin_ia32_minpd256_mask", IX86_BUILTIN_MINPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31862 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8sf3_mask, "__builtin_ia32_minps256_mask", IX86_BUILTIN_MINPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31863 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4sf3_mask, "__builtin_ia32_mulps_mask", IX86_BUILTIN_MULPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31864 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_divv4sf3_mask, "__builtin_ia32_divps_mask", IX86_BUILTIN_DIVPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31865 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv2df3_mask, "__builtin_ia32_mulpd_mask", IX86_BUILTIN_MULPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31866 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_divv2df3_mask, "__builtin_ia32_divpd_mask", IX86_BUILTIN_DIVPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31867 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv4df3_mask, "__builtin_ia32_divpd256_mask", IX86_BUILTIN_DIVPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31868 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv8sf3_mask, "__builtin_ia32_divps256_mask", IX86_BUILTIN_DIVPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31869 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4df3_mask, "__builtin_ia32_mulpd256_mask", IX86_BUILTIN_MULPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31870 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8sf3_mask, "__builtin_ia32_mulps256_mask", IX86_BUILTIN_MULPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31871 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2df3_mask, "__builtin_ia32_addpd128_mask", IX86_BUILTIN_ADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31872 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4df3_mask, "__builtin_ia32_addpd256_mask", IX86_BUILTIN_ADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31873 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4sf3_mask, "__builtin_ia32_addps128_mask", IX86_BUILTIN_ADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31874 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8sf3_mask, "__builtin_ia32_addps256_mask", IX86_BUILTIN_ADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31875 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2df3_mask, "__builtin_ia32_subpd128_mask", IX86_BUILTIN_SUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31876 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4df3_mask, "__builtin_ia32_subpd256_mask", IX86_BUILTIN_SUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31877 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4sf3_mask, "__builtin_ia32_subps128_mask", IX86_BUILTIN_SUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31878 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8sf3_mask, "__builtin_ia32_subps256_mask", IX86_BUILTIN_SUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31879 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4df3_mask, "__builtin_ia32_xorpd256_mask", IX86_BUILTIN_XORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31880 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2df3_mask, "__builtin_ia32_xorpd128_mask", IX86_BUILTIN_XORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31881 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8sf3_mask, "__builtin_ia32_xorps256_mask", IX86_BUILTIN_XORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31882 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4sf3_mask, "__builtin_ia32_xorps128_mask", IX86_BUILTIN_XORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31883 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4df3_mask, "__builtin_ia32_orpd256_mask", IX86_BUILTIN_ORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31884 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2df3_mask, "__builtin_ia32_orpd128_mask", IX86_BUILTIN_ORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31885 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8sf3_mask, "__builtin_ia32_orps256_mask", IX86_BUILTIN_ORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31886 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4sf3_mask, "__builtin_ia32_orps128_mask", IX86_BUILTIN_ORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31887 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8sf_mask, "__builtin_ia32_broadcastf32x2_256_mask", IX86_BUILTIN_BROADCASTF32x2_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
31888 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8si_mask, "__builtin_ia32_broadcasti32x2_256_mask", IX86_BUILTIN_BROADCASTI32x2_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
31889 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4si_mask, "__builtin_ia32_broadcasti32x2_128_mask", IX86_BUILTIN_BROADCASTI32x2_128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
31890 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4df_mask_1, "__builtin_ia32_broadcastf64x2_256_mask", IX86_BUILTIN_BROADCASTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
31891 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4di_mask_1, "__builtin_ia32_broadcasti64x2_256_mask", IX86_BUILTIN_BROADCASTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
31892 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8sf_mask_1, "__builtin_ia32_broadcastf32x4_256_mask", IX86_BUILTIN_BROADCASTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
31893 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8si_mask_1, "__builtin_ia32_broadcasti32x4_256_mask", IX86_BUILTIN_BROADCASTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
31894 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8sf, "__builtin_ia32_extractf32x4_256_mask", IX86_BUILTIN_EXTRACTF32X4_256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT_V4SF_QI },
31895 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8si, "__builtin_ia32_extracti32x4_256_mask", IX86_BUILTIN_EXTRACTI32X4_256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT_V4SI_QI },
31896 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv16hi_mask, "__builtin_ia32_dbpsadbw256_mask", IX86_BUILTIN_DBPSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI },
31897 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv8hi_mask, "__builtin_ia32_dbpsadbw128_mask", IX86_BUILTIN_DBPSADBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI },
31898 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2qq256_mask", IX86_BUILTIN_CVTTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
31899 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2qq128_mask", IX86_BUILTIN_CVTTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
31900 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2uqq256_mask", IX86_BUILTIN_CVTTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
31901 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2uqq128_mask", IX86_BUILTIN_CVTTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
31902 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2qq256_mask", IX86_BUILTIN_CVTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
31903 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2qq128_mask", IX86_BUILTIN_CVTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
31904 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2uqq256_mask", IX86_BUILTIN_CVTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
31905 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2uqq128_mask", IX86_BUILTIN_CVTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
31906 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4si2_mask, "__builtin_ia32_cvtpd2udq256_mask", IX86_BUILTIN_CVTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
31907 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2si2_mask, "__builtin_ia32_cvtpd2udq128_mask", IX86_BUILTIN_CVTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
31908 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2qq256_mask", IX86_BUILTIN_CVTTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
31909 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2qq128_mask", IX86_BUILTIN_CVTTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
31910 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2uqq256_mask", IX86_BUILTIN_CVTTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
31911 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2uqq128_mask", IX86_BUILTIN_CVTTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
31912 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2dq256_mask", IX86_BUILTIN_CVTTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
31913 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2dq128_mask", IX86_BUILTIN_CVTTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
31914 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2udq256_mask", IX86_BUILTIN_CVTTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
31915 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2udq128_mask", IX86_BUILTIN_CVTTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
31916 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2dq256_mask", IX86_BUILTIN_CVTTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
31917 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvttpd2dq_mask, "__builtin_ia32_cvttpd2dq128_mask", IX86_BUILTIN_CVTTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
31918 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2udq256_mask", IX86_BUILTIN_CVTTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
31919 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2si2_mask, "__builtin_ia32_cvttpd2udq128_mask", IX86_BUILTIN_CVTTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
31920 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2dq256_mask, "__builtin_ia32_cvtpd2dq256_mask", IX86_BUILTIN_CVTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
31921 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2dq_mask, "__builtin_ia32_cvtpd2dq128_mask", IX86_BUILTIN_CVTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
31922 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4df2_mask, "__builtin_ia32_cvtdq2pd256_mask", IX86_BUILTIN_CVTDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI },
31923 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtdq2pd_mask, "__builtin_ia32_cvtdq2pd128_mask", IX86_BUILTIN_CVTDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI },
31924 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4df2_mask, "__builtin_ia32_cvtudq2pd256_mask", IX86_BUILTIN_CVTUDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI },
31925 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2siv2df2_mask, "__builtin_ia32_cvtudq2pd128_mask", IX86_BUILTIN_CVTUDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI },
31926 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv8siv8sf2_mask, "__builtin_ia32_cvtdq2ps256_mask", IX86_BUILTIN_CVTDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI },
31927 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4sf2_mask, "__builtin_ia32_cvtdq2ps128_mask", IX86_BUILTIN_CVTDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI },
31928 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv8siv8sf2_mask, "__builtin_ia32_cvtudq2ps256_mask", IX86_BUILTIN_CVTUDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI },
31929 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4sf2_mask, "__builtin_ia32_cvtudq2ps128_mask", IX86_BUILTIN_CVTUDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI },
31930 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtps2pd256_mask, "__builtin_ia32_cvtps2pd256_mask", IX86_BUILTIN_CVTPS2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SF_V4DF_QI },
31931 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtps2pd_mask, "__builtin_ia32_cvtps2pd128_mask", IX86_BUILTIN_CVTPS2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SF_V2DF_QI },
31932 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv32qi_mask, "__builtin_ia32_pbroadcastb256_mask", IX86_BUILTIN_PBROADCASTB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16QI_V32QI_SI },
31933 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv32qi_mask, "__builtin_ia32_pbroadcastb256_gpr_mask", IX86_BUILTIN_PBROADCASTB256_GPR_MASK, UNKNOWN, (int) V32QI_FTYPE_QI_V32QI_SI },
31934 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16qi_mask, "__builtin_ia32_pbroadcastb128_mask", IX86_BUILTIN_PBROADCASTB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
31935 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16qi_mask, "__builtin_ia32_pbroadcastb128_gpr_mask", IX86_BUILTIN_PBROADCASTB128_GPR_MASK, UNKNOWN, (int) V16QI_FTYPE_QI_V16QI_HI },
31936 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16hi_mask, "__builtin_ia32_pbroadcastw256_mask", IX86_BUILTIN_PBROADCASTW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8HI_V16HI_HI },
31937 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16hi_mask, "__builtin_ia32_pbroadcastw256_gpr_mask", IX86_BUILTIN_PBROADCASTW256_GPR_MASK, UNKNOWN, (int) V16HI_FTYPE_HI_V16HI_HI },
31938 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8hi_mask, "__builtin_ia32_pbroadcastw128_mask", IX86_BUILTIN_PBROADCASTW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
31939 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8hi_mask, "__builtin_ia32_pbroadcastw128_gpr_mask", IX86_BUILTIN_PBROADCASTW128_GPR_MASK, UNKNOWN, (int) V8HI_FTYPE_HI_V8HI_QI },
31940 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8si_mask, "__builtin_ia32_pbroadcastd256_mask", IX86_BUILTIN_PBROADCASTD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
31941 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8si_mask, "__builtin_ia32_pbroadcastd256_gpr_mask", IX86_BUILTIN_PBROADCASTD256_GPR_MASK, UNKNOWN, (int) V8SI_FTYPE_SI_V8SI_QI },
31942 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4si_mask, "__builtin_ia32_pbroadcastd128_mask", IX86_BUILTIN_PBROADCASTD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
31943 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4si_mask, "__builtin_ia32_pbroadcastd128_gpr_mask", IX86_BUILTIN_PBROADCASTD128_GPR_MASK, UNKNOWN, (int) V4SI_FTYPE_SI_V4SI_QI },
31944 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4di_mask, "__builtin_ia32_pbroadcastq256_mask", IX86_BUILTIN_PBROADCASTQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
31945 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512vl_vec_dup_gprv4di_mask, "__builtin_ia32_pbroadcastq256_gpr_mask", IX86_BUILTIN_PBROADCASTQ256_GPR_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_QI },
31946 { OPTION_MASK_ISA_AVX512VL & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512vl_vec_dup_memv4di_mask, "__builtin_ia32_pbroadcastq256_mem_mask", IX86_BUILTIN_PBROADCASTQ256_MEM_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_QI },
31947 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv2di_mask, "__builtin_ia32_pbroadcastq128_mask", IX86_BUILTIN_PBROADCASTQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
31948 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512vl_vec_dup_gprv2di_mask, "__builtin_ia32_pbroadcastq128_gpr_mask", IX86_BUILTIN_PBROADCASTQ128_GPR_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_QI },
31949 { OPTION_MASK_ISA_AVX512VL & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512vl_vec_dup_memv2di_mask, "__builtin_ia32_pbroadcastq128_mem_mask", IX86_BUILTIN_PBROADCASTQ128_MEM_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_QI },
31950 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8sf_mask, "__builtin_ia32_broadcastss256_mask", IX86_BUILTIN_BROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
31951 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4sf_mask, "__builtin_ia32_broadcastss128_mask", IX86_BUILTIN_BROADCASTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
31952 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4df_mask, "__builtin_ia32_broadcastsd256_mask", IX86_BUILTIN_BROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
31953 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4df, "__builtin_ia32_extractf64x2_256_mask", IX86_BUILTIN_EXTRACTF64X2_256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT_V2DF_QI },
31954 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4di, "__builtin_ia32_extracti64x2_256_mask", IX86_BUILTIN_EXTRACTI64X2_256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT_V2DI_QI },
31955 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8sf, "__builtin_ia32_insertf32x4_256_mask", IX86_BUILTIN_INSERTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI },
31956 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8si, "__builtin_ia32_inserti32x4_256_mask", IX86_BUILTIN_INSERTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI },
31957 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv16qiv16hi2_mask, "__builtin_ia32_pmovsxbw256_mask", IX86_BUILTIN_PMOVSXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI },
31958 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv8qiv8hi2_mask, "__builtin_ia32_pmovsxbw128_mask", IX86_BUILTIN_PMOVSXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI },
31959 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8qiv8si2_mask, "__builtin_ia32_pmovsxbd256_mask", IX86_BUILTIN_PMOVSXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI },
31960 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4qiv4si2_mask, "__builtin_ia32_pmovsxbd128_mask", IX86_BUILTIN_PMOVSXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI },
31961 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4qiv4di2_mask, "__builtin_ia32_pmovsxbq256_mask", IX86_BUILTIN_PMOVSXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI },
31962 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2qiv2di2_mask, "__builtin_ia32_pmovsxbq128_mask", IX86_BUILTIN_PMOVSXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI },
31963 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8hiv8si2_mask, "__builtin_ia32_pmovsxwd256_mask", IX86_BUILTIN_PMOVSXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI },
31964 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4hiv4si2_mask, "__builtin_ia32_pmovsxwd128_mask", IX86_BUILTIN_PMOVSXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI },
31965 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4hiv4di2_mask, "__builtin_ia32_pmovsxwq256_mask", IX86_BUILTIN_PMOVSXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI },
31966 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2hiv2di2_mask, "__builtin_ia32_pmovsxwq128_mask", IX86_BUILTIN_PMOVSXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI },
31967 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4siv4di2_mask, "__builtin_ia32_pmovsxdq256_mask", IX86_BUILTIN_PMOVSXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI },
31968 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2siv2di2_mask, "__builtin_ia32_pmovsxdq128_mask", IX86_BUILTIN_PMOVSXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI },
31969 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv16qiv16hi2_mask, "__builtin_ia32_pmovzxbw256_mask", IX86_BUILTIN_PMOVZXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI },
31970 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv8qiv8hi2_mask, "__builtin_ia32_pmovzxbw128_mask", IX86_BUILTIN_PMOVZXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI },
31971 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8qiv8si2_mask, "__builtin_ia32_pmovzxbd256_mask", IX86_BUILTIN_PMOVZXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI },
31972 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4qiv4si2_mask, "__builtin_ia32_pmovzxbd128_mask", IX86_BUILTIN_PMOVZXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI },
31973 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4qiv4di2_mask, "__builtin_ia32_pmovzxbq256_mask", IX86_BUILTIN_PMOVZXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI },
31974 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2qiv2di2_mask, "__builtin_ia32_pmovzxbq128_mask", IX86_BUILTIN_PMOVZXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI },
31975 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8hiv8si2_mask, "__builtin_ia32_pmovzxwd256_mask", IX86_BUILTIN_PMOVZXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI },
31976 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4hiv4si2_mask, "__builtin_ia32_pmovzxwd128_mask", IX86_BUILTIN_PMOVZXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI },
31977 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4hiv4di2_mask, "__builtin_ia32_pmovzxwq256_mask", IX86_BUILTIN_PMOVZXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI },
31978 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2hiv2di2_mask, "__builtin_ia32_pmovzxwq128_mask", IX86_BUILTIN_PMOVZXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI },
31979 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4siv4di2_mask, "__builtin_ia32_pmovzxdq256_mask", IX86_BUILTIN_PMOVZXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI },
31980 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2siv2di2_mask, "__builtin_ia32_pmovzxdq128_mask", IX86_BUILTIN_PMOVZXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI },
31981 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4df_mask, "__builtin_ia32_reducepd256_mask", IX86_BUILTIN_REDUCEPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
31982 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv2df_mask, "__builtin_ia32_reducepd128_mask", IX86_BUILTIN_REDUCEPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
31983 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv8sf_mask, "__builtin_ia32_reduceps256_mask", IX86_BUILTIN_REDUCEPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
31984 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4sf_mask, "__builtin_ia32_reduceps128_mask", IX86_BUILTIN_REDUCEPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
31985 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv2df, "__builtin_ia32_reducesd", IX86_BUILTIN_REDUCESD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31986 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv4sf, "__builtin_ia32_reducess", IX86_BUILTIN_REDUCESS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31987 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16hi_mask, "__builtin_ia32_permvarhi256_mask", IX86_BUILTIN_VPERMVARHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
31988 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv8hi_mask, "__builtin_ia32_permvarhi128_mask", IX86_BUILTIN_VPERMVARHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
31989 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_mask, "__builtin_ia32_vpermt2varhi256_mask", IX86_BUILTIN_VPERMT2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
31990 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_maskz, "__builtin_ia32_vpermt2varhi256_maskz", IX86_BUILTIN_VPERMT2VARHI256_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
31991 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_mask, "__builtin_ia32_vpermt2varhi128_mask", IX86_BUILTIN_VPERMT2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
31992 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_maskz, "__builtin_ia32_vpermt2varhi128_maskz", IX86_BUILTIN_VPERMT2VARHI128_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
31993 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16hi3_mask, "__builtin_ia32_vpermi2varhi256_mask", IX86_BUILTIN_VPERMI2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
31994 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8hi3_mask, "__builtin_ia32_vpermi2varhi128_mask", IX86_BUILTIN_VPERMI2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
31995 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4df_mask, "__builtin_ia32_rcp14pd256_mask", IX86_BUILTIN_RCP14PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
31996 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v2df_mask, "__builtin_ia32_rcp14pd128_mask", IX86_BUILTIN_RCP14PD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
31997 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v8sf_mask, "__builtin_ia32_rcp14ps256_mask", IX86_BUILTIN_RCP14PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
31998 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4sf_mask, "__builtin_ia32_rcp14ps128_mask", IX86_BUILTIN_RCP14PS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
31999 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4df_mask, "__builtin_ia32_rsqrt14pd256_mask", IX86_BUILTIN_RSQRT14PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32000 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v2df_mask, "__builtin_ia32_rsqrt14pd128_mask", IX86_BUILTIN_RSQRT14PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32001 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v8sf_mask, "__builtin_ia32_rsqrt14ps256_mask", IX86_BUILTIN_RSQRT14PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32002 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4sf_mask, "__builtin_ia32_rsqrt14ps128_mask", IX86_BUILTIN_RSQRT14PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32003 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv4df2_mask, "__builtin_ia32_sqrtpd256_mask", IX86_BUILTIN_SQRTPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32004 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sqrtv2df2_mask, "__builtin_ia32_sqrtpd128_mask", IX86_BUILTIN_SQRTPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32005 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv8sf2_mask, "__builtin_ia32_sqrtps256_mask", IX86_BUILTIN_SQRTPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32006 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_sqrtv4sf2_mask, "__builtin_ia32_sqrtps128_mask", IX86_BUILTIN_SQRTPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32007 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16qi3_mask, "__builtin_ia32_paddb128_mask", IX86_BUILTIN_PADDB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32008 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8hi3_mask, "__builtin_ia32_paddw128_mask", IX86_BUILTIN_PADDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32009 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4si3_mask, "__builtin_ia32_paddd128_mask", IX86_BUILTIN_PADDD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32010 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2di3_mask, "__builtin_ia32_paddq128_mask", IX86_BUILTIN_PADDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32011 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16qi3_mask, "__builtin_ia32_psubb128_mask", IX86_BUILTIN_PSUBB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32012 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8hi3_mask, "__builtin_ia32_psubw128_mask", IX86_BUILTIN_PSUBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32013 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4si3_mask, "__builtin_ia32_psubd128_mask", IX86_BUILTIN_PSUBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32014 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2di3_mask, "__builtin_ia32_psubq128_mask", IX86_BUILTIN_PSUBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32015 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv16qi3_mask, "__builtin_ia32_paddsb128_mask", IX86_BUILTIN_PADDSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32016 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv8hi3_mask, "__builtin_ia32_paddsw128_mask", IX86_BUILTIN_PADDSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32017 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv16qi3_mask, "__builtin_ia32_psubsb128_mask", IX86_BUILTIN_PSUBSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32018 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv8hi3_mask, "__builtin_ia32_psubsw128_mask", IX86_BUILTIN_PSUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32019 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv16qi3_mask, "__builtin_ia32_paddusb128_mask", IX86_BUILTIN_PADDUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32020 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv8hi3_mask, "__builtin_ia32_paddusw128_mask", IX86_BUILTIN_PADDUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32021 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv16qi3_mask, "__builtin_ia32_psubusb128_mask", IX86_BUILTIN_PSUBUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32022 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv8hi3_mask, "__builtin_ia32_psubusw128_mask", IX86_BUILTIN_PSUBUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32023 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv32qi3_mask, "__builtin_ia32_paddb256_mask", IX86_BUILTIN_PADDB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32024 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16hi3_mask, "__builtin_ia32_paddw256_mask", IX86_BUILTIN_PADDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32025 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8si3_mask, "__builtin_ia32_paddd256_mask", IX86_BUILTIN_PADDD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32026 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4di3_mask, "__builtin_ia32_paddq256_mask", IX86_BUILTIN_PADDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32027 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv32qi3_mask, "__builtin_ia32_paddsb256_mask", IX86_BUILTIN_PADDSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32028 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv16hi3_mask, "__builtin_ia32_paddsw256_mask", IX86_BUILTIN_PADDSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32029 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv32qi3_mask, "__builtin_ia32_paddusb256_mask", IX86_BUILTIN_PADDUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32030 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv16hi3_mask, "__builtin_ia32_paddusw256_mask", IX86_BUILTIN_PADDUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32031 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv32qi3_mask, "__builtin_ia32_psubb256_mask", IX86_BUILTIN_PSUBB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32032 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16hi3_mask, "__builtin_ia32_psubw256_mask", IX86_BUILTIN_PSUBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32033 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8si3_mask, "__builtin_ia32_psubd256_mask", IX86_BUILTIN_PSUBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32034 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4di3_mask, "__builtin_ia32_psubq256_mask", IX86_BUILTIN_PSUBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32035 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv32qi3_mask, "__builtin_ia32_psubsb256_mask", IX86_BUILTIN_PSUBSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32036 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv16hi3_mask, "__builtin_ia32_psubsw256_mask", IX86_BUILTIN_PSUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32037 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv32qi3_mask, "__builtin_ia32_psubusb256_mask", IX86_BUILTIN_PSUBUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32038 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv16hi3_mask, "__builtin_ia32_psubusw256_mask", IX86_BUILTIN_PSUBUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32039 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_256_mask", IX86_BUILTIN_SHUF_F64x2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32040 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_256_mask", IX86_BUILTIN_SHUF_I64x2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI },
32041 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_256_mask", IX86_BUILTIN_SHUF_I32x4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI },
32042 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_256_mask", IX86_BUILTIN_SHUF_F32x4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32043 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovwb128_mask", IX86_BUILTIN_PMOVWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32044 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovwb256_mask", IX86_BUILTIN_PMOVWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32045 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovswb128_mask", IX86_BUILTIN_PMOVSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32046 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovswb256_mask", IX86_BUILTIN_PMOVSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32047 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovuswb128_mask", IX86_BUILTIN_PMOVUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32048 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovuswb256_mask", IX86_BUILTIN_PMOVUSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32049 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask, "__builtin_ia32_pmovdb128_mask", IX86_BUILTIN_PMOVDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32050 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask, "__builtin_ia32_pmovdb256_mask", IX86_BUILTIN_PMOVDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32051 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask, "__builtin_ia32_pmovsdb128_mask", IX86_BUILTIN_PMOVSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32052 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask, "__builtin_ia32_pmovsdb256_mask", IX86_BUILTIN_PMOVSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32053 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask, "__builtin_ia32_pmovusdb128_mask", IX86_BUILTIN_PMOVUSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32054 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask, "__builtin_ia32_pmovusdb256_mask", IX86_BUILTIN_PMOVUSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32055 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask, "__builtin_ia32_pmovdw128_mask", IX86_BUILTIN_PMOVDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32056 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask, "__builtin_ia32_pmovdw256_mask", IX86_BUILTIN_PMOVDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32057 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask, "__builtin_ia32_pmovsdw128_mask", IX86_BUILTIN_PMOVSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32058 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask, "__builtin_ia32_pmovsdw256_mask", IX86_BUILTIN_PMOVSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32059 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask, "__builtin_ia32_pmovusdw128_mask", IX86_BUILTIN_PMOVUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32060 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask, "__builtin_ia32_pmovusdw256_mask", IX86_BUILTIN_PMOVUSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32061 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask, "__builtin_ia32_pmovqb128_mask", IX86_BUILTIN_PMOVQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32062 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask, "__builtin_ia32_pmovqb256_mask", IX86_BUILTIN_PMOVQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32063 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask, "__builtin_ia32_pmovsqb128_mask", IX86_BUILTIN_PMOVSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32064 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask, "__builtin_ia32_pmovsqb256_mask", IX86_BUILTIN_PMOVSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32065 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask, "__builtin_ia32_pmovusqb128_mask", IX86_BUILTIN_PMOVUSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32066 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask, "__builtin_ia32_pmovusqb256_mask", IX86_BUILTIN_PMOVUSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32067 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask, "__builtin_ia32_pmovqw128_mask", IX86_BUILTIN_PMOVQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32068 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask, "__builtin_ia32_pmovqw256_mask", IX86_BUILTIN_PMOVQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32069 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask, "__builtin_ia32_pmovsqw128_mask", IX86_BUILTIN_PMOVSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32070 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask, "__builtin_ia32_pmovsqw256_mask", IX86_BUILTIN_PMOVSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32071 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask, "__builtin_ia32_pmovusqw128_mask", IX86_BUILTIN_PMOVUSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32072 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask, "__builtin_ia32_pmovusqw256_mask", IX86_BUILTIN_PMOVUSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32073 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask, "__builtin_ia32_pmovqd128_mask", IX86_BUILTIN_PMOVQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32074 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask, "__builtin_ia32_pmovqd256_mask", IX86_BUILTIN_PMOVQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32075 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask, "__builtin_ia32_pmovsqd128_mask", IX86_BUILTIN_PMOVSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32076 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask, "__builtin_ia32_pmovsqd256_mask", IX86_BUILTIN_PMOVSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32077 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask, "__builtin_ia32_pmovusqd128_mask", IX86_BUILTIN_PMOVUSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32078 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask, "__builtin_ia32_pmovusqd256_mask", IX86_BUILTIN_PMOVUSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32079 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4df_mask, "__builtin_ia32_rangepd256_mask", IX86_BUILTIN_RANGEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32080 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv2df_mask, "__builtin_ia32_rangepd128_mask", IX86_BUILTIN_RANGEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI },
32081 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv8sf_mask, "__builtin_ia32_rangeps256_mask", IX86_BUILTIN_RANGEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32082 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4sf_mask, "__builtin_ia32_rangeps128_mask", IX86_BUILTIN_RANGEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI },
32083 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv8sf_mask, "__builtin_ia32_getexpps256_mask", IX86_BUILTIN_GETEXPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32084 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4df_mask, "__builtin_ia32_getexppd256_mask", IX86_BUILTIN_GETEXPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32085 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4sf_mask, "__builtin_ia32_getexpps128_mask", IX86_BUILTIN_GETEXPPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32086 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv2df_mask, "__builtin_ia32_getexppd128_mask", IX86_BUILTIN_GETEXPPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32087 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_mask, "__builtin_ia32_fixupimmpd256_mask", IX86_BUILTIN_FIXUPIMMPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI },
32088 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_maskz, "__builtin_ia32_fixupimmpd256_maskz", IX86_BUILTIN_FIXUPIMMPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI },
32089 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_mask, "__builtin_ia32_fixupimmps256_mask", IX86_BUILTIN_FIXUPIMMPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI },
32090 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_maskz, "__builtin_ia32_fixupimmps256_maskz", IX86_BUILTIN_FIXUPIMMPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI },
32091 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_mask, "__builtin_ia32_fixupimmpd128_mask", IX86_BUILTIN_FIXUPIMMPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI },
32092 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_maskz, "__builtin_ia32_fixupimmpd128_maskz", IX86_BUILTIN_FIXUPIMMPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI },
32093 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_mask, "__builtin_ia32_fixupimmps128_mask", IX86_BUILTIN_FIXUPIMMPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI },
32094 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_maskz, "__builtin_ia32_fixupimmps128_maskz", IX86_BUILTIN_FIXUPIMMPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI },
32095 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4di2_mask, "__builtin_ia32_pabsq256_mask", IX86_BUILTIN_PABSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32096 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv2di2_mask, "__builtin_ia32_pabsq128_mask", IX86_BUILTIN_PABSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32097 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8si2_mask, "__builtin_ia32_pabsd256_mask", IX86_BUILTIN_PABSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32098 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4si2_mask, "__builtin_ia32_pabsd128_mask", IX86_BUILTIN_PABSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32099 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pmulhrswv16hi3_mask , "__builtin_ia32_pmulhrsw256_mask", IX86_BUILTIN_PMULHRSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32100 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pmulhrswv8hi3_mask, "__builtin_ia32_pmulhrsw128_mask", IX86_BUILTIN_PMULHRSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32101 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv8hi3_highpart_mask, "__builtin_ia32_pmulhuw128_mask", IX86_BUILTIN_PMULHUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32102 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv16hi3_highpart_mask, "__builtin_ia32_pmulhuw256_mask" , IX86_BUILTIN_PMULHUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32103 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv16hi3_highpart_mask, "__builtin_ia32_pmulhw256_mask" , IX86_BUILTIN_PMULHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32104 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv8hi3_highpart_mask, "__builtin_ia32_pmulhw128_mask", IX86_BUILTIN_PMULHW128_MASK, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32105 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv16hi3_mask, "__builtin_ia32_pmullw256_mask" , IX86_BUILTIN_PMULLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32106 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8hi3_mask, "__builtin_ia32_pmullw128_mask", IX86_BUILTIN_PMULLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32107 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv4di3_mask, "__builtin_ia32_pmullq256_mask", IX86_BUILTIN_PMULLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32108 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv2di3_mask, "__builtin_ia32_pmullq128_mask", IX86_BUILTIN_PMULLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32109 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4df3_mask, "__builtin_ia32_andpd256_mask", IX86_BUILTIN_ANDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32110 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2df3_mask, "__builtin_ia32_andpd128_mask", IX86_BUILTIN_ANDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32111 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8sf3_mask, "__builtin_ia32_andps256_mask", IX86_BUILTIN_ANDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32112 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4sf3_mask, "__builtin_ia32_andps128_mask", IX86_BUILTIN_ANDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32113 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv4df3_mask, "__builtin_ia32_andnpd256_mask", IX86_BUILTIN_ANDNPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32114 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2df3_mask, "__builtin_ia32_andnpd128_mask", IX86_BUILTIN_ANDNPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32115 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv8sf3_mask, "__builtin_ia32_andnps256_mask", IX86_BUILTIN_ANDNPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32116 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_andnotv4sf3_mask, "__builtin_ia32_andnps128_mask", IX86_BUILTIN_ANDNPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32117 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllwi128_mask", IX86_BUILTIN_PSLLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32118 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslldi128_mask", IX86_BUILTIN_PSLLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32119 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllqi128_mask", IX86_BUILTIN_PSLLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32120 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllw128_mask", IX86_BUILTIN_PSLLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32121 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslld128_mask", IX86_BUILTIN_PSLLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32122 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllq128_mask", IX86_BUILTIN_PSLLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32123 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllwi256_mask", IX86_BUILTIN_PSLLWI256_MASK , UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32124 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllw256_mask", IX86_BUILTIN_PSLLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32125 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslldi256_mask", IX86_BUILTIN_PSLLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32126 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslld256_mask", IX86_BUILTIN_PSLLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32127 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllqi256_mask", IX86_BUILTIN_PSLLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32128 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllq256_mask", IX86_BUILTIN_PSLLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32129 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psradi128_mask", IX86_BUILTIN_PSRADI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32130 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psrad128_mask", IX86_BUILTIN_PSRAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32131 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psradi256_mask", IX86_BUILTIN_PSRADI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32132 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psrad256_mask", IX86_BUILTIN_PSRAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32133 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraqi128_mask", IX86_BUILTIN_PSRAQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32134 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraq128_mask", IX86_BUILTIN_PSRAQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32135 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraqi256_mask", IX86_BUILTIN_PSRAQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32136 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraq256_mask", IX86_BUILTIN_PSRAQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32137 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8si3_mask, "__builtin_ia32_pandd256_mask", IX86_BUILTIN_PANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32138 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4si3_mask, "__builtin_ia32_pandd128_mask", IX86_BUILTIN_PANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32139 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrldi128_mask", IX86_BUILTIN_PSRLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32140 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrld128_mask", IX86_BUILTIN_PSRLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32141 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrldi256_mask", IX86_BUILTIN_PSRLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32142 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrld256_mask", IX86_BUILTIN_PSRLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32143 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlqi128_mask", IX86_BUILTIN_PSRLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32144 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlq128_mask", IX86_BUILTIN_PSRLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32145 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlqi256_mask", IX86_BUILTIN_PSRLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32146 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlq256_mask", IX86_BUILTIN_PSRLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32147 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4di3_mask, "__builtin_ia32_pandq256_mask", IX86_BUILTIN_PANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32148 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2di3_mask, "__builtin_ia32_pandq128_mask", IX86_BUILTIN_PANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32149 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv8si3_mask, "__builtin_ia32_pandnd256_mask", IX86_BUILTIN_PANDND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32150 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv4si3_mask, "__builtin_ia32_pandnd128_mask", IX86_BUILTIN_PANDND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32151 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv4di3_mask, "__builtin_ia32_pandnq256_mask", IX86_BUILTIN_PANDNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32152 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2di3_mask, "__builtin_ia32_pandnq128_mask", IX86_BUILTIN_PANDNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32153 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8si3_mask, "__builtin_ia32_pord256_mask", IX86_BUILTIN_PORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32154 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4si3_mask, "__builtin_ia32_pord128_mask", IX86_BUILTIN_PORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32155 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4di3_mask, "__builtin_ia32_porq256_mask", IX86_BUILTIN_PORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32156 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2di3_mask, "__builtin_ia32_porq128_mask", IX86_BUILTIN_PORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32157 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8si3_mask, "__builtin_ia32_pxord256_mask", IX86_BUILTIN_PXORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32158 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4si3_mask, "__builtin_ia32_pxord128_mask", IX86_BUILTIN_PXORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32159 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4di3_mask, "__builtin_ia32_pxorq256_mask", IX86_BUILTIN_PXORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32160 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2di3_mask, "__builtin_ia32_pxorq128_mask", IX86_BUILTIN_PXORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32161 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packsswb_mask, "__builtin_ia32_packsswb256_mask", IX86_BUILTIN_PACKSSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI },
32162 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packsswb_mask, "__builtin_ia32_packsswb128_mask", IX86_BUILTIN_PACKSSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI },
32163 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packuswb_mask, "__builtin_ia32_packuswb256_mask", IX86_BUILTIN_PACKUSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI },
32164 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packuswb_mask, "__builtin_ia32_packuswb128_mask", IX86_BUILTIN_PACKUSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI },
32165 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev8sf_mask, "__builtin_ia32_rndscaleps_256_mask", IX86_BUILTIN_RNDSCALEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32166 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4df_mask, "__builtin_ia32_rndscalepd_256_mask", IX86_BUILTIN_RNDSCALEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32167 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4sf_mask, "__builtin_ia32_rndscaleps_128_mask", IX86_BUILTIN_RNDSCALEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32168 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev2df_mask, "__builtin_ia32_rndscalepd_128_mask", IX86_BUILTIN_RNDSCALEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32169 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_mask, "__builtin_ia32_pternlogq256_mask", IX86_BUILTIN_VTERNLOGQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI },
32170 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_maskz, "__builtin_ia32_pternlogq256_maskz", IX86_BUILTIN_VTERNLOGQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI },
32171 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_mask, "__builtin_ia32_pternlogd256_mask", IX86_BUILTIN_VTERNLOGD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI },
32172 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_maskz, "__builtin_ia32_pternlogd256_maskz", IX86_BUILTIN_VTERNLOGD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI },
32173 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_mask, "__builtin_ia32_pternlogq128_mask", IX86_BUILTIN_VTERNLOGQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI },
32174 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_maskz, "__builtin_ia32_pternlogq128_maskz", IX86_BUILTIN_VTERNLOGQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI },
32175 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_mask, "__builtin_ia32_pternlogd128_mask", IX86_BUILTIN_VTERNLOGD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI },
32176 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_maskz, "__builtin_ia32_pternlogd128_maskz", IX86_BUILTIN_VTERNLOGD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI },
32177 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4df_mask, "__builtin_ia32_scalefpd256_mask", IX86_BUILTIN_SCALEFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32178 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv8sf_mask, "__builtin_ia32_scalefps256_mask", IX86_BUILTIN_SCALEFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32179 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv2df_mask, "__builtin_ia32_scalefpd128_mask", IX86_BUILTIN_SCALEFPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32180 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4sf_mask, "__builtin_ia32_scalefps128_mask", IX86_BUILTIN_SCALEFPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32181 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask, "__builtin_ia32_vfmaddpd256_mask", IX86_BUILTIN_VFMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32182 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask3, "__builtin_ia32_vfmaddpd256_mask3", IX86_BUILTIN_VFMADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32183 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_maskz, "__builtin_ia32_vfmaddpd256_maskz", IX86_BUILTIN_VFMADDPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32184 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask, "__builtin_ia32_vfmaddpd128_mask", IX86_BUILTIN_VFMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32185 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask3, "__builtin_ia32_vfmaddpd128_mask3", IX86_BUILTIN_VFMADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32186 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_maskz, "__builtin_ia32_vfmaddpd128_maskz", IX86_BUILTIN_VFMADDPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32187 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask, "__builtin_ia32_vfmaddps256_mask", IX86_BUILTIN_VFMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32188 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask3, "__builtin_ia32_vfmaddps256_mask3", IX86_BUILTIN_VFMADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32189 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_maskz, "__builtin_ia32_vfmaddps256_maskz", IX86_BUILTIN_VFMADDPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32190 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask, "__builtin_ia32_vfmaddps128_mask", IX86_BUILTIN_VFMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32191 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask3, "__builtin_ia32_vfmaddps128_mask3", IX86_BUILTIN_VFMADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32192 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_maskz, "__builtin_ia32_vfmaddps128_maskz", IX86_BUILTIN_VFMADDPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32193 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4df_mask3, "__builtin_ia32_vfmsubpd256_mask3", IX86_BUILTIN_VFMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32194 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v2df_mask3, "__builtin_ia32_vfmsubpd128_mask3", IX86_BUILTIN_VFMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32195 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v8sf_mask3, "__builtin_ia32_vfmsubps256_mask3", IX86_BUILTIN_VFMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32196 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4sf_mask3, "__builtin_ia32_vfmsubps128_mask3", IX86_BUILTIN_VFMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32197 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4df_mask, "__builtin_ia32_vfnmaddpd256_mask", IX86_BUILTIN_VFNMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32198 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v2df_mask, "__builtin_ia32_vfnmaddpd128_mask", IX86_BUILTIN_VFNMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32199 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v8sf_mask, "__builtin_ia32_vfnmaddps256_mask", IX86_BUILTIN_VFNMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32200 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4sf_mask, "__builtin_ia32_vfnmaddps128_mask", IX86_BUILTIN_VFNMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32201 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask, "__builtin_ia32_vfnmsubpd256_mask", IX86_BUILTIN_VFNMSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32202 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask3, "__builtin_ia32_vfnmsubpd256_mask3", IX86_BUILTIN_VFNMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32203 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask, "__builtin_ia32_vfnmsubpd128_mask", IX86_BUILTIN_VFNMSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32204 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask3, "__builtin_ia32_vfnmsubpd128_mask3", IX86_BUILTIN_VFNMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32205 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask, "__builtin_ia32_vfnmsubps256_mask", IX86_BUILTIN_VFNMSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32206 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask3, "__builtin_ia32_vfnmsubps256_mask3", IX86_BUILTIN_VFNMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32207 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask, "__builtin_ia32_vfnmsubps128_mask", IX86_BUILTIN_VFNMSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32208 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask3, "__builtin_ia32_vfnmsubps128_mask3", IX86_BUILTIN_VFNMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32209 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask, "__builtin_ia32_vfmaddsubpd256_mask", IX86_BUILTIN_VFMADDSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32210 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask3, "__builtin_ia32_vfmaddsubpd256_mask3", IX86_BUILTIN_VFMADDSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32211 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_maskz, "__builtin_ia32_vfmaddsubpd256_maskz", IX86_BUILTIN_VFMADDSUBPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32212 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask, "__builtin_ia32_vfmaddsubpd128_mask", IX86_BUILTIN_VFMADDSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32213 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask3, "__builtin_ia32_vfmaddsubpd128_mask3", IX86_BUILTIN_VFMADDSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32214 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_maskz, "__builtin_ia32_vfmaddsubpd128_maskz", IX86_BUILTIN_VFMADDSUBPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32215 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask, "__builtin_ia32_vfmaddsubps256_mask", IX86_BUILTIN_VFMADDSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32216 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask3, "__builtin_ia32_vfmaddsubps256_mask3", IX86_BUILTIN_VFMADDSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32217 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_maskz, "__builtin_ia32_vfmaddsubps256_maskz", IX86_BUILTIN_VFMADDSUBPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32218 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask, "__builtin_ia32_vfmaddsubps128_mask", IX86_BUILTIN_VFMADDSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32219 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask3, "__builtin_ia32_vfmaddsubps128_mask3", IX86_BUILTIN_VFMADDSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32220 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_maskz, "__builtin_ia32_vfmaddsubps128_maskz", IX86_BUILTIN_VFMADDSUBPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32221 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4df_mask3, "__builtin_ia32_vfmsubaddpd256_mask3", IX86_BUILTIN_VFMSUBADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32222 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v2df_mask3, "__builtin_ia32_vfmsubaddpd128_mask3", IX86_BUILTIN_VFMSUBADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32223 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v8sf_mask3, "__builtin_ia32_vfmsubaddps256_mask3", IX86_BUILTIN_VFMSUBADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32224 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4sf_mask3, "__builtin_ia32_vfmsubaddps128_mask3", IX86_BUILTIN_VFMSUBADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32225 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4df, "__builtin_ia32_insertf64x2_256_mask", IX86_BUILTIN_INSERTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI },
32226 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4di, "__builtin_ia32_inserti64x2_256_mask", IX86_BUILTIN_INSERTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI },
32227 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv16hi_mask, "__builtin_ia32_psrav16hi_mask", IX86_BUILTIN_PSRAVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32228 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv8hi_mask, "__builtin_ia32_psrav8hi_mask", IX86_BUILTIN_PSRAVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32229 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v16hi_mask, "__builtin_ia32_pmaddubsw256_mask", IX86_BUILTIN_PMADDUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_V16HI_HI },
32230 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v8hi_mask, "__builtin_ia32_pmaddubsw128_mask", IX86_BUILTIN_PMADDUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_V8HI_QI },
32231 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v16hi_mask, "__builtin_ia32_pmaddwd256_mask", IX86_BUILTIN_PMADDWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI_V8SI_QI },
32232 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v8hi_mask, "__builtin_ia32_pmaddwd128_mask", IX86_BUILTIN_PMADDWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI_V4SI_QI },
32233 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv16hi_mask, "__builtin_ia32_psrlv16hi_mask", IX86_BUILTIN_PSRLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32234 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv8hi_mask, "__builtin_ia32_psrlv8hi_mask", IX86_BUILTIN_PSRLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32235 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_fix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2dq256_mask", IX86_BUILTIN_CVTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32236 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_fix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2dq128_mask", IX86_BUILTIN_CVTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32237 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2udq256_mask", IX86_BUILTIN_CVTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32238 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2udq128_mask", IX86_BUILTIN_CVTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32239 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv4di_mask, "__builtin_ia32_cvtps2qq256_mask", IX86_BUILTIN_CVTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32240 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv2di_mask, "__builtin_ia32_cvtps2qq128_mask", IX86_BUILTIN_CVTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32241 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv4di_mask, "__builtin_ia32_cvtps2uqq256_mask", IX86_BUILTIN_CVTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32242 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv2di_mask, "__builtin_ia32_cvtps2uqq128_mask", IX86_BUILTIN_CVTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32243 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv8sf_mask, "__builtin_ia32_getmantps256_mask", IX86_BUILTIN_GETMANTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32244 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4sf_mask, "__builtin_ia32_getmantps128_mask", IX86_BUILTIN_GETMANTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32245 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4df_mask, "__builtin_ia32_getmantpd256_mask", IX86_BUILTIN_GETMANTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32246 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv2df_mask, "__builtin_ia32_getmantpd128_mask", IX86_BUILTIN_GETMANTPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32247 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movddup256_mask, "__builtin_ia32_movddup256_mask", IX86_BUILTIN_MOVDDUP256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32248 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_dupv2df_mask, "__builtin_ia32_movddup128_mask", IX86_BUILTIN_MOVDDUP128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32249 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movshdup256_mask, "__builtin_ia32_movshdup256_mask", IX86_BUILTIN_MOVSHDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32250 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movshdup_mask, "__builtin_ia32_movshdup128_mask", IX86_BUILTIN_MOVSHDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32251 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movsldup256_mask, "__builtin_ia32_movsldup256_mask", IX86_BUILTIN_MOVSLDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32252 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movsldup_mask, "__builtin_ia32_movsldup128_mask", IX86_BUILTIN_MOVSLDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32253 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4sf2_mask, "__builtin_ia32_cvtqq2ps256_mask", IX86_BUILTIN_CVTQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI },
32254 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2sf2_mask, "__builtin_ia32_cvtqq2ps128_mask", IX86_BUILTIN_CVTQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI },
32255 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4sf2_mask, "__builtin_ia32_cvtuqq2ps256_mask", IX86_BUILTIN_CVTUQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI },
32256 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2sf2_mask, "__builtin_ia32_cvtuqq2ps128_mask", IX86_BUILTIN_CVTUQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI },
32257 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4df2_mask, "__builtin_ia32_cvtqq2pd256_mask", IX86_BUILTIN_CVTQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI },
32258 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2df2_mask, "__builtin_ia32_cvtqq2pd128_mask", IX86_BUILTIN_CVTQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI },
32259 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4df2_mask, "__builtin_ia32_cvtuqq2pd256_mask", IX86_BUILTIN_CVTUQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI },
32260 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2df2_mask, "__builtin_ia32_cvtuqq2pd128_mask", IX86_BUILTIN_CVTUQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI },
32261 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_mask, "__builtin_ia32_vpermt2varq256_mask", IX86_BUILTIN_VPERMT2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32262 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_maskz, "__builtin_ia32_vpermt2varq256_maskz", IX86_BUILTIN_VPERMT2VARQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32263 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_mask, "__builtin_ia32_vpermt2vard256_mask", IX86_BUILTIN_VPERMT2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32264 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_maskz, "__builtin_ia32_vpermt2vard256_maskz", IX86_BUILTIN_VPERMT2VARD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32265 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4di3_mask, "__builtin_ia32_vpermi2varq256_mask", IX86_BUILTIN_VPERMI2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32266 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8si3_mask, "__builtin_ia32_vpermi2vard256_mask", IX86_BUILTIN_VPERMI2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32267 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_mask, "__builtin_ia32_vpermt2varpd256_mask", IX86_BUILTIN_VPERMT2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI },
32268 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_maskz, "__builtin_ia32_vpermt2varpd256_maskz", IX86_BUILTIN_VPERMT2VARPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI },
32269 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_mask, "__builtin_ia32_vpermt2varps256_mask", IX86_BUILTIN_VPERMT2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI },
32270 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_maskz, "__builtin_ia32_vpermt2varps256_maskz", IX86_BUILTIN_VPERMT2VARPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI },
32271 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4df3_mask, "__builtin_ia32_vpermi2varpd256_mask", IX86_BUILTIN_VPERMI2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32272 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8sf3_mask, "__builtin_ia32_vpermi2varps256_mask", IX86_BUILTIN_VPERMI2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32273 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_mask, "__builtin_ia32_vpermt2varq128_mask", IX86_BUILTIN_VPERMT2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32274 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_maskz, "__builtin_ia32_vpermt2varq128_maskz", IX86_BUILTIN_VPERMT2VARQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32275 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_mask, "__builtin_ia32_vpermt2vard128_mask", IX86_BUILTIN_VPERMT2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32276 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_maskz, "__builtin_ia32_vpermt2vard128_maskz", IX86_BUILTIN_VPERMT2VARD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32277 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2di3_mask, "__builtin_ia32_vpermi2varq128_mask", IX86_BUILTIN_VPERMI2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32278 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4si3_mask, "__builtin_ia32_vpermi2vard128_mask", IX86_BUILTIN_VPERMI2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32279 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_mask, "__builtin_ia32_vpermt2varpd128_mask", IX86_BUILTIN_VPERMT2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI },
32280 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_maskz, "__builtin_ia32_vpermt2varpd128_maskz", IX86_BUILTIN_VPERMT2VARPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI },
32281 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_mask, "__builtin_ia32_vpermt2varps128_mask", IX86_BUILTIN_VPERMT2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI },
32282 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_maskz, "__builtin_ia32_vpermt2varps128_maskz", IX86_BUILTIN_VPERMT2VARPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI },
32283 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2df3_mask, "__builtin_ia32_vpermi2varpd128_mask", IX86_BUILTIN_VPERMI2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI },
32284 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4sf3_mask, "__builtin_ia32_vpermi2varps128_mask", IX86_BUILTIN_VPERMI2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI },
32285 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pshufbv32qi3_mask, "__builtin_ia32_pshufb256_mask", IX86_BUILTIN_PSHUFB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32286 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pshufbv16qi3_mask, "__builtin_ia32_pshufb128_mask", IX86_BUILTIN_PSHUFB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32287 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhwv3_mask, "__builtin_ia32_pshufhw256_mask", IX86_BUILTIN_PSHUFHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32288 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhw_mask, "__builtin_ia32_pshufhw128_mask", IX86_BUILTIN_PSHUFHW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32289 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflwv3_mask, "__builtin_ia32_pshuflw256_mask", IX86_BUILTIN_PSHUFLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32290 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflw_mask, "__builtin_ia32_pshuflw128_mask", IX86_BUILTIN_PSHUFLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32291 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufdv3_mask, "__builtin_ia32_pshufd256_mask", IX86_BUILTIN_PSHUFD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32292 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufd_mask, "__builtin_ia32_pshufd128_mask", IX86_BUILTIN_PSHUFD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32293 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufpd256_mask, "__builtin_ia32_shufpd256_mask", IX86_BUILTIN_SHUFPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32294 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_shufpd_mask, "__builtin_ia32_shufpd128_mask", IX86_BUILTIN_SHUFPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI },
32295 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufps256_mask, "__builtin_ia32_shufps256_mask", IX86_BUILTIN_SHUFPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32296 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_shufps_mask, "__builtin_ia32_shufps128_mask", IX86_BUILTIN_SHUFPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI },
32297 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4di_mask, "__builtin_ia32_prolvq256_mask", IX86_BUILTIN_PROLVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32298 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv2di_mask, "__builtin_ia32_prolvq128_mask", IX86_BUILTIN_PROLVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32299 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4di_mask, "__builtin_ia32_prolq256_mask", IX86_BUILTIN_PROLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32300 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv2di_mask, "__builtin_ia32_prolq128_mask", IX86_BUILTIN_PROLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32301 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4di_mask, "__builtin_ia32_prorvq256_mask", IX86_BUILTIN_PRORVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32302 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv2di_mask, "__builtin_ia32_prorvq128_mask", IX86_BUILTIN_PRORVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32303 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4di_mask, "__builtin_ia32_prorq256_mask", IX86_BUILTIN_PRORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32304 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv2di_mask, "__builtin_ia32_prorq128_mask", IX86_BUILTIN_PRORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32305 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv2di_mask, "__builtin_ia32_psravq128_mask", IX86_BUILTIN_PSRAVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32306 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4di_mask, "__builtin_ia32_psravq256_mask", IX86_BUILTIN_PSRAVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32307 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4di_mask, "__builtin_ia32_psllv4di_mask", IX86_BUILTIN_PSLLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32308 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv2di_mask, "__builtin_ia32_psllv2di_mask", IX86_BUILTIN_PSLLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32309 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv8si_mask, "__builtin_ia32_psllv8si_mask", IX86_BUILTIN_PSLLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32310 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4si_mask, "__builtin_ia32_psllv4si_mask", IX86_BUILTIN_PSLLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32311 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv8si_mask, "__builtin_ia32_psrav8si_mask", IX86_BUILTIN_PSRAVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32312 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4si_mask, "__builtin_ia32_psrav4si_mask", IX86_BUILTIN_PSRAVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32313 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4di_mask, "__builtin_ia32_psrlv4di_mask", IX86_BUILTIN_PSRLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32314 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv2di_mask, "__builtin_ia32_psrlv2di_mask", IX86_BUILTIN_PSRLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32315 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv8si_mask, "__builtin_ia32_psrlv8si_mask", IX86_BUILTIN_PSRLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32316 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4si_mask, "__builtin_ia32_psrlv4si_mask", IX86_BUILTIN_PSRLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32317 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psrawi256_mask", IX86_BUILTIN_PSRAWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32318 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psraw256_mask", IX86_BUILTIN_PSRAW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32319 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psrawi128_mask", IX86_BUILTIN_PSRAWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32320 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psraw128_mask", IX86_BUILTIN_PSRAW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32321 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlwi256_mask", IX86_BUILTIN_PSRLWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32322 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlw256_mask", IX86_BUILTIN_PSRLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32323 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlwi128_mask", IX86_BUILTIN_PSRLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32324 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlw128_mask", IX86_BUILTIN_PSRLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32325 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv8si_mask, "__builtin_ia32_prorvd256_mask", IX86_BUILTIN_PRORVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32326 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv8si_mask, "__builtin_ia32_prolvd256_mask", IX86_BUILTIN_PROLVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32327 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv8si_mask, "__builtin_ia32_prord256_mask", IX86_BUILTIN_PRORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32328 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv8si_mask, "__builtin_ia32_prold256_mask", IX86_BUILTIN_PROLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32329 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4si_mask, "__builtin_ia32_prorvd128_mask", IX86_BUILTIN_PRORVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32330 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4si_mask, "__builtin_ia32_prolvd128_mask", IX86_BUILTIN_PROLVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32331 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4si_mask, "__builtin_ia32_prord128_mask", IX86_BUILTIN_PRORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32332 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4si_mask, "__builtin_ia32_prold128_mask", IX86_BUILTIN_PROLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32333 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4df_mask, "__builtin_ia32_fpclasspd256_mask", IX86_BUILTIN_FPCLASSPD256, UNKNOWN, (int) QI_FTYPE_V4DF_INT_QI },
32334 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv2df_mask, "__builtin_ia32_fpclasspd128_mask", IX86_BUILTIN_FPCLASSPD128, UNKNOWN, (int) QI_FTYPE_V2DF_INT_QI },
32335 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv2df, "__builtin_ia32_fpclasssd", IX86_BUILTIN_FPCLASSSD, UNKNOWN, (int) QI_FTYPE_V2DF_INT },
32336 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv8sf_mask, "__builtin_ia32_fpclassps256_mask", IX86_BUILTIN_FPCLASSPS256, UNKNOWN, (int) QI_FTYPE_V8SF_INT_QI },
32337 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4sf_mask, "__builtin_ia32_fpclassps128_mask", IX86_BUILTIN_FPCLASSPS128, UNKNOWN, (int) QI_FTYPE_V4SF_INT_QI },
32338 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv4sf, "__builtin_ia32_fpclassss", IX86_BUILTIN_FPCLASSSS, UNKNOWN, (int) QI_FTYPE_V4SF_INT },
32339 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv16qi, "__builtin_ia32_cvtb2mask128", IX86_BUILTIN_CVTB2MASK128, UNKNOWN, (int) HI_FTYPE_V16QI },
32340 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv32qi, "__builtin_ia32_cvtb2mask256", IX86_BUILTIN_CVTB2MASK256, UNKNOWN, (int) SI_FTYPE_V32QI },
32341 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv8hi, "__builtin_ia32_cvtw2mask128", IX86_BUILTIN_CVTW2MASK128, UNKNOWN, (int) QI_FTYPE_V8HI },
32342 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv16hi, "__builtin_ia32_cvtw2mask256", IX86_BUILTIN_CVTW2MASK256, UNKNOWN, (int) HI_FTYPE_V16HI },
32343 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv4si, "__builtin_ia32_cvtd2mask128", IX86_BUILTIN_CVTD2MASK128, UNKNOWN, (int) QI_FTYPE_V4SI },
32344 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv8si, "__builtin_ia32_cvtd2mask256", IX86_BUILTIN_CVTD2MASK256, UNKNOWN, (int) QI_FTYPE_V8SI },
32345 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv2di, "__builtin_ia32_cvtq2mask128", IX86_BUILTIN_CVTQ2MASK128, UNKNOWN, (int) QI_FTYPE_V2DI },
32346 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv4di, "__builtin_ia32_cvtq2mask256", IX86_BUILTIN_CVTQ2MASK256, UNKNOWN, (int) QI_FTYPE_V4DI },
32347 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv16qi, "__builtin_ia32_cvtmask2b128", IX86_BUILTIN_CVTMASK2B128, UNKNOWN, (int) V16QI_FTYPE_HI },
32348 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv32qi, "__builtin_ia32_cvtmask2b256", IX86_BUILTIN_CVTMASK2B256, UNKNOWN, (int) V32QI_FTYPE_SI },
32349 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv8hi, "__builtin_ia32_cvtmask2w128", IX86_BUILTIN_CVTMASK2W128, UNKNOWN, (int) V8HI_FTYPE_QI },
32350 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv16hi, "__builtin_ia32_cvtmask2w256", IX86_BUILTIN_CVTMASK2W256, UNKNOWN, (int) V16HI_FTYPE_HI },
32351 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv4si, "__builtin_ia32_cvtmask2d128", IX86_BUILTIN_CVTMASK2D128, UNKNOWN, (int) V4SI_FTYPE_QI },
32352 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv8si, "__builtin_ia32_cvtmask2d256", IX86_BUILTIN_CVTMASK2D256, UNKNOWN, (int) V8SI_FTYPE_QI },
32353 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv2di, "__builtin_ia32_cvtmask2q128", IX86_BUILTIN_CVTMASK2Q128, UNKNOWN, (int) V2DI_FTYPE_QI },
32354 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv4di, "__builtin_ia32_cvtmask2q256", IX86_BUILTIN_CVTMASK2Q256, UNKNOWN, (int) V4DI_FTYPE_QI },
32355 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16qi3_mask, "__builtin_ia32_pcmpeqb128_mask", IX86_BUILTIN_PCMPEQB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32356 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv32qi3_mask, "__builtin_ia32_pcmpeqb256_mask", IX86_BUILTIN_PCMPEQB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32357 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8hi3_mask, "__builtin_ia32_pcmpeqw128_mask", IX86_BUILTIN_PCMPEQW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32358 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16hi3_mask, "__builtin_ia32_pcmpeqw256_mask", IX86_BUILTIN_PCMPEQW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32359 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4si3_mask, "__builtin_ia32_pcmpeqd128_mask", IX86_BUILTIN_PCMPEQD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32360 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8si3_mask, "__builtin_ia32_pcmpeqd256_mask", IX86_BUILTIN_PCMPEQD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32361 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv2di3_mask, "__builtin_ia32_pcmpeqq128_mask", IX86_BUILTIN_PCMPEQQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32362 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4di3_mask, "__builtin_ia32_pcmpeqq256_mask", IX86_BUILTIN_PCMPEQQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32363 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16qi3_mask, "__builtin_ia32_pcmpgtb128_mask", IX86_BUILTIN_PCMPGTB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32364 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv32qi3_mask, "__builtin_ia32_pcmpgtb256_mask", IX86_BUILTIN_PCMPGTB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32365 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8hi3_mask, "__builtin_ia32_pcmpgtw128_mask", IX86_BUILTIN_PCMPGTW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32366 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16hi3_mask, "__builtin_ia32_pcmpgtw256_mask", IX86_BUILTIN_PCMPGTW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32367 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4si3_mask, "__builtin_ia32_pcmpgtd128_mask", IX86_BUILTIN_PCMPGTD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32368 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8si3_mask, "__builtin_ia32_pcmpgtd256_mask", IX86_BUILTIN_PCMPGTD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32369 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv2di3_mask, "__builtin_ia32_pcmpgtq128_mask", IX86_BUILTIN_PCMPGTQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32370 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4di3_mask, "__builtin_ia32_pcmpgtq256_mask", IX86_BUILTIN_PCMPGTQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32371 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16qi3_mask, "__builtin_ia32_ptestmb128", IX86_BUILTIN_PTESTMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32372 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv32qi3_mask, "__builtin_ia32_ptestmb256", IX86_BUILTIN_PTESTMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32373 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8hi3_mask, "__builtin_ia32_ptestmw128", IX86_BUILTIN_PTESTMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32374 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16hi3_mask, "__builtin_ia32_ptestmw256", IX86_BUILTIN_PTESTMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32375 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4si3_mask, "__builtin_ia32_ptestmd128", IX86_BUILTIN_PTESTMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32376 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8si3_mask, "__builtin_ia32_ptestmd256", IX86_BUILTIN_PTESTMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32377 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv2di3_mask, "__builtin_ia32_ptestmq128", IX86_BUILTIN_PTESTMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32378 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4di3_mask, "__builtin_ia32_ptestmq256", IX86_BUILTIN_PTESTMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32379 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16qi3_mask, "__builtin_ia32_ptestnmb128", IX86_BUILTIN_PTESTNMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32380 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv32qi3_mask, "__builtin_ia32_ptestnmb256", IX86_BUILTIN_PTESTNMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32381 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8hi3_mask, "__builtin_ia32_ptestnmw128", IX86_BUILTIN_PTESTNMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32382 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16hi3_mask, "__builtin_ia32_ptestnmw256", IX86_BUILTIN_PTESTNMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32383 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4si3_mask, "__builtin_ia32_ptestnmd128", IX86_BUILTIN_PTESTNMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32384 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8si3_mask, "__builtin_ia32_ptestnmd256", IX86_BUILTIN_PTESTNMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32385 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv2di3_mask, "__builtin_ia32_ptestnmq128", IX86_BUILTIN_PTESTNMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32386 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4di3_mask, "__builtin_ia32_ptestnmq256", IX86_BUILTIN_PTESTNMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32387 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv2di, "__builtin_ia32_broadcastmb128", IX86_BUILTIN_PBROADCASTMB128, UNKNOWN, (int) V2DI_FTYPE_QI },
32388 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv4di, "__builtin_ia32_broadcastmb256", IX86_BUILTIN_PBROADCASTMB256, UNKNOWN, (int) V4DI_FTYPE_QI },
32389 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv4si, "__builtin_ia32_broadcastmw128", IX86_BUILTIN_PBROADCASTMW128, UNKNOWN, (int) V4SI_FTYPE_HI },
32390 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv8si, "__builtin_ia32_broadcastmw256", IX86_BUILTIN_PBROADCASTMW256, UNKNOWN, (int) V8SI_FTYPE_HI },
32391 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4df_mask, "__builtin_ia32_compressdf256_mask", IX86_BUILTIN_COMPRESSPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32392 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2df_mask, "__builtin_ia32_compressdf128_mask", IX86_BUILTIN_COMPRESSPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32393 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8sf_mask, "__builtin_ia32_compresssf256_mask", IX86_BUILTIN_COMPRESSPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32394 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4sf_mask, "__builtin_ia32_compresssf128_mask", IX86_BUILTIN_COMPRESSPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32395 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4di_mask, "__builtin_ia32_compressdi256_mask", IX86_BUILTIN_PCOMPRESSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32396 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2di_mask, "__builtin_ia32_compressdi128_mask", IX86_BUILTIN_PCOMPRESSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32397 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8si_mask, "__builtin_ia32_compresssi256_mask", IX86_BUILTIN_PCOMPRESSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32398 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4si_mask, "__builtin_ia32_compresssi128_mask", IX86_BUILTIN_PCOMPRESSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32399 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expanddf256_mask", IX86_BUILTIN_EXPANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32400 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expanddf128_mask", IX86_BUILTIN_EXPANDPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32401 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandsf256_mask", IX86_BUILTIN_EXPANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32402 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandsf128_mask", IX86_BUILTIN_EXPANDPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32403 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expanddi256_mask", IX86_BUILTIN_PEXPANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32404 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expanddi128_mask", IX86_BUILTIN_PEXPANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32405 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandsi256_mask", IX86_BUILTIN_PEXPANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32406 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandsi128_mask", IX86_BUILTIN_PEXPANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32407 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expanddf256_maskz", IX86_BUILTIN_EXPANDPD256Z, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32408 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expanddf128_maskz", IX86_BUILTIN_EXPANDPD128Z, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32409 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandsf256_maskz", IX86_BUILTIN_EXPANDPS256Z, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32410 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandsf128_maskz", IX86_BUILTIN_EXPANDPS128Z, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32411 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expanddi256_maskz", IX86_BUILTIN_PEXPANDQ256Z, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32412 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expanddi128_maskz", IX86_BUILTIN_PEXPANDQ128Z, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32413 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandsi256_maskz", IX86_BUILTIN_PEXPANDD256Z, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32414 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandsi128_maskz", IX86_BUILTIN_PEXPANDD128Z, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32415 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8si3_mask, "__builtin_ia32_pmaxsd256_mask", IX86_BUILTIN_PMAXSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32416 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8si3_mask, "__builtin_ia32_pminsd256_mask", IX86_BUILTIN_PMINSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32417 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8si3_mask, "__builtin_ia32_pmaxud256_mask", IX86_BUILTIN_PMAXUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32418 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8si3_mask, "__builtin_ia32_pminud256_mask", IX86_BUILTIN_PMINUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32419 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4si3_mask, "__builtin_ia32_pmaxsd128_mask", IX86_BUILTIN_PMAXSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32420 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4si3_mask, "__builtin_ia32_pminsd128_mask", IX86_BUILTIN_PMINSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32421 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4si3_mask, "__builtin_ia32_pmaxud128_mask", IX86_BUILTIN_PMAXUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32422 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4si3_mask, "__builtin_ia32_pminud128_mask", IX86_BUILTIN_PMINUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32423 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4di3_mask, "__builtin_ia32_pmaxsq256_mask", IX86_BUILTIN_PMAXSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32424 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4di3_mask, "__builtin_ia32_pminsq256_mask", IX86_BUILTIN_PMINSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32425 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4di3_mask, "__builtin_ia32_pmaxuq256_mask", IX86_BUILTIN_PMAXUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32426 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4di3_mask, "__builtin_ia32_pminuq256_mask", IX86_BUILTIN_PMINUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32427 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2di3_mask, "__builtin_ia32_pmaxsq128_mask", IX86_BUILTIN_PMAXSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32428 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2di3_mask, "__builtin_ia32_pminsq128_mask", IX86_BUILTIN_PMINSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32429 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv2di3_mask, "__builtin_ia32_pmaxuq128_mask", IX86_BUILTIN_PMAXUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32430 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv2di3_mask, "__builtin_ia32_pminuq128_mask", IX86_BUILTIN_PMINUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32431 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv32qi3_mask, "__builtin_ia32_pminsb256_mask", IX86_BUILTIN_PMINSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32432 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv32qi3_mask, "__builtin_ia32_pminub256_mask", IX86_BUILTIN_PMINUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32433 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv32qi3_mask, "__builtin_ia32_pmaxsb256_mask", IX86_BUILTIN_PMAXSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32434 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv32qi3_mask, "__builtin_ia32_pmaxub256_mask", IX86_BUILTIN_PMAXUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32435 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16qi3_mask, "__builtin_ia32_pminsb128_mask", IX86_BUILTIN_PMINSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32436 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16qi3_mask, "__builtin_ia32_pminub128_mask", IX86_BUILTIN_PMINUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32437 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16qi3_mask, "__builtin_ia32_pmaxsb128_mask", IX86_BUILTIN_PMAXSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32438 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16qi3_mask, "__builtin_ia32_pmaxub128_mask", IX86_BUILTIN_PMAXUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32439 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16hi3_mask, "__builtin_ia32_pminsw256_mask", IX86_BUILTIN_PMINSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32440 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16hi3_mask, "__builtin_ia32_pminuw256_mask", IX86_BUILTIN_PMINUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32441 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16hi3_mask, "__builtin_ia32_pmaxsw256_mask", IX86_BUILTIN_PMAXSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32442 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16hi3_mask, "__builtin_ia32_pmaxuw256_mask", IX86_BUILTIN_PMAXUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32443 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8hi3_mask, "__builtin_ia32_pminsw128_mask", IX86_BUILTIN_PMINSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32444 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8hi3_mask, "__builtin_ia32_pminuw128_mask", IX86_BUILTIN_PMINUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32445 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8hi3_mask, "__builtin_ia32_pmaxsw128_mask", IX86_BUILTIN_PMAXSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32446 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8hi3_mask, "__builtin_ia32_pmaxuw128_mask", IX86_BUILTIN_PMAXUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32447 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4di_mask, "__builtin_ia32_vpconflictdi_256_mask", IX86_BUILTIN_VPCONFLICTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32448 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv8si_mask, "__builtin_ia32_vpconflictsi_256_mask", IX86_BUILTIN_VPCONFLICTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32449 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4di2_mask, "__builtin_ia32_vplzcntq_256_mask", IX86_BUILTIN_VPCLZCNTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32450 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv8si2_mask, "__builtin_ia32_vplzcntd_256_mask", IX86_BUILTIN_VPCLZCNTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32451 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhpd256_mask, "__builtin_ia32_unpckhpd256_mask", IX86_BUILTIN_UNPCKHPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32452 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpckhpd128_mask, "__builtin_ia32_unpckhpd128_mask", IX86_BUILTIN_UNPCKHPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32453 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhps256_mask, "__builtin_ia32_unpckhps256_mask", IX86_BUILTIN_UNPCKHPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32454 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4sf_mask, "__builtin_ia32_unpckhps128_mask", IX86_BUILTIN_UNPCKHPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32455 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklpd256_mask, "__builtin_ia32_unpcklpd256_mask", IX86_BUILTIN_UNPCKLPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32456 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpcklpd128_mask, "__builtin_ia32_unpcklpd128_mask", IX86_BUILTIN_UNPCKLPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32457 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklps256_mask, "__builtin_ia32_unpcklps256_mask", IX86_BUILTIN_UNPCKLPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32458 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv2di_mask, "__builtin_ia32_vpconflictdi_128_mask", IX86_BUILTIN_VPCONFLICTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32459 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4si_mask, "__builtin_ia32_vpconflictsi_128_mask", IX86_BUILTIN_VPCONFLICTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32460 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv2di2_mask, "__builtin_ia32_vplzcntq_128_mask", IX86_BUILTIN_VPCLZCNTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32461 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4si2_mask, "__builtin_ia32_vplzcntd_128_mask", IX86_BUILTIN_VPCLZCNTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32462 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_unpcklps128_mask, "__builtin_ia32_unpcklps128_mask", IX86_BUILTIN_UNPCKLPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32463 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv8si_mask, "__builtin_ia32_alignd256_mask", IX86_BUILTIN_ALIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI },
32464 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4di_mask, "__builtin_ia32_alignq256_mask", IX86_BUILTIN_ALIGNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI },
32465 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4si_mask, "__builtin_ia32_alignd128_mask", IX86_BUILTIN_ALIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI },
32466 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv2di_mask, "__builtin_ia32_alignq128_mask", IX86_BUILTIN_ALIGNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI },
32467 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph256_mask, "__builtin_ia32_vcvtps2ph256_mask", IX86_BUILTIN_CVTPS2PH256_MASK, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT_V8HI_QI },
32468 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph_mask, "__builtin_ia32_vcvtps2ph_mask", IX86_BUILTIN_CVTPS2PH_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT_V8HI_QI },
32469 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps_mask, "__builtin_ia32_vcvtph2ps_mask", IX86_BUILTIN_CVTPH2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V8HI_V4SF_QI },
32470 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps256_mask, "__builtin_ia32_vcvtph2ps256_mask", IX86_BUILTIN_CVTPH2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8HI_V8SF_QI },
32471 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4si_mask, "__builtin_ia32_punpckhdq128_mask", IX86_BUILTIN_PUNPCKHDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32472 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv8si_mask, "__builtin_ia32_punpckhdq256_mask", IX86_BUILTIN_PUNPCKHDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32473 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv2di_mask, "__builtin_ia32_punpckhqdq128_mask", IX86_BUILTIN_PUNPCKHQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32474 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv4di_mask, "__builtin_ia32_punpckhqdq256_mask", IX86_BUILTIN_PUNPCKHQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32475 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv4si_mask, "__builtin_ia32_punpckldq128_mask", IX86_BUILTIN_PUNPCKLDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32476 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv8si_mask, "__builtin_ia32_punpckldq256_mask", IX86_BUILTIN_PUNPCKLDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32477 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv2di_mask, "__builtin_ia32_punpcklqdq128_mask", IX86_BUILTIN_PUNPCKLQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32478 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv4di_mask, "__builtin_ia32_punpcklqdq256_mask", IX86_BUILTIN_PUNPCKLQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32479 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv16qi_mask, "__builtin_ia32_punpckhbw128_mask", IX86_BUILTIN_PUNPCKHBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32480 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv32qi_mask, "__builtin_ia32_punpckhbw256_mask", IX86_BUILTIN_PUNPCKHBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32481 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv8hi_mask, "__builtin_ia32_punpckhwd128_mask", IX86_BUILTIN_PUNPCKHWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32482 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv16hi_mask, "__builtin_ia32_punpckhwd256_mask", IX86_BUILTIN_PUNPCKHWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32483 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv16qi_mask, "__builtin_ia32_punpcklbw128_mask", IX86_BUILTIN_PUNPCKLBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32484 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv32qi_mask, "__builtin_ia32_punpcklbw256_mask", IX86_BUILTIN_PUNPCKLBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32485 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv8hi_mask, "__builtin_ia32_punpcklwd128_mask", IX86_BUILTIN_PUNPCKLWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32486 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv16hi_mask, "__builtin_ia32_punpcklwd256_mask", IX86_BUILTIN_PUNPCKLWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32487 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv16hi_mask, "__builtin_ia32_psllv16hi_mask", IX86_BUILTIN_PSLLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32488 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv8hi_mask, "__builtin_ia32_psllv8hi_mask", IX86_BUILTIN_PSLLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32489 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packssdw_mask, "__builtin_ia32_packssdw256_mask", IX86_BUILTIN_PACKSSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI },
32490 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packssdw_mask, "__builtin_ia32_packssdw128_mask", IX86_BUILTIN_PACKSSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI },
32491 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packusdw_mask, "__builtin_ia32_packusdw256_mask", IX86_BUILTIN_PACKUSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI },
32492 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_packusdw_mask, "__builtin_ia32_packusdw128_mask", IX86_BUILTIN_PACKUSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI },
32493 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv32qi3_mask, "__builtin_ia32_pavgb256_mask", IX86_BUILTIN_PAVGB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32494 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv16hi3_mask, "__builtin_ia32_pavgw256_mask", IX86_BUILTIN_PAVGW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32495 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv16qi3_mask, "__builtin_ia32_pavgb128_mask", IX86_BUILTIN_PAVGB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32496 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv8hi3_mask, "__builtin_ia32_pavgw128_mask", IX86_BUILTIN_PAVGW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32497 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8sf_mask, "__builtin_ia32_permvarsf256_mask", IX86_BUILTIN_VPERMVARSF256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32498 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4df_mask, "__builtin_ia32_permvardf256_mask", IX86_BUILTIN_VPERMVARDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32499 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4df_mask, "__builtin_ia32_permdf256_mask", IX86_BUILTIN_VPERMDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32500 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv32qi2_mask, "__builtin_ia32_pabsb256_mask", IX86_BUILTIN_PABSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32501 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16qi2_mask, "__builtin_ia32_pabsb128_mask", IX86_BUILTIN_PABSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32502 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16hi2_mask, "__builtin_ia32_pabsw256_mask", IX86_BUILTIN_PABSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32503 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8hi2_mask, "__builtin_ia32_pabsw128_mask", IX86_BUILTIN_PABSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32504 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv2df3_mask, "__builtin_ia32_vpermilvarpd_mask", IX86_BUILTIN_VPERMILVARPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI },
32505 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4sf3_mask, "__builtin_ia32_vpermilvarps_mask", IX86_BUILTIN_VPERMILVARPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI },
32506 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4df3_mask, "__builtin_ia32_vpermilvarpd256_mask", IX86_BUILTIN_VPERMILVARPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32507 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv8sf3_mask, "__builtin_ia32_vpermilvarps256_mask", IX86_BUILTIN_VPERMILVARPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32508 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv2df_mask, "__builtin_ia32_vpermilpd_mask", IX86_BUILTIN_VPERMILPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32509 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4sf_mask, "__builtin_ia32_vpermilps_mask", IX86_BUILTIN_VPERMILPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32510 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4df_mask, "__builtin_ia32_vpermilpd256_mask", IX86_BUILTIN_VPERMILPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32511 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv8sf_mask, "__builtin_ia32_vpermilps256_mask", IX86_BUILTIN_VPERMILPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32512 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4di, "__builtin_ia32_blendmq_256_mask", IX86_BUILTIN_BLENDMQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32513 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8si, "__builtin_ia32_blendmd_256_mask", IX86_BUILTIN_BLENDMD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32514 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4df, "__builtin_ia32_blendmpd_256_mask", IX86_BUILTIN_BLENDMPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32515 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8sf, "__builtin_ia32_blendmps_256_mask", IX86_BUILTIN_BLENDMPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32516 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2di, "__builtin_ia32_blendmq_128_mask", IX86_BUILTIN_BLENDMQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32517 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4si, "__builtin_ia32_blendmd_128_mask", IX86_BUILTIN_BLENDMD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32518 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2df, "__builtin_ia32_blendmpd_128_mask", IX86_BUILTIN_BLENDMPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32519 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4sf, "__builtin_ia32_blendmps_128_mask", IX86_BUILTIN_BLENDMPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32520 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16hi, "__builtin_ia32_blendmw_256_mask", IX86_BUILTIN_BLENDMW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32521 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv32qi, "__builtin_ia32_blendmb_256_mask", IX86_BUILTIN_BLENDMB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32522 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8hi, "__builtin_ia32_blendmw_128_mask", IX86_BUILTIN_BLENDMW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32523 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16qi, "__builtin_ia32_blendmb_128_mask", IX86_BUILTIN_BLENDMB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32524 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8si3_mask, "__builtin_ia32_pmulld256_mask", IX86_BUILTIN_PMULLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32525 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4si3_mask, "__builtin_ia32_pmulld128_mask", IX86_BUILTIN_PMULLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32526 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v8si_mask, "__builtin_ia32_pmuludq256_mask", IX86_BUILTIN_PMULUDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI },
32527 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_smult_even_v8si_mask, "__builtin_ia32_pmuldq256_mask", IX86_BUILTIN_PMULDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI },
32528 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_mulv2siv2di3_mask, "__builtin_ia32_pmuldq128_mask", IX86_BUILTIN_PMULDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI },
32529 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v4si_mask, "__builtin_ia32_pmuludq128_mask", IX86_BUILTIN_PMULUDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI },
32530 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2ps256_mask, "__builtin_ia32_cvtpd2ps256_mask", IX86_BUILTIN_CVTPD2PS256_MASK, UNKNOWN, (int) V4SF_FTYPE_V4DF_V4SF_QI },
32531 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2ps_mask, "__builtin_ia32_cvtpd2ps_mask", IX86_BUILTIN_CVTPD2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V2DF_V4SF_QI },
32532 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8si_mask, "__builtin_ia32_permvarsi256_mask", IX86_BUILTIN_VPERMVARSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32533 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4di_mask, "__builtin_ia32_permvardi256_mask", IX86_BUILTIN_VPERMVARDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32534 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4di_mask, "__builtin_ia32_permdi256_mask", IX86_BUILTIN_VPERMDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32535 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4di3_mask, "__builtin_ia32_cmpq256_mask", IX86_BUILTIN_CMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI },
32536 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8si3_mask, "__builtin_ia32_cmpd256_mask", IX86_BUILTIN_CMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI },
32537 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4di3_mask, "__builtin_ia32_ucmpq256_mask", IX86_BUILTIN_UCMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI },
32538 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8si3_mask, "__builtin_ia32_ucmpd256_mask", IX86_BUILTIN_UCMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI },
32539 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv32qi3_mask, "__builtin_ia32_cmpb256_mask", IX86_BUILTIN_CMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI },
32540 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16hi3_mask, "__builtin_ia32_cmpw256_mask", IX86_BUILTIN_CMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI },
32541 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv32qi3_mask, "__builtin_ia32_ucmpb256_mask", IX86_BUILTIN_UCMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI },
32542 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16hi3_mask, "__builtin_ia32_ucmpw256_mask", IX86_BUILTIN_UCMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI },
32543 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4df3_mask, "__builtin_ia32_cmppd256_mask", IX86_BUILTIN_CMPPD256_MASK, UNKNOWN, (int) QI_FTYPE_V4DF_V4DF_INT_QI },
32544 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8sf3_mask, "__builtin_ia32_cmpps256_mask", IX86_BUILTIN_CMPPS256_MASK, UNKNOWN, (int) QI_FTYPE_V8SF_V8SF_INT_QI },
32545 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2di3_mask, "__builtin_ia32_cmpq128_mask", IX86_BUILTIN_CMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI },
32546 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4si3_mask, "__builtin_ia32_cmpd128_mask", IX86_BUILTIN_CMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI },
32547 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv2di3_mask, "__builtin_ia32_ucmpq128_mask", IX86_BUILTIN_UCMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI },
32548 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4si3_mask, "__builtin_ia32_ucmpd128_mask", IX86_BUILTIN_UCMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI },
32549 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16qi3_mask, "__builtin_ia32_cmpb128_mask", IX86_BUILTIN_CMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI },
32550 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8hi3_mask, "__builtin_ia32_cmpw128_mask", IX86_BUILTIN_CMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI },
32551 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16qi3_mask, "__builtin_ia32_ucmpb128_mask", IX86_BUILTIN_UCMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI },
32552 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8hi3_mask, "__builtin_ia32_ucmpw128_mask", IX86_BUILTIN_UCMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI },
32553 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2df3_mask, "__builtin_ia32_cmppd128_mask", IX86_BUILTIN_CMPPD128_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI },
32554 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4sf3_mask, "__builtin_ia32_cmpps128_mask", IX86_BUILTIN_CMPPS128_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI },
32556 /* AVX512DQ. */
32557 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x2_512_mask", IX86_BUILTIN_BROADCASTF32x2_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
32558 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask, "__builtin_ia32_broadcasti32x2_512_mask", IX86_BUILTIN_BROADCASTI32x2_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
32559 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8df_mask_1, "__builtin_ia32_broadcastf64x2_512_mask", IX86_BUILTIN_BROADCASTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
32560 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8di_mask_1, "__builtin_ia32_broadcasti64x2_512_mask", IX86_BUILTIN_BROADCASTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
32561 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask_1, "__builtin_ia32_broadcastf32x8_512_mask", IX86_BUILTIN_BROADCASTF32X8_512, UNKNOWN, (int) V16SF_FTYPE_V8SF_V16SF_HI },
32562 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask_1, "__builtin_ia32_broadcasti32x8_512_mask", IX86_BUILTIN_BROADCASTI32X8_512, UNKNOWN, (int) V16SI_FTYPE_V8SI_V16SI_HI },
32563 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf64x2_mask, "__builtin_ia32_extractf64x2_512_mask", IX86_BUILTIN_EXTRACTF64X2_512, UNKNOWN, (int) V2DF_FTYPE_V8DF_INT_V2DF_QI },
32564 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf32x8_mask, "__builtin_ia32_extractf32x8_mask", IX86_BUILTIN_EXTRACTF32X8, UNKNOWN, (int) V8SF_FTYPE_V16SF_INT_V8SF_QI },
32565 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti64x2_mask, "__builtin_ia32_extracti64x2_512_mask", IX86_BUILTIN_EXTRACTI64X2_512, UNKNOWN, (int) V2DI_FTYPE_V8DI_INT_V2DI_QI },
32566 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti32x8_mask, "__builtin_ia32_extracti32x8_mask", IX86_BUILTIN_EXTRACTI32X8, UNKNOWN, (int) V8SI_FTYPE_V16SI_INT_V8SI_QI },
32567 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv8df_mask, "__builtin_ia32_reducepd512_mask", IX86_BUILTIN_REDUCEPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
32568 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv16sf_mask, "__builtin_ia32_reduceps512_mask", IX86_BUILTIN_REDUCEPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
32569 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_mulv8di3_mask, "__builtin_ia32_pmullq512_mask", IX86_BUILTIN_PMULLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32570 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv8df3_mask, "__builtin_ia32_xorpd512_mask", IX86_BUILTIN_XORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32571 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv16sf3_mask, "__builtin_ia32_xorps512_mask", IX86_BUILTIN_XORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32572 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv8df3_mask, "__builtin_ia32_orpd512_mask", IX86_BUILTIN_ORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32573 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv16sf3_mask, "__builtin_ia32_orps512_mask", IX86_BUILTIN_ORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32574 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv8df3_mask, "__builtin_ia32_andpd512_mask", IX86_BUILTIN_ANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32575 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv16sf3_mask, "__builtin_ia32_andps512_mask", IX86_BUILTIN_ANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32576 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv8df3_mask, "__builtin_ia32_andnpd512_mask", IX86_BUILTIN_ANDNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI},
32577 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv16sf3_mask, "__builtin_ia32_andnps512_mask", IX86_BUILTIN_ANDNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32578 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf32x8_mask, "__builtin_ia32_insertf32x8_mask", IX86_BUILTIN_INSERTF32X8, UNKNOWN, (int) V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI },
32579 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti32x8_mask, "__builtin_ia32_inserti32x8_mask", IX86_BUILTIN_INSERTI32X8, UNKNOWN, (int) V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI },
32580 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf64x2_mask, "__builtin_ia32_insertf64x2_512_mask", IX86_BUILTIN_INSERTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI },
32581 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti64x2_mask, "__builtin_ia32_inserti64x2_512_mask", IX86_BUILTIN_INSERTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI },
32582 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv8df_mask, "__builtin_ia32_fpclasspd512_mask", IX86_BUILTIN_FPCLASSPD512, UNKNOWN, (int) QI_FTYPE_V8DF_INT_QI },
32583 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv16sf_mask, "__builtin_ia32_fpclassps512_mask", IX86_BUILTIN_FPCLASSPS512, UNKNOWN, (int) HI_FTYPE_V16SF_INT_HI },
32584 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtd2maskv16si, "__builtin_ia32_cvtd2mask512", IX86_BUILTIN_CVTD2MASK512, UNKNOWN, (int) HI_FTYPE_V16SI },
32585 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtq2maskv8di, "__builtin_ia32_cvtq2mask512", IX86_BUILTIN_CVTQ2MASK512, UNKNOWN, (int) QI_FTYPE_V8DI },
32586 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2dv16si, "__builtin_ia32_cvtmask2d512", IX86_BUILTIN_CVTMASK2D512, UNKNOWN, (int) V16SI_FTYPE_HI },
32587 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2qv8di, "__builtin_ia32_cvtmask2q512", IX86_BUILTIN_CVTMASK2Q512, UNKNOWN, (int) V8DI_FTYPE_QI },
32589 /* AVX512BW. */
32590 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpcksi, "__builtin_ia32_kunpcksi", IX86_BUILTIN_KUNPCKWD, UNKNOWN, (int) SI_FTYPE_SI_SI },
32591 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpckdi, "__builtin_ia32_kunpckdi", IX86_BUILTIN_KUNPCKDQ, UNKNOWN, (int) DI_FTYPE_DI_DI },
32592 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packusdw_mask, "__builtin_ia32_packusdw512_mask", IX86_BUILTIN_PACKUSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI },
32593 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlv4ti3, "__builtin_ia32_pslldq512", IX86_BUILTIN_PSLLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
32594 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrv4ti3, "__builtin_ia32_psrldq512", IX86_BUILTIN_PSRLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
32595 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packssdw_mask, "__builtin_ia32_packssdw512_mask", IX86_BUILTIN_PACKSSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI },
32596 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv4ti, "__builtin_ia32_palignr512", IX86_BUILTIN_PALIGNR512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_CONVERT },
32597 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv64qi_mask, "__builtin_ia32_palignr512_mask", IX86_BUILTIN_PALIGNR512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT },
32598 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_movdquhi512_mask", IX86_BUILTIN_MOVDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32599 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_movdquqi512_mask", IX86_BUILTIN_MOVDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32600 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_psadbw, "__builtin_ia32_psadbw512", IX86_BUILTIN_PSADBW512, UNKNOWN, (int) V8DI_FTYPE_V64QI_V64QI },
32601 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_dbpsadbwv32hi_mask, "__builtin_ia32_dbpsadbw512_mask", IX86_BUILTIN_DBPSADBW512, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI },
32602 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv64qi_mask, "__builtin_ia32_pbroadcastb512_mask", IX86_BUILTIN_PBROADCASTB512, UNKNOWN, (int) V64QI_FTYPE_V16QI_V64QI_DI },
32603 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv64qi_mask, "__builtin_ia32_pbroadcastb512_gpr_mask", IX86_BUILTIN_PBROADCASTB512_GPR, UNKNOWN, (int) V64QI_FTYPE_QI_V64QI_DI },
32604 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv32hi_mask, "__builtin_ia32_pbroadcastw512_mask", IX86_BUILTIN_PBROADCASTW512, UNKNOWN, (int) V32HI_FTYPE_V8HI_V32HI_SI },
32605 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv32hi_mask, "__builtin_ia32_pbroadcastw512_gpr_mask", IX86_BUILTIN_PBROADCASTW512_GPR, UNKNOWN, (int) V32HI_FTYPE_HI_V32HI_SI },
32606 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sign_extendv32qiv32hi2_mask, "__builtin_ia32_pmovsxbw512_mask", IX86_BUILTIN_PMOVSXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI },
32607 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_zero_extendv32qiv32hi2_mask, "__builtin_ia32_pmovzxbw512_mask", IX86_BUILTIN_PMOVZXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI },
32608 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_permvarv32hi_mask, "__builtin_ia32_permvarhi512_mask", IX86_BUILTIN_VPERMVARHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32609 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_mask, "__builtin_ia32_vpermt2varhi512_mask", IX86_BUILTIN_VPERMT2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32610 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_maskz, "__builtin_ia32_vpermt2varhi512_maskz", IX86_BUILTIN_VPERMT2VARHI512_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32611 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermi2varv32hi3_mask, "__builtin_ia32_vpermi2varhi512_mask", IX86_BUILTIN_VPERMI2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32612 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv64qi3_mask, "__builtin_ia32_pavgb512_mask", IX86_BUILTIN_PAVGB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32613 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv32hi3_mask, "__builtin_ia32_pavgw512_mask", IX86_BUILTIN_PAVGW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32614 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv64qi3_mask, "__builtin_ia32_paddb512_mask", IX86_BUILTIN_PADDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32615 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv64qi3_mask, "__builtin_ia32_psubb512_mask", IX86_BUILTIN_PSUBB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32616 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv64qi3_mask, "__builtin_ia32_psubsb512_mask", IX86_BUILTIN_PSUBSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32617 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv64qi3_mask, "__builtin_ia32_paddsb512_mask", IX86_BUILTIN_PADDSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32618 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv64qi3_mask, "__builtin_ia32_psubusb512_mask", IX86_BUILTIN_PSUBUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32619 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv64qi3_mask, "__builtin_ia32_paddusb512_mask", IX86_BUILTIN_PADDUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32620 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv32hi3_mask, "__builtin_ia32_psubw512_mask", IX86_BUILTIN_PSUBW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32621 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv32hi3_mask, "__builtin_ia32_paddw512_mask", IX86_BUILTIN_PADDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32622 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv32hi3_mask, "__builtin_ia32_psubsw512_mask", IX86_BUILTIN_PSUBSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32623 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv32hi3_mask, "__builtin_ia32_paddsw512_mask", IX86_BUILTIN_PADDSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32624 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv32hi3_mask, "__builtin_ia32_psubusw512_mask", IX86_BUILTIN_PSUBUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32625 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv32hi3_mask, "__builtin_ia32_paddusw512_mask", IX86_BUILTIN_PADDUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32626 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv32hi3_mask, "__builtin_ia32_pmaxuw512_mask", IX86_BUILTIN_PMAXUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32627 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv32hi3_mask, "__builtin_ia32_pmaxsw512_mask", IX86_BUILTIN_PMAXSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32628 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv32hi3_mask, "__builtin_ia32_pminuw512_mask", IX86_BUILTIN_PMINUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32629 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv32hi3_mask, "__builtin_ia32_pminsw512_mask", IX86_BUILTIN_PMINSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32630 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv64qi3_mask, "__builtin_ia32_pmaxub512_mask", IX86_BUILTIN_PMAXUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32631 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv64qi3_mask, "__builtin_ia32_pmaxsb512_mask", IX86_BUILTIN_PMAXSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32632 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv64qi3_mask, "__builtin_ia32_pminub512_mask", IX86_BUILTIN_PMINUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32633 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv64qi3_mask, "__builtin_ia32_pminsb512_mask", IX86_BUILTIN_PMINSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32634 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovwb512_mask", IX86_BUILTIN_PMOVWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32635 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovswb512_mask", IX86_BUILTIN_PMOVSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32636 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovuswb512_mask", IX86_BUILTIN_PMOVUSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32637 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_umulhrswv32hi3_mask, "__builtin_ia32_pmulhrsw512_mask", IX86_BUILTIN_PMULHRSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32638 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umulv32hi3_highpart_mask, "__builtin_ia32_pmulhuw512_mask" , IX86_BUILTIN_PMULHUW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32639 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smulv32hi3_highpart_mask, "__builtin_ia32_pmulhw512_mask" , IX86_BUILTIN_PMULHW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32640 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_mulv32hi3_mask, "__builtin_ia32_pmullw512_mask", IX86_BUILTIN_PMULLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32641 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllwi512_mask", IX86_BUILTIN_PSLLWI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32642 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllw512_mask", IX86_BUILTIN_PSLLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32643 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packsswb_mask, "__builtin_ia32_packsswb512_mask", IX86_BUILTIN_PACKSSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI },
32644 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packuswb_mask, "__builtin_ia32_packuswb512_mask", IX86_BUILTIN_PACKUSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI },
32645 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashrvv32hi_mask, "__builtin_ia32_psrav32hi_mask", IX86_BUILTIN_PSRAVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32646 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddubsw512v32hi_mask, "__builtin_ia32_pmaddubsw512_mask", IX86_BUILTIN_PMADDUBSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_V32HI_SI },
32647 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddwd512v32hi_mask, "__builtin_ia32_pmaddwd512_mask", IX86_BUILTIN_PMADDWD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V32HI_V32HI_V16SI_HI },
32648 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrvv32hi_mask, "__builtin_ia32_psrlv32hi_mask", IX86_BUILTIN_PSRLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32649 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv64qi_mask, "__builtin_ia32_punpckhbw512_mask", IX86_BUILTIN_PUNPCKHBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32650 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv32hi_mask, "__builtin_ia32_punpckhwd512_mask", IX86_BUILTIN_PUNPCKHWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32651 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv64qi_mask, "__builtin_ia32_punpcklbw512_mask", IX86_BUILTIN_PUNPCKLBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32652 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv32hi_mask, "__builtin_ia32_punpcklwd512_mask", IX86_BUILTIN_PUNPCKLWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32653 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufbv64qi3_mask, "__builtin_ia32_pshufb512_mask", IX86_BUILTIN_PSHUFB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32654 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufhwv32hi_mask, "__builtin_ia32_pshufhw512_mask", IX86_BUILTIN_PSHUFHW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32655 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshuflwv32hi_mask, "__builtin_ia32_pshuflw512_mask", IX86_BUILTIN_PSHUFLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32656 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psrawi512_mask", IX86_BUILTIN_PSRAWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32657 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psraw512_mask", IX86_BUILTIN_PSRAW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32658 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlwi512_mask", IX86_BUILTIN_PSRLWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32659 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlw512_mask", IX86_BUILTIN_PSRLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32660 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtb2maskv64qi, "__builtin_ia32_cvtb2mask512", IX86_BUILTIN_CVTB2MASK512, UNKNOWN, (int) DI_FTYPE_V64QI },
32661 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtw2maskv32hi, "__builtin_ia32_cvtw2mask512", IX86_BUILTIN_CVTW2MASK512, UNKNOWN, (int) SI_FTYPE_V32HI },
32662 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2bv64qi, "__builtin_ia32_cvtmask2b512", IX86_BUILTIN_CVTMASK2B512, UNKNOWN, (int) V64QI_FTYPE_DI },
32663 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2wv32hi, "__builtin_ia32_cvtmask2w512", IX86_BUILTIN_CVTMASK2W512, UNKNOWN, (int) V32HI_FTYPE_SI },
32664 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv64qi3_mask, "__builtin_ia32_pcmpeqb512_mask", IX86_BUILTIN_PCMPEQB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32665 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv32hi3_mask, "__builtin_ia32_pcmpeqw512_mask", IX86_BUILTIN_PCMPEQW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32666 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv64qi3_mask, "__builtin_ia32_pcmpgtb512_mask", IX86_BUILTIN_PCMPGTB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32667 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv32hi3_mask, "__builtin_ia32_pcmpgtw512_mask", IX86_BUILTIN_PCMPGTW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32668 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv64qi3_mask, "__builtin_ia32_ptestmb512", IX86_BUILTIN_PTESTMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32669 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv32hi3_mask, "__builtin_ia32_ptestmw512", IX86_BUILTIN_PTESTMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32670 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv64qi3_mask, "__builtin_ia32_ptestnmb512", IX86_BUILTIN_PTESTNMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32671 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv32hi3_mask, "__builtin_ia32_ptestnmw512", IX86_BUILTIN_PTESTNMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32672 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlvv32hi_mask, "__builtin_ia32_psllv32hi_mask", IX86_BUILTIN_PSLLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32673 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv64qi2_mask, "__builtin_ia32_pabsb512_mask", IX86_BUILTIN_PABSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32674 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv32hi2_mask, "__builtin_ia32_pabsw512_mask", IX86_BUILTIN_PABSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32675 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv32hi, "__builtin_ia32_blendmw_512_mask", IX86_BUILTIN_BLENDMW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32676 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv64qi, "__builtin_ia32_blendmb_512_mask", IX86_BUILTIN_BLENDMB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32677 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv64qi3_mask, "__builtin_ia32_cmpb512_mask", IX86_BUILTIN_CMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI },
32678 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv32hi3_mask, "__builtin_ia32_cmpw512_mask", IX86_BUILTIN_CMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI },
32679 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv64qi3_mask, "__builtin_ia32_ucmpb512_mask", IX86_BUILTIN_UCMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI },
32680 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv32hi3_mask, "__builtin_ia32_ucmpw512_mask", IX86_BUILTIN_UCMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI },
32683 /* Builtins with rounding support. */
32684 static const struct builtin_description bdesc_round_args[] =
32686 /* AVX512F */
32687 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32688 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32689 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32690 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32691 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT },
32692 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT },
32693 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT },
32694 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI_INT },
32695 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
32696 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
32697 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
32698 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32699 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
32700 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32701 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
32702 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32703 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
32704 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32705 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
32706 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
32707 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
32708 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
32709 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
32710 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32711 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32712 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32713 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32714 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
32715 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
32716 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
32717 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
32718 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32719 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32720 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32721 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32722 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
32723 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
32724 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
32725 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
32726 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
32727 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
32728 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
32729 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
32730 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
32731 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
32732 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32733 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32734 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
32735 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
32736 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
32737 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
32738 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32739 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32740 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32741 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32742 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32743 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32744 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32745 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32746 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32747 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32748 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32749 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32750 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
32751 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
32752 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
32753 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
32754 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32755 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32756 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32757 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32758 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
32759 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
32760 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32761 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32762 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32763 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32764 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32765 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32766 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
32767 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
32768 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
32769 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
32770 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
32771 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
32772 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
32773 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
32774 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
32775 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
32776 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
32777 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
32778 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
32779 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
32780 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
32781 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
32782 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32783 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32784 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32785 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32786 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32787 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32788 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
32789 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
32790 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32791 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32792 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32793 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32794 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32795 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32796 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32797 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32798 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32799 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32800 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32801 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32802 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32803 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32804 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32805 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32807 /* AVX512ER */
32808 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
32809 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
32810 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
32811 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
32812 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32813 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32814 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
32815 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
32816 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32817 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32819 /* AVX512DQ. */
32820 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv2df_round, "__builtin_ia32_rangesd128_round", IX86_BUILTIN_RANGESD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
32821 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv4sf_round, "__builtin_ia32_rangess128_round", IX86_BUILTIN_RANGESS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
32822 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2qq512_mask", IX86_BUILTIN_CVTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
32823 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2qqv8di_mask_round, "__builtin_ia32_cvtps2qq512_mask", IX86_BUILTIN_CVTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
32824 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2uqq512_mask", IX86_BUILTIN_CVTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
32825 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2uqqv8di_mask_round, "__builtin_ia32_cvtps2uqq512_mask", IX86_BUILTIN_CVTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
32826 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8sf2_mask_round, "__builtin_ia32_cvtqq2ps512_mask", IX86_BUILTIN_CVTQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
32827 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8sf2_mask_round, "__builtin_ia32_cvtuqq2ps512_mask", IX86_BUILTIN_CVTUQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
32828 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8df2_mask_round, "__builtin_ia32_cvtqq2pd512_mask", IX86_BUILTIN_CVTQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
32829 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8df2_mask_round, "__builtin_ia32_cvtuqq2pd512_mask", IX86_BUILTIN_CVTUQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
32830 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2qq512_mask", IX86_BUILTIN_CVTTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
32831 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2uqq512_mask", IX86_BUILTIN_CVTTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
32832 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2qq512_mask", IX86_BUILTIN_CVTTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
32833 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2uqq512_mask", IX86_BUILTIN_CVTTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
32834 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv16sf_mask_round, "__builtin_ia32_rangeps512_mask", IX86_BUILTIN_RANGEPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT },
32835 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv8df_mask_round, "__builtin_ia32_rangepd512_mask", IX86_BUILTIN_RANGEPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT },
32838 /* Bultins for MPX. */
32839 static const struct builtin_description bdesc_mpx[] =
32841 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndstx", IX86_BUILTIN_BNDSTX, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND_PCVOID },
32842 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcl", IX86_BUILTIN_BNDCL, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
32843 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcu", IX86_BUILTIN_BNDCU, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
32846 /* Const builtins for MPX. */
32847 static const struct builtin_description bdesc_mpx_const[] =
32849 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndmk", IX86_BUILTIN_BNDMK, UNKNOWN, (int) BND_FTYPE_PCVOID_ULONG },
32850 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndldx", IX86_BUILTIN_BNDLDX, UNKNOWN, (int) BND_FTYPE_PCVOID_PCVOID },
32851 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_narrow_bounds", IX86_BUILTIN_BNDNARROW, UNKNOWN, (int) PVOID_FTYPE_PCVOID_BND_ULONG },
32852 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndint", IX86_BUILTIN_BNDINT, UNKNOWN, (int) BND_FTYPE_BND_BND },
32853 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_sizeof", IX86_BUILTIN_SIZEOF, UNKNOWN, (int) ULONG_FTYPE_VOID },
32854 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndlower", IX86_BUILTIN_BNDLOWER, UNKNOWN, (int) PVOID_FTYPE_BND },
32855 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndupper", IX86_BUILTIN_BNDUPPER, UNKNOWN, (int) PVOID_FTYPE_BND },
32856 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndret", IX86_BUILTIN_BNDRET, UNKNOWN, (int) BND_FTYPE_PCVOID },
32859 /* FMA4 and XOP. */
32860 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
32861 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
32862 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
32863 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
32864 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
32865 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
32866 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
32867 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
32868 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
32869 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
32870 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
32871 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
32872 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
32873 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
32874 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
32875 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
32876 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
32877 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
32878 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
32879 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
32880 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
32881 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
32882 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
32883 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
32884 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
32885 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
32886 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
32887 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
32888 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
32889 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
32890 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
32891 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
32892 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
32893 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
32894 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
32895 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
32896 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
32897 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
32898 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
32899 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
32900 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
32901 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
32902 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
32903 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
32904 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
32905 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
32906 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
32907 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
32908 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
32909 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
32910 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
32911 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
32913 static const struct builtin_description bdesc_multi_arg[] =
32915 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
32916 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
32917 UNKNOWN, (int)MULTI_ARG_3_SF },
32918 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
32919 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
32920 UNKNOWN, (int)MULTI_ARG_3_DF },
32922 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
32923 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
32924 UNKNOWN, (int)MULTI_ARG_3_SF },
32925 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
32926 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
32927 UNKNOWN, (int)MULTI_ARG_3_DF },
32929 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
32930 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
32931 UNKNOWN, (int)MULTI_ARG_3_SF },
32932 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
32933 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
32934 UNKNOWN, (int)MULTI_ARG_3_DF },
32935 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
32936 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
32937 UNKNOWN, (int)MULTI_ARG_3_SF2 },
32938 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
32939 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
32940 UNKNOWN, (int)MULTI_ARG_3_DF2 },
32942 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
32943 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
32944 UNKNOWN, (int)MULTI_ARG_3_SF },
32945 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
32946 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
32947 UNKNOWN, (int)MULTI_ARG_3_DF },
32948 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
32949 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
32950 UNKNOWN, (int)MULTI_ARG_3_SF2 },
32951 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
32952 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
32953 UNKNOWN, (int)MULTI_ARG_3_DF2 },
32955 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
32956 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
32957 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
32958 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
32959 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
32960 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
32961 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
32963 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
32964 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
32965 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
32966 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
32967 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
32968 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
32969 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
32971 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
32973 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
32974 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
32975 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
32976 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
32977 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
32978 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
32979 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
32980 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
32981 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
32982 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
32983 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
32984 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
32986 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
32987 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
32988 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
32989 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
32990 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
32991 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
32992 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
32993 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
32994 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
32995 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
32996 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
32997 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
32998 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
32999 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
33000 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
33001 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
33003 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
33004 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
33005 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
33006 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
33007 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
33008 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
33010 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33011 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
33012 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
33013 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33014 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
33015 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33016 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33017 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
33018 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
33019 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33020 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
33021 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33022 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33023 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33024 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33026 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
33027 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
33028 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
33029 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
33030 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
33031 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
33032 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
33034 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
33035 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
33036 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
33037 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
33038 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
33039 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
33040 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
33042 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
33043 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
33044 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
33045 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
33046 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
33047 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
33048 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
33050 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
33051 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
33052 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
33053 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
33054 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
33055 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
33056 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
33058 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
33059 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
33060 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
33061 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
33062 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
33063 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
33064 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
33066 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
33067 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
33068 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
33069 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
33070 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
33071 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
33072 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
33074 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
33075 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
33076 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
33077 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
33078 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
33079 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
33080 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
33082 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
33083 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
33084 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
33085 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
33086 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
33087 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
33088 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
33090 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
33091 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
33092 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
33093 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
33094 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
33095 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
33096 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
33097 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
33099 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
33100 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
33101 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
33102 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
33103 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
33104 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
33105 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
33106 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
33108 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
33109 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
33110 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
33111 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
33115 /* TM vector builtins. */
33117 /* Reuse the existing x86-specific `struct builtin_description' cause
33118 we're lazy. Add casts to make them fit. */
33119 static const struct builtin_description bdesc_tm[] =
33121 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33122 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33123 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33124 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33125 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33126 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33127 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33129 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33130 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33131 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33132 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33133 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33134 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33135 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33137 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33138 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33139 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33140 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33141 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33142 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33143 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33145 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
33146 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
33147 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
33150 /* TM callbacks. */
33152 /* Return the builtin decl needed to load a vector of TYPE. */
33154 static tree
33155 ix86_builtin_tm_load (tree type)
33157 if (TREE_CODE (type) == VECTOR_TYPE)
33159 switch (tree_to_uhwi (TYPE_SIZE (type)))
33161 case 64:
33162 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
33163 case 128:
33164 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
33165 case 256:
33166 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
33169 return NULL_TREE;
33172 /* Return the builtin decl needed to store a vector of TYPE. */
33174 static tree
33175 ix86_builtin_tm_store (tree type)
33177 if (TREE_CODE (type) == VECTOR_TYPE)
33179 switch (tree_to_uhwi (TYPE_SIZE (type)))
33181 case 64:
33182 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
33183 case 128:
33184 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
33185 case 256:
33186 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
33189 return NULL_TREE;
33192 /* Initialize the transactional memory vector load/store builtins. */
33194 static void
33195 ix86_init_tm_builtins (void)
33197 enum ix86_builtin_func_type ftype;
33198 const struct builtin_description *d;
33199 size_t i;
33200 tree decl;
33201 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
33202 tree attrs_log, attrs_type_log;
33204 if (!flag_tm)
33205 return;
33207 /* If there are no builtins defined, we must be compiling in a
33208 language without trans-mem support. */
33209 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
33210 return;
33212 /* Use whatever attributes a normal TM load has. */
33213 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
33214 attrs_load = DECL_ATTRIBUTES (decl);
33215 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33216 /* Use whatever attributes a normal TM store has. */
33217 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
33218 attrs_store = DECL_ATTRIBUTES (decl);
33219 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33220 /* Use whatever attributes a normal TM log has. */
33221 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
33222 attrs_log = DECL_ATTRIBUTES (decl);
33223 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33225 for (i = 0, d = bdesc_tm;
33226 i < ARRAY_SIZE (bdesc_tm);
33227 i++, d++)
33229 if ((d->mask & ix86_isa_flags) != 0
33230 || (lang_hooks.builtin_function
33231 == lang_hooks.builtin_function_ext_scope))
33233 tree type, attrs, attrs_type;
33234 enum built_in_function code = (enum built_in_function) d->code;
33236 ftype = (enum ix86_builtin_func_type) d->flag;
33237 type = ix86_get_builtin_func_type (ftype);
33239 if (BUILTIN_TM_LOAD_P (code))
33241 attrs = attrs_load;
33242 attrs_type = attrs_type_load;
33244 else if (BUILTIN_TM_STORE_P (code))
33246 attrs = attrs_store;
33247 attrs_type = attrs_type_store;
33249 else
33251 attrs = attrs_log;
33252 attrs_type = attrs_type_log;
33254 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
33255 /* The builtin without the prefix for
33256 calling it directly. */
33257 d->name + strlen ("__builtin_"),
33258 attrs);
33259 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
33260 set the TYPE_ATTRIBUTES. */
33261 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
33263 set_builtin_decl (code, decl, false);
33268 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
33269 in the current target ISA to allow the user to compile particular modules
33270 with different target specific options that differ from the command line
33271 options. */
33272 static void
33273 ix86_init_mmx_sse_builtins (void)
33275 const struct builtin_description * d;
33276 enum ix86_builtin_func_type ftype;
33277 size_t i;
33279 /* Add all special builtins with variable number of operands. */
33280 for (i = 0, d = bdesc_special_args;
33281 i < ARRAY_SIZE (bdesc_special_args);
33282 i++, d++)
33284 if (d->name == 0)
33285 continue;
33287 ftype = (enum ix86_builtin_func_type) d->flag;
33288 def_builtin (d->mask, d->name, ftype, d->code);
33291 /* Add all builtins with variable number of operands. */
33292 for (i = 0, d = bdesc_args;
33293 i < ARRAY_SIZE (bdesc_args);
33294 i++, d++)
33296 if (d->name == 0)
33297 continue;
33299 ftype = (enum ix86_builtin_func_type) d->flag;
33300 def_builtin_const (d->mask, d->name, ftype, d->code);
33303 /* Add all builtins with rounding. */
33304 for (i = 0, d = bdesc_round_args;
33305 i < ARRAY_SIZE (bdesc_round_args);
33306 i++, d++)
33308 if (d->name == 0)
33309 continue;
33311 ftype = (enum ix86_builtin_func_type) d->flag;
33312 def_builtin_const (d->mask, d->name, ftype, d->code);
33315 /* pcmpestr[im] insns. */
33316 for (i = 0, d = bdesc_pcmpestr;
33317 i < ARRAY_SIZE (bdesc_pcmpestr);
33318 i++, d++)
33320 if (d->code == IX86_BUILTIN_PCMPESTRM128)
33321 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
33322 else
33323 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
33324 def_builtin_const (d->mask, d->name, ftype, d->code);
33327 /* pcmpistr[im] insns. */
33328 for (i = 0, d = bdesc_pcmpistr;
33329 i < ARRAY_SIZE (bdesc_pcmpistr);
33330 i++, d++)
33332 if (d->code == IX86_BUILTIN_PCMPISTRM128)
33333 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
33334 else
33335 ftype = INT_FTYPE_V16QI_V16QI_INT;
33336 def_builtin_const (d->mask, d->name, ftype, d->code);
33339 /* comi/ucomi insns. */
33340 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
33342 if (d->mask == OPTION_MASK_ISA_SSE2)
33343 ftype = INT_FTYPE_V2DF_V2DF;
33344 else
33345 ftype = INT_FTYPE_V4SF_V4SF;
33346 def_builtin_const (d->mask, d->name, ftype, d->code);
33349 /* SSE */
33350 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
33351 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
33352 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
33353 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
33355 /* SSE or 3DNow!A */
33356 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
33357 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
33358 IX86_BUILTIN_MASKMOVQ);
33360 /* SSE2 */
33361 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
33362 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
33364 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
33365 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
33366 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
33367 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
33369 /* SSE3. */
33370 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
33371 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
33372 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
33373 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
33375 /* AES */
33376 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
33377 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
33378 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
33379 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
33380 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
33381 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
33382 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
33383 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
33384 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
33385 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
33386 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
33387 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
33389 /* PCLMUL */
33390 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
33391 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
33393 /* RDRND */
33394 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
33395 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
33396 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
33397 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
33398 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
33399 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
33400 IX86_BUILTIN_RDRAND64_STEP);
33402 /* AVX2 */
33403 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
33404 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
33405 IX86_BUILTIN_GATHERSIV2DF);
33407 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
33408 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
33409 IX86_BUILTIN_GATHERSIV4DF);
33411 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
33412 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
33413 IX86_BUILTIN_GATHERDIV2DF);
33415 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
33416 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
33417 IX86_BUILTIN_GATHERDIV4DF);
33419 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
33420 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
33421 IX86_BUILTIN_GATHERSIV4SF);
33423 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
33424 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
33425 IX86_BUILTIN_GATHERSIV8SF);
33427 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
33428 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
33429 IX86_BUILTIN_GATHERDIV4SF);
33431 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
33432 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
33433 IX86_BUILTIN_GATHERDIV8SF);
33435 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
33436 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
33437 IX86_BUILTIN_GATHERSIV2DI);
33439 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
33440 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
33441 IX86_BUILTIN_GATHERSIV4DI);
33443 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
33444 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
33445 IX86_BUILTIN_GATHERDIV2DI);
33447 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
33448 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
33449 IX86_BUILTIN_GATHERDIV4DI);
33451 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
33452 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
33453 IX86_BUILTIN_GATHERSIV4SI);
33455 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
33456 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
33457 IX86_BUILTIN_GATHERSIV8SI);
33459 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
33460 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
33461 IX86_BUILTIN_GATHERDIV4SI);
33463 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
33464 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
33465 IX86_BUILTIN_GATHERDIV8SI);
33467 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
33468 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
33469 IX86_BUILTIN_GATHERALTSIV4DF);
33471 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
33472 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
33473 IX86_BUILTIN_GATHERALTDIV8SF);
33475 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
33476 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
33477 IX86_BUILTIN_GATHERALTSIV4DI);
33479 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
33480 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
33481 IX86_BUILTIN_GATHERALTDIV8SI);
33483 /* AVX512F */
33484 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
33485 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
33486 IX86_BUILTIN_GATHER3SIV16SF);
33488 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
33489 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
33490 IX86_BUILTIN_GATHER3SIV8DF);
33492 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
33493 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
33494 IX86_BUILTIN_GATHER3DIV16SF);
33496 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
33497 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
33498 IX86_BUILTIN_GATHER3DIV8DF);
33500 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
33501 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
33502 IX86_BUILTIN_GATHER3SIV16SI);
33504 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
33505 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
33506 IX86_BUILTIN_GATHER3SIV8DI);
33508 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
33509 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
33510 IX86_BUILTIN_GATHER3DIV16SI);
33512 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
33513 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
33514 IX86_BUILTIN_GATHER3DIV8DI);
33516 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
33517 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
33518 IX86_BUILTIN_GATHER3ALTSIV8DF);
33520 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
33521 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
33522 IX86_BUILTIN_GATHER3ALTDIV16SF);
33524 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
33525 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
33526 IX86_BUILTIN_GATHER3ALTSIV8DI);
33528 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
33529 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
33530 IX86_BUILTIN_GATHER3ALTDIV16SI);
33532 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
33533 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
33534 IX86_BUILTIN_SCATTERSIV16SF);
33536 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
33537 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
33538 IX86_BUILTIN_SCATTERSIV8DF);
33540 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
33541 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
33542 IX86_BUILTIN_SCATTERDIV16SF);
33544 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
33545 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
33546 IX86_BUILTIN_SCATTERDIV8DF);
33548 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
33549 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
33550 IX86_BUILTIN_SCATTERSIV16SI);
33552 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
33553 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
33554 IX86_BUILTIN_SCATTERSIV8DI);
33556 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
33557 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
33558 IX86_BUILTIN_SCATTERDIV16SI);
33560 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
33561 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
33562 IX86_BUILTIN_SCATTERDIV8DI);
33564 /* AVX512VL */
33565 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2df",
33566 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_QI_INT,
33567 IX86_BUILTIN_GATHER3SIV2DF);
33569 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4df",
33570 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_QI_INT,
33571 IX86_BUILTIN_GATHER3SIV4DF);
33573 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2df",
33574 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_QI_INT,
33575 IX86_BUILTIN_GATHER3DIV2DF);
33577 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4df",
33578 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_QI_INT,
33579 IX86_BUILTIN_GATHER3DIV4DF);
33581 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4sf",
33582 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_QI_INT,
33583 IX86_BUILTIN_GATHER3SIV4SF);
33585 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8sf",
33586 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_QI_INT,
33587 IX86_BUILTIN_GATHER3SIV8SF);
33589 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4sf",
33590 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_QI_INT,
33591 IX86_BUILTIN_GATHER3DIV4SF);
33593 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8sf",
33594 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_QI_INT,
33595 IX86_BUILTIN_GATHER3DIV8SF);
33597 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2di",
33598 V2DI_FTYPE_V2DI_PCINT64_V4SI_QI_INT,
33599 IX86_BUILTIN_GATHER3SIV2DI);
33601 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4di",
33602 V4DI_FTYPE_V4DI_PCINT64_V4SI_QI_INT,
33603 IX86_BUILTIN_GATHER3SIV4DI);
33605 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2di",
33606 V2DI_FTYPE_V2DI_PCINT64_V2DI_QI_INT,
33607 IX86_BUILTIN_GATHER3DIV2DI);
33609 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4di",
33610 V4DI_FTYPE_V4DI_PCINT64_V4DI_QI_INT,
33611 IX86_BUILTIN_GATHER3DIV4DI);
33613 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4si",
33614 V4SI_FTYPE_V4SI_PCINT_V4SI_QI_INT,
33615 IX86_BUILTIN_GATHER3SIV4SI);
33617 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8si",
33618 V8SI_FTYPE_V8SI_PCINT_V8SI_QI_INT,
33619 IX86_BUILTIN_GATHER3SIV8SI);
33621 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4si",
33622 V4SI_FTYPE_V4SI_PCINT_V2DI_QI_INT,
33623 IX86_BUILTIN_GATHER3DIV4SI);
33625 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8si",
33626 V4SI_FTYPE_V4SI_PCINT_V4DI_QI_INT,
33627 IX86_BUILTIN_GATHER3DIV8SI);
33629 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4df ",
33630 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_QI_INT,
33631 IX86_BUILTIN_GATHER3ALTSIV4DF);
33633 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8sf ",
33634 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_QI_INT,
33635 IX86_BUILTIN_GATHER3ALTDIV8SF);
33637 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4di ",
33638 V4DI_FTYPE_V4DI_PCINT64_V8SI_QI_INT,
33639 IX86_BUILTIN_GATHER3ALTSIV4DI);
33641 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8si ",
33642 V8SI_FTYPE_V8SI_PCINT_V4DI_QI_INT,
33643 IX86_BUILTIN_GATHER3ALTDIV8SI);
33645 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8sf",
33646 VOID_FTYPE_PFLOAT_QI_V8SI_V8SF_INT,
33647 IX86_BUILTIN_SCATTERSIV8SF);
33649 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4sf",
33650 VOID_FTYPE_PFLOAT_QI_V4SI_V4SF_INT,
33651 IX86_BUILTIN_SCATTERSIV4SF);
33653 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4df",
33654 VOID_FTYPE_PDOUBLE_QI_V4SI_V4DF_INT,
33655 IX86_BUILTIN_SCATTERSIV4DF);
33657 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2df",
33658 VOID_FTYPE_PDOUBLE_QI_V4SI_V2DF_INT,
33659 IX86_BUILTIN_SCATTERSIV2DF);
33661 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8sf",
33662 VOID_FTYPE_PFLOAT_QI_V4DI_V4SF_INT,
33663 IX86_BUILTIN_SCATTERDIV8SF);
33665 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4sf",
33666 VOID_FTYPE_PFLOAT_QI_V2DI_V4SF_INT,
33667 IX86_BUILTIN_SCATTERDIV4SF);
33669 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4df",
33670 VOID_FTYPE_PDOUBLE_QI_V4DI_V4DF_INT,
33671 IX86_BUILTIN_SCATTERDIV4DF);
33673 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2df",
33674 VOID_FTYPE_PDOUBLE_QI_V2DI_V2DF_INT,
33675 IX86_BUILTIN_SCATTERDIV2DF);
33677 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8si",
33678 VOID_FTYPE_PINT_QI_V8SI_V8SI_INT,
33679 IX86_BUILTIN_SCATTERSIV8SI);
33681 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4si",
33682 VOID_FTYPE_PINT_QI_V4SI_V4SI_INT,
33683 IX86_BUILTIN_SCATTERSIV4SI);
33685 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4di",
33686 VOID_FTYPE_PLONGLONG_QI_V4SI_V4DI_INT,
33687 IX86_BUILTIN_SCATTERSIV4DI);
33689 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2di",
33690 VOID_FTYPE_PLONGLONG_QI_V4SI_V2DI_INT,
33691 IX86_BUILTIN_SCATTERSIV2DI);
33693 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8si",
33694 VOID_FTYPE_PINT_QI_V4DI_V4SI_INT,
33695 IX86_BUILTIN_SCATTERDIV8SI);
33697 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4si",
33698 VOID_FTYPE_PINT_QI_V2DI_V4SI_INT,
33699 IX86_BUILTIN_SCATTERDIV4SI);
33701 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4di",
33702 VOID_FTYPE_PLONGLONG_QI_V4DI_V4DI_INT,
33703 IX86_BUILTIN_SCATTERDIV4DI);
33705 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2di",
33706 VOID_FTYPE_PLONGLONG_QI_V2DI_V2DI_INT,
33707 IX86_BUILTIN_SCATTERDIV2DI);
33709 /* AVX512PF */
33710 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
33711 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
33712 IX86_BUILTIN_GATHERPFDPD);
33713 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
33714 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
33715 IX86_BUILTIN_GATHERPFDPS);
33716 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
33717 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
33718 IX86_BUILTIN_GATHERPFQPD);
33719 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
33720 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
33721 IX86_BUILTIN_GATHERPFQPS);
33722 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
33723 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
33724 IX86_BUILTIN_SCATTERPFDPD);
33725 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
33726 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
33727 IX86_BUILTIN_SCATTERPFDPS);
33728 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
33729 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
33730 IX86_BUILTIN_SCATTERPFQPD);
33731 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
33732 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
33733 IX86_BUILTIN_SCATTERPFQPS);
33735 /* SHA */
33736 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
33737 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
33738 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
33739 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
33740 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
33741 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
33742 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
33743 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
33744 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
33745 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
33746 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
33747 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
33748 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
33749 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
33751 /* RTM. */
33752 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
33753 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
33755 /* MMX access to the vec_init patterns. */
33756 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
33757 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
33759 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
33760 V4HI_FTYPE_HI_HI_HI_HI,
33761 IX86_BUILTIN_VEC_INIT_V4HI);
33763 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
33764 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
33765 IX86_BUILTIN_VEC_INIT_V8QI);
33767 /* Access to the vec_extract patterns. */
33768 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
33769 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
33770 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
33771 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
33772 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
33773 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
33774 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
33775 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
33776 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
33777 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
33779 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
33780 "__builtin_ia32_vec_ext_v4hi",
33781 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
33783 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
33784 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
33786 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
33787 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
33789 /* Access to the vec_set patterns. */
33790 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
33791 "__builtin_ia32_vec_set_v2di",
33792 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
33794 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
33795 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
33797 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
33798 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
33800 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
33801 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
33803 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
33804 "__builtin_ia32_vec_set_v4hi",
33805 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
33807 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
33808 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
33810 /* RDSEED */
33811 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
33812 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
33813 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
33814 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
33815 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
33816 "__builtin_ia32_rdseed_di_step",
33817 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
33819 /* ADCX */
33820 def_builtin (0, "__builtin_ia32_addcarryx_u32",
33821 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
33822 def_builtin (OPTION_MASK_ISA_64BIT,
33823 "__builtin_ia32_addcarryx_u64",
33824 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
33825 IX86_BUILTIN_ADDCARRYX64);
33827 /* SBB */
33828 def_builtin (0, "__builtin_ia32_sbb_u32",
33829 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32);
33830 def_builtin (OPTION_MASK_ISA_64BIT,
33831 "__builtin_ia32_sbb_u64",
33832 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
33833 IX86_BUILTIN_SBB64);
33835 /* Read/write FLAGS. */
33836 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
33837 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
33838 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
33839 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
33840 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
33841 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
33842 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
33843 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
33845 /* CLFLUSHOPT. */
33846 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
33847 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
33849 /* Add FMA4 multi-arg argument instructions */
33850 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
33852 if (d->name == 0)
33853 continue;
33855 ftype = (enum ix86_builtin_func_type) d->flag;
33856 def_builtin_const (d->mask, d->name, ftype, d->code);
33860 static void
33861 ix86_init_mpx_builtins ()
33863 const struct builtin_description * d;
33864 enum ix86_builtin_func_type ftype;
33865 tree decl;
33866 size_t i;
33868 for (i = 0, d = bdesc_mpx;
33869 i < ARRAY_SIZE (bdesc_mpx);
33870 i++, d++)
33872 if (d->name == 0)
33873 continue;
33875 ftype = (enum ix86_builtin_func_type) d->flag;
33876 decl = def_builtin (d->mask, d->name, ftype, d->code);
33878 /* With no leaf and nothrow flags for MPX builtins
33879 abnormal edges may follow its call when setjmp
33880 presents in the function. Since we may have a lot
33881 of MPX builtins calls it causes lots of useless
33882 edges and enormous PHI nodes. To avoid this we mark
33883 MPX builtins as leaf and nothrow. */
33884 if (decl)
33886 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
33887 NULL_TREE);
33888 TREE_NOTHROW (decl) = 1;
33890 else
33892 ix86_builtins_isa[(int)d->code].leaf_p = true;
33893 ix86_builtins_isa[(int)d->code].nothrow_p = true;
33897 for (i = 0, d = bdesc_mpx_const;
33898 i < ARRAY_SIZE (bdesc_mpx_const);
33899 i++, d++)
33901 if (d->name == 0)
33902 continue;
33904 ftype = (enum ix86_builtin_func_type) d->flag;
33905 decl = def_builtin_const (d->mask, d->name, ftype, d->code);
33907 if (decl)
33909 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
33910 NULL_TREE);
33911 TREE_NOTHROW (decl) = 1;
33913 else
33915 ix86_builtins_isa[(int)d->code].leaf_p = true;
33916 ix86_builtins_isa[(int)d->code].nothrow_p = true;
33921 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
33922 to return a pointer to VERSION_DECL if the outcome of the expression
33923 formed by PREDICATE_CHAIN is true. This function will be called during
33924 version dispatch to decide which function version to execute. It returns
33925 the basic block at the end, to which more conditions can be added. */
33927 static basic_block
33928 add_condition_to_bb (tree function_decl, tree version_decl,
33929 tree predicate_chain, basic_block new_bb)
33931 gimple return_stmt;
33932 tree convert_expr, result_var;
33933 gimple convert_stmt;
33934 gimple call_cond_stmt;
33935 gimple if_else_stmt;
33937 basic_block bb1, bb2, bb3;
33938 edge e12, e23;
33940 tree cond_var, and_expr_var = NULL_TREE;
33941 gimple_seq gseq;
33943 tree predicate_decl, predicate_arg;
33945 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
33947 gcc_assert (new_bb != NULL);
33948 gseq = bb_seq (new_bb);
33951 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
33952 build_fold_addr_expr (version_decl));
33953 result_var = create_tmp_var (ptr_type_node, NULL);
33954 convert_stmt = gimple_build_assign (result_var, convert_expr);
33955 return_stmt = gimple_build_return (result_var);
33957 if (predicate_chain == NULL_TREE)
33959 gimple_seq_add_stmt (&gseq, convert_stmt);
33960 gimple_seq_add_stmt (&gseq, return_stmt);
33961 set_bb_seq (new_bb, gseq);
33962 gimple_set_bb (convert_stmt, new_bb);
33963 gimple_set_bb (return_stmt, new_bb);
33964 pop_cfun ();
33965 return new_bb;
33968 while (predicate_chain != NULL)
33970 cond_var = create_tmp_var (integer_type_node, NULL);
33971 predicate_decl = TREE_PURPOSE (predicate_chain);
33972 predicate_arg = TREE_VALUE (predicate_chain);
33973 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
33974 gimple_call_set_lhs (call_cond_stmt, cond_var);
33976 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
33977 gimple_set_bb (call_cond_stmt, new_bb);
33978 gimple_seq_add_stmt (&gseq, call_cond_stmt);
33980 predicate_chain = TREE_CHAIN (predicate_chain);
33982 if (and_expr_var == NULL)
33983 and_expr_var = cond_var;
33984 else
33986 gimple assign_stmt;
33987 /* Use MIN_EXPR to check if any integer is zero?.
33988 and_expr_var = min_expr <cond_var, and_expr_var> */
33989 assign_stmt = gimple_build_assign (and_expr_var,
33990 build2 (MIN_EXPR, integer_type_node,
33991 cond_var, and_expr_var));
33993 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
33994 gimple_set_bb (assign_stmt, new_bb);
33995 gimple_seq_add_stmt (&gseq, assign_stmt);
33999 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
34000 integer_zero_node,
34001 NULL_TREE, NULL_TREE);
34002 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
34003 gimple_set_bb (if_else_stmt, new_bb);
34004 gimple_seq_add_stmt (&gseq, if_else_stmt);
34006 gimple_seq_add_stmt (&gseq, convert_stmt);
34007 gimple_seq_add_stmt (&gseq, return_stmt);
34008 set_bb_seq (new_bb, gseq);
34010 bb1 = new_bb;
34011 e12 = split_block (bb1, if_else_stmt);
34012 bb2 = e12->dest;
34013 e12->flags &= ~EDGE_FALLTHRU;
34014 e12->flags |= EDGE_TRUE_VALUE;
34016 e23 = split_block (bb2, return_stmt);
34018 gimple_set_bb (convert_stmt, bb2);
34019 gimple_set_bb (return_stmt, bb2);
34021 bb3 = e23->dest;
34022 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
34024 remove_edge (e23);
34025 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
34027 pop_cfun ();
34029 return bb3;
34032 /* This parses the attribute arguments to target in DECL and determines
34033 the right builtin to use to match the platform specification.
34034 It returns the priority value for this version decl. If PREDICATE_LIST
34035 is not NULL, it stores the list of cpu features that need to be checked
34036 before dispatching this function. */
34038 static unsigned int
34039 get_builtin_code_for_version (tree decl, tree *predicate_list)
34041 tree attrs;
34042 struct cl_target_option cur_target;
34043 tree target_node;
34044 struct cl_target_option *new_target;
34045 const char *arg_str = NULL;
34046 const char *attrs_str = NULL;
34047 char *tok_str = NULL;
34048 char *token;
34050 /* Priority of i386 features, greater value is higher priority. This is
34051 used to decide the order in which function dispatch must happen. For
34052 instance, a version specialized for SSE4.2 should be checked for dispatch
34053 before a version for SSE3, as SSE4.2 implies SSE3. */
34054 enum feature_priority
34056 P_ZERO = 0,
34057 P_MMX,
34058 P_SSE,
34059 P_SSE2,
34060 P_SSE3,
34061 P_SSSE3,
34062 P_PROC_SSSE3,
34063 P_SSE4_A,
34064 P_PROC_SSE4_A,
34065 P_SSE4_1,
34066 P_SSE4_2,
34067 P_PROC_SSE4_2,
34068 P_POPCNT,
34069 P_AVX,
34070 P_PROC_AVX,
34071 P_FMA4,
34072 P_XOP,
34073 P_PROC_XOP,
34074 P_FMA,
34075 P_PROC_FMA,
34076 P_AVX2,
34077 P_PROC_AVX2
34080 enum feature_priority priority = P_ZERO;
34082 /* These are the target attribute strings for which a dispatcher is
34083 available, from fold_builtin_cpu. */
34085 static struct _feature_list
34087 const char *const name;
34088 const enum feature_priority priority;
34090 const feature_list[] =
34092 {"mmx", P_MMX},
34093 {"sse", P_SSE},
34094 {"sse2", P_SSE2},
34095 {"sse3", P_SSE3},
34096 {"sse4a", P_SSE4_A},
34097 {"ssse3", P_SSSE3},
34098 {"sse4.1", P_SSE4_1},
34099 {"sse4.2", P_SSE4_2},
34100 {"popcnt", P_POPCNT},
34101 {"avx", P_AVX},
34102 {"fma4", P_FMA4},
34103 {"xop", P_XOP},
34104 {"fma", P_FMA},
34105 {"avx2", P_AVX2}
34109 static unsigned int NUM_FEATURES
34110 = sizeof (feature_list) / sizeof (struct _feature_list);
34112 unsigned int i;
34114 tree predicate_chain = NULL_TREE;
34115 tree predicate_decl, predicate_arg;
34117 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34118 gcc_assert (attrs != NULL);
34120 attrs = TREE_VALUE (TREE_VALUE (attrs));
34122 gcc_assert (TREE_CODE (attrs) == STRING_CST);
34123 attrs_str = TREE_STRING_POINTER (attrs);
34125 /* Return priority zero for default function. */
34126 if (strcmp (attrs_str, "default") == 0)
34127 return 0;
34129 /* Handle arch= if specified. For priority, set it to be 1 more than
34130 the best instruction set the processor can handle. For instance, if
34131 there is a version for atom and a version for ssse3 (the highest ISA
34132 priority for atom), the atom version must be checked for dispatch
34133 before the ssse3 version. */
34134 if (strstr (attrs_str, "arch=") != NULL)
34136 cl_target_option_save (&cur_target, &global_options);
34137 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
34138 &global_options_set);
34140 gcc_assert (target_node);
34141 new_target = TREE_TARGET_OPTION (target_node);
34142 gcc_assert (new_target);
34144 if (new_target->arch_specified && new_target->arch > 0)
34146 switch (new_target->arch)
34148 case PROCESSOR_CORE2:
34149 arg_str = "core2";
34150 priority = P_PROC_SSSE3;
34151 break;
34152 case PROCESSOR_NEHALEM:
34153 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
34154 arg_str = "westmere";
34155 else
34156 /* We translate "arch=corei7" and "arch=nehalem" to
34157 "corei7" so that it will be mapped to M_INTEL_COREI7
34158 as cpu type to cover all M_INTEL_COREI7_XXXs. */
34159 arg_str = "corei7";
34160 priority = P_PROC_SSE4_2;
34161 break;
34162 case PROCESSOR_SANDYBRIDGE:
34163 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
34164 arg_str = "ivybridge";
34165 else
34166 arg_str = "sandybridge";
34167 priority = P_PROC_AVX;
34168 break;
34169 case PROCESSOR_HASWELL:
34170 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
34171 arg_str = "broadwell";
34172 else
34173 arg_str = "haswell";
34174 priority = P_PROC_AVX2;
34175 break;
34176 case PROCESSOR_BONNELL:
34177 arg_str = "bonnell";
34178 priority = P_PROC_SSSE3;
34179 break;
34180 case PROCESSOR_SILVERMONT:
34181 arg_str = "silvermont";
34182 priority = P_PROC_SSE4_2;
34183 break;
34184 case PROCESSOR_AMDFAM10:
34185 arg_str = "amdfam10h";
34186 priority = P_PROC_SSE4_A;
34187 break;
34188 case PROCESSOR_BTVER1:
34189 arg_str = "btver1";
34190 priority = P_PROC_SSE4_A;
34191 break;
34192 case PROCESSOR_BTVER2:
34193 arg_str = "btver2";
34194 priority = P_PROC_AVX;
34195 break;
34196 case PROCESSOR_BDVER1:
34197 arg_str = "bdver1";
34198 priority = P_PROC_XOP;
34199 break;
34200 case PROCESSOR_BDVER2:
34201 arg_str = "bdver2";
34202 priority = P_PROC_FMA;
34203 break;
34204 case PROCESSOR_BDVER3:
34205 arg_str = "bdver3";
34206 priority = P_PROC_FMA;
34207 break;
34208 case PROCESSOR_BDVER4:
34209 arg_str = "bdver4";
34210 priority = P_PROC_AVX2;
34211 break;
34215 cl_target_option_restore (&global_options, &cur_target);
34217 if (predicate_list && arg_str == NULL)
34219 error_at (DECL_SOURCE_LOCATION (decl),
34220 "No dispatcher found for the versioning attributes");
34221 return 0;
34224 if (predicate_list)
34226 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
34227 /* For a C string literal the length includes the trailing NULL. */
34228 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
34229 predicate_chain = tree_cons (predicate_decl, predicate_arg,
34230 predicate_chain);
34234 /* Process feature name. */
34235 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
34236 strcpy (tok_str, attrs_str);
34237 token = strtok (tok_str, ",");
34238 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
34240 while (token != NULL)
34242 /* Do not process "arch=" */
34243 if (strncmp (token, "arch=", 5) == 0)
34245 token = strtok (NULL, ",");
34246 continue;
34248 for (i = 0; i < NUM_FEATURES; ++i)
34250 if (strcmp (token, feature_list[i].name) == 0)
34252 if (predicate_list)
34254 predicate_arg = build_string_literal (
34255 strlen (feature_list[i].name) + 1,
34256 feature_list[i].name);
34257 predicate_chain = tree_cons (predicate_decl, predicate_arg,
34258 predicate_chain);
34260 /* Find the maximum priority feature. */
34261 if (feature_list[i].priority > priority)
34262 priority = feature_list[i].priority;
34264 break;
34267 if (predicate_list && i == NUM_FEATURES)
34269 error_at (DECL_SOURCE_LOCATION (decl),
34270 "No dispatcher found for %s", token);
34271 return 0;
34273 token = strtok (NULL, ",");
34275 free (tok_str);
34277 if (predicate_list && predicate_chain == NULL_TREE)
34279 error_at (DECL_SOURCE_LOCATION (decl),
34280 "No dispatcher found for the versioning attributes : %s",
34281 attrs_str);
34282 return 0;
34284 else if (predicate_list)
34286 predicate_chain = nreverse (predicate_chain);
34287 *predicate_list = predicate_chain;
34290 return priority;
34293 /* This compares the priority of target features in function DECL1
34294 and DECL2. It returns positive value if DECL1 is higher priority,
34295 negative value if DECL2 is higher priority and 0 if they are the
34296 same. */
34298 static int
34299 ix86_compare_version_priority (tree decl1, tree decl2)
34301 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
34302 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
34304 return (int)priority1 - (int)priority2;
34307 /* V1 and V2 point to function versions with different priorities
34308 based on the target ISA. This function compares their priorities. */
34310 static int
34311 feature_compare (const void *v1, const void *v2)
34313 typedef struct _function_version_info
34315 tree version_decl;
34316 tree predicate_chain;
34317 unsigned int dispatch_priority;
34318 } function_version_info;
34320 const function_version_info c1 = *(const function_version_info *)v1;
34321 const function_version_info c2 = *(const function_version_info *)v2;
34322 return (c2.dispatch_priority - c1.dispatch_priority);
34325 /* This function generates the dispatch function for
34326 multi-versioned functions. DISPATCH_DECL is the function which will
34327 contain the dispatch logic. FNDECLS are the function choices for
34328 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
34329 in DISPATCH_DECL in which the dispatch code is generated. */
34331 static int
34332 dispatch_function_versions (tree dispatch_decl,
34333 void *fndecls_p,
34334 basic_block *empty_bb)
34336 tree default_decl;
34337 gimple ifunc_cpu_init_stmt;
34338 gimple_seq gseq;
34339 int ix;
34340 tree ele;
34341 vec<tree> *fndecls;
34342 unsigned int num_versions = 0;
34343 unsigned int actual_versions = 0;
34344 unsigned int i;
34346 struct _function_version_info
34348 tree version_decl;
34349 tree predicate_chain;
34350 unsigned int dispatch_priority;
34351 }*function_version_info;
34353 gcc_assert (dispatch_decl != NULL
34354 && fndecls_p != NULL
34355 && empty_bb != NULL);
34357 /*fndecls_p is actually a vector. */
34358 fndecls = static_cast<vec<tree> *> (fndecls_p);
34360 /* At least one more version other than the default. */
34361 num_versions = fndecls->length ();
34362 gcc_assert (num_versions >= 2);
34364 function_version_info = (struct _function_version_info *)
34365 XNEWVEC (struct _function_version_info, (num_versions - 1));
34367 /* The first version in the vector is the default decl. */
34368 default_decl = (*fndecls)[0];
34370 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
34372 gseq = bb_seq (*empty_bb);
34373 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
34374 constructors, so explicity call __builtin_cpu_init here. */
34375 ifunc_cpu_init_stmt = gimple_build_call_vec (
34376 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
34377 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
34378 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
34379 set_bb_seq (*empty_bb, gseq);
34381 pop_cfun ();
34384 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
34386 tree version_decl = ele;
34387 tree predicate_chain = NULL_TREE;
34388 unsigned int priority;
34389 /* Get attribute string, parse it and find the right predicate decl.
34390 The predicate function could be a lengthy combination of many
34391 features, like arch-type and various isa-variants. */
34392 priority = get_builtin_code_for_version (version_decl,
34393 &predicate_chain);
34395 if (predicate_chain == NULL_TREE)
34396 continue;
34398 function_version_info [actual_versions].version_decl = version_decl;
34399 function_version_info [actual_versions].predicate_chain
34400 = predicate_chain;
34401 function_version_info [actual_versions].dispatch_priority = priority;
34402 actual_versions++;
34405 /* Sort the versions according to descending order of dispatch priority. The
34406 priority is based on the ISA. This is not a perfect solution. There
34407 could still be ambiguity. If more than one function version is suitable
34408 to execute, which one should be dispatched? In future, allow the user
34409 to specify a dispatch priority next to the version. */
34410 qsort (function_version_info, actual_versions,
34411 sizeof (struct _function_version_info), feature_compare);
34413 for (i = 0; i < actual_versions; ++i)
34414 *empty_bb = add_condition_to_bb (dispatch_decl,
34415 function_version_info[i].version_decl,
34416 function_version_info[i].predicate_chain,
34417 *empty_bb);
34419 /* dispatch default version at the end. */
34420 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
34421 NULL, *empty_bb);
34423 free (function_version_info);
34424 return 0;
34427 /* Comparator function to be used in qsort routine to sort attribute
34428 specification strings to "target". */
34430 static int
34431 attr_strcmp (const void *v1, const void *v2)
34433 const char *c1 = *(char *const*)v1;
34434 const char *c2 = *(char *const*)v2;
34435 return strcmp (c1, c2);
34438 /* ARGLIST is the argument to target attribute. This function tokenizes
34439 the comma separated arguments, sorts them and returns a string which
34440 is a unique identifier for the comma separated arguments. It also
34441 replaces non-identifier characters "=,-" with "_". */
34443 static char *
34444 sorted_attr_string (tree arglist)
34446 tree arg;
34447 size_t str_len_sum = 0;
34448 char **args = NULL;
34449 char *attr_str, *ret_str;
34450 char *attr = NULL;
34451 unsigned int argnum = 1;
34452 unsigned int i;
34454 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
34456 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
34457 size_t len = strlen (str);
34458 str_len_sum += len + 1;
34459 if (arg != arglist)
34460 argnum++;
34461 for (i = 0; i < strlen (str); i++)
34462 if (str[i] == ',')
34463 argnum++;
34466 attr_str = XNEWVEC (char, str_len_sum);
34467 str_len_sum = 0;
34468 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
34470 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
34471 size_t len = strlen (str);
34472 memcpy (attr_str + str_len_sum, str, len);
34473 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
34474 str_len_sum += len + 1;
34477 /* Replace "=,-" with "_". */
34478 for (i = 0; i < strlen (attr_str); i++)
34479 if (attr_str[i] == '=' || attr_str[i]== '-')
34480 attr_str[i] = '_';
34482 if (argnum == 1)
34483 return attr_str;
34485 args = XNEWVEC (char *, argnum);
34487 i = 0;
34488 attr = strtok (attr_str, ",");
34489 while (attr != NULL)
34491 args[i] = attr;
34492 i++;
34493 attr = strtok (NULL, ",");
34496 qsort (args, argnum, sizeof (char *), attr_strcmp);
34498 ret_str = XNEWVEC (char, str_len_sum);
34499 str_len_sum = 0;
34500 for (i = 0; i < argnum; i++)
34502 size_t len = strlen (args[i]);
34503 memcpy (ret_str + str_len_sum, args[i], len);
34504 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
34505 str_len_sum += len + 1;
34508 XDELETEVEC (args);
34509 XDELETEVEC (attr_str);
34510 return ret_str;
34513 /* This function changes the assembler name for functions that are
34514 versions. If DECL is a function version and has a "target"
34515 attribute, it appends the attribute string to its assembler name. */
34517 static tree
34518 ix86_mangle_function_version_assembler_name (tree decl, tree id)
34520 tree version_attr;
34521 const char *orig_name, *version_string;
34522 char *attr_str, *assembler_name;
34524 if (DECL_DECLARED_INLINE_P (decl)
34525 && lookup_attribute ("gnu_inline",
34526 DECL_ATTRIBUTES (decl)))
34527 error_at (DECL_SOURCE_LOCATION (decl),
34528 "Function versions cannot be marked as gnu_inline,"
34529 " bodies have to be generated");
34531 if (DECL_VIRTUAL_P (decl)
34532 || DECL_VINDEX (decl))
34533 sorry ("Virtual function multiversioning not supported");
34535 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34537 /* target attribute string cannot be NULL. */
34538 gcc_assert (version_attr != NULL_TREE);
34540 orig_name = IDENTIFIER_POINTER (id);
34541 version_string
34542 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
34544 if (strcmp (version_string, "default") == 0)
34545 return id;
34547 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
34548 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
34550 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
34552 /* Allow assembler name to be modified if already set. */
34553 if (DECL_ASSEMBLER_NAME_SET_P (decl))
34554 SET_DECL_RTL (decl, NULL);
34556 tree ret = get_identifier (assembler_name);
34557 XDELETEVEC (attr_str);
34558 XDELETEVEC (assembler_name);
34559 return ret;
34562 /* This function returns true if FN1 and FN2 are versions of the same function,
34563 that is, the target strings of the function decls are different. This assumes
34564 that FN1 and FN2 have the same signature. */
34566 static bool
34567 ix86_function_versions (tree fn1, tree fn2)
34569 tree attr1, attr2;
34570 char *target1, *target2;
34571 bool result;
34573 if (TREE_CODE (fn1) != FUNCTION_DECL
34574 || TREE_CODE (fn2) != FUNCTION_DECL)
34575 return false;
34577 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
34578 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
34580 /* At least one function decl should have the target attribute specified. */
34581 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
34582 return false;
34584 /* Diagnose missing target attribute if one of the decls is already
34585 multi-versioned. */
34586 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
34588 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
34590 if (attr2 != NULL_TREE)
34592 tree tem = fn1;
34593 fn1 = fn2;
34594 fn2 = tem;
34595 attr1 = attr2;
34597 error_at (DECL_SOURCE_LOCATION (fn2),
34598 "missing %<target%> attribute for multi-versioned %D",
34599 fn2);
34600 inform (DECL_SOURCE_LOCATION (fn1),
34601 "previous declaration of %D", fn1);
34602 /* Prevent diagnosing of the same error multiple times. */
34603 DECL_ATTRIBUTES (fn2)
34604 = tree_cons (get_identifier ("target"),
34605 copy_node (TREE_VALUE (attr1)),
34606 DECL_ATTRIBUTES (fn2));
34608 return false;
34611 target1 = sorted_attr_string (TREE_VALUE (attr1));
34612 target2 = sorted_attr_string (TREE_VALUE (attr2));
34614 /* The sorted target strings must be different for fn1 and fn2
34615 to be versions. */
34616 if (strcmp (target1, target2) == 0)
34617 result = false;
34618 else
34619 result = true;
34621 XDELETEVEC (target1);
34622 XDELETEVEC (target2);
34624 return result;
34627 static tree
34628 ix86_mangle_decl_assembler_name (tree decl, tree id)
34630 /* For function version, add the target suffix to the assembler name. */
34631 if (TREE_CODE (decl) == FUNCTION_DECL
34632 && DECL_FUNCTION_VERSIONED (decl))
34633 id = ix86_mangle_function_version_assembler_name (decl, id);
34634 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
34635 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
34636 #endif
34638 return id;
34641 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
34642 is true, append the full path name of the source file. */
34644 static char *
34645 make_name (tree decl, const char *suffix, bool make_unique)
34647 char *global_var_name;
34648 int name_len;
34649 const char *name;
34650 const char *unique_name = NULL;
34652 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
34654 /* Get a unique name that can be used globally without any chances
34655 of collision at link time. */
34656 if (make_unique)
34657 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
34659 name_len = strlen (name) + strlen (suffix) + 2;
34661 if (make_unique)
34662 name_len += strlen (unique_name) + 1;
34663 global_var_name = XNEWVEC (char, name_len);
34665 /* Use '.' to concatenate names as it is demangler friendly. */
34666 if (make_unique)
34667 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
34668 suffix);
34669 else
34670 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
34672 return global_var_name;
34675 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
34677 /* Make a dispatcher declaration for the multi-versioned function DECL.
34678 Calls to DECL function will be replaced with calls to the dispatcher
34679 by the front-end. Return the decl created. */
34681 static tree
34682 make_dispatcher_decl (const tree decl)
34684 tree func_decl;
34685 char *func_name;
34686 tree fn_type, func_type;
34687 bool is_uniq = false;
34689 if (TREE_PUBLIC (decl) == 0)
34690 is_uniq = true;
34692 func_name = make_name (decl, "ifunc", is_uniq);
34694 fn_type = TREE_TYPE (decl);
34695 func_type = build_function_type (TREE_TYPE (fn_type),
34696 TYPE_ARG_TYPES (fn_type));
34698 func_decl = build_fn_decl (func_name, func_type);
34699 XDELETEVEC (func_name);
34700 TREE_USED (func_decl) = 1;
34701 DECL_CONTEXT (func_decl) = NULL_TREE;
34702 DECL_INITIAL (func_decl) = error_mark_node;
34703 DECL_ARTIFICIAL (func_decl) = 1;
34704 /* Mark this func as external, the resolver will flip it again if
34705 it gets generated. */
34706 DECL_EXTERNAL (func_decl) = 1;
34707 /* This will be of type IFUNCs have to be externally visible. */
34708 TREE_PUBLIC (func_decl) = 1;
34710 return func_decl;
34713 #endif
34715 /* Returns true if decl is multi-versioned and DECL is the default function,
34716 that is it is not tagged with target specific optimization. */
34718 static bool
34719 is_function_default_version (const tree decl)
34721 if (TREE_CODE (decl) != FUNCTION_DECL
34722 || !DECL_FUNCTION_VERSIONED (decl))
34723 return false;
34724 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34725 gcc_assert (attr);
34726 attr = TREE_VALUE (TREE_VALUE (attr));
34727 return (TREE_CODE (attr) == STRING_CST
34728 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
34731 /* Make a dispatcher declaration for the multi-versioned function DECL.
34732 Calls to DECL function will be replaced with calls to the dispatcher
34733 by the front-end. Returns the decl of the dispatcher function. */
34735 static tree
34736 ix86_get_function_versions_dispatcher (void *decl)
34738 tree fn = (tree) decl;
34739 struct cgraph_node *node = NULL;
34740 struct cgraph_node *default_node = NULL;
34741 struct cgraph_function_version_info *node_v = NULL;
34742 struct cgraph_function_version_info *first_v = NULL;
34744 tree dispatch_decl = NULL;
34746 struct cgraph_function_version_info *default_version_info = NULL;
34748 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
34750 node = cgraph_node::get (fn);
34751 gcc_assert (node != NULL);
34753 node_v = node->function_version ();
34754 gcc_assert (node_v != NULL);
34756 if (node_v->dispatcher_resolver != NULL)
34757 return node_v->dispatcher_resolver;
34759 /* Find the default version and make it the first node. */
34760 first_v = node_v;
34761 /* Go to the beginning of the chain. */
34762 while (first_v->prev != NULL)
34763 first_v = first_v->prev;
34764 default_version_info = first_v;
34765 while (default_version_info != NULL)
34767 if (is_function_default_version
34768 (default_version_info->this_node->decl))
34769 break;
34770 default_version_info = default_version_info->next;
34773 /* If there is no default node, just return NULL. */
34774 if (default_version_info == NULL)
34775 return NULL;
34777 /* Make default info the first node. */
34778 if (first_v != default_version_info)
34780 default_version_info->prev->next = default_version_info->next;
34781 if (default_version_info->next)
34782 default_version_info->next->prev = default_version_info->prev;
34783 first_v->prev = default_version_info;
34784 default_version_info->next = first_v;
34785 default_version_info->prev = NULL;
34788 default_node = default_version_info->this_node;
34790 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
34791 if (targetm.has_ifunc_p ())
34793 struct cgraph_function_version_info *it_v = NULL;
34794 struct cgraph_node *dispatcher_node = NULL;
34795 struct cgraph_function_version_info *dispatcher_version_info = NULL;
34797 /* Right now, the dispatching is done via ifunc. */
34798 dispatch_decl = make_dispatcher_decl (default_node->decl);
34800 dispatcher_node = cgraph_node::get_create (dispatch_decl);
34801 gcc_assert (dispatcher_node != NULL);
34802 dispatcher_node->dispatcher_function = 1;
34803 dispatcher_version_info
34804 = dispatcher_node->insert_new_function_version ();
34805 dispatcher_version_info->next = default_version_info;
34806 dispatcher_node->definition = 1;
34808 /* Set the dispatcher for all the versions. */
34809 it_v = default_version_info;
34810 while (it_v != NULL)
34812 it_v->dispatcher_resolver = dispatch_decl;
34813 it_v = it_v->next;
34816 else
34817 #endif
34819 error_at (DECL_SOURCE_LOCATION (default_node->decl),
34820 "multiversioning needs ifunc which is not supported "
34821 "on this target");
34824 return dispatch_decl;
34827 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
34828 it to CHAIN. */
34830 static tree
34831 make_attribute (const char *name, const char *arg_name, tree chain)
34833 tree attr_name;
34834 tree attr_arg_name;
34835 tree attr_args;
34836 tree attr;
34838 attr_name = get_identifier (name);
34839 attr_arg_name = build_string (strlen (arg_name), arg_name);
34840 attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
34841 attr = tree_cons (attr_name, attr_args, chain);
34842 return attr;
34845 /* Make the resolver function decl to dispatch the versions of
34846 a multi-versioned function, DEFAULT_DECL. Create an
34847 empty basic block in the resolver and store the pointer in
34848 EMPTY_BB. Return the decl of the resolver function. */
34850 static tree
34851 make_resolver_func (const tree default_decl,
34852 const tree dispatch_decl,
34853 basic_block *empty_bb)
34855 char *resolver_name;
34856 tree decl, type, decl_name, t;
34857 bool is_uniq = false;
34859 /* IFUNC's have to be globally visible. So, if the default_decl is
34860 not, then the name of the IFUNC should be made unique. */
34861 if (TREE_PUBLIC (default_decl) == 0)
34862 is_uniq = true;
34864 /* Append the filename to the resolver function if the versions are
34865 not externally visible. This is because the resolver function has
34866 to be externally visible for the loader to find it. So, appending
34867 the filename will prevent conflicts with a resolver function from
34868 another module which is based on the same version name. */
34869 resolver_name = make_name (default_decl, "resolver", is_uniq);
34871 /* The resolver function should return a (void *). */
34872 type = build_function_type_list (ptr_type_node, NULL_TREE);
34874 decl = build_fn_decl (resolver_name, type);
34875 decl_name = get_identifier (resolver_name);
34876 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
34878 DECL_NAME (decl) = decl_name;
34879 TREE_USED (decl) = 1;
34880 DECL_ARTIFICIAL (decl) = 1;
34881 DECL_IGNORED_P (decl) = 0;
34882 /* IFUNC resolvers have to be externally visible. */
34883 TREE_PUBLIC (decl) = 1;
34884 DECL_UNINLINABLE (decl) = 1;
34886 /* Resolver is not external, body is generated. */
34887 DECL_EXTERNAL (decl) = 0;
34888 DECL_EXTERNAL (dispatch_decl) = 0;
34890 DECL_CONTEXT (decl) = NULL_TREE;
34891 DECL_INITIAL (decl) = make_node (BLOCK);
34892 DECL_STATIC_CONSTRUCTOR (decl) = 0;
34894 if (DECL_COMDAT_GROUP (default_decl)
34895 || TREE_PUBLIC (default_decl))
34897 /* In this case, each translation unit with a call to this
34898 versioned function will put out a resolver. Ensure it
34899 is comdat to keep just one copy. */
34900 DECL_COMDAT (decl) = 1;
34901 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
34903 /* Build result decl and add to function_decl. */
34904 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
34905 DECL_ARTIFICIAL (t) = 1;
34906 DECL_IGNORED_P (t) = 1;
34907 DECL_RESULT (decl) = t;
34909 gimplify_function_tree (decl);
34910 push_cfun (DECL_STRUCT_FUNCTION (decl));
34911 *empty_bb = init_lowered_empty_function (decl, false);
34913 cgraph_node::add_new_function (decl, true);
34914 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
34916 pop_cfun ();
34918 gcc_assert (dispatch_decl != NULL);
34919 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
34920 DECL_ATTRIBUTES (dispatch_decl)
34921 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
34923 /* Create the alias for dispatch to resolver here. */
34924 /*cgraph_create_function_alias (dispatch_decl, decl);*/
34925 cgraph_node::create_same_body_alias (dispatch_decl, decl);
34926 XDELETEVEC (resolver_name);
34927 return decl;
34930 /* Generate the dispatching code body to dispatch multi-versioned function
34931 DECL. The target hook is called to process the "target" attributes and
34932 provide the code to dispatch the right function at run-time. NODE points
34933 to the dispatcher decl whose body will be created. */
34935 static tree
34936 ix86_generate_version_dispatcher_body (void *node_p)
34938 tree resolver_decl;
34939 basic_block empty_bb;
34940 tree default_ver_decl;
34941 struct cgraph_node *versn;
34942 struct cgraph_node *node;
34944 struct cgraph_function_version_info *node_version_info = NULL;
34945 struct cgraph_function_version_info *versn_info = NULL;
34947 node = (cgraph_node *)node_p;
34949 node_version_info = node->function_version ();
34950 gcc_assert (node->dispatcher_function
34951 && node_version_info != NULL);
34953 if (node_version_info->dispatcher_resolver)
34954 return node_version_info->dispatcher_resolver;
34956 /* The first version in the chain corresponds to the default version. */
34957 default_ver_decl = node_version_info->next->this_node->decl;
34959 /* node is going to be an alias, so remove the finalized bit. */
34960 node->definition = false;
34962 resolver_decl = make_resolver_func (default_ver_decl,
34963 node->decl, &empty_bb);
34965 node_version_info->dispatcher_resolver = resolver_decl;
34967 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
34969 auto_vec<tree, 2> fn_ver_vec;
34971 for (versn_info = node_version_info->next; versn_info;
34972 versn_info = versn_info->next)
34974 versn = versn_info->this_node;
34975 /* Check for virtual functions here again, as by this time it should
34976 have been determined if this function needs a vtable index or
34977 not. This happens for methods in derived classes that override
34978 virtual methods in base classes but are not explicitly marked as
34979 virtual. */
34980 if (DECL_VINDEX (versn->decl))
34981 sorry ("Virtual function multiversioning not supported");
34983 fn_ver_vec.safe_push (versn->decl);
34986 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
34987 cgraph_edge::rebuild_edges ();
34988 pop_cfun ();
34989 return resolver_decl;
34991 /* This builds the processor_model struct type defined in
34992 libgcc/config/i386/cpuinfo.c */
34994 static tree
34995 build_processor_model_struct (void)
34997 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
34998 "__cpu_features"};
34999 tree field = NULL_TREE, field_chain = NULL_TREE;
35000 int i;
35001 tree type = make_node (RECORD_TYPE);
35003 /* The first 3 fields are unsigned int. */
35004 for (i = 0; i < 3; ++i)
35006 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
35007 get_identifier (field_name[i]), unsigned_type_node);
35008 if (field_chain != NULL_TREE)
35009 DECL_CHAIN (field) = field_chain;
35010 field_chain = field;
35013 /* The last field is an array of unsigned integers of size one. */
35014 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
35015 get_identifier (field_name[3]),
35016 build_array_type (unsigned_type_node,
35017 build_index_type (size_one_node)));
35018 if (field_chain != NULL_TREE)
35019 DECL_CHAIN (field) = field_chain;
35020 field_chain = field;
35022 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
35023 return type;
35026 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
35028 static tree
35029 make_var_decl (tree type, const char *name)
35031 tree new_decl;
35033 new_decl = build_decl (UNKNOWN_LOCATION,
35034 VAR_DECL,
35035 get_identifier(name),
35036 type);
35038 DECL_EXTERNAL (new_decl) = 1;
35039 TREE_STATIC (new_decl) = 1;
35040 TREE_PUBLIC (new_decl) = 1;
35041 DECL_INITIAL (new_decl) = 0;
35042 DECL_ARTIFICIAL (new_decl) = 0;
35043 DECL_PRESERVE_P (new_decl) = 1;
35045 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
35046 assemble_variable (new_decl, 0, 0, 0);
35048 return new_decl;
35051 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
35052 into an integer defined in libgcc/config/i386/cpuinfo.c */
35054 static tree
35055 fold_builtin_cpu (tree fndecl, tree *args)
35057 unsigned int i;
35058 enum ix86_builtins fn_code = (enum ix86_builtins)
35059 DECL_FUNCTION_CODE (fndecl);
35060 tree param_string_cst = NULL;
35062 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
35063 enum processor_features
35065 F_CMOV = 0,
35066 F_MMX,
35067 F_POPCNT,
35068 F_SSE,
35069 F_SSE2,
35070 F_SSE3,
35071 F_SSSE3,
35072 F_SSE4_1,
35073 F_SSE4_2,
35074 F_AVX,
35075 F_AVX2,
35076 F_SSE4_A,
35077 F_FMA4,
35078 F_XOP,
35079 F_FMA,
35080 F_MAX
35083 /* These are the values for vendor types and cpu types and subtypes
35084 in cpuinfo.c. Cpu types and subtypes should be subtracted by
35085 the corresponding start value. */
35086 enum processor_model
35088 M_INTEL = 1,
35089 M_AMD,
35090 M_CPU_TYPE_START,
35091 M_INTEL_BONNELL,
35092 M_INTEL_CORE2,
35093 M_INTEL_COREI7,
35094 M_AMDFAM10H,
35095 M_AMDFAM15H,
35096 M_INTEL_SILVERMONT,
35097 M_AMD_BTVER1,
35098 M_AMD_BTVER2,
35099 M_CPU_SUBTYPE_START,
35100 M_INTEL_COREI7_NEHALEM,
35101 M_INTEL_COREI7_WESTMERE,
35102 M_INTEL_COREI7_SANDYBRIDGE,
35103 M_AMDFAM10H_BARCELONA,
35104 M_AMDFAM10H_SHANGHAI,
35105 M_AMDFAM10H_ISTANBUL,
35106 M_AMDFAM15H_BDVER1,
35107 M_AMDFAM15H_BDVER2,
35108 M_AMDFAM15H_BDVER3,
35109 M_AMDFAM15H_BDVER4,
35110 M_INTEL_COREI7_IVYBRIDGE,
35111 M_INTEL_COREI7_HASWELL
35114 static struct _arch_names_table
35116 const char *const name;
35117 const enum processor_model model;
35119 const arch_names_table[] =
35121 {"amd", M_AMD},
35122 {"intel", M_INTEL},
35123 {"atom", M_INTEL_BONNELL},
35124 {"slm", M_INTEL_SILVERMONT},
35125 {"core2", M_INTEL_CORE2},
35126 {"corei7", M_INTEL_COREI7},
35127 {"nehalem", M_INTEL_COREI7_NEHALEM},
35128 {"westmere", M_INTEL_COREI7_WESTMERE},
35129 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
35130 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
35131 {"haswell", M_INTEL_COREI7_HASWELL},
35132 {"bonnell", M_INTEL_BONNELL},
35133 {"silvermont", M_INTEL_SILVERMONT},
35134 {"amdfam10h", M_AMDFAM10H},
35135 {"barcelona", M_AMDFAM10H_BARCELONA},
35136 {"shanghai", M_AMDFAM10H_SHANGHAI},
35137 {"istanbul", M_AMDFAM10H_ISTANBUL},
35138 {"btver1", M_AMD_BTVER1},
35139 {"amdfam15h", M_AMDFAM15H},
35140 {"bdver1", M_AMDFAM15H_BDVER1},
35141 {"bdver2", M_AMDFAM15H_BDVER2},
35142 {"bdver3", M_AMDFAM15H_BDVER3},
35143 {"bdver4", M_AMDFAM15H_BDVER4},
35144 {"btver2", M_AMD_BTVER2},
35147 static struct _isa_names_table
35149 const char *const name;
35150 const enum processor_features feature;
35152 const isa_names_table[] =
35154 {"cmov", F_CMOV},
35155 {"mmx", F_MMX},
35156 {"popcnt", F_POPCNT},
35157 {"sse", F_SSE},
35158 {"sse2", F_SSE2},
35159 {"sse3", F_SSE3},
35160 {"ssse3", F_SSSE3},
35161 {"sse4a", F_SSE4_A},
35162 {"sse4.1", F_SSE4_1},
35163 {"sse4.2", F_SSE4_2},
35164 {"avx", F_AVX},
35165 {"fma4", F_FMA4},
35166 {"xop", F_XOP},
35167 {"fma", F_FMA},
35168 {"avx2", F_AVX2}
35171 tree __processor_model_type = build_processor_model_struct ();
35172 tree __cpu_model_var = make_var_decl (__processor_model_type,
35173 "__cpu_model");
35176 varpool_node::add (__cpu_model_var);
35178 gcc_assert ((args != NULL) && (*args != NULL));
35180 param_string_cst = *args;
35181 while (param_string_cst
35182 && TREE_CODE (param_string_cst) != STRING_CST)
35184 /* *args must be a expr that can contain other EXPRS leading to a
35185 STRING_CST. */
35186 if (!EXPR_P (param_string_cst))
35188 error ("Parameter to builtin must be a string constant or literal");
35189 return integer_zero_node;
35191 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
35194 gcc_assert (param_string_cst);
35196 if (fn_code == IX86_BUILTIN_CPU_IS)
35198 tree ref;
35199 tree field;
35200 tree final;
35202 unsigned int field_val = 0;
35203 unsigned int NUM_ARCH_NAMES
35204 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
35206 for (i = 0; i < NUM_ARCH_NAMES; i++)
35207 if (strcmp (arch_names_table[i].name,
35208 TREE_STRING_POINTER (param_string_cst)) == 0)
35209 break;
35211 if (i == NUM_ARCH_NAMES)
35213 error ("Parameter to builtin not valid: %s",
35214 TREE_STRING_POINTER (param_string_cst));
35215 return integer_zero_node;
35218 field = TYPE_FIELDS (__processor_model_type);
35219 field_val = arch_names_table[i].model;
35221 /* CPU types are stored in the next field. */
35222 if (field_val > M_CPU_TYPE_START
35223 && field_val < M_CPU_SUBTYPE_START)
35225 field = DECL_CHAIN (field);
35226 field_val -= M_CPU_TYPE_START;
35229 /* CPU subtypes are stored in the next field. */
35230 if (field_val > M_CPU_SUBTYPE_START)
35232 field = DECL_CHAIN ( DECL_CHAIN (field));
35233 field_val -= M_CPU_SUBTYPE_START;
35236 /* Get the appropriate field in __cpu_model. */
35237 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
35238 field, NULL_TREE);
35240 /* Check the value. */
35241 final = build2 (EQ_EXPR, unsigned_type_node, ref,
35242 build_int_cstu (unsigned_type_node, field_val));
35243 return build1 (CONVERT_EXPR, integer_type_node, final);
35245 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
35247 tree ref;
35248 tree array_elt;
35249 tree field;
35250 tree final;
35252 unsigned int field_val = 0;
35253 unsigned int NUM_ISA_NAMES
35254 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
35256 for (i = 0; i < NUM_ISA_NAMES; i++)
35257 if (strcmp (isa_names_table[i].name,
35258 TREE_STRING_POINTER (param_string_cst)) == 0)
35259 break;
35261 if (i == NUM_ISA_NAMES)
35263 error ("Parameter to builtin not valid: %s",
35264 TREE_STRING_POINTER (param_string_cst));
35265 return integer_zero_node;
35268 field = TYPE_FIELDS (__processor_model_type);
35269 /* Get the last field, which is __cpu_features. */
35270 while (DECL_CHAIN (field))
35271 field = DECL_CHAIN (field);
35273 /* Get the appropriate field: __cpu_model.__cpu_features */
35274 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
35275 field, NULL_TREE);
35277 /* Access the 0th element of __cpu_features array. */
35278 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
35279 integer_zero_node, NULL_TREE, NULL_TREE);
35281 field_val = (1 << isa_names_table[i].feature);
35282 /* Return __cpu_model.__cpu_features[0] & field_val */
35283 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
35284 build_int_cstu (unsigned_type_node, field_val));
35285 return build1 (CONVERT_EXPR, integer_type_node, final);
35287 gcc_unreachable ();
35290 static tree
35291 ix86_fold_builtin (tree fndecl, int n_args,
35292 tree *args, bool ignore ATTRIBUTE_UNUSED)
35294 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
35296 enum ix86_builtins fn_code = (enum ix86_builtins)
35297 DECL_FUNCTION_CODE (fndecl);
35298 if (fn_code == IX86_BUILTIN_CPU_IS
35299 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
35301 gcc_assert (n_args == 1);
35302 return fold_builtin_cpu (fndecl, args);
35306 #ifdef SUBTARGET_FOLD_BUILTIN
35307 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
35308 #endif
35310 return NULL_TREE;
35313 /* Make builtins to detect cpu type and features supported. NAME is
35314 the builtin name, CODE is the builtin code, and FTYPE is the function
35315 type of the builtin. */
35317 static void
35318 make_cpu_type_builtin (const char* name, int code,
35319 enum ix86_builtin_func_type ftype, bool is_const)
35321 tree decl;
35322 tree type;
35324 type = ix86_get_builtin_func_type (ftype);
35325 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
35326 NULL, NULL_TREE);
35327 gcc_assert (decl != NULL_TREE);
35328 ix86_builtins[(int) code] = decl;
35329 TREE_READONLY (decl) = is_const;
35332 /* Make builtins to get CPU type and features supported. The created
35333 builtins are :
35335 __builtin_cpu_init (), to detect cpu type and features,
35336 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
35337 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
35340 static void
35341 ix86_init_platform_type_builtins (void)
35343 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
35344 INT_FTYPE_VOID, false);
35345 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
35346 INT_FTYPE_PCCHAR, true);
35347 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
35348 INT_FTYPE_PCCHAR, true);
35351 /* Internal method for ix86_init_builtins. */
35353 static void
35354 ix86_init_builtins_va_builtins_abi (void)
35356 tree ms_va_ref, sysv_va_ref;
35357 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
35358 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
35359 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
35360 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
35362 if (!TARGET_64BIT)
35363 return;
35364 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
35365 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
35366 ms_va_ref = build_reference_type (ms_va_list_type_node);
35367 sysv_va_ref =
35368 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
35370 fnvoid_va_end_ms =
35371 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
35372 fnvoid_va_start_ms =
35373 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
35374 fnvoid_va_end_sysv =
35375 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
35376 fnvoid_va_start_sysv =
35377 build_varargs_function_type_list (void_type_node, sysv_va_ref,
35378 NULL_TREE);
35379 fnvoid_va_copy_ms =
35380 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
35381 NULL_TREE);
35382 fnvoid_va_copy_sysv =
35383 build_function_type_list (void_type_node, sysv_va_ref,
35384 sysv_va_ref, NULL_TREE);
35386 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
35387 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
35388 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
35389 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
35390 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
35391 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
35392 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
35393 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35394 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
35395 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35396 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
35397 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35400 static void
35401 ix86_init_builtin_types (void)
35403 tree float128_type_node, float80_type_node;
35405 /* The __float80 type. */
35406 float80_type_node = long_double_type_node;
35407 if (TYPE_MODE (float80_type_node) != XFmode)
35409 /* The __float80 type. */
35410 float80_type_node = make_node (REAL_TYPE);
35412 TYPE_PRECISION (float80_type_node) = 80;
35413 layout_type (float80_type_node);
35415 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
35417 /* The __float128 type. */
35418 float128_type_node = make_node (REAL_TYPE);
35419 TYPE_PRECISION (float128_type_node) = 128;
35420 layout_type (float128_type_node);
35421 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
35423 /* This macro is built by i386-builtin-types.awk. */
35424 DEFINE_BUILTIN_PRIMITIVE_TYPES;
35427 static void
35428 ix86_init_builtins (void)
35430 tree t;
35432 ix86_init_builtin_types ();
35434 /* Builtins to get CPU type and features. */
35435 ix86_init_platform_type_builtins ();
35437 /* TFmode support builtins. */
35438 def_builtin_const (0, "__builtin_infq",
35439 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
35440 def_builtin_const (0, "__builtin_huge_valq",
35441 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
35443 /* We will expand them to normal call if SSE isn't available since
35444 they are used by libgcc. */
35445 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
35446 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
35447 BUILT_IN_MD, "__fabstf2", NULL_TREE);
35448 TREE_READONLY (t) = 1;
35449 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
35451 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
35452 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
35453 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
35454 TREE_READONLY (t) = 1;
35455 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
35457 ix86_init_tm_builtins ();
35458 ix86_init_mmx_sse_builtins ();
35459 ix86_init_mpx_builtins ();
35461 if (TARGET_LP64)
35462 ix86_init_builtins_va_builtins_abi ();
35464 #ifdef SUBTARGET_INIT_BUILTINS
35465 SUBTARGET_INIT_BUILTINS;
35466 #endif
35469 /* Return the ix86 builtin for CODE. */
35471 static tree
35472 ix86_builtin_decl (unsigned code, bool)
35474 if (code >= IX86_BUILTIN_MAX)
35475 return error_mark_node;
35477 return ix86_builtins[code];
35480 /* Errors in the source file can cause expand_expr to return const0_rtx
35481 where we expect a vector. To avoid crashing, use one of the vector
35482 clear instructions. */
35483 static rtx
35484 safe_vector_operand (rtx x, machine_mode mode)
35486 if (x == const0_rtx)
35487 x = CONST0_RTX (mode);
35488 return x;
35491 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
35493 static rtx
35494 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
35496 rtx pat;
35497 tree arg0 = CALL_EXPR_ARG (exp, 0);
35498 tree arg1 = CALL_EXPR_ARG (exp, 1);
35499 rtx op0 = expand_normal (arg0);
35500 rtx op1 = expand_normal (arg1);
35501 machine_mode tmode = insn_data[icode].operand[0].mode;
35502 machine_mode mode0 = insn_data[icode].operand[1].mode;
35503 machine_mode mode1 = insn_data[icode].operand[2].mode;
35505 if (VECTOR_MODE_P (mode0))
35506 op0 = safe_vector_operand (op0, mode0);
35507 if (VECTOR_MODE_P (mode1))
35508 op1 = safe_vector_operand (op1, mode1);
35510 if (optimize || !target
35511 || GET_MODE (target) != tmode
35512 || !insn_data[icode].operand[0].predicate (target, tmode))
35513 target = gen_reg_rtx (tmode);
35515 if (GET_MODE (op1) == SImode && mode1 == TImode)
35517 rtx x = gen_reg_rtx (V4SImode);
35518 emit_insn (gen_sse2_loadd (x, op1));
35519 op1 = gen_lowpart (TImode, x);
35522 if (!insn_data[icode].operand[1].predicate (op0, mode0))
35523 op0 = copy_to_mode_reg (mode0, op0);
35524 if (!insn_data[icode].operand[2].predicate (op1, mode1))
35525 op1 = copy_to_mode_reg (mode1, op1);
35527 pat = GEN_FCN (icode) (target, op0, op1);
35528 if (! pat)
35529 return 0;
35531 emit_insn (pat);
35533 return target;
35536 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
35538 static rtx
35539 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
35540 enum ix86_builtin_func_type m_type,
35541 enum rtx_code sub_code)
35543 rtx pat;
35544 int i;
35545 int nargs;
35546 bool comparison_p = false;
35547 bool tf_p = false;
35548 bool last_arg_constant = false;
35549 int num_memory = 0;
35550 struct {
35551 rtx op;
35552 machine_mode mode;
35553 } args[4];
35555 machine_mode tmode = insn_data[icode].operand[0].mode;
35557 switch (m_type)
35559 case MULTI_ARG_4_DF2_DI_I:
35560 case MULTI_ARG_4_DF2_DI_I1:
35561 case MULTI_ARG_4_SF2_SI_I:
35562 case MULTI_ARG_4_SF2_SI_I1:
35563 nargs = 4;
35564 last_arg_constant = true;
35565 break;
35567 case MULTI_ARG_3_SF:
35568 case MULTI_ARG_3_DF:
35569 case MULTI_ARG_3_SF2:
35570 case MULTI_ARG_3_DF2:
35571 case MULTI_ARG_3_DI:
35572 case MULTI_ARG_3_SI:
35573 case MULTI_ARG_3_SI_DI:
35574 case MULTI_ARG_3_HI:
35575 case MULTI_ARG_3_HI_SI:
35576 case MULTI_ARG_3_QI:
35577 case MULTI_ARG_3_DI2:
35578 case MULTI_ARG_3_SI2:
35579 case MULTI_ARG_3_HI2:
35580 case MULTI_ARG_3_QI2:
35581 nargs = 3;
35582 break;
35584 case MULTI_ARG_2_SF:
35585 case MULTI_ARG_2_DF:
35586 case MULTI_ARG_2_DI:
35587 case MULTI_ARG_2_SI:
35588 case MULTI_ARG_2_HI:
35589 case MULTI_ARG_2_QI:
35590 nargs = 2;
35591 break;
35593 case MULTI_ARG_2_DI_IMM:
35594 case MULTI_ARG_2_SI_IMM:
35595 case MULTI_ARG_2_HI_IMM:
35596 case MULTI_ARG_2_QI_IMM:
35597 nargs = 2;
35598 last_arg_constant = true;
35599 break;
35601 case MULTI_ARG_1_SF:
35602 case MULTI_ARG_1_DF:
35603 case MULTI_ARG_1_SF2:
35604 case MULTI_ARG_1_DF2:
35605 case MULTI_ARG_1_DI:
35606 case MULTI_ARG_1_SI:
35607 case MULTI_ARG_1_HI:
35608 case MULTI_ARG_1_QI:
35609 case MULTI_ARG_1_SI_DI:
35610 case MULTI_ARG_1_HI_DI:
35611 case MULTI_ARG_1_HI_SI:
35612 case MULTI_ARG_1_QI_DI:
35613 case MULTI_ARG_1_QI_SI:
35614 case MULTI_ARG_1_QI_HI:
35615 nargs = 1;
35616 break;
35618 case MULTI_ARG_2_DI_CMP:
35619 case MULTI_ARG_2_SI_CMP:
35620 case MULTI_ARG_2_HI_CMP:
35621 case MULTI_ARG_2_QI_CMP:
35622 nargs = 2;
35623 comparison_p = true;
35624 break;
35626 case MULTI_ARG_2_SF_TF:
35627 case MULTI_ARG_2_DF_TF:
35628 case MULTI_ARG_2_DI_TF:
35629 case MULTI_ARG_2_SI_TF:
35630 case MULTI_ARG_2_HI_TF:
35631 case MULTI_ARG_2_QI_TF:
35632 nargs = 2;
35633 tf_p = true;
35634 break;
35636 default:
35637 gcc_unreachable ();
35640 if (optimize || !target
35641 || GET_MODE (target) != tmode
35642 || !insn_data[icode].operand[0].predicate (target, tmode))
35643 target = gen_reg_rtx (tmode);
35645 gcc_assert (nargs <= 4);
35647 for (i = 0; i < nargs; i++)
35649 tree arg = CALL_EXPR_ARG (exp, i);
35650 rtx op = expand_normal (arg);
35651 int adjust = (comparison_p) ? 1 : 0;
35652 machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
35654 if (last_arg_constant && i == nargs - 1)
35656 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
35658 enum insn_code new_icode = icode;
35659 switch (icode)
35661 case CODE_FOR_xop_vpermil2v2df3:
35662 case CODE_FOR_xop_vpermil2v4sf3:
35663 case CODE_FOR_xop_vpermil2v4df3:
35664 case CODE_FOR_xop_vpermil2v8sf3:
35665 error ("the last argument must be a 2-bit immediate");
35666 return gen_reg_rtx (tmode);
35667 case CODE_FOR_xop_rotlv2di3:
35668 new_icode = CODE_FOR_rotlv2di3;
35669 goto xop_rotl;
35670 case CODE_FOR_xop_rotlv4si3:
35671 new_icode = CODE_FOR_rotlv4si3;
35672 goto xop_rotl;
35673 case CODE_FOR_xop_rotlv8hi3:
35674 new_icode = CODE_FOR_rotlv8hi3;
35675 goto xop_rotl;
35676 case CODE_FOR_xop_rotlv16qi3:
35677 new_icode = CODE_FOR_rotlv16qi3;
35678 xop_rotl:
35679 if (CONST_INT_P (op))
35681 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
35682 op = GEN_INT (INTVAL (op) & mask);
35683 gcc_checking_assert
35684 (insn_data[icode].operand[i + 1].predicate (op, mode));
35686 else
35688 gcc_checking_assert
35689 (nargs == 2
35690 && insn_data[new_icode].operand[0].mode == tmode
35691 && insn_data[new_icode].operand[1].mode == tmode
35692 && insn_data[new_icode].operand[2].mode == mode
35693 && insn_data[new_icode].operand[0].predicate
35694 == insn_data[icode].operand[0].predicate
35695 && insn_data[new_icode].operand[1].predicate
35696 == insn_data[icode].operand[1].predicate);
35697 icode = new_icode;
35698 goto non_constant;
35700 break;
35701 default:
35702 gcc_unreachable ();
35706 else
35708 non_constant:
35709 if (VECTOR_MODE_P (mode))
35710 op = safe_vector_operand (op, mode);
35712 /* If we aren't optimizing, only allow one memory operand to be
35713 generated. */
35714 if (memory_operand (op, mode))
35715 num_memory++;
35717 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
35719 if (optimize
35720 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
35721 || num_memory > 1)
35722 op = force_reg (mode, op);
35725 args[i].op = op;
35726 args[i].mode = mode;
35729 switch (nargs)
35731 case 1:
35732 pat = GEN_FCN (icode) (target, args[0].op);
35733 break;
35735 case 2:
35736 if (tf_p)
35737 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
35738 GEN_INT ((int)sub_code));
35739 else if (! comparison_p)
35740 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
35741 else
35743 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
35744 args[0].op,
35745 args[1].op);
35747 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
35749 break;
35751 case 3:
35752 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
35753 break;
35755 case 4:
35756 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
35757 break;
35759 default:
35760 gcc_unreachable ();
35763 if (! pat)
35764 return 0;
35766 emit_insn (pat);
35767 return target;
35770 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
35771 insns with vec_merge. */
35773 static rtx
35774 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
35775 rtx target)
35777 rtx pat;
35778 tree arg0 = CALL_EXPR_ARG (exp, 0);
35779 rtx op1, op0 = expand_normal (arg0);
35780 machine_mode tmode = insn_data[icode].operand[0].mode;
35781 machine_mode mode0 = insn_data[icode].operand[1].mode;
35783 if (optimize || !target
35784 || GET_MODE (target) != tmode
35785 || !insn_data[icode].operand[0].predicate (target, tmode))
35786 target = gen_reg_rtx (tmode);
35788 if (VECTOR_MODE_P (mode0))
35789 op0 = safe_vector_operand (op0, mode0);
35791 if ((optimize && !register_operand (op0, mode0))
35792 || !insn_data[icode].operand[1].predicate (op0, mode0))
35793 op0 = copy_to_mode_reg (mode0, op0);
35795 op1 = op0;
35796 if (!insn_data[icode].operand[2].predicate (op1, mode0))
35797 op1 = copy_to_mode_reg (mode0, op1);
35799 pat = GEN_FCN (icode) (target, op0, op1);
35800 if (! pat)
35801 return 0;
35802 emit_insn (pat);
35803 return target;
35806 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
35808 static rtx
35809 ix86_expand_sse_compare (const struct builtin_description *d,
35810 tree exp, rtx target, bool swap)
35812 rtx pat;
35813 tree arg0 = CALL_EXPR_ARG (exp, 0);
35814 tree arg1 = CALL_EXPR_ARG (exp, 1);
35815 rtx op0 = expand_normal (arg0);
35816 rtx op1 = expand_normal (arg1);
35817 rtx op2;
35818 machine_mode tmode = insn_data[d->icode].operand[0].mode;
35819 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
35820 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
35821 enum rtx_code comparison = d->comparison;
35823 if (VECTOR_MODE_P (mode0))
35824 op0 = safe_vector_operand (op0, mode0);
35825 if (VECTOR_MODE_P (mode1))
35826 op1 = safe_vector_operand (op1, mode1);
35828 /* Swap operands if we have a comparison that isn't available in
35829 hardware. */
35830 if (swap)
35832 rtx tmp = gen_reg_rtx (mode1);
35833 emit_move_insn (tmp, op1);
35834 op1 = op0;
35835 op0 = tmp;
35838 if (optimize || !target
35839 || GET_MODE (target) != tmode
35840 || !insn_data[d->icode].operand[0].predicate (target, tmode))
35841 target = gen_reg_rtx (tmode);
35843 if ((optimize && !register_operand (op0, mode0))
35844 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
35845 op0 = copy_to_mode_reg (mode0, op0);
35846 if ((optimize && !register_operand (op1, mode1))
35847 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
35848 op1 = copy_to_mode_reg (mode1, op1);
35850 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
35851 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
35852 if (! pat)
35853 return 0;
35854 emit_insn (pat);
35855 return target;
35858 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
35860 static rtx
35861 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
35862 rtx target)
35864 rtx pat;
35865 tree arg0 = CALL_EXPR_ARG (exp, 0);
35866 tree arg1 = CALL_EXPR_ARG (exp, 1);
35867 rtx op0 = expand_normal (arg0);
35868 rtx op1 = expand_normal (arg1);
35869 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
35870 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
35871 enum rtx_code comparison = d->comparison;
35873 if (VECTOR_MODE_P (mode0))
35874 op0 = safe_vector_operand (op0, mode0);
35875 if (VECTOR_MODE_P (mode1))
35876 op1 = safe_vector_operand (op1, mode1);
35878 /* Swap operands if we have a comparison that isn't available in
35879 hardware. */
35880 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
35881 std::swap (op1, op0);
35883 target = gen_reg_rtx (SImode);
35884 emit_move_insn (target, const0_rtx);
35885 target = gen_rtx_SUBREG (QImode, target, 0);
35887 if ((optimize && !register_operand (op0, mode0))
35888 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
35889 op0 = copy_to_mode_reg (mode0, op0);
35890 if ((optimize && !register_operand (op1, mode1))
35891 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
35892 op1 = copy_to_mode_reg (mode1, op1);
35894 pat = GEN_FCN (d->icode) (op0, op1);
35895 if (! pat)
35896 return 0;
35897 emit_insn (pat);
35898 emit_insn (gen_rtx_SET (VOIDmode,
35899 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
35900 gen_rtx_fmt_ee (comparison, QImode,
35901 SET_DEST (pat),
35902 const0_rtx)));
35904 return SUBREG_REG (target);
35907 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
35909 static rtx
35910 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
35911 rtx target)
35913 rtx pat;
35914 tree arg0 = CALL_EXPR_ARG (exp, 0);
35915 rtx op1, op0 = expand_normal (arg0);
35916 machine_mode tmode = insn_data[d->icode].operand[0].mode;
35917 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
35919 if (optimize || target == 0
35920 || GET_MODE (target) != tmode
35921 || !insn_data[d->icode].operand[0].predicate (target, tmode))
35922 target = gen_reg_rtx (tmode);
35924 if (VECTOR_MODE_P (mode0))
35925 op0 = safe_vector_operand (op0, mode0);
35927 if ((optimize && !register_operand (op0, mode0))
35928 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
35929 op0 = copy_to_mode_reg (mode0, op0);
35931 op1 = GEN_INT (d->comparison);
35933 pat = GEN_FCN (d->icode) (target, op0, op1);
35934 if (! pat)
35935 return 0;
35936 emit_insn (pat);
35937 return target;
35940 static rtx
35941 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
35942 tree exp, rtx target)
35944 rtx pat;
35945 tree arg0 = CALL_EXPR_ARG (exp, 0);
35946 tree arg1 = CALL_EXPR_ARG (exp, 1);
35947 rtx op0 = expand_normal (arg0);
35948 rtx op1 = expand_normal (arg1);
35949 rtx op2;
35950 machine_mode tmode = insn_data[d->icode].operand[0].mode;
35951 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
35952 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
35954 if (optimize || target == 0
35955 || GET_MODE (target) != tmode
35956 || !insn_data[d->icode].operand[0].predicate (target, tmode))
35957 target = gen_reg_rtx (tmode);
35959 op0 = safe_vector_operand (op0, mode0);
35960 op1 = safe_vector_operand (op1, mode1);
35962 if ((optimize && !register_operand (op0, mode0))
35963 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
35964 op0 = copy_to_mode_reg (mode0, op0);
35965 if ((optimize && !register_operand (op1, mode1))
35966 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
35967 op1 = copy_to_mode_reg (mode1, op1);
35969 op2 = GEN_INT (d->comparison);
35971 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
35972 if (! pat)
35973 return 0;
35974 emit_insn (pat);
35975 return target;
35978 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
35980 static rtx
35981 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
35982 rtx target)
35984 rtx pat;
35985 tree arg0 = CALL_EXPR_ARG (exp, 0);
35986 tree arg1 = CALL_EXPR_ARG (exp, 1);
35987 rtx op0 = expand_normal (arg0);
35988 rtx op1 = expand_normal (arg1);
35989 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
35990 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
35991 enum rtx_code comparison = d->comparison;
35993 if (VECTOR_MODE_P (mode0))
35994 op0 = safe_vector_operand (op0, mode0);
35995 if (VECTOR_MODE_P (mode1))
35996 op1 = safe_vector_operand (op1, mode1);
35998 target = gen_reg_rtx (SImode);
35999 emit_move_insn (target, const0_rtx);
36000 target = gen_rtx_SUBREG (QImode, target, 0);
36002 if ((optimize && !register_operand (op0, mode0))
36003 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36004 op0 = copy_to_mode_reg (mode0, op0);
36005 if ((optimize && !register_operand (op1, mode1))
36006 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36007 op1 = copy_to_mode_reg (mode1, op1);
36009 pat = GEN_FCN (d->icode) (op0, op1);
36010 if (! pat)
36011 return 0;
36012 emit_insn (pat);
36013 emit_insn (gen_rtx_SET (VOIDmode,
36014 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36015 gen_rtx_fmt_ee (comparison, QImode,
36016 SET_DEST (pat),
36017 const0_rtx)));
36019 return SUBREG_REG (target);
36022 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
36024 static rtx
36025 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
36026 tree exp, rtx target)
36028 rtx pat;
36029 tree arg0 = CALL_EXPR_ARG (exp, 0);
36030 tree arg1 = CALL_EXPR_ARG (exp, 1);
36031 tree arg2 = CALL_EXPR_ARG (exp, 2);
36032 tree arg3 = CALL_EXPR_ARG (exp, 3);
36033 tree arg4 = CALL_EXPR_ARG (exp, 4);
36034 rtx scratch0, scratch1;
36035 rtx op0 = expand_normal (arg0);
36036 rtx op1 = expand_normal (arg1);
36037 rtx op2 = expand_normal (arg2);
36038 rtx op3 = expand_normal (arg3);
36039 rtx op4 = expand_normal (arg4);
36040 machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
36042 tmode0 = insn_data[d->icode].operand[0].mode;
36043 tmode1 = insn_data[d->icode].operand[1].mode;
36044 modev2 = insn_data[d->icode].operand[2].mode;
36045 modei3 = insn_data[d->icode].operand[3].mode;
36046 modev4 = insn_data[d->icode].operand[4].mode;
36047 modei5 = insn_data[d->icode].operand[5].mode;
36048 modeimm = insn_data[d->icode].operand[6].mode;
36050 if (VECTOR_MODE_P (modev2))
36051 op0 = safe_vector_operand (op0, modev2);
36052 if (VECTOR_MODE_P (modev4))
36053 op2 = safe_vector_operand (op2, modev4);
36055 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
36056 op0 = copy_to_mode_reg (modev2, op0);
36057 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
36058 op1 = copy_to_mode_reg (modei3, op1);
36059 if ((optimize && !register_operand (op2, modev4))
36060 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
36061 op2 = copy_to_mode_reg (modev4, op2);
36062 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
36063 op3 = copy_to_mode_reg (modei5, op3);
36065 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
36067 error ("the fifth argument must be an 8-bit immediate");
36068 return const0_rtx;
36071 if (d->code == IX86_BUILTIN_PCMPESTRI128)
36073 if (optimize || !target
36074 || GET_MODE (target) != tmode0
36075 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
36076 target = gen_reg_rtx (tmode0);
36078 scratch1 = gen_reg_rtx (tmode1);
36080 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
36082 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
36084 if (optimize || !target
36085 || GET_MODE (target) != tmode1
36086 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
36087 target = gen_reg_rtx (tmode1);
36089 scratch0 = gen_reg_rtx (tmode0);
36091 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
36093 else
36095 gcc_assert (d->flag);
36097 scratch0 = gen_reg_rtx (tmode0);
36098 scratch1 = gen_reg_rtx (tmode1);
36100 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
36103 if (! pat)
36104 return 0;
36106 emit_insn (pat);
36108 if (d->flag)
36110 target = gen_reg_rtx (SImode);
36111 emit_move_insn (target, const0_rtx);
36112 target = gen_rtx_SUBREG (QImode, target, 0);
36114 emit_insn
36115 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36116 gen_rtx_fmt_ee (EQ, QImode,
36117 gen_rtx_REG ((machine_mode) d->flag,
36118 FLAGS_REG),
36119 const0_rtx)));
36120 return SUBREG_REG (target);
36122 else
36123 return target;
36127 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
36129 static rtx
36130 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
36131 tree exp, rtx target)
36133 rtx pat;
36134 tree arg0 = CALL_EXPR_ARG (exp, 0);
36135 tree arg1 = CALL_EXPR_ARG (exp, 1);
36136 tree arg2 = CALL_EXPR_ARG (exp, 2);
36137 rtx scratch0, scratch1;
36138 rtx op0 = expand_normal (arg0);
36139 rtx op1 = expand_normal (arg1);
36140 rtx op2 = expand_normal (arg2);
36141 machine_mode tmode0, tmode1, modev2, modev3, modeimm;
36143 tmode0 = insn_data[d->icode].operand[0].mode;
36144 tmode1 = insn_data[d->icode].operand[1].mode;
36145 modev2 = insn_data[d->icode].operand[2].mode;
36146 modev3 = insn_data[d->icode].operand[3].mode;
36147 modeimm = insn_data[d->icode].operand[4].mode;
36149 if (VECTOR_MODE_P (modev2))
36150 op0 = safe_vector_operand (op0, modev2);
36151 if (VECTOR_MODE_P (modev3))
36152 op1 = safe_vector_operand (op1, modev3);
36154 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
36155 op0 = copy_to_mode_reg (modev2, op0);
36156 if ((optimize && !register_operand (op1, modev3))
36157 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
36158 op1 = copy_to_mode_reg (modev3, op1);
36160 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
36162 error ("the third argument must be an 8-bit immediate");
36163 return const0_rtx;
36166 if (d->code == IX86_BUILTIN_PCMPISTRI128)
36168 if (optimize || !target
36169 || GET_MODE (target) != tmode0
36170 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
36171 target = gen_reg_rtx (tmode0);
36173 scratch1 = gen_reg_rtx (tmode1);
36175 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
36177 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
36179 if (optimize || !target
36180 || GET_MODE (target) != tmode1
36181 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
36182 target = gen_reg_rtx (tmode1);
36184 scratch0 = gen_reg_rtx (tmode0);
36186 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
36188 else
36190 gcc_assert (d->flag);
36192 scratch0 = gen_reg_rtx (tmode0);
36193 scratch1 = gen_reg_rtx (tmode1);
36195 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
36198 if (! pat)
36199 return 0;
36201 emit_insn (pat);
36203 if (d->flag)
36205 target = gen_reg_rtx (SImode);
36206 emit_move_insn (target, const0_rtx);
36207 target = gen_rtx_SUBREG (QImode, target, 0);
36209 emit_insn
36210 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36211 gen_rtx_fmt_ee (EQ, QImode,
36212 gen_rtx_REG ((machine_mode) d->flag,
36213 FLAGS_REG),
36214 const0_rtx)));
36215 return SUBREG_REG (target);
36217 else
36218 return target;
36221 /* Subroutine of ix86_expand_builtin to take care of insns with
36222 variable number of operands. */
36224 static rtx
36225 ix86_expand_args_builtin (const struct builtin_description *d,
36226 tree exp, rtx target)
36228 rtx pat, real_target;
36229 unsigned int i, nargs;
36230 unsigned int nargs_constant = 0;
36231 unsigned int mask_pos = 0;
36232 int num_memory = 0;
36233 struct
36235 rtx op;
36236 machine_mode mode;
36237 } args[6];
36238 bool last_arg_count = false;
36239 enum insn_code icode = d->icode;
36240 const struct insn_data_d *insn_p = &insn_data[icode];
36241 machine_mode tmode = insn_p->operand[0].mode;
36242 machine_mode rmode = VOIDmode;
36243 bool swap = false;
36244 enum rtx_code comparison = d->comparison;
36246 switch ((enum ix86_builtin_func_type) d->flag)
36248 case V2DF_FTYPE_V2DF_ROUND:
36249 case V4DF_FTYPE_V4DF_ROUND:
36250 case V4SF_FTYPE_V4SF_ROUND:
36251 case V8SF_FTYPE_V8SF_ROUND:
36252 case V4SI_FTYPE_V4SF_ROUND:
36253 case V8SI_FTYPE_V8SF_ROUND:
36254 return ix86_expand_sse_round (d, exp, target);
36255 case V4SI_FTYPE_V2DF_V2DF_ROUND:
36256 case V8SI_FTYPE_V4DF_V4DF_ROUND:
36257 case V16SI_FTYPE_V8DF_V8DF_ROUND:
36258 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
36259 case INT_FTYPE_V8SF_V8SF_PTEST:
36260 case INT_FTYPE_V4DI_V4DI_PTEST:
36261 case INT_FTYPE_V4DF_V4DF_PTEST:
36262 case INT_FTYPE_V4SF_V4SF_PTEST:
36263 case INT_FTYPE_V2DI_V2DI_PTEST:
36264 case INT_FTYPE_V2DF_V2DF_PTEST:
36265 return ix86_expand_sse_ptest (d, exp, target);
36266 case FLOAT128_FTYPE_FLOAT128:
36267 case FLOAT_FTYPE_FLOAT:
36268 case INT_FTYPE_INT:
36269 case UINT64_FTYPE_INT:
36270 case UINT16_FTYPE_UINT16:
36271 case INT64_FTYPE_INT64:
36272 case INT64_FTYPE_V4SF:
36273 case INT64_FTYPE_V2DF:
36274 case INT_FTYPE_V16QI:
36275 case INT_FTYPE_V8QI:
36276 case INT_FTYPE_V8SF:
36277 case INT_FTYPE_V4DF:
36278 case INT_FTYPE_V4SF:
36279 case INT_FTYPE_V2DF:
36280 case INT_FTYPE_V32QI:
36281 case V16QI_FTYPE_V16QI:
36282 case V8SI_FTYPE_V8SF:
36283 case V8SI_FTYPE_V4SI:
36284 case V8HI_FTYPE_V8HI:
36285 case V8HI_FTYPE_V16QI:
36286 case V8QI_FTYPE_V8QI:
36287 case V8SF_FTYPE_V8SF:
36288 case V8SF_FTYPE_V8SI:
36289 case V8SF_FTYPE_V4SF:
36290 case V8SF_FTYPE_V8HI:
36291 case V4SI_FTYPE_V4SI:
36292 case V4SI_FTYPE_V16QI:
36293 case V4SI_FTYPE_V4SF:
36294 case V4SI_FTYPE_V8SI:
36295 case V4SI_FTYPE_V8HI:
36296 case V4SI_FTYPE_V4DF:
36297 case V4SI_FTYPE_V2DF:
36298 case V4HI_FTYPE_V4HI:
36299 case V4DF_FTYPE_V4DF:
36300 case V4DF_FTYPE_V4SI:
36301 case V4DF_FTYPE_V4SF:
36302 case V4DF_FTYPE_V2DF:
36303 case V4SF_FTYPE_V4SF:
36304 case V4SF_FTYPE_V4SI:
36305 case V4SF_FTYPE_V8SF:
36306 case V4SF_FTYPE_V4DF:
36307 case V4SF_FTYPE_V8HI:
36308 case V4SF_FTYPE_V2DF:
36309 case V2DI_FTYPE_V2DI:
36310 case V2DI_FTYPE_V16QI:
36311 case V2DI_FTYPE_V8HI:
36312 case V2DI_FTYPE_V4SI:
36313 case V2DF_FTYPE_V2DF:
36314 case V2DF_FTYPE_V4SI:
36315 case V2DF_FTYPE_V4DF:
36316 case V2DF_FTYPE_V4SF:
36317 case V2DF_FTYPE_V2SI:
36318 case V2SI_FTYPE_V2SI:
36319 case V2SI_FTYPE_V4SF:
36320 case V2SI_FTYPE_V2SF:
36321 case V2SI_FTYPE_V2DF:
36322 case V2SF_FTYPE_V2SF:
36323 case V2SF_FTYPE_V2SI:
36324 case V32QI_FTYPE_V32QI:
36325 case V32QI_FTYPE_V16QI:
36326 case V16HI_FTYPE_V16HI:
36327 case V16HI_FTYPE_V8HI:
36328 case V8SI_FTYPE_V8SI:
36329 case V16HI_FTYPE_V16QI:
36330 case V8SI_FTYPE_V16QI:
36331 case V4DI_FTYPE_V16QI:
36332 case V8SI_FTYPE_V8HI:
36333 case V4DI_FTYPE_V8HI:
36334 case V4DI_FTYPE_V4SI:
36335 case V4DI_FTYPE_V2DI:
36336 case HI_FTYPE_HI:
36337 case HI_FTYPE_V16QI:
36338 case SI_FTYPE_V32QI:
36339 case DI_FTYPE_V64QI:
36340 case V16QI_FTYPE_HI:
36341 case V32QI_FTYPE_SI:
36342 case V64QI_FTYPE_DI:
36343 case V8HI_FTYPE_QI:
36344 case V16HI_FTYPE_HI:
36345 case V32HI_FTYPE_SI:
36346 case V4SI_FTYPE_QI:
36347 case V8SI_FTYPE_QI:
36348 case V4SI_FTYPE_HI:
36349 case V8SI_FTYPE_HI:
36350 case QI_FTYPE_V8HI:
36351 case HI_FTYPE_V16HI:
36352 case SI_FTYPE_V32HI:
36353 case QI_FTYPE_V4SI:
36354 case QI_FTYPE_V8SI:
36355 case HI_FTYPE_V16SI:
36356 case QI_FTYPE_V2DI:
36357 case QI_FTYPE_V4DI:
36358 case QI_FTYPE_V8DI:
36359 case UINT_FTYPE_V2DF:
36360 case UINT_FTYPE_V4SF:
36361 case UINT64_FTYPE_V2DF:
36362 case UINT64_FTYPE_V4SF:
36363 case V16QI_FTYPE_V8DI:
36364 case V16HI_FTYPE_V16SI:
36365 case V16SI_FTYPE_HI:
36366 case V2DI_FTYPE_QI:
36367 case V4DI_FTYPE_QI:
36368 case V16SI_FTYPE_V16SI:
36369 case V16SI_FTYPE_INT:
36370 case V16SF_FTYPE_FLOAT:
36371 case V16SF_FTYPE_V8SF:
36372 case V16SI_FTYPE_V8SI:
36373 case V16SF_FTYPE_V4SF:
36374 case V16SI_FTYPE_V4SI:
36375 case V16SF_FTYPE_V16SF:
36376 case V8HI_FTYPE_V8DI:
36377 case V8UHI_FTYPE_V8UHI:
36378 case V8SI_FTYPE_V8DI:
36379 case V8SF_FTYPE_V8DF:
36380 case V8DI_FTYPE_QI:
36381 case V8DI_FTYPE_INT64:
36382 case V8DI_FTYPE_V4DI:
36383 case V8DI_FTYPE_V8DI:
36384 case V8DF_FTYPE_DOUBLE:
36385 case V8DF_FTYPE_V4DF:
36386 case V8DF_FTYPE_V2DF:
36387 case V8DF_FTYPE_V8DF:
36388 case V8DF_FTYPE_V8SI:
36389 nargs = 1;
36390 break;
36391 case V4SF_FTYPE_V4SF_VEC_MERGE:
36392 case V2DF_FTYPE_V2DF_VEC_MERGE:
36393 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
36394 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
36395 case V16QI_FTYPE_V16QI_V16QI:
36396 case V16QI_FTYPE_V8HI_V8HI:
36397 case V16SI_FTYPE_V16SI_V16SI:
36398 case V16SF_FTYPE_V16SF_V16SF:
36399 case V16SF_FTYPE_V16SF_V16SI:
36400 case V8QI_FTYPE_V8QI_V8QI:
36401 case V8QI_FTYPE_V4HI_V4HI:
36402 case V8HI_FTYPE_V8HI_V8HI:
36403 case V8HI_FTYPE_V16QI_V16QI:
36404 case V8HI_FTYPE_V4SI_V4SI:
36405 case V8SF_FTYPE_V8SF_V8SF:
36406 case V8SF_FTYPE_V8SF_V8SI:
36407 case V8DI_FTYPE_V8DI_V8DI:
36408 case V8DF_FTYPE_V8DF_V8DF:
36409 case V8DF_FTYPE_V8DF_V8DI:
36410 case V4SI_FTYPE_V4SI_V4SI:
36411 case V4SI_FTYPE_V8HI_V8HI:
36412 case V4SI_FTYPE_V4SF_V4SF:
36413 case V4SI_FTYPE_V2DF_V2DF:
36414 case V4HI_FTYPE_V4HI_V4HI:
36415 case V4HI_FTYPE_V8QI_V8QI:
36416 case V4HI_FTYPE_V2SI_V2SI:
36417 case V4DF_FTYPE_V4DF_V4DF:
36418 case V4DF_FTYPE_V4DF_V4DI:
36419 case V4SF_FTYPE_V4SF_V4SF:
36420 case V4SF_FTYPE_V4SF_V4SI:
36421 case V4SF_FTYPE_V4SF_V2SI:
36422 case V4SF_FTYPE_V4SF_V2DF:
36423 case V4SF_FTYPE_V4SF_UINT:
36424 case V4SF_FTYPE_V4SF_UINT64:
36425 case V4SF_FTYPE_V4SF_DI:
36426 case V4SF_FTYPE_V4SF_SI:
36427 case V2DI_FTYPE_V2DI_V2DI:
36428 case V2DI_FTYPE_V16QI_V16QI:
36429 case V2DI_FTYPE_V4SI_V4SI:
36430 case V2UDI_FTYPE_V4USI_V4USI:
36431 case V2DI_FTYPE_V2DI_V16QI:
36432 case V2DI_FTYPE_V2DF_V2DF:
36433 case V2SI_FTYPE_V2SI_V2SI:
36434 case V2SI_FTYPE_V4HI_V4HI:
36435 case V2SI_FTYPE_V2SF_V2SF:
36436 case V2DF_FTYPE_V2DF_V2DF:
36437 case V2DF_FTYPE_V2DF_V4SF:
36438 case V2DF_FTYPE_V2DF_V2DI:
36439 case V2DF_FTYPE_V2DF_DI:
36440 case V2DF_FTYPE_V2DF_SI:
36441 case V2DF_FTYPE_V2DF_UINT:
36442 case V2DF_FTYPE_V2DF_UINT64:
36443 case V2SF_FTYPE_V2SF_V2SF:
36444 case V1DI_FTYPE_V1DI_V1DI:
36445 case V1DI_FTYPE_V8QI_V8QI:
36446 case V1DI_FTYPE_V2SI_V2SI:
36447 case V32QI_FTYPE_V16HI_V16HI:
36448 case V16HI_FTYPE_V8SI_V8SI:
36449 case V32QI_FTYPE_V32QI_V32QI:
36450 case V16HI_FTYPE_V32QI_V32QI:
36451 case V16HI_FTYPE_V16HI_V16HI:
36452 case V8SI_FTYPE_V4DF_V4DF:
36453 case V8SI_FTYPE_V8SI_V8SI:
36454 case V8SI_FTYPE_V16HI_V16HI:
36455 case V4DI_FTYPE_V4DI_V4DI:
36456 case V4DI_FTYPE_V8SI_V8SI:
36457 case V4UDI_FTYPE_V8USI_V8USI:
36458 case QI_FTYPE_V8DI_V8DI:
36459 case V8DI_FTYPE_V64QI_V64QI:
36460 case HI_FTYPE_V16SI_V16SI:
36461 if (comparison == UNKNOWN)
36462 return ix86_expand_binop_builtin (icode, exp, target);
36463 nargs = 2;
36464 break;
36465 case V4SF_FTYPE_V4SF_V4SF_SWAP:
36466 case V2DF_FTYPE_V2DF_V2DF_SWAP:
36467 gcc_assert (comparison != UNKNOWN);
36468 nargs = 2;
36469 swap = true;
36470 break;
36471 case V16HI_FTYPE_V16HI_V8HI_COUNT:
36472 case V16HI_FTYPE_V16HI_SI_COUNT:
36473 case V8SI_FTYPE_V8SI_V4SI_COUNT:
36474 case V8SI_FTYPE_V8SI_SI_COUNT:
36475 case V4DI_FTYPE_V4DI_V2DI_COUNT:
36476 case V4DI_FTYPE_V4DI_INT_COUNT:
36477 case V8HI_FTYPE_V8HI_V8HI_COUNT:
36478 case V8HI_FTYPE_V8HI_SI_COUNT:
36479 case V4SI_FTYPE_V4SI_V4SI_COUNT:
36480 case V4SI_FTYPE_V4SI_SI_COUNT:
36481 case V4HI_FTYPE_V4HI_V4HI_COUNT:
36482 case V4HI_FTYPE_V4HI_SI_COUNT:
36483 case V2DI_FTYPE_V2DI_V2DI_COUNT:
36484 case V2DI_FTYPE_V2DI_SI_COUNT:
36485 case V2SI_FTYPE_V2SI_V2SI_COUNT:
36486 case V2SI_FTYPE_V2SI_SI_COUNT:
36487 case V1DI_FTYPE_V1DI_V1DI_COUNT:
36488 case V1DI_FTYPE_V1DI_SI_COUNT:
36489 nargs = 2;
36490 last_arg_count = true;
36491 break;
36492 case UINT64_FTYPE_UINT64_UINT64:
36493 case UINT_FTYPE_UINT_UINT:
36494 case UINT_FTYPE_UINT_USHORT:
36495 case UINT_FTYPE_UINT_UCHAR:
36496 case UINT16_FTYPE_UINT16_INT:
36497 case UINT8_FTYPE_UINT8_INT:
36498 case HI_FTYPE_HI_HI:
36499 case SI_FTYPE_SI_SI:
36500 case DI_FTYPE_DI_DI:
36501 case V16SI_FTYPE_V8DF_V8DF:
36502 nargs = 2;
36503 break;
36504 case V2DI_FTYPE_V2DI_INT_CONVERT:
36505 nargs = 2;
36506 rmode = V1TImode;
36507 nargs_constant = 1;
36508 break;
36509 case V4DI_FTYPE_V4DI_INT_CONVERT:
36510 nargs = 2;
36511 rmode = V2TImode;
36512 nargs_constant = 1;
36513 break;
36514 case V8DI_FTYPE_V8DI_INT_CONVERT:
36515 nargs = 2;
36516 rmode = V4TImode;
36517 nargs_constant = 1;
36518 break;
36519 case V8HI_FTYPE_V8HI_INT:
36520 case V8HI_FTYPE_V8SF_INT:
36521 case V16HI_FTYPE_V16SF_INT:
36522 case V8HI_FTYPE_V4SF_INT:
36523 case V8SF_FTYPE_V8SF_INT:
36524 case V4SF_FTYPE_V16SF_INT:
36525 case V16SF_FTYPE_V16SF_INT:
36526 case V4SI_FTYPE_V4SI_INT:
36527 case V4SI_FTYPE_V8SI_INT:
36528 case V4HI_FTYPE_V4HI_INT:
36529 case V4DF_FTYPE_V4DF_INT:
36530 case V4DF_FTYPE_V8DF_INT:
36531 case V4SF_FTYPE_V4SF_INT:
36532 case V4SF_FTYPE_V8SF_INT:
36533 case V2DI_FTYPE_V2DI_INT:
36534 case V2DF_FTYPE_V2DF_INT:
36535 case V2DF_FTYPE_V4DF_INT:
36536 case V16HI_FTYPE_V16HI_INT:
36537 case V8SI_FTYPE_V8SI_INT:
36538 case V16SI_FTYPE_V16SI_INT:
36539 case V4SI_FTYPE_V16SI_INT:
36540 case V4DI_FTYPE_V4DI_INT:
36541 case V2DI_FTYPE_V4DI_INT:
36542 case V4DI_FTYPE_V8DI_INT:
36543 case HI_FTYPE_HI_INT:
36544 case QI_FTYPE_V4SF_INT:
36545 case QI_FTYPE_V2DF_INT:
36546 nargs = 2;
36547 nargs_constant = 1;
36548 break;
36549 case V16QI_FTYPE_V16QI_V16QI_V16QI:
36550 case V8SF_FTYPE_V8SF_V8SF_V8SF:
36551 case V4DF_FTYPE_V4DF_V4DF_V4DF:
36552 case V4SF_FTYPE_V4SF_V4SF_V4SF:
36553 case V2DF_FTYPE_V2DF_V2DF_V2DF:
36554 case V32QI_FTYPE_V32QI_V32QI_V32QI:
36555 case HI_FTYPE_V16SI_V16SI_HI:
36556 case QI_FTYPE_V8DI_V8DI_QI:
36557 case V16HI_FTYPE_V16SI_V16HI_HI:
36558 case V16QI_FTYPE_V16SI_V16QI_HI:
36559 case V16QI_FTYPE_V8DI_V16QI_QI:
36560 case V16SF_FTYPE_V16SF_V16SF_HI:
36561 case V16SF_FTYPE_V16SF_V16SF_V16SF:
36562 case V16SF_FTYPE_V16SF_V16SI_V16SF:
36563 case V16SF_FTYPE_V16SI_V16SF_HI:
36564 case V16SF_FTYPE_V16SI_V16SF_V16SF:
36565 case V16SF_FTYPE_V4SF_V16SF_HI:
36566 case V16SI_FTYPE_SI_V16SI_HI:
36567 case V16SI_FTYPE_V16HI_V16SI_HI:
36568 case V16SI_FTYPE_V16QI_V16SI_HI:
36569 case V16SI_FTYPE_V16SF_V16SI_HI:
36570 case V8SF_FTYPE_V4SF_V8SF_QI:
36571 case V4DF_FTYPE_V2DF_V4DF_QI:
36572 case V8SI_FTYPE_V4SI_V8SI_QI:
36573 case V8SI_FTYPE_SI_V8SI_QI:
36574 case V4SI_FTYPE_V4SI_V4SI_QI:
36575 case V4SI_FTYPE_SI_V4SI_QI:
36576 case V4DI_FTYPE_V2DI_V4DI_QI:
36577 case V4DI_FTYPE_DI_V4DI_QI:
36578 case V2DI_FTYPE_V2DI_V2DI_QI:
36579 case V2DI_FTYPE_DI_V2DI_QI:
36580 case V64QI_FTYPE_V64QI_V64QI_DI:
36581 case V64QI_FTYPE_V16QI_V64QI_DI:
36582 case V64QI_FTYPE_QI_V64QI_DI:
36583 case V32QI_FTYPE_V32QI_V32QI_SI:
36584 case V32QI_FTYPE_V16QI_V32QI_SI:
36585 case V32QI_FTYPE_QI_V32QI_SI:
36586 case V16QI_FTYPE_V16QI_V16QI_HI:
36587 case V16QI_FTYPE_QI_V16QI_HI:
36588 case V32HI_FTYPE_V8HI_V32HI_SI:
36589 case V32HI_FTYPE_HI_V32HI_SI:
36590 case V16HI_FTYPE_V8HI_V16HI_HI:
36591 case V16HI_FTYPE_HI_V16HI_HI:
36592 case V8HI_FTYPE_V8HI_V8HI_QI:
36593 case V8HI_FTYPE_HI_V8HI_QI:
36594 case V8SF_FTYPE_V8HI_V8SF_QI:
36595 case V4SF_FTYPE_V8HI_V4SF_QI:
36596 case V8SI_FTYPE_V8SF_V8SI_QI:
36597 case V4SI_FTYPE_V4SF_V4SI_QI:
36598 case V8DI_FTYPE_V8SF_V8DI_QI:
36599 case V4DI_FTYPE_V4SF_V4DI_QI:
36600 case V2DI_FTYPE_V4SF_V2DI_QI:
36601 case V8SF_FTYPE_V8DI_V8SF_QI:
36602 case V4SF_FTYPE_V4DI_V4SF_QI:
36603 case V4SF_FTYPE_V2DI_V4SF_QI:
36604 case V8DF_FTYPE_V8DI_V8DF_QI:
36605 case V4DF_FTYPE_V4DI_V4DF_QI:
36606 case V2DF_FTYPE_V2DI_V2DF_QI:
36607 case V16QI_FTYPE_V8HI_V16QI_QI:
36608 case V16QI_FTYPE_V16HI_V16QI_HI:
36609 case V16QI_FTYPE_V4SI_V16QI_QI:
36610 case V16QI_FTYPE_V8SI_V16QI_QI:
36611 case V8HI_FTYPE_V4SI_V8HI_QI:
36612 case V8HI_FTYPE_V8SI_V8HI_QI:
36613 case V16QI_FTYPE_V2DI_V16QI_QI:
36614 case V16QI_FTYPE_V4DI_V16QI_QI:
36615 case V8HI_FTYPE_V2DI_V8HI_QI:
36616 case V8HI_FTYPE_V4DI_V8HI_QI:
36617 case V4SI_FTYPE_V2DI_V4SI_QI:
36618 case V4SI_FTYPE_V4DI_V4SI_QI:
36619 case V32QI_FTYPE_V32HI_V32QI_SI:
36620 case HI_FTYPE_V16QI_V16QI_HI:
36621 case SI_FTYPE_V32QI_V32QI_SI:
36622 case DI_FTYPE_V64QI_V64QI_DI:
36623 case QI_FTYPE_V8HI_V8HI_QI:
36624 case HI_FTYPE_V16HI_V16HI_HI:
36625 case SI_FTYPE_V32HI_V32HI_SI:
36626 case QI_FTYPE_V4SI_V4SI_QI:
36627 case QI_FTYPE_V8SI_V8SI_QI:
36628 case QI_FTYPE_V2DI_V2DI_QI:
36629 case QI_FTYPE_V4DI_V4DI_QI:
36630 case V4SF_FTYPE_V2DF_V4SF_QI:
36631 case V4SF_FTYPE_V4DF_V4SF_QI:
36632 case V16SI_FTYPE_V16SI_V16SI_HI:
36633 case V16SI_FTYPE_V16SI_V16SI_V16SI:
36634 case V16SI_FTYPE_V4SI_V16SI_HI:
36635 case V2DI_FTYPE_V2DI_V2DI_V2DI:
36636 case V2DI_FTYPE_V4SI_V2DI_QI:
36637 case V2DI_FTYPE_V8HI_V2DI_QI:
36638 case V2DI_FTYPE_V16QI_V2DI_QI:
36639 case V4DI_FTYPE_V4DI_V4DI_QI:
36640 case V4DI_FTYPE_V4SI_V4DI_QI:
36641 case V4DI_FTYPE_V8HI_V4DI_QI:
36642 case V4DI_FTYPE_V16QI_V4DI_QI:
36643 case V8DI_FTYPE_V8DF_V8DI_QI:
36644 case V4DI_FTYPE_V4DF_V4DI_QI:
36645 case V2DI_FTYPE_V2DF_V2DI_QI:
36646 case V4SI_FTYPE_V4DF_V4SI_QI:
36647 case V4SI_FTYPE_V2DF_V4SI_QI:
36648 case V4SI_FTYPE_V8HI_V4SI_QI:
36649 case V4SI_FTYPE_V16QI_V4SI_QI:
36650 case V8SI_FTYPE_V8SI_V8SI_V8SI:
36651 case V4DI_FTYPE_V4DI_V4DI_V4DI:
36652 case V8DF_FTYPE_V2DF_V8DF_QI:
36653 case V8DF_FTYPE_V4DF_V8DF_QI:
36654 case V8DF_FTYPE_V8DF_V8DF_QI:
36655 case V8DF_FTYPE_V8DF_V8DF_V8DF:
36656 case V8SF_FTYPE_V8SF_V8SF_QI:
36657 case V8SF_FTYPE_V8SI_V8SF_QI:
36658 case V4DF_FTYPE_V4DF_V4DF_QI:
36659 case V4SF_FTYPE_V4SF_V4SF_QI:
36660 case V2DF_FTYPE_V2DF_V2DF_QI:
36661 case V2DF_FTYPE_V4SF_V2DF_QI:
36662 case V2DF_FTYPE_V4SI_V2DF_QI:
36663 case V4SF_FTYPE_V4SI_V4SF_QI:
36664 case V4DF_FTYPE_V4SF_V4DF_QI:
36665 case V4DF_FTYPE_V4SI_V4DF_QI:
36666 case V8SI_FTYPE_V8SI_V8SI_QI:
36667 case V8SI_FTYPE_V8HI_V8SI_QI:
36668 case V8SI_FTYPE_V16QI_V8SI_QI:
36669 case V8DF_FTYPE_V8DF_V8DI_V8DF:
36670 case V8DF_FTYPE_V8DI_V8DF_V8DF:
36671 case V8DF_FTYPE_V8SF_V8DF_QI:
36672 case V8DF_FTYPE_V8SI_V8DF_QI:
36673 case V8DI_FTYPE_DI_V8DI_QI:
36674 case V16SF_FTYPE_V8SF_V16SF_HI:
36675 case V16SI_FTYPE_V8SI_V16SI_HI:
36676 case V16HI_FTYPE_V16HI_V16HI_HI:
36677 case V8HI_FTYPE_V16QI_V8HI_QI:
36678 case V16HI_FTYPE_V16QI_V16HI_HI:
36679 case V32HI_FTYPE_V32HI_V32HI_SI:
36680 case V32HI_FTYPE_V32QI_V32HI_SI:
36681 case V8DI_FTYPE_V16QI_V8DI_QI:
36682 case V8DI_FTYPE_V2DI_V8DI_QI:
36683 case V8DI_FTYPE_V4DI_V8DI_QI:
36684 case V8DI_FTYPE_V8DI_V8DI_QI:
36685 case V8DI_FTYPE_V8DI_V8DI_V8DI:
36686 case V8DI_FTYPE_V8HI_V8DI_QI:
36687 case V8DI_FTYPE_V8SI_V8DI_QI:
36688 case V8HI_FTYPE_V8DI_V8HI_QI:
36689 case V8SF_FTYPE_V8DF_V8SF_QI:
36690 case V8SI_FTYPE_V8DF_V8SI_QI:
36691 case V8SI_FTYPE_V8DI_V8SI_QI:
36692 case V4SI_FTYPE_V4SI_V4SI_V4SI:
36693 nargs = 3;
36694 break;
36695 case V32QI_FTYPE_V32QI_V32QI_INT:
36696 case V16HI_FTYPE_V16HI_V16HI_INT:
36697 case V16QI_FTYPE_V16QI_V16QI_INT:
36698 case V4DI_FTYPE_V4DI_V4DI_INT:
36699 case V8HI_FTYPE_V8HI_V8HI_INT:
36700 case V8SI_FTYPE_V8SI_V8SI_INT:
36701 case V8SI_FTYPE_V8SI_V4SI_INT:
36702 case V8SF_FTYPE_V8SF_V8SF_INT:
36703 case V8SF_FTYPE_V8SF_V4SF_INT:
36704 case V4SI_FTYPE_V4SI_V4SI_INT:
36705 case V4DF_FTYPE_V4DF_V4DF_INT:
36706 case V16SF_FTYPE_V16SF_V16SF_INT:
36707 case V16SF_FTYPE_V16SF_V4SF_INT:
36708 case V16SI_FTYPE_V16SI_V4SI_INT:
36709 case V4DF_FTYPE_V4DF_V2DF_INT:
36710 case V4SF_FTYPE_V4SF_V4SF_INT:
36711 case V2DI_FTYPE_V2DI_V2DI_INT:
36712 case V4DI_FTYPE_V4DI_V2DI_INT:
36713 case V2DF_FTYPE_V2DF_V2DF_INT:
36714 case QI_FTYPE_V8DI_V8DI_INT:
36715 case QI_FTYPE_V8DF_V8DF_INT:
36716 case QI_FTYPE_V2DF_V2DF_INT:
36717 case QI_FTYPE_V4SF_V4SF_INT:
36718 case HI_FTYPE_V16SI_V16SI_INT:
36719 case HI_FTYPE_V16SF_V16SF_INT:
36720 nargs = 3;
36721 nargs_constant = 1;
36722 break;
36723 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
36724 nargs = 3;
36725 rmode = V4DImode;
36726 nargs_constant = 1;
36727 break;
36728 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
36729 nargs = 3;
36730 rmode = V2DImode;
36731 nargs_constant = 1;
36732 break;
36733 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
36734 nargs = 3;
36735 rmode = DImode;
36736 nargs_constant = 1;
36737 break;
36738 case V2DI_FTYPE_V2DI_UINT_UINT:
36739 nargs = 3;
36740 nargs_constant = 2;
36741 break;
36742 case V8DI_FTYPE_V8DI_V8DI_INT_CONVERT:
36743 nargs = 3;
36744 rmode = V8DImode;
36745 nargs_constant = 1;
36746 break;
36747 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT:
36748 nargs = 5;
36749 rmode = V8DImode;
36750 mask_pos = 2;
36751 nargs_constant = 1;
36752 break;
36753 case QI_FTYPE_V8DF_INT_QI:
36754 case QI_FTYPE_V4DF_INT_QI:
36755 case QI_FTYPE_V2DF_INT_QI:
36756 case HI_FTYPE_V16SF_INT_HI:
36757 case QI_FTYPE_V8SF_INT_QI:
36758 case QI_FTYPE_V4SF_INT_QI:
36759 nargs = 3;
36760 mask_pos = 1;
36761 nargs_constant = 1;
36762 break;
36763 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT:
36764 nargs = 5;
36765 rmode = V4DImode;
36766 mask_pos = 2;
36767 nargs_constant = 1;
36768 break;
36769 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT:
36770 nargs = 5;
36771 rmode = V2DImode;
36772 mask_pos = 2;
36773 nargs_constant = 1;
36774 break;
36775 case V32QI_FTYPE_V32QI_V32QI_V32QI_SI:
36776 case V32HI_FTYPE_V32HI_V32HI_V32HI_SI:
36777 case V32HI_FTYPE_V64QI_V64QI_V32HI_SI:
36778 case V16SI_FTYPE_V32HI_V32HI_V16SI_HI:
36779 case V64QI_FTYPE_V64QI_V64QI_V64QI_DI:
36780 case V32HI_FTYPE_V32HI_V8HI_V32HI_SI:
36781 case V16HI_FTYPE_V16HI_V8HI_V16HI_HI:
36782 case V8SI_FTYPE_V8SI_V4SI_V8SI_QI:
36783 case V4DI_FTYPE_V4DI_V2DI_V4DI_QI:
36784 case V64QI_FTYPE_V32HI_V32HI_V64QI_DI:
36785 case V32QI_FTYPE_V16HI_V16HI_V32QI_SI:
36786 case V16QI_FTYPE_V8HI_V8HI_V16QI_HI:
36787 case V32HI_FTYPE_V16SI_V16SI_V32HI_SI:
36788 case V16HI_FTYPE_V8SI_V8SI_V16HI_HI:
36789 case V8HI_FTYPE_V4SI_V4SI_V8HI_QI:
36790 case V4DF_FTYPE_V4DF_V4DI_V4DF_QI:
36791 case V8SF_FTYPE_V8SF_V8SI_V8SF_QI:
36792 case V4SF_FTYPE_V4SF_V4SI_V4SF_QI:
36793 case V2DF_FTYPE_V2DF_V2DI_V2DF_QI:
36794 case V2DI_FTYPE_V4SI_V4SI_V2DI_QI:
36795 case V4DI_FTYPE_V8SI_V8SI_V4DI_QI:
36796 case V4DF_FTYPE_V4DI_V4DF_V4DF_QI:
36797 case V8SF_FTYPE_V8SI_V8SF_V8SF_QI:
36798 case V2DF_FTYPE_V2DI_V2DF_V2DF_QI:
36799 case V4SF_FTYPE_V4SI_V4SF_V4SF_QI:
36800 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI:
36801 case V16SF_FTYPE_V16SF_V16SI_V16SF_HI:
36802 case V16SF_FTYPE_V16SI_V16SF_V16SF_HI:
36803 case V16SI_FTYPE_V16SI_V16SI_V16SI_HI:
36804 case V16SI_FTYPE_V16SI_V4SI_V16SI_HI:
36805 case V8HI_FTYPE_V8HI_V8HI_V8HI_QI:
36806 case V8SI_FTYPE_V8SI_V8SI_V8SI_QI:
36807 case V4SI_FTYPE_V4SI_V4SI_V4SI_QI:
36808 case V8SF_FTYPE_V8SF_V8SF_V8SF_QI:
36809 case V16QI_FTYPE_V16QI_V16QI_V16QI_HI:
36810 case V16HI_FTYPE_V16HI_V16HI_V16HI_HI:
36811 case V2DI_FTYPE_V2DI_V2DI_V2DI_QI:
36812 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI:
36813 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI:
36814 case V4DI_FTYPE_V4DI_V4DI_V4DI_QI:
36815 case V4DF_FTYPE_V4DF_V4DF_V4DF_QI:
36816 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI:
36817 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI:
36818 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI:
36819 case V8DF_FTYPE_V8DF_V8DI_V8DF_QI:
36820 case V8DF_FTYPE_V8DI_V8DF_V8DF_QI:
36821 case V8DI_FTYPE_V16SI_V16SI_V8DI_QI:
36822 case V8DI_FTYPE_V8DI_SI_V8DI_V8DI:
36823 case V8DI_FTYPE_V8DI_V2DI_V8DI_QI:
36824 case V8DI_FTYPE_V8DI_V8DI_V8DI_QI:
36825 case V8HI_FTYPE_V16QI_V16QI_V8HI_QI:
36826 case V16HI_FTYPE_V32QI_V32QI_V16HI_HI:
36827 case V8SI_FTYPE_V16HI_V16HI_V8SI_QI:
36828 case V4SI_FTYPE_V8HI_V8HI_V4SI_QI:
36829 nargs = 4;
36830 break;
36831 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
36832 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
36833 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
36834 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
36835 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
36836 nargs = 4;
36837 nargs_constant = 1;
36838 break;
36839 case QI_FTYPE_V4DI_V4DI_INT_QI:
36840 case QI_FTYPE_V8SI_V8SI_INT_QI:
36841 case QI_FTYPE_V4DF_V4DF_INT_QI:
36842 case QI_FTYPE_V8SF_V8SF_INT_QI:
36843 case QI_FTYPE_V2DI_V2DI_INT_QI:
36844 case QI_FTYPE_V4SI_V4SI_INT_QI:
36845 case QI_FTYPE_V2DF_V2DF_INT_QI:
36846 case QI_FTYPE_V4SF_V4SF_INT_QI:
36847 case DI_FTYPE_V64QI_V64QI_INT_DI:
36848 case SI_FTYPE_V32QI_V32QI_INT_SI:
36849 case HI_FTYPE_V16QI_V16QI_INT_HI:
36850 case SI_FTYPE_V32HI_V32HI_INT_SI:
36851 case HI_FTYPE_V16HI_V16HI_INT_HI:
36852 case QI_FTYPE_V8HI_V8HI_INT_QI:
36853 nargs = 4;
36854 mask_pos = 1;
36855 nargs_constant = 1;
36856 break;
36857 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
36858 nargs = 4;
36859 nargs_constant = 2;
36860 break;
36861 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
36862 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
36863 nargs = 4;
36864 break;
36865 case QI_FTYPE_V8DI_V8DI_INT_QI:
36866 case HI_FTYPE_V16SI_V16SI_INT_HI:
36867 case QI_FTYPE_V8DF_V8DF_INT_QI:
36868 case HI_FTYPE_V16SF_V16SF_INT_HI:
36869 mask_pos = 1;
36870 nargs = 4;
36871 nargs_constant = 1;
36872 break;
36873 case V8SF_FTYPE_V8SF_INT_V8SF_QI:
36874 case V4SF_FTYPE_V4SF_INT_V4SF_QI:
36875 case V2DF_FTYPE_V4DF_INT_V2DF_QI:
36876 case V2DI_FTYPE_V4DI_INT_V2DI_QI:
36877 case V8SF_FTYPE_V16SF_INT_V8SF_QI:
36878 case V8SI_FTYPE_V16SI_INT_V8SI_QI:
36879 case V2DF_FTYPE_V8DF_INT_V2DF_QI:
36880 case V2DI_FTYPE_V8DI_INT_V2DI_QI:
36881 case V4SF_FTYPE_V8SF_INT_V4SF_QI:
36882 case V4SI_FTYPE_V8SI_INT_V4SI_QI:
36883 case V8HI_FTYPE_V8SF_INT_V8HI_QI:
36884 case V8HI_FTYPE_V4SF_INT_V8HI_QI:
36885 case V32HI_FTYPE_V32HI_INT_V32HI_SI:
36886 case V16HI_FTYPE_V16HI_INT_V16HI_HI:
36887 case V8HI_FTYPE_V8HI_INT_V8HI_QI:
36888 case V4DI_FTYPE_V4DI_INT_V4DI_QI:
36889 case V2DI_FTYPE_V2DI_INT_V2DI_QI:
36890 case V8SI_FTYPE_V8SI_INT_V8SI_QI:
36891 case V4SI_FTYPE_V4SI_INT_V4SI_QI:
36892 case V4DF_FTYPE_V4DF_INT_V4DF_QI:
36893 case V2DF_FTYPE_V2DF_INT_V2DF_QI:
36894 case V8DF_FTYPE_V8DF_INT_V8DF_QI:
36895 case V16SF_FTYPE_V16SF_INT_V16SF_HI:
36896 case V16HI_FTYPE_V16SF_INT_V16HI_HI:
36897 case V16SI_FTYPE_V16SI_INT_V16SI_HI:
36898 case V4SI_FTYPE_V16SI_INT_V4SI_QI:
36899 case V4DI_FTYPE_V8DI_INT_V4DI_QI:
36900 case V4DF_FTYPE_V8DF_INT_V4DF_QI:
36901 case V4SF_FTYPE_V16SF_INT_V4SF_QI:
36902 case V8DI_FTYPE_V8DI_INT_V8DI_QI:
36903 nargs = 4;
36904 mask_pos = 2;
36905 nargs_constant = 1;
36906 break;
36907 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI:
36908 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI:
36909 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI:
36910 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI:
36911 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI:
36912 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI:
36913 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI:
36914 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI:
36915 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI:
36916 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI:
36917 case V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI:
36918 case V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI:
36919 case V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI:
36920 case V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI:
36921 case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI:
36922 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI:
36923 case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI:
36924 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI:
36925 case V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI:
36926 case V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI:
36927 case V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI:
36928 case V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI:
36929 case V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI:
36930 case V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI:
36931 case V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI:
36932 case V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI:
36933 case V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI:
36934 nargs = 5;
36935 mask_pos = 2;
36936 nargs_constant = 1;
36937 break;
36938 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI:
36939 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI:
36940 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI:
36941 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI:
36942 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI:
36943 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI:
36944 case V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI:
36945 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI:
36946 case V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI:
36947 case V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI:
36948 case V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI:
36949 nargs = 5;
36950 nargs = 5;
36951 mask_pos = 1;
36952 nargs_constant = 1;
36953 break;
36955 default:
36956 gcc_unreachable ();
36959 gcc_assert (nargs <= ARRAY_SIZE (args));
36961 if (comparison != UNKNOWN)
36963 gcc_assert (nargs == 2);
36964 return ix86_expand_sse_compare (d, exp, target, swap);
36967 if (rmode == VOIDmode || rmode == tmode)
36969 if (optimize
36970 || target == 0
36971 || GET_MODE (target) != tmode
36972 || !insn_p->operand[0].predicate (target, tmode))
36973 target = gen_reg_rtx (tmode);
36974 real_target = target;
36976 else
36978 real_target = gen_reg_rtx (tmode);
36979 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
36982 for (i = 0; i < nargs; i++)
36984 tree arg = CALL_EXPR_ARG (exp, i);
36985 rtx op = expand_normal (arg);
36986 machine_mode mode = insn_p->operand[i + 1].mode;
36987 bool match = insn_p->operand[i + 1].predicate (op, mode);
36989 if (last_arg_count && (i + 1) == nargs)
36991 /* SIMD shift insns take either an 8-bit immediate or
36992 register as count. But builtin functions take int as
36993 count. If count doesn't match, we put it in register. */
36994 if (!match)
36996 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
36997 if (!insn_p->operand[i + 1].predicate (op, mode))
36998 op = copy_to_reg (op);
37001 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
37002 (!mask_pos && (nargs - i) <= nargs_constant))
37004 if (!match)
37005 switch (icode)
37007 case CODE_FOR_avx_vinsertf128v4di:
37008 case CODE_FOR_avx_vextractf128v4di:
37009 error ("the last argument must be an 1-bit immediate");
37010 return const0_rtx;
37012 case CODE_FOR_avx512f_cmpv8di3_mask:
37013 case CODE_FOR_avx512f_cmpv16si3_mask:
37014 case CODE_FOR_avx512f_ucmpv8di3_mask:
37015 case CODE_FOR_avx512f_ucmpv16si3_mask:
37016 case CODE_FOR_avx512vl_cmpv4di3_mask:
37017 case CODE_FOR_avx512vl_cmpv8si3_mask:
37018 case CODE_FOR_avx512vl_ucmpv4di3_mask:
37019 case CODE_FOR_avx512vl_ucmpv8si3_mask:
37020 case CODE_FOR_avx512vl_cmpv2di3_mask:
37021 case CODE_FOR_avx512vl_cmpv4si3_mask:
37022 case CODE_FOR_avx512vl_ucmpv2di3_mask:
37023 case CODE_FOR_avx512vl_ucmpv4si3_mask:
37024 error ("the last argument must be a 3-bit immediate");
37025 return const0_rtx;
37027 case CODE_FOR_sse4_1_roundsd:
37028 case CODE_FOR_sse4_1_roundss:
37030 case CODE_FOR_sse4_1_roundpd:
37031 case CODE_FOR_sse4_1_roundps:
37032 case CODE_FOR_avx_roundpd256:
37033 case CODE_FOR_avx_roundps256:
37035 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
37036 case CODE_FOR_sse4_1_roundps_sfix:
37037 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
37038 case CODE_FOR_avx_roundps_sfix256:
37040 case CODE_FOR_sse4_1_blendps:
37041 case CODE_FOR_avx_blendpd256:
37042 case CODE_FOR_avx_vpermilv4df:
37043 case CODE_FOR_avx_vpermilv4df_mask:
37044 case CODE_FOR_avx512f_getmantv8df_mask:
37045 case CODE_FOR_avx512f_getmantv16sf_mask:
37046 case CODE_FOR_avx512vl_getmantv8sf_mask:
37047 case CODE_FOR_avx512vl_getmantv4df_mask:
37048 case CODE_FOR_avx512vl_getmantv4sf_mask:
37049 case CODE_FOR_avx512vl_getmantv2df_mask:
37050 case CODE_FOR_avx512dq_rangepv8df_mask_round:
37051 case CODE_FOR_avx512dq_rangepv16sf_mask_round:
37052 case CODE_FOR_avx512dq_rangepv4df_mask:
37053 case CODE_FOR_avx512dq_rangepv8sf_mask:
37054 case CODE_FOR_avx512dq_rangepv2df_mask:
37055 case CODE_FOR_avx512dq_rangepv4sf_mask:
37056 case CODE_FOR_avx_shufpd256_mask:
37057 error ("the last argument must be a 4-bit immediate");
37058 return const0_rtx;
37060 case CODE_FOR_sha1rnds4:
37061 case CODE_FOR_sse4_1_blendpd:
37062 case CODE_FOR_avx_vpermilv2df:
37063 case CODE_FOR_avx_vpermilv2df_mask:
37064 case CODE_FOR_xop_vpermil2v2df3:
37065 case CODE_FOR_xop_vpermil2v4sf3:
37066 case CODE_FOR_xop_vpermil2v4df3:
37067 case CODE_FOR_xop_vpermil2v8sf3:
37068 case CODE_FOR_avx512f_vinsertf32x4_mask:
37069 case CODE_FOR_avx512f_vinserti32x4_mask:
37070 case CODE_FOR_avx512f_vextractf32x4_mask:
37071 case CODE_FOR_avx512f_vextracti32x4_mask:
37072 case CODE_FOR_sse2_shufpd:
37073 case CODE_FOR_sse2_shufpd_mask:
37074 case CODE_FOR_avx512dq_shuf_f64x2_mask:
37075 case CODE_FOR_avx512dq_shuf_i64x2_mask:
37076 case CODE_FOR_avx512vl_shuf_i32x4_mask:
37077 case CODE_FOR_avx512vl_shuf_f32x4_mask:
37078 error ("the last argument must be a 2-bit immediate");
37079 return const0_rtx;
37081 case CODE_FOR_avx_vextractf128v4df:
37082 case CODE_FOR_avx_vextractf128v8sf:
37083 case CODE_FOR_avx_vextractf128v8si:
37084 case CODE_FOR_avx_vinsertf128v4df:
37085 case CODE_FOR_avx_vinsertf128v8sf:
37086 case CODE_FOR_avx_vinsertf128v8si:
37087 case CODE_FOR_avx512f_vinsertf64x4_mask:
37088 case CODE_FOR_avx512f_vinserti64x4_mask:
37089 case CODE_FOR_avx512f_vextractf64x4_mask:
37090 case CODE_FOR_avx512f_vextracti64x4_mask:
37091 case CODE_FOR_avx512dq_vinsertf32x8_mask:
37092 case CODE_FOR_avx512dq_vinserti32x8_mask:
37093 case CODE_FOR_avx512vl_vinsertv4df:
37094 case CODE_FOR_avx512vl_vinsertv4di:
37095 case CODE_FOR_avx512vl_vinsertv8sf:
37096 case CODE_FOR_avx512vl_vinsertv8si:
37097 error ("the last argument must be a 1-bit immediate");
37098 return const0_rtx;
37100 case CODE_FOR_avx_vmcmpv2df3:
37101 case CODE_FOR_avx_vmcmpv4sf3:
37102 case CODE_FOR_avx_cmpv2df3:
37103 case CODE_FOR_avx_cmpv4sf3:
37104 case CODE_FOR_avx_cmpv4df3:
37105 case CODE_FOR_avx_cmpv8sf3:
37106 case CODE_FOR_avx512f_cmpv8df3_mask:
37107 case CODE_FOR_avx512f_cmpv16sf3_mask:
37108 case CODE_FOR_avx512f_vmcmpv2df3_mask:
37109 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
37110 error ("the last argument must be a 5-bit immediate");
37111 return const0_rtx;
37113 default:
37114 switch (nargs_constant)
37116 case 2:
37117 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
37118 (!mask_pos && (nargs - i) == nargs_constant))
37120 error ("the next to last argument must be an 8-bit immediate");
37121 break;
37123 case 1:
37124 error ("the last argument must be an 8-bit immediate");
37125 break;
37126 default:
37127 gcc_unreachable ();
37129 return const0_rtx;
37132 else
37134 if (VECTOR_MODE_P (mode))
37135 op = safe_vector_operand (op, mode);
37137 /* If we aren't optimizing, only allow one memory operand to
37138 be generated. */
37139 if (memory_operand (op, mode))
37140 num_memory++;
37142 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
37144 if (optimize || !match || num_memory > 1)
37145 op = copy_to_mode_reg (mode, op);
37147 else
37149 op = copy_to_reg (op);
37150 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
37154 args[i].op = op;
37155 args[i].mode = mode;
37158 switch (nargs)
37160 case 1:
37161 pat = GEN_FCN (icode) (real_target, args[0].op);
37162 break;
37163 case 2:
37164 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
37165 break;
37166 case 3:
37167 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37168 args[2].op);
37169 break;
37170 case 4:
37171 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37172 args[2].op, args[3].op);
37173 break;
37174 case 5:
37175 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37176 args[2].op, args[3].op, args[4].op);
37177 case 6:
37178 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37179 args[2].op, args[3].op, args[4].op,
37180 args[5].op);
37181 break;
37182 default:
37183 gcc_unreachable ();
37186 if (! pat)
37187 return 0;
37189 emit_insn (pat);
37190 return target;
37193 /* Transform pattern of following layout:
37194 (parallel [
37195 set (A B)
37196 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
37198 into:
37199 (set (A B))
37202 (parallel [ A B
37204 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
37207 into:
37208 (parallel [ A B ... ]) */
37210 static rtx
37211 ix86_erase_embedded_rounding (rtx pat)
37213 if (GET_CODE (pat) == INSN)
37214 pat = PATTERN (pat);
37216 gcc_assert (GET_CODE (pat) == PARALLEL);
37218 if (XVECLEN (pat, 0) == 2)
37220 rtx p0 = XVECEXP (pat, 0, 0);
37221 rtx p1 = XVECEXP (pat, 0, 1);
37223 gcc_assert (GET_CODE (p0) == SET
37224 && GET_CODE (p1) == UNSPEC
37225 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
37227 return p0;
37229 else
37231 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
37232 int i = 0;
37233 int j = 0;
37235 for (; i < XVECLEN (pat, 0); ++i)
37237 rtx elem = XVECEXP (pat, 0, i);
37238 if (GET_CODE (elem) != UNSPEC
37239 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
37240 res [j++] = elem;
37243 /* No more than 1 occurence was removed. */
37244 gcc_assert (j >= XVECLEN (pat, 0) - 1);
37246 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
37250 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
37251 with rounding. */
37252 static rtx
37253 ix86_expand_sse_comi_round (const struct builtin_description *d,
37254 tree exp, rtx target)
37256 rtx pat, set_dst;
37257 tree arg0 = CALL_EXPR_ARG (exp, 0);
37258 tree arg1 = CALL_EXPR_ARG (exp, 1);
37259 tree arg2 = CALL_EXPR_ARG (exp, 2);
37260 tree arg3 = CALL_EXPR_ARG (exp, 3);
37261 rtx op0 = expand_normal (arg0);
37262 rtx op1 = expand_normal (arg1);
37263 rtx op2 = expand_normal (arg2);
37264 rtx op3 = expand_normal (arg3);
37265 enum insn_code icode = d->icode;
37266 const struct insn_data_d *insn_p = &insn_data[icode];
37267 machine_mode mode0 = insn_p->operand[0].mode;
37268 machine_mode mode1 = insn_p->operand[1].mode;
37269 enum rtx_code comparison = UNEQ;
37270 bool need_ucomi = false;
37272 /* See avxintrin.h for values. */
37273 enum rtx_code comi_comparisons[32] =
37275 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
37276 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
37277 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
37279 bool need_ucomi_values[32] =
37281 true, false, false, true, true, false, false, true,
37282 true, false, false, true, true, false, false, true,
37283 false, true, true, false, false, true, true, false,
37284 false, true, true, false, false, true, true, false
37287 if (!CONST_INT_P (op2))
37289 error ("the third argument must be comparison constant");
37290 return const0_rtx;
37292 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
37294 error ("incorect comparison mode");
37295 return const0_rtx;
37298 if (!insn_p->operand[2].predicate (op3, SImode))
37300 error ("incorrect rounding operand");
37301 return const0_rtx;
37304 comparison = comi_comparisons[INTVAL (op2)];
37305 need_ucomi = need_ucomi_values[INTVAL (op2)];
37307 if (VECTOR_MODE_P (mode0))
37308 op0 = safe_vector_operand (op0, mode0);
37309 if (VECTOR_MODE_P (mode1))
37310 op1 = safe_vector_operand (op1, mode1);
37312 target = gen_reg_rtx (SImode);
37313 emit_move_insn (target, const0_rtx);
37314 target = gen_rtx_SUBREG (QImode, target, 0);
37316 if ((optimize && !register_operand (op0, mode0))
37317 || !insn_p->operand[0].predicate (op0, mode0))
37318 op0 = copy_to_mode_reg (mode0, op0);
37319 if ((optimize && !register_operand (op1, mode1))
37320 || !insn_p->operand[1].predicate (op1, mode1))
37321 op1 = copy_to_mode_reg (mode1, op1);
37323 if (need_ucomi)
37324 icode = icode == CODE_FOR_sse_comi_round
37325 ? CODE_FOR_sse_ucomi_round
37326 : CODE_FOR_sse2_ucomi_round;
37328 pat = GEN_FCN (icode) (op0, op1, op3);
37329 if (! pat)
37330 return 0;
37332 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
37333 if (INTVAL (op3) == NO_ROUND)
37335 pat = ix86_erase_embedded_rounding (pat);
37336 if (! pat)
37337 return 0;
37339 set_dst = SET_DEST (pat);
37341 else
37343 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
37344 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
37347 emit_insn (pat);
37348 emit_insn (gen_rtx_SET (VOIDmode,
37349 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
37350 gen_rtx_fmt_ee (comparison, QImode,
37351 set_dst,
37352 const0_rtx)));
37354 return SUBREG_REG (target);
37357 static rtx
37358 ix86_expand_round_builtin (const struct builtin_description *d,
37359 tree exp, rtx target)
37361 rtx pat;
37362 unsigned int i, nargs;
37363 struct
37365 rtx op;
37366 machine_mode mode;
37367 } args[6];
37368 enum insn_code icode = d->icode;
37369 const struct insn_data_d *insn_p = &insn_data[icode];
37370 machine_mode tmode = insn_p->operand[0].mode;
37371 unsigned int nargs_constant = 0;
37372 unsigned int redundant_embed_rnd = 0;
37374 switch ((enum ix86_builtin_func_type) d->flag)
37376 case UINT64_FTYPE_V2DF_INT:
37377 case UINT64_FTYPE_V4SF_INT:
37378 case UINT_FTYPE_V2DF_INT:
37379 case UINT_FTYPE_V4SF_INT:
37380 case INT64_FTYPE_V2DF_INT:
37381 case INT64_FTYPE_V4SF_INT:
37382 case INT_FTYPE_V2DF_INT:
37383 case INT_FTYPE_V4SF_INT:
37384 nargs = 2;
37385 break;
37386 case V4SF_FTYPE_V4SF_UINT_INT:
37387 case V4SF_FTYPE_V4SF_UINT64_INT:
37388 case V2DF_FTYPE_V2DF_UINT64_INT:
37389 case V4SF_FTYPE_V4SF_INT_INT:
37390 case V4SF_FTYPE_V4SF_INT64_INT:
37391 case V2DF_FTYPE_V2DF_INT64_INT:
37392 case V4SF_FTYPE_V4SF_V4SF_INT:
37393 case V2DF_FTYPE_V2DF_V2DF_INT:
37394 case V4SF_FTYPE_V4SF_V2DF_INT:
37395 case V2DF_FTYPE_V2DF_V4SF_INT:
37396 nargs = 3;
37397 break;
37398 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
37399 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
37400 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
37401 case V8DI_FTYPE_V8DF_V8DI_QI_INT:
37402 case V8SF_FTYPE_V8DI_V8SF_QI_INT:
37403 case V8DF_FTYPE_V8DI_V8DF_QI_INT:
37404 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
37405 case V8DI_FTYPE_V8SF_V8DI_QI_INT:
37406 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
37407 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
37408 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
37409 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
37410 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
37411 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
37412 nargs = 4;
37413 break;
37414 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
37415 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
37416 nargs_constant = 2;
37417 nargs = 4;
37418 break;
37419 case INT_FTYPE_V4SF_V4SF_INT_INT:
37420 case INT_FTYPE_V2DF_V2DF_INT_INT:
37421 return ix86_expand_sse_comi_round (d, exp, target);
37422 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT:
37423 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
37424 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
37425 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
37426 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
37427 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
37428 nargs = 5;
37429 break;
37430 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
37431 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
37432 nargs_constant = 4;
37433 nargs = 5;
37434 break;
37435 case QI_FTYPE_V8DF_V8DF_INT_QI_INT:
37436 case QI_FTYPE_V2DF_V2DF_INT_QI_INT:
37437 case HI_FTYPE_V16SF_V16SF_INT_HI_INT:
37438 case QI_FTYPE_V4SF_V4SF_INT_QI_INT:
37439 nargs_constant = 3;
37440 nargs = 5;
37441 break;
37442 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT:
37443 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT:
37444 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
37445 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
37446 nargs = 6;
37447 nargs_constant = 4;
37448 break;
37449 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
37450 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
37451 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
37452 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
37453 nargs = 6;
37454 nargs_constant = 3;
37455 break;
37456 default:
37457 gcc_unreachable ();
37459 gcc_assert (nargs <= ARRAY_SIZE (args));
37461 if (optimize
37462 || target == 0
37463 || GET_MODE (target) != tmode
37464 || !insn_p->operand[0].predicate (target, tmode))
37465 target = gen_reg_rtx (tmode);
37467 for (i = 0; i < nargs; i++)
37469 tree arg = CALL_EXPR_ARG (exp, i);
37470 rtx op = expand_normal (arg);
37471 machine_mode mode = insn_p->operand[i + 1].mode;
37472 bool match = insn_p->operand[i + 1].predicate (op, mode);
37474 if (i == nargs - nargs_constant)
37476 if (!match)
37478 switch (icode)
37480 case CODE_FOR_avx512f_getmantv8df_mask_round:
37481 case CODE_FOR_avx512f_getmantv16sf_mask_round:
37482 case CODE_FOR_avx512f_vgetmantv2df_round:
37483 case CODE_FOR_avx512f_vgetmantv4sf_round:
37484 error ("the immediate argument must be a 4-bit immediate");
37485 return const0_rtx;
37486 case CODE_FOR_avx512f_cmpv8df3_mask_round:
37487 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
37488 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
37489 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
37490 error ("the immediate argument must be a 5-bit immediate");
37491 return const0_rtx;
37492 default:
37493 error ("the immediate argument must be an 8-bit immediate");
37494 return const0_rtx;
37498 else if (i == nargs-1)
37500 if (!insn_p->operand[nargs].predicate (op, SImode))
37502 error ("incorrect rounding operand");
37503 return const0_rtx;
37506 /* If there is no rounding use normal version of the pattern. */
37507 if (INTVAL (op) == NO_ROUND)
37508 redundant_embed_rnd = 1;
37510 else
37512 if (VECTOR_MODE_P (mode))
37513 op = safe_vector_operand (op, mode);
37515 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
37517 if (optimize || !match)
37518 op = copy_to_mode_reg (mode, op);
37520 else
37522 op = copy_to_reg (op);
37523 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
37527 args[i].op = op;
37528 args[i].mode = mode;
37531 switch (nargs)
37533 case 1:
37534 pat = GEN_FCN (icode) (target, args[0].op);
37535 break;
37536 case 2:
37537 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
37538 break;
37539 case 3:
37540 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37541 args[2].op);
37542 break;
37543 case 4:
37544 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37545 args[2].op, args[3].op);
37546 break;
37547 case 5:
37548 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37549 args[2].op, args[3].op, args[4].op);
37550 case 6:
37551 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37552 args[2].op, args[3].op, args[4].op,
37553 args[5].op);
37554 break;
37555 default:
37556 gcc_unreachable ();
37559 if (!pat)
37560 return 0;
37562 if (redundant_embed_rnd)
37563 pat = ix86_erase_embedded_rounding (pat);
37565 emit_insn (pat);
37566 return target;
37569 /* Subroutine of ix86_expand_builtin to take care of special insns
37570 with variable number of operands. */
37572 static rtx
37573 ix86_expand_special_args_builtin (const struct builtin_description *d,
37574 tree exp, rtx target)
37576 tree arg;
37577 rtx pat, op;
37578 unsigned int i, nargs, arg_adjust, memory;
37579 bool aligned_mem = false;
37580 struct
37582 rtx op;
37583 machine_mode mode;
37584 } args[3];
37585 enum insn_code icode = d->icode;
37586 bool last_arg_constant = false;
37587 const struct insn_data_d *insn_p = &insn_data[icode];
37588 machine_mode tmode = insn_p->operand[0].mode;
37589 enum { load, store } klass;
37591 switch ((enum ix86_builtin_func_type) d->flag)
37593 case VOID_FTYPE_VOID:
37594 emit_insn (GEN_FCN (icode) (target));
37595 return 0;
37596 case VOID_FTYPE_UINT64:
37597 case VOID_FTYPE_UNSIGNED:
37598 nargs = 0;
37599 klass = store;
37600 memory = 0;
37601 break;
37603 case INT_FTYPE_VOID:
37604 case USHORT_FTYPE_VOID:
37605 case UINT64_FTYPE_VOID:
37606 case UNSIGNED_FTYPE_VOID:
37607 nargs = 0;
37608 klass = load;
37609 memory = 0;
37610 break;
37611 case UINT64_FTYPE_PUNSIGNED:
37612 case V2DI_FTYPE_PV2DI:
37613 case V4DI_FTYPE_PV4DI:
37614 case V32QI_FTYPE_PCCHAR:
37615 case V16QI_FTYPE_PCCHAR:
37616 case V8SF_FTYPE_PCV4SF:
37617 case V8SF_FTYPE_PCFLOAT:
37618 case V4SF_FTYPE_PCFLOAT:
37619 case V4DF_FTYPE_PCV2DF:
37620 case V4DF_FTYPE_PCDOUBLE:
37621 case V2DF_FTYPE_PCDOUBLE:
37622 case VOID_FTYPE_PVOID:
37623 case V16SI_FTYPE_PV4SI:
37624 case V16SF_FTYPE_PV4SF:
37625 case V8DI_FTYPE_PV4DI:
37626 case V8DI_FTYPE_PV8DI:
37627 case V8DF_FTYPE_PV4DF:
37628 nargs = 1;
37629 klass = load;
37630 memory = 0;
37631 switch (icode)
37633 case CODE_FOR_sse4_1_movntdqa:
37634 case CODE_FOR_avx2_movntdqa:
37635 case CODE_FOR_avx512f_movntdqa:
37636 aligned_mem = true;
37637 break;
37638 default:
37639 break;
37641 break;
37642 case VOID_FTYPE_PV2SF_V4SF:
37643 case VOID_FTYPE_PV8DI_V8DI:
37644 case VOID_FTYPE_PV4DI_V4DI:
37645 case VOID_FTYPE_PV2DI_V2DI:
37646 case VOID_FTYPE_PCHAR_V32QI:
37647 case VOID_FTYPE_PCHAR_V16QI:
37648 case VOID_FTYPE_PFLOAT_V16SF:
37649 case VOID_FTYPE_PFLOAT_V8SF:
37650 case VOID_FTYPE_PFLOAT_V4SF:
37651 case VOID_FTYPE_PDOUBLE_V8DF:
37652 case VOID_FTYPE_PDOUBLE_V4DF:
37653 case VOID_FTYPE_PDOUBLE_V2DF:
37654 case VOID_FTYPE_PLONGLONG_LONGLONG:
37655 case VOID_FTYPE_PULONGLONG_ULONGLONG:
37656 case VOID_FTYPE_PINT_INT:
37657 nargs = 1;
37658 klass = store;
37659 /* Reserve memory operand for target. */
37660 memory = ARRAY_SIZE (args);
37661 switch (icode)
37663 /* These builtins and instructions require the memory
37664 to be properly aligned. */
37665 case CODE_FOR_avx_movntv4di:
37666 case CODE_FOR_sse2_movntv2di:
37667 case CODE_FOR_avx_movntv8sf:
37668 case CODE_FOR_sse_movntv4sf:
37669 case CODE_FOR_sse4a_vmmovntv4sf:
37670 case CODE_FOR_avx_movntv4df:
37671 case CODE_FOR_sse2_movntv2df:
37672 case CODE_FOR_sse4a_vmmovntv2df:
37673 case CODE_FOR_sse2_movntidi:
37674 case CODE_FOR_sse_movntq:
37675 case CODE_FOR_sse2_movntisi:
37676 case CODE_FOR_avx512f_movntv16sf:
37677 case CODE_FOR_avx512f_movntv8df:
37678 case CODE_FOR_avx512f_movntv8di:
37679 aligned_mem = true;
37680 break;
37681 default:
37682 break;
37684 break;
37685 case V4SF_FTYPE_V4SF_PCV2SF:
37686 case V2DF_FTYPE_V2DF_PCDOUBLE:
37687 nargs = 2;
37688 klass = load;
37689 memory = 1;
37690 break;
37691 case V8SF_FTYPE_PCV8SF_V8SI:
37692 case V4DF_FTYPE_PCV4DF_V4DI:
37693 case V4SF_FTYPE_PCV4SF_V4SI:
37694 case V2DF_FTYPE_PCV2DF_V2DI:
37695 case V8SI_FTYPE_PCV8SI_V8SI:
37696 case V4DI_FTYPE_PCV4DI_V4DI:
37697 case V4SI_FTYPE_PCV4SI_V4SI:
37698 case V2DI_FTYPE_PCV2DI_V2DI:
37699 nargs = 2;
37700 klass = load;
37701 memory = 0;
37702 break;
37703 case VOID_FTYPE_PV8DF_V8DF_QI:
37704 case VOID_FTYPE_PV16SF_V16SF_HI:
37705 case VOID_FTYPE_PV8DI_V8DI_QI:
37706 case VOID_FTYPE_PV4DI_V4DI_QI:
37707 case VOID_FTYPE_PV2DI_V2DI_QI:
37708 case VOID_FTYPE_PV16SI_V16SI_HI:
37709 case VOID_FTYPE_PV8SI_V8SI_QI:
37710 case VOID_FTYPE_PV4SI_V4SI_QI:
37711 switch (icode)
37713 /* These builtins and instructions require the memory
37714 to be properly aligned. */
37715 case CODE_FOR_avx512f_storev16sf_mask:
37716 case CODE_FOR_avx512f_storev16si_mask:
37717 case CODE_FOR_avx512f_storev8df_mask:
37718 case CODE_FOR_avx512f_storev8di_mask:
37719 case CODE_FOR_avx512vl_storev8sf_mask:
37720 case CODE_FOR_avx512vl_storev8si_mask:
37721 case CODE_FOR_avx512vl_storev4df_mask:
37722 case CODE_FOR_avx512vl_storev4di_mask:
37723 case CODE_FOR_avx512vl_storev4sf_mask:
37724 case CODE_FOR_avx512vl_storev4si_mask:
37725 case CODE_FOR_avx512vl_storev2df_mask:
37726 case CODE_FOR_avx512vl_storev2di_mask:
37727 aligned_mem = true;
37728 break;
37729 default:
37730 break;
37732 /* FALLTHRU */
37733 case VOID_FTYPE_PV8SF_V8SI_V8SF:
37734 case VOID_FTYPE_PV4DF_V4DI_V4DF:
37735 case VOID_FTYPE_PV4SF_V4SI_V4SF:
37736 case VOID_FTYPE_PV2DF_V2DI_V2DF:
37737 case VOID_FTYPE_PV8SI_V8SI_V8SI:
37738 case VOID_FTYPE_PV4DI_V4DI_V4DI:
37739 case VOID_FTYPE_PV4SI_V4SI_V4SI:
37740 case VOID_FTYPE_PV2DI_V2DI_V2DI:
37741 case VOID_FTYPE_PDOUBLE_V2DF_QI:
37742 case VOID_FTYPE_PFLOAT_V4SF_QI:
37743 case VOID_FTYPE_PV8SI_V8DI_QI:
37744 case VOID_FTYPE_PV8HI_V8DI_QI:
37745 case VOID_FTYPE_PV16HI_V16SI_HI:
37746 case VOID_FTYPE_PV16QI_V8DI_QI:
37747 case VOID_FTYPE_PV16QI_V16SI_HI:
37748 case VOID_FTYPE_PV4SI_V4DI_QI:
37749 case VOID_FTYPE_PV4SI_V2DI_QI:
37750 case VOID_FTYPE_PV8HI_V4DI_QI:
37751 case VOID_FTYPE_PV8HI_V2DI_QI:
37752 case VOID_FTYPE_PV8HI_V8SI_QI:
37753 case VOID_FTYPE_PV8HI_V4SI_QI:
37754 case VOID_FTYPE_PV16QI_V4DI_QI:
37755 case VOID_FTYPE_PV16QI_V2DI_QI:
37756 case VOID_FTYPE_PV16QI_V8SI_QI:
37757 case VOID_FTYPE_PV16QI_V4SI_QI:
37758 case VOID_FTYPE_PV8HI_V8HI_QI:
37759 case VOID_FTYPE_PV16HI_V16HI_HI:
37760 case VOID_FTYPE_PV32HI_V32HI_SI:
37761 case VOID_FTYPE_PV16QI_V16QI_HI:
37762 case VOID_FTYPE_PV32QI_V32QI_SI:
37763 case VOID_FTYPE_PV64QI_V64QI_DI:
37764 case VOID_FTYPE_PV4DF_V4DF_QI:
37765 case VOID_FTYPE_PV2DF_V2DF_QI:
37766 case VOID_FTYPE_PV8SF_V8SF_QI:
37767 case VOID_FTYPE_PV4SF_V4SF_QI:
37768 nargs = 2;
37769 klass = store;
37770 /* Reserve memory operand for target. */
37771 memory = ARRAY_SIZE (args);
37772 break;
37773 case V4SF_FTYPE_PCV4SF_V4SF_QI:
37774 case V8SF_FTYPE_PCV8SF_V8SF_QI:
37775 case V16SF_FTYPE_PCV16SF_V16SF_HI:
37776 case V4SI_FTYPE_PCV4SI_V4SI_QI:
37777 case V8SI_FTYPE_PCV8SI_V8SI_QI:
37778 case V16SI_FTYPE_PCV16SI_V16SI_HI:
37779 case V2DF_FTYPE_PCV2DF_V2DF_QI:
37780 case V4DF_FTYPE_PCV4DF_V4DF_QI:
37781 case V8DF_FTYPE_PCV8DF_V8DF_QI:
37782 case V2DI_FTYPE_PCV2DI_V2DI_QI:
37783 case V4DI_FTYPE_PCV4DI_V4DI_QI:
37784 case V8DI_FTYPE_PCV8DI_V8DI_QI:
37785 case V2DF_FTYPE_PCDOUBLE_V2DF_QI:
37786 case V4SF_FTYPE_PCFLOAT_V4SF_QI:
37787 case V8HI_FTYPE_PCV8HI_V8HI_QI:
37788 case V16HI_FTYPE_PCV16HI_V16HI_HI:
37789 case V32HI_FTYPE_PCV32HI_V32HI_SI:
37790 case V16QI_FTYPE_PCV16QI_V16QI_HI:
37791 case V32QI_FTYPE_PCV32QI_V32QI_SI:
37792 case V64QI_FTYPE_PCV64QI_V64QI_DI:
37793 nargs = 3;
37794 klass = load;
37795 memory = 0;
37796 switch (icode)
37798 /* These builtins and instructions require the memory
37799 to be properly aligned. */
37800 case CODE_FOR_avx512f_loadv16sf_mask:
37801 case CODE_FOR_avx512f_loadv16si_mask:
37802 case CODE_FOR_avx512f_loadv8df_mask:
37803 case CODE_FOR_avx512f_loadv8di_mask:
37804 case CODE_FOR_avx512vl_loadv8sf_mask:
37805 case CODE_FOR_avx512vl_loadv8si_mask:
37806 case CODE_FOR_avx512vl_loadv4df_mask:
37807 case CODE_FOR_avx512vl_loadv4di_mask:
37808 case CODE_FOR_avx512vl_loadv4sf_mask:
37809 case CODE_FOR_avx512vl_loadv4si_mask:
37810 case CODE_FOR_avx512vl_loadv2df_mask:
37811 case CODE_FOR_avx512vl_loadv2di_mask:
37812 case CODE_FOR_avx512bw_loadv64qi_mask:
37813 case CODE_FOR_avx512vl_loadv32qi_mask:
37814 case CODE_FOR_avx512vl_loadv16qi_mask:
37815 case CODE_FOR_avx512bw_loadv32hi_mask:
37816 case CODE_FOR_avx512vl_loadv16hi_mask:
37817 case CODE_FOR_avx512vl_loadv8hi_mask:
37818 aligned_mem = true;
37819 break;
37820 default:
37821 break;
37823 break;
37824 case VOID_FTYPE_UINT_UINT_UINT:
37825 case VOID_FTYPE_UINT64_UINT_UINT:
37826 case UCHAR_FTYPE_UINT_UINT_UINT:
37827 case UCHAR_FTYPE_UINT64_UINT_UINT:
37828 nargs = 3;
37829 klass = load;
37830 memory = ARRAY_SIZE (args);
37831 last_arg_constant = true;
37832 break;
37833 default:
37834 gcc_unreachable ();
37837 gcc_assert (nargs <= ARRAY_SIZE (args));
37839 if (klass == store)
37841 arg = CALL_EXPR_ARG (exp, 0);
37842 op = expand_normal (arg);
37843 gcc_assert (target == 0);
37844 if (memory)
37846 op = ix86_zero_extend_to_Pmode (op);
37847 target = gen_rtx_MEM (tmode, op);
37848 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
37849 on it. Try to improve it using get_pointer_alignment,
37850 and if the special builtin is one that requires strict
37851 mode alignment, also from it's GET_MODE_ALIGNMENT.
37852 Failure to do so could lead to ix86_legitimate_combined_insn
37853 rejecting all changes to such insns. */
37854 unsigned int align = get_pointer_alignment (arg);
37855 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
37856 align = GET_MODE_ALIGNMENT (tmode);
37857 if (MEM_ALIGN (target) < align)
37858 set_mem_align (target, align);
37860 else
37861 target = force_reg (tmode, op);
37862 arg_adjust = 1;
37864 else
37866 arg_adjust = 0;
37867 if (optimize
37868 || target == 0
37869 || !register_operand (target, tmode)
37870 || GET_MODE (target) != tmode)
37871 target = gen_reg_rtx (tmode);
37874 for (i = 0; i < nargs; i++)
37876 machine_mode mode = insn_p->operand[i + 1].mode;
37877 bool match;
37879 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
37880 op = expand_normal (arg);
37881 match = insn_p->operand[i + 1].predicate (op, mode);
37883 if (last_arg_constant && (i + 1) == nargs)
37885 if (!match)
37887 if (icode == CODE_FOR_lwp_lwpvalsi3
37888 || icode == CODE_FOR_lwp_lwpinssi3
37889 || icode == CODE_FOR_lwp_lwpvaldi3
37890 || icode == CODE_FOR_lwp_lwpinsdi3)
37891 error ("the last argument must be a 32-bit immediate");
37892 else
37893 error ("the last argument must be an 8-bit immediate");
37894 return const0_rtx;
37897 else
37899 if (i == memory)
37901 /* This must be the memory operand. */
37902 op = ix86_zero_extend_to_Pmode (op);
37903 op = gen_rtx_MEM (mode, op);
37904 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
37905 on it. Try to improve it using get_pointer_alignment,
37906 and if the special builtin is one that requires strict
37907 mode alignment, also from it's GET_MODE_ALIGNMENT.
37908 Failure to do so could lead to ix86_legitimate_combined_insn
37909 rejecting all changes to such insns. */
37910 unsigned int align = get_pointer_alignment (arg);
37911 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
37912 align = GET_MODE_ALIGNMENT (mode);
37913 if (MEM_ALIGN (op) < align)
37914 set_mem_align (op, align);
37916 else
37918 /* This must be register. */
37919 if (VECTOR_MODE_P (mode))
37920 op = safe_vector_operand (op, mode);
37922 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
37923 op = copy_to_mode_reg (mode, op);
37924 else
37926 op = copy_to_reg (op);
37927 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
37932 args[i].op = op;
37933 args[i].mode = mode;
37936 switch (nargs)
37938 case 0:
37939 pat = GEN_FCN (icode) (target);
37940 break;
37941 case 1:
37942 pat = GEN_FCN (icode) (target, args[0].op);
37943 break;
37944 case 2:
37945 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
37946 break;
37947 case 3:
37948 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
37949 break;
37950 default:
37951 gcc_unreachable ();
37954 if (! pat)
37955 return 0;
37956 emit_insn (pat);
37957 return klass == store ? 0 : target;
37960 /* Return the integer constant in ARG. Constrain it to be in the range
37961 of the subparts of VEC_TYPE; issue an error if not. */
37963 static int
37964 get_element_number (tree vec_type, tree arg)
37966 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
37968 if (!tree_fits_uhwi_p (arg)
37969 || (elt = tree_to_uhwi (arg), elt > max))
37971 error ("selector must be an integer constant in the range 0..%wi", max);
37972 return 0;
37975 return elt;
37978 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
37979 ix86_expand_vector_init. We DO have language-level syntax for this, in
37980 the form of (type){ init-list }. Except that since we can't place emms
37981 instructions from inside the compiler, we can't allow the use of MMX
37982 registers unless the user explicitly asks for it. So we do *not* define
37983 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
37984 we have builtins invoked by mmintrin.h that gives us license to emit
37985 these sorts of instructions. */
37987 static rtx
37988 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
37990 machine_mode tmode = TYPE_MODE (type);
37991 machine_mode inner_mode = GET_MODE_INNER (tmode);
37992 int i, n_elt = GET_MODE_NUNITS (tmode);
37993 rtvec v = rtvec_alloc (n_elt);
37995 gcc_assert (VECTOR_MODE_P (tmode));
37996 gcc_assert (call_expr_nargs (exp) == n_elt);
37998 for (i = 0; i < n_elt; ++i)
38000 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
38001 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
38004 if (!target || !register_operand (target, tmode))
38005 target = gen_reg_rtx (tmode);
38007 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
38008 return target;
38011 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38012 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
38013 had a language-level syntax for referencing vector elements. */
38015 static rtx
38016 ix86_expand_vec_ext_builtin (tree exp, rtx target)
38018 machine_mode tmode, mode0;
38019 tree arg0, arg1;
38020 int elt;
38021 rtx op0;
38023 arg0 = CALL_EXPR_ARG (exp, 0);
38024 arg1 = CALL_EXPR_ARG (exp, 1);
38026 op0 = expand_normal (arg0);
38027 elt = get_element_number (TREE_TYPE (arg0), arg1);
38029 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
38030 mode0 = TYPE_MODE (TREE_TYPE (arg0));
38031 gcc_assert (VECTOR_MODE_P (mode0));
38033 op0 = force_reg (mode0, op0);
38035 if (optimize || !target || !register_operand (target, tmode))
38036 target = gen_reg_rtx (tmode);
38038 ix86_expand_vector_extract (true, target, op0, elt);
38040 return target;
38043 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38044 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
38045 a language-level syntax for referencing vector elements. */
38047 static rtx
38048 ix86_expand_vec_set_builtin (tree exp)
38050 machine_mode tmode, mode1;
38051 tree arg0, arg1, arg2;
38052 int elt;
38053 rtx op0, op1, target;
38055 arg0 = CALL_EXPR_ARG (exp, 0);
38056 arg1 = CALL_EXPR_ARG (exp, 1);
38057 arg2 = CALL_EXPR_ARG (exp, 2);
38059 tmode = TYPE_MODE (TREE_TYPE (arg0));
38060 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
38061 gcc_assert (VECTOR_MODE_P (tmode));
38063 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
38064 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
38065 elt = get_element_number (TREE_TYPE (arg0), arg2);
38067 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
38068 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
38070 op0 = force_reg (tmode, op0);
38071 op1 = force_reg (mode1, op1);
38073 /* OP0 is the source of these builtin functions and shouldn't be
38074 modified. Create a copy, use it and return it as target. */
38075 target = gen_reg_rtx (tmode);
38076 emit_move_insn (target, op0);
38077 ix86_expand_vector_set (true, target, op1, elt);
38079 return target;
38082 /* Emit conditional move of SRC to DST with condition
38083 OP1 CODE OP2. */
38084 static void
38085 ix86_emit_cmove (rtx dst, rtx src, enum rtx_code code, rtx op1, rtx op2)
38087 rtx t;
38089 if (TARGET_CMOVE)
38091 t = ix86_expand_compare (code, op1, op2);
38092 emit_insn (gen_rtx_SET (VOIDmode, dst,
38093 gen_rtx_IF_THEN_ELSE (GET_MODE (dst), t,
38094 src, dst)));
38096 else
38098 rtx nomove = gen_label_rtx ();
38099 emit_cmp_and_jump_insns (op1, op2, reverse_condition (code),
38100 const0_rtx, GET_MODE (op1), 1, nomove);
38101 emit_move_insn (dst, src);
38102 emit_label (nomove);
38106 /* Choose max of DST and SRC and put it to DST. */
38107 static void
38108 ix86_emit_move_max (rtx dst, rtx src)
38110 ix86_emit_cmove (dst, src, LTU, dst, src);
38113 /* Expand an expression EXP that calls a built-in function,
38114 with result going to TARGET if that's convenient
38115 (and in mode MODE if that's convenient).
38116 SUBTARGET may be used as the target for computing one of EXP's operands.
38117 IGNORE is nonzero if the value is to be ignored. */
38119 static rtx
38120 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
38121 machine_mode mode, int ignore)
38123 const struct builtin_description *d;
38124 size_t i;
38125 enum insn_code icode;
38126 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
38127 tree arg0, arg1, arg2, arg3, arg4;
38128 rtx op0, op1, op2, op3, op4, pat, insn;
38129 machine_mode mode0, mode1, mode2, mode3, mode4;
38130 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
38132 /* For CPU builtins that can be folded, fold first and expand the fold. */
38133 switch (fcode)
38135 case IX86_BUILTIN_CPU_INIT:
38137 /* Make it call __cpu_indicator_init in libgcc. */
38138 tree call_expr, fndecl, type;
38139 type = build_function_type_list (integer_type_node, NULL_TREE);
38140 fndecl = build_fn_decl ("__cpu_indicator_init", type);
38141 call_expr = build_call_expr (fndecl, 0);
38142 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
38144 case IX86_BUILTIN_CPU_IS:
38145 case IX86_BUILTIN_CPU_SUPPORTS:
38147 tree arg0 = CALL_EXPR_ARG (exp, 0);
38148 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
38149 gcc_assert (fold_expr != NULL_TREE);
38150 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
38154 /* Determine whether the builtin function is available under the current ISA.
38155 Originally the builtin was not created if it wasn't applicable to the
38156 current ISA based on the command line switches. With function specific
38157 options, we need to check in the context of the function making the call
38158 whether it is supported. */
38159 if (ix86_builtins_isa[fcode].isa
38160 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
38162 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
38163 NULL, (enum fpmath_unit) 0, false);
38165 if (!opts)
38166 error ("%qE needs unknown isa option", fndecl);
38167 else
38169 gcc_assert (opts != NULL);
38170 error ("%qE needs isa option %s", fndecl, opts);
38171 free (opts);
38173 return const0_rtx;
38176 switch (fcode)
38178 case IX86_BUILTIN_BNDMK:
38179 if (!target
38180 || GET_MODE (target) != BNDmode
38181 || !register_operand (target, BNDmode))
38182 target = gen_reg_rtx (BNDmode);
38184 arg0 = CALL_EXPR_ARG (exp, 0);
38185 arg1 = CALL_EXPR_ARG (exp, 1);
38187 op0 = expand_normal (arg0);
38188 op1 = expand_normal (arg1);
38190 if (!register_operand (op0, Pmode))
38191 op0 = ix86_zero_extend_to_Pmode (op0);
38192 if (!register_operand (op1, Pmode))
38193 op1 = ix86_zero_extend_to_Pmode (op1);
38195 /* Builtin arg1 is size of block but instruction op1 should
38196 be (size - 1). */
38197 op1 = expand_simple_binop (Pmode, PLUS, op1, constm1_rtx,
38198 NULL_RTX, 1, OPTAB_DIRECT);
38200 emit_insn (BNDmode == BND64mode
38201 ? gen_bnd64_mk (target, op0, op1)
38202 : gen_bnd32_mk (target, op0, op1));
38203 return target;
38205 case IX86_BUILTIN_BNDSTX:
38206 arg0 = CALL_EXPR_ARG (exp, 0);
38207 arg1 = CALL_EXPR_ARG (exp, 1);
38208 arg2 = CALL_EXPR_ARG (exp, 2);
38210 op0 = expand_normal (arg0);
38211 op1 = expand_normal (arg1);
38212 op2 = expand_normal (arg2);
38214 if (!register_operand (op0, Pmode))
38215 op0 = ix86_zero_extend_to_Pmode (op0);
38216 if (!register_operand (op1, BNDmode))
38217 op1 = copy_to_mode_reg (BNDmode, op1);
38218 if (!register_operand (op2, Pmode))
38219 op2 = ix86_zero_extend_to_Pmode (op2);
38221 emit_insn (BNDmode == BND64mode
38222 ? gen_bnd64_stx (op2, op0, op1)
38223 : gen_bnd32_stx (op2, op0, op1));
38224 return 0;
38226 case IX86_BUILTIN_BNDLDX:
38227 if (!target
38228 || GET_MODE (target) != BNDmode
38229 || !register_operand (target, BNDmode))
38230 target = gen_reg_rtx (BNDmode);
38232 arg0 = CALL_EXPR_ARG (exp, 0);
38233 arg1 = CALL_EXPR_ARG (exp, 1);
38235 op0 = expand_normal (arg0);
38236 op1 = expand_normal (arg1);
38238 if (!register_operand (op0, Pmode))
38239 op0 = ix86_zero_extend_to_Pmode (op0);
38240 if (!register_operand (op1, Pmode))
38241 op1 = ix86_zero_extend_to_Pmode (op1);
38243 emit_insn (BNDmode == BND64mode
38244 ? gen_bnd64_ldx (target, op0, op1)
38245 : gen_bnd32_ldx (target, op0, op1));
38246 return target;
38248 case IX86_BUILTIN_BNDCL:
38249 arg0 = CALL_EXPR_ARG (exp, 0);
38250 arg1 = CALL_EXPR_ARG (exp, 1);
38252 op0 = expand_normal (arg0);
38253 op1 = expand_normal (arg1);
38255 if (!register_operand (op0, Pmode))
38256 op0 = ix86_zero_extend_to_Pmode (op0);
38257 if (!register_operand (op1, BNDmode))
38258 op1 = copy_to_mode_reg (BNDmode, op1);
38260 emit_insn (BNDmode == BND64mode
38261 ? gen_bnd64_cl (op1, op0)
38262 : gen_bnd32_cl (op1, op0));
38263 return 0;
38265 case IX86_BUILTIN_BNDCU:
38266 arg0 = CALL_EXPR_ARG (exp, 0);
38267 arg1 = CALL_EXPR_ARG (exp, 1);
38269 op0 = expand_normal (arg0);
38270 op1 = expand_normal (arg1);
38272 if (!register_operand (op0, Pmode))
38273 op0 = ix86_zero_extend_to_Pmode (op0);
38274 if (!register_operand (op1, BNDmode))
38275 op1 = copy_to_mode_reg (BNDmode, op1);
38277 emit_insn (BNDmode == BND64mode
38278 ? gen_bnd64_cu (op1, op0)
38279 : gen_bnd32_cu (op1, op0));
38280 return 0;
38282 case IX86_BUILTIN_BNDRET:
38283 arg0 = CALL_EXPR_ARG (exp, 0);
38284 gcc_assert (TREE_CODE (arg0) == SSA_NAME);
38285 target = chkp_get_rtl_bounds (arg0);
38287 /* If no bounds were specified for returned value,
38288 then use INIT bounds. It usually happens when
38289 some built-in function is expanded. */
38290 if (!target)
38292 rtx t1 = gen_reg_rtx (Pmode);
38293 rtx t2 = gen_reg_rtx (Pmode);
38294 target = gen_reg_rtx (BNDmode);
38295 emit_move_insn (t1, const0_rtx);
38296 emit_move_insn (t2, constm1_rtx);
38297 emit_insn (BNDmode == BND64mode
38298 ? gen_bnd64_mk (target, t1, t2)
38299 : gen_bnd32_mk (target, t1, t2));
38302 gcc_assert (target && REG_P (target));
38303 return target;
38305 case IX86_BUILTIN_BNDNARROW:
38307 rtx m1, m1h1, m1h2, lb, ub, t1;
38309 /* Return value and lb. */
38310 arg0 = CALL_EXPR_ARG (exp, 0);
38311 /* Bounds. */
38312 arg1 = CALL_EXPR_ARG (exp, 1);
38313 /* Size. */
38314 arg2 = CALL_EXPR_ARG (exp, 2);
38316 lb = expand_normal (arg0);
38317 op1 = expand_normal (arg1);
38318 op2 = expand_normal (arg2);
38320 /* Size was passed but we need to use (size - 1) as for bndmk. */
38321 op2 = expand_simple_binop (Pmode, PLUS, op2, constm1_rtx,
38322 NULL_RTX, 1, OPTAB_DIRECT);
38324 /* Add LB to size and inverse to get UB. */
38325 op2 = expand_simple_binop (Pmode, PLUS, op2, lb,
38326 op2, 1, OPTAB_DIRECT);
38327 ub = expand_simple_unop (Pmode, NOT, op2, op2, 1);
38329 if (!register_operand (lb, Pmode))
38330 lb = ix86_zero_extend_to_Pmode (lb);
38331 if (!register_operand (ub, Pmode))
38332 ub = ix86_zero_extend_to_Pmode (ub);
38334 /* We need to move bounds to memory before any computations. */
38335 if (MEM_P (op1))
38336 m1 = op1;
38337 else
38339 m1 = assign_386_stack_local (BNDmode, SLOT_TEMP);
38340 emit_move_insn (m1, op1);
38343 /* Generate mem expression to be used for access to LB and UB. */
38344 m1h1 = adjust_address (m1, Pmode, 0);
38345 m1h2 = adjust_address (m1, Pmode, GET_MODE_SIZE (Pmode));
38347 t1 = gen_reg_rtx (Pmode);
38349 /* Compute LB. */
38350 emit_move_insn (t1, m1h1);
38351 ix86_emit_move_max (t1, lb);
38352 emit_move_insn (m1h1, t1);
38354 /* Compute UB. UB is stored in 1's complement form. Therefore
38355 we also use max here. */
38356 emit_move_insn (t1, m1h2);
38357 ix86_emit_move_max (t1, ub);
38358 emit_move_insn (m1h2, t1);
38360 op2 = gen_reg_rtx (BNDmode);
38361 emit_move_insn (op2, m1);
38363 return chkp_join_splitted_slot (lb, op2);
38366 case IX86_BUILTIN_BNDINT:
38368 rtx res, rh1, rh2, lb1, lb2, ub1, ub2;
38370 if (!target
38371 || GET_MODE (target) != BNDmode
38372 || !register_operand (target, BNDmode))
38373 target = gen_reg_rtx (BNDmode);
38375 arg0 = CALL_EXPR_ARG (exp, 0);
38376 arg1 = CALL_EXPR_ARG (exp, 1);
38378 op0 = expand_normal (arg0);
38379 op1 = expand_normal (arg1);
38381 res = assign_386_stack_local (BNDmode, SLOT_TEMP);
38382 rh1 = adjust_address (res, Pmode, 0);
38383 rh2 = adjust_address (res, Pmode, GET_MODE_SIZE (Pmode));
38385 /* Put first bounds to temporaries. */
38386 lb1 = gen_reg_rtx (Pmode);
38387 ub1 = gen_reg_rtx (Pmode);
38388 if (MEM_P (op0))
38390 emit_move_insn (lb1, adjust_address (op0, Pmode, 0));
38391 emit_move_insn (ub1, adjust_address (op0, Pmode,
38392 GET_MODE_SIZE (Pmode)));
38394 else
38396 emit_move_insn (res, op0);
38397 emit_move_insn (lb1, rh1);
38398 emit_move_insn (ub1, rh2);
38401 /* Put second bounds to temporaries. */
38402 lb2 = gen_reg_rtx (Pmode);
38403 ub2 = gen_reg_rtx (Pmode);
38404 if (MEM_P (op1))
38406 emit_move_insn (lb2, adjust_address (op1, Pmode, 0));
38407 emit_move_insn (ub2, adjust_address (op1, Pmode,
38408 GET_MODE_SIZE (Pmode)));
38410 else
38412 emit_move_insn (res, op1);
38413 emit_move_insn (lb2, rh1);
38414 emit_move_insn (ub2, rh2);
38417 /* Compute LB. */
38418 ix86_emit_move_max (lb1, lb2);
38419 emit_move_insn (rh1, lb1);
38421 /* Compute UB. UB is stored in 1's complement form. Therefore
38422 we also use max here. */
38423 ix86_emit_move_max (ub1, ub2);
38424 emit_move_insn (rh2, ub1);
38426 emit_move_insn (target, res);
38428 return target;
38431 case IX86_BUILTIN_SIZEOF:
38433 tree name;
38434 rtx symbol;
38436 if (!target
38437 || GET_MODE (target) != Pmode
38438 || !register_operand (target, Pmode))
38439 target = gen_reg_rtx (Pmode);
38441 arg0 = CALL_EXPR_ARG (exp, 0);
38442 gcc_assert (TREE_CODE (arg0) == VAR_DECL);
38444 name = DECL_ASSEMBLER_NAME (arg0);
38445 symbol = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (name));
38447 emit_insn (Pmode == SImode
38448 ? gen_move_size_reloc_si (target, symbol)
38449 : gen_move_size_reloc_di (target, symbol));
38451 return target;
38454 case IX86_BUILTIN_BNDLOWER:
38456 rtx mem, hmem;
38458 if (!target
38459 || GET_MODE (target) != Pmode
38460 || !register_operand (target, Pmode))
38461 target = gen_reg_rtx (Pmode);
38463 arg0 = CALL_EXPR_ARG (exp, 0);
38464 op0 = expand_normal (arg0);
38466 /* We need to move bounds to memory first. */
38467 if (MEM_P (op0))
38468 mem = op0;
38469 else
38471 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
38472 emit_move_insn (mem, op0);
38475 /* Generate mem expression to access LB and load it. */
38476 hmem = adjust_address (mem, Pmode, 0);
38477 emit_move_insn (target, hmem);
38479 return target;
38482 case IX86_BUILTIN_BNDUPPER:
38484 rtx mem, hmem, res;
38486 if (!target
38487 || GET_MODE (target) != Pmode
38488 || !register_operand (target, Pmode))
38489 target = gen_reg_rtx (Pmode);
38491 arg0 = CALL_EXPR_ARG (exp, 0);
38492 op0 = expand_normal (arg0);
38494 /* We need to move bounds to memory first. */
38495 if (MEM_P (op0))
38496 mem = op0;
38497 else
38499 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
38500 emit_move_insn (mem, op0);
38503 /* Generate mem expression to access UB. */
38504 hmem = adjust_address (mem, Pmode, GET_MODE_SIZE (Pmode));
38506 /* We need to inverse all bits of UB. */
38507 res = expand_simple_unop (Pmode, NOT, hmem, target, 1);
38509 if (res != target)
38510 emit_move_insn (target, res);
38512 return target;
38515 case IX86_BUILTIN_MASKMOVQ:
38516 case IX86_BUILTIN_MASKMOVDQU:
38517 icode = (fcode == IX86_BUILTIN_MASKMOVQ
38518 ? CODE_FOR_mmx_maskmovq
38519 : CODE_FOR_sse2_maskmovdqu);
38520 /* Note the arg order is different from the operand order. */
38521 arg1 = CALL_EXPR_ARG (exp, 0);
38522 arg2 = CALL_EXPR_ARG (exp, 1);
38523 arg0 = CALL_EXPR_ARG (exp, 2);
38524 op0 = expand_normal (arg0);
38525 op1 = expand_normal (arg1);
38526 op2 = expand_normal (arg2);
38527 mode0 = insn_data[icode].operand[0].mode;
38528 mode1 = insn_data[icode].operand[1].mode;
38529 mode2 = insn_data[icode].operand[2].mode;
38531 op0 = ix86_zero_extend_to_Pmode (op0);
38532 op0 = gen_rtx_MEM (mode1, op0);
38534 if (!insn_data[icode].operand[0].predicate (op0, mode0))
38535 op0 = copy_to_mode_reg (mode0, op0);
38536 if (!insn_data[icode].operand[1].predicate (op1, mode1))
38537 op1 = copy_to_mode_reg (mode1, op1);
38538 if (!insn_data[icode].operand[2].predicate (op2, mode2))
38539 op2 = copy_to_mode_reg (mode2, op2);
38540 pat = GEN_FCN (icode) (op0, op1, op2);
38541 if (! pat)
38542 return 0;
38543 emit_insn (pat);
38544 return 0;
38546 case IX86_BUILTIN_LDMXCSR:
38547 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
38548 target = assign_386_stack_local (SImode, SLOT_TEMP);
38549 emit_move_insn (target, op0);
38550 emit_insn (gen_sse_ldmxcsr (target));
38551 return 0;
38553 case IX86_BUILTIN_STMXCSR:
38554 target = assign_386_stack_local (SImode, SLOT_TEMP);
38555 emit_insn (gen_sse_stmxcsr (target));
38556 return copy_to_mode_reg (SImode, target);
38558 case IX86_BUILTIN_CLFLUSH:
38559 arg0 = CALL_EXPR_ARG (exp, 0);
38560 op0 = expand_normal (arg0);
38561 icode = CODE_FOR_sse2_clflush;
38562 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38563 op0 = ix86_zero_extend_to_Pmode (op0);
38565 emit_insn (gen_sse2_clflush (op0));
38566 return 0;
38568 case IX86_BUILTIN_CLFLUSHOPT:
38569 arg0 = CALL_EXPR_ARG (exp, 0);
38570 op0 = expand_normal (arg0);
38571 icode = CODE_FOR_clflushopt;
38572 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38573 op0 = ix86_zero_extend_to_Pmode (op0);
38575 emit_insn (gen_clflushopt (op0));
38576 return 0;
38578 case IX86_BUILTIN_MONITOR:
38579 arg0 = CALL_EXPR_ARG (exp, 0);
38580 arg1 = CALL_EXPR_ARG (exp, 1);
38581 arg2 = CALL_EXPR_ARG (exp, 2);
38582 op0 = expand_normal (arg0);
38583 op1 = expand_normal (arg1);
38584 op2 = expand_normal (arg2);
38585 if (!REG_P (op0))
38586 op0 = ix86_zero_extend_to_Pmode (op0);
38587 if (!REG_P (op1))
38588 op1 = copy_to_mode_reg (SImode, op1);
38589 if (!REG_P (op2))
38590 op2 = copy_to_mode_reg (SImode, op2);
38591 emit_insn (ix86_gen_monitor (op0, op1, op2));
38592 return 0;
38594 case IX86_BUILTIN_MWAIT:
38595 arg0 = CALL_EXPR_ARG (exp, 0);
38596 arg1 = CALL_EXPR_ARG (exp, 1);
38597 op0 = expand_normal (arg0);
38598 op1 = expand_normal (arg1);
38599 if (!REG_P (op0))
38600 op0 = copy_to_mode_reg (SImode, op0);
38601 if (!REG_P (op1))
38602 op1 = copy_to_mode_reg (SImode, op1);
38603 emit_insn (gen_sse3_mwait (op0, op1));
38604 return 0;
38606 case IX86_BUILTIN_VEC_INIT_V2SI:
38607 case IX86_BUILTIN_VEC_INIT_V4HI:
38608 case IX86_BUILTIN_VEC_INIT_V8QI:
38609 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
38611 case IX86_BUILTIN_VEC_EXT_V2DF:
38612 case IX86_BUILTIN_VEC_EXT_V2DI:
38613 case IX86_BUILTIN_VEC_EXT_V4SF:
38614 case IX86_BUILTIN_VEC_EXT_V4SI:
38615 case IX86_BUILTIN_VEC_EXT_V8HI:
38616 case IX86_BUILTIN_VEC_EXT_V2SI:
38617 case IX86_BUILTIN_VEC_EXT_V4HI:
38618 case IX86_BUILTIN_VEC_EXT_V16QI:
38619 return ix86_expand_vec_ext_builtin (exp, target);
38621 case IX86_BUILTIN_VEC_SET_V2DI:
38622 case IX86_BUILTIN_VEC_SET_V4SF:
38623 case IX86_BUILTIN_VEC_SET_V4SI:
38624 case IX86_BUILTIN_VEC_SET_V8HI:
38625 case IX86_BUILTIN_VEC_SET_V4HI:
38626 case IX86_BUILTIN_VEC_SET_V16QI:
38627 return ix86_expand_vec_set_builtin (exp);
38629 case IX86_BUILTIN_INFQ:
38630 case IX86_BUILTIN_HUGE_VALQ:
38632 REAL_VALUE_TYPE inf;
38633 rtx tmp;
38635 real_inf (&inf);
38636 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
38638 tmp = validize_mem (force_const_mem (mode, tmp));
38640 if (target == 0)
38641 target = gen_reg_rtx (mode);
38643 emit_move_insn (target, tmp);
38644 return target;
38647 case IX86_BUILTIN_RDPMC:
38648 case IX86_BUILTIN_RDTSC:
38649 case IX86_BUILTIN_RDTSCP:
38651 op0 = gen_reg_rtx (DImode);
38652 op1 = gen_reg_rtx (DImode);
38654 if (fcode == IX86_BUILTIN_RDPMC)
38656 arg0 = CALL_EXPR_ARG (exp, 0);
38657 op2 = expand_normal (arg0);
38658 if (!register_operand (op2, SImode))
38659 op2 = copy_to_mode_reg (SImode, op2);
38661 insn = (TARGET_64BIT
38662 ? gen_rdpmc_rex64 (op0, op1, op2)
38663 : gen_rdpmc (op0, op2));
38664 emit_insn (insn);
38666 else if (fcode == IX86_BUILTIN_RDTSC)
38668 insn = (TARGET_64BIT
38669 ? gen_rdtsc_rex64 (op0, op1)
38670 : gen_rdtsc (op0));
38671 emit_insn (insn);
38673 else
38675 op2 = gen_reg_rtx (SImode);
38677 insn = (TARGET_64BIT
38678 ? gen_rdtscp_rex64 (op0, op1, op2)
38679 : gen_rdtscp (op0, op2));
38680 emit_insn (insn);
38682 arg0 = CALL_EXPR_ARG (exp, 0);
38683 op4 = expand_normal (arg0);
38684 if (!address_operand (op4, VOIDmode))
38686 op4 = convert_memory_address (Pmode, op4);
38687 op4 = copy_addr_to_reg (op4);
38689 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
38692 if (target == 0)
38694 /* mode is VOIDmode if __builtin_rd* has been called
38695 without lhs. */
38696 if (mode == VOIDmode)
38697 return target;
38698 target = gen_reg_rtx (mode);
38701 if (TARGET_64BIT)
38703 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
38704 op1, 1, OPTAB_DIRECT);
38705 op0 = expand_simple_binop (DImode, IOR, op0, op1,
38706 op0, 1, OPTAB_DIRECT);
38709 emit_move_insn (target, op0);
38710 return target;
38712 case IX86_BUILTIN_FXSAVE:
38713 case IX86_BUILTIN_FXRSTOR:
38714 case IX86_BUILTIN_FXSAVE64:
38715 case IX86_BUILTIN_FXRSTOR64:
38716 case IX86_BUILTIN_FNSTENV:
38717 case IX86_BUILTIN_FLDENV:
38718 mode0 = BLKmode;
38719 switch (fcode)
38721 case IX86_BUILTIN_FXSAVE:
38722 icode = CODE_FOR_fxsave;
38723 break;
38724 case IX86_BUILTIN_FXRSTOR:
38725 icode = CODE_FOR_fxrstor;
38726 break;
38727 case IX86_BUILTIN_FXSAVE64:
38728 icode = CODE_FOR_fxsave64;
38729 break;
38730 case IX86_BUILTIN_FXRSTOR64:
38731 icode = CODE_FOR_fxrstor64;
38732 break;
38733 case IX86_BUILTIN_FNSTENV:
38734 icode = CODE_FOR_fnstenv;
38735 break;
38736 case IX86_BUILTIN_FLDENV:
38737 icode = CODE_FOR_fldenv;
38738 break;
38739 default:
38740 gcc_unreachable ();
38743 arg0 = CALL_EXPR_ARG (exp, 0);
38744 op0 = expand_normal (arg0);
38746 if (!address_operand (op0, VOIDmode))
38748 op0 = convert_memory_address (Pmode, op0);
38749 op0 = copy_addr_to_reg (op0);
38751 op0 = gen_rtx_MEM (mode0, op0);
38753 pat = GEN_FCN (icode) (op0);
38754 if (pat)
38755 emit_insn (pat);
38756 return 0;
38758 case IX86_BUILTIN_XSAVE:
38759 case IX86_BUILTIN_XRSTOR:
38760 case IX86_BUILTIN_XSAVE64:
38761 case IX86_BUILTIN_XRSTOR64:
38762 case IX86_BUILTIN_XSAVEOPT:
38763 case IX86_BUILTIN_XSAVEOPT64:
38764 case IX86_BUILTIN_XSAVES:
38765 case IX86_BUILTIN_XRSTORS:
38766 case IX86_BUILTIN_XSAVES64:
38767 case IX86_BUILTIN_XRSTORS64:
38768 case IX86_BUILTIN_XSAVEC:
38769 case IX86_BUILTIN_XSAVEC64:
38770 arg0 = CALL_EXPR_ARG (exp, 0);
38771 arg1 = CALL_EXPR_ARG (exp, 1);
38772 op0 = expand_normal (arg0);
38773 op1 = expand_normal (arg1);
38775 if (!address_operand (op0, VOIDmode))
38777 op0 = convert_memory_address (Pmode, op0);
38778 op0 = copy_addr_to_reg (op0);
38780 op0 = gen_rtx_MEM (BLKmode, op0);
38782 op1 = force_reg (DImode, op1);
38784 if (TARGET_64BIT)
38786 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
38787 NULL, 1, OPTAB_DIRECT);
38788 switch (fcode)
38790 case IX86_BUILTIN_XSAVE:
38791 icode = CODE_FOR_xsave_rex64;
38792 break;
38793 case IX86_BUILTIN_XRSTOR:
38794 icode = CODE_FOR_xrstor_rex64;
38795 break;
38796 case IX86_BUILTIN_XSAVE64:
38797 icode = CODE_FOR_xsave64;
38798 break;
38799 case IX86_BUILTIN_XRSTOR64:
38800 icode = CODE_FOR_xrstor64;
38801 break;
38802 case IX86_BUILTIN_XSAVEOPT:
38803 icode = CODE_FOR_xsaveopt_rex64;
38804 break;
38805 case IX86_BUILTIN_XSAVEOPT64:
38806 icode = CODE_FOR_xsaveopt64;
38807 break;
38808 case IX86_BUILTIN_XSAVES:
38809 icode = CODE_FOR_xsaves_rex64;
38810 break;
38811 case IX86_BUILTIN_XRSTORS:
38812 icode = CODE_FOR_xrstors_rex64;
38813 break;
38814 case IX86_BUILTIN_XSAVES64:
38815 icode = CODE_FOR_xsaves64;
38816 break;
38817 case IX86_BUILTIN_XRSTORS64:
38818 icode = CODE_FOR_xrstors64;
38819 break;
38820 case IX86_BUILTIN_XSAVEC:
38821 icode = CODE_FOR_xsavec_rex64;
38822 break;
38823 case IX86_BUILTIN_XSAVEC64:
38824 icode = CODE_FOR_xsavec64;
38825 break;
38826 default:
38827 gcc_unreachable ();
38830 op2 = gen_lowpart (SImode, op2);
38831 op1 = gen_lowpart (SImode, op1);
38832 pat = GEN_FCN (icode) (op0, op1, op2);
38834 else
38836 switch (fcode)
38838 case IX86_BUILTIN_XSAVE:
38839 icode = CODE_FOR_xsave;
38840 break;
38841 case IX86_BUILTIN_XRSTOR:
38842 icode = CODE_FOR_xrstor;
38843 break;
38844 case IX86_BUILTIN_XSAVEOPT:
38845 icode = CODE_FOR_xsaveopt;
38846 break;
38847 case IX86_BUILTIN_XSAVES:
38848 icode = CODE_FOR_xsaves;
38849 break;
38850 case IX86_BUILTIN_XRSTORS:
38851 icode = CODE_FOR_xrstors;
38852 break;
38853 case IX86_BUILTIN_XSAVEC:
38854 icode = CODE_FOR_xsavec;
38855 break;
38856 default:
38857 gcc_unreachable ();
38859 pat = GEN_FCN (icode) (op0, op1);
38862 if (pat)
38863 emit_insn (pat);
38864 return 0;
38866 case IX86_BUILTIN_LLWPCB:
38867 arg0 = CALL_EXPR_ARG (exp, 0);
38868 op0 = expand_normal (arg0);
38869 icode = CODE_FOR_lwp_llwpcb;
38870 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38871 op0 = ix86_zero_extend_to_Pmode (op0);
38872 emit_insn (gen_lwp_llwpcb (op0));
38873 return 0;
38875 case IX86_BUILTIN_SLWPCB:
38876 icode = CODE_FOR_lwp_slwpcb;
38877 if (!target
38878 || !insn_data[icode].operand[0].predicate (target, Pmode))
38879 target = gen_reg_rtx (Pmode);
38880 emit_insn (gen_lwp_slwpcb (target));
38881 return target;
38883 case IX86_BUILTIN_BEXTRI32:
38884 case IX86_BUILTIN_BEXTRI64:
38885 arg0 = CALL_EXPR_ARG (exp, 0);
38886 arg1 = CALL_EXPR_ARG (exp, 1);
38887 op0 = expand_normal (arg0);
38888 op1 = expand_normal (arg1);
38889 icode = (fcode == IX86_BUILTIN_BEXTRI32
38890 ? CODE_FOR_tbm_bextri_si
38891 : CODE_FOR_tbm_bextri_di);
38892 if (!CONST_INT_P (op1))
38894 error ("last argument must be an immediate");
38895 return const0_rtx;
38897 else
38899 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
38900 unsigned char lsb_index = INTVAL (op1) & 0xFF;
38901 op1 = GEN_INT (length);
38902 op2 = GEN_INT (lsb_index);
38903 pat = GEN_FCN (icode) (target, op0, op1, op2);
38904 if (pat)
38905 emit_insn (pat);
38906 return target;
38909 case IX86_BUILTIN_RDRAND16_STEP:
38910 icode = CODE_FOR_rdrandhi_1;
38911 mode0 = HImode;
38912 goto rdrand_step;
38914 case IX86_BUILTIN_RDRAND32_STEP:
38915 icode = CODE_FOR_rdrandsi_1;
38916 mode0 = SImode;
38917 goto rdrand_step;
38919 case IX86_BUILTIN_RDRAND64_STEP:
38920 icode = CODE_FOR_rdranddi_1;
38921 mode0 = DImode;
38923 rdrand_step:
38924 op0 = gen_reg_rtx (mode0);
38925 emit_insn (GEN_FCN (icode) (op0));
38927 arg0 = CALL_EXPR_ARG (exp, 0);
38928 op1 = expand_normal (arg0);
38929 if (!address_operand (op1, VOIDmode))
38931 op1 = convert_memory_address (Pmode, op1);
38932 op1 = copy_addr_to_reg (op1);
38934 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
38936 op1 = gen_reg_rtx (SImode);
38937 emit_move_insn (op1, CONST1_RTX (SImode));
38939 /* Emit SImode conditional move. */
38940 if (mode0 == HImode)
38942 op2 = gen_reg_rtx (SImode);
38943 emit_insn (gen_zero_extendhisi2 (op2, op0));
38945 else if (mode0 == SImode)
38946 op2 = op0;
38947 else
38948 op2 = gen_rtx_SUBREG (SImode, op0, 0);
38950 if (target == 0
38951 || !register_operand (target, SImode))
38952 target = gen_reg_rtx (SImode);
38954 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
38955 const0_rtx);
38956 emit_insn (gen_rtx_SET (VOIDmode, target,
38957 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
38958 return target;
38960 case IX86_BUILTIN_RDSEED16_STEP:
38961 icode = CODE_FOR_rdseedhi_1;
38962 mode0 = HImode;
38963 goto rdseed_step;
38965 case IX86_BUILTIN_RDSEED32_STEP:
38966 icode = CODE_FOR_rdseedsi_1;
38967 mode0 = SImode;
38968 goto rdseed_step;
38970 case IX86_BUILTIN_RDSEED64_STEP:
38971 icode = CODE_FOR_rdseeddi_1;
38972 mode0 = DImode;
38974 rdseed_step:
38975 op0 = gen_reg_rtx (mode0);
38976 emit_insn (GEN_FCN (icode) (op0));
38978 arg0 = CALL_EXPR_ARG (exp, 0);
38979 op1 = expand_normal (arg0);
38980 if (!address_operand (op1, VOIDmode))
38982 op1 = convert_memory_address (Pmode, op1);
38983 op1 = copy_addr_to_reg (op1);
38985 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
38987 op2 = gen_reg_rtx (QImode);
38989 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
38990 const0_rtx);
38991 emit_insn (gen_rtx_SET (VOIDmode, op2, pat));
38993 if (target == 0
38994 || !register_operand (target, SImode))
38995 target = gen_reg_rtx (SImode);
38997 emit_insn (gen_zero_extendqisi2 (target, op2));
38998 return target;
39000 case IX86_BUILTIN_SBB32:
39001 icode = CODE_FOR_subsi3_carry;
39002 mode0 = SImode;
39003 goto addcarryx;
39005 case IX86_BUILTIN_SBB64:
39006 icode = CODE_FOR_subdi3_carry;
39007 mode0 = DImode;
39008 goto addcarryx;
39010 case IX86_BUILTIN_ADDCARRYX32:
39011 icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry;
39012 mode0 = SImode;
39013 goto addcarryx;
39015 case IX86_BUILTIN_ADDCARRYX64:
39016 icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry;
39017 mode0 = DImode;
39019 addcarryx:
39020 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
39021 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
39022 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
39023 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
39025 op0 = gen_reg_rtx (QImode);
39027 /* Generate CF from input operand. */
39028 op1 = expand_normal (arg0);
39029 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
39030 emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx));
39032 /* Gen ADCX instruction to compute X+Y+CF. */
39033 op2 = expand_normal (arg1);
39034 op3 = expand_normal (arg2);
39036 if (!REG_P (op2))
39037 op2 = copy_to_mode_reg (mode0, op2);
39038 if (!REG_P (op3))
39039 op3 = copy_to_mode_reg (mode0, op3);
39041 op0 = gen_reg_rtx (mode0);
39043 op4 = gen_rtx_REG (CCCmode, FLAGS_REG);
39044 pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx);
39045 emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat));
39047 /* Store the result. */
39048 op4 = expand_normal (arg3);
39049 if (!address_operand (op4, VOIDmode))
39051 op4 = convert_memory_address (Pmode, op4);
39052 op4 = copy_addr_to_reg (op4);
39054 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
39056 /* Return current CF value. */
39057 if (target == 0)
39058 target = gen_reg_rtx (QImode);
39060 PUT_MODE (pat, QImode);
39061 emit_insn (gen_rtx_SET (VOIDmode, target, pat));
39062 return target;
39064 case IX86_BUILTIN_READ_FLAGS:
39065 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
39067 if (optimize
39068 || target == NULL_RTX
39069 || !nonimmediate_operand (target, word_mode)
39070 || GET_MODE (target) != word_mode)
39071 target = gen_reg_rtx (word_mode);
39073 emit_insn (gen_pop (target));
39074 return target;
39076 case IX86_BUILTIN_WRITE_FLAGS:
39078 arg0 = CALL_EXPR_ARG (exp, 0);
39079 op0 = expand_normal (arg0);
39080 if (!general_no_elim_operand (op0, word_mode))
39081 op0 = copy_to_mode_reg (word_mode, op0);
39083 emit_insn (gen_push (op0));
39084 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
39085 return 0;
39087 case IX86_BUILTIN_KORTESTC16:
39088 icode = CODE_FOR_kortestchi;
39089 mode0 = HImode;
39090 mode1 = CCCmode;
39091 goto kortest;
39093 case IX86_BUILTIN_KORTESTZ16:
39094 icode = CODE_FOR_kortestzhi;
39095 mode0 = HImode;
39096 mode1 = CCZmode;
39098 kortest:
39099 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
39100 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
39101 op0 = expand_normal (arg0);
39102 op1 = expand_normal (arg1);
39104 op0 = copy_to_reg (op0);
39105 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
39106 op1 = copy_to_reg (op1);
39107 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
39109 target = gen_reg_rtx (QImode);
39110 emit_insn (gen_rtx_SET (mode0, target, const0_rtx));
39112 /* Emit kortest. */
39113 emit_insn (GEN_FCN (icode) (op0, op1));
39114 /* And use setcc to return result from flags. */
39115 ix86_expand_setcc (target, EQ,
39116 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
39117 return target;
39119 case IX86_BUILTIN_GATHERSIV2DF:
39120 icode = CODE_FOR_avx2_gathersiv2df;
39121 goto gather_gen;
39122 case IX86_BUILTIN_GATHERSIV4DF:
39123 icode = CODE_FOR_avx2_gathersiv4df;
39124 goto gather_gen;
39125 case IX86_BUILTIN_GATHERDIV2DF:
39126 icode = CODE_FOR_avx2_gatherdiv2df;
39127 goto gather_gen;
39128 case IX86_BUILTIN_GATHERDIV4DF:
39129 icode = CODE_FOR_avx2_gatherdiv4df;
39130 goto gather_gen;
39131 case IX86_BUILTIN_GATHERSIV4SF:
39132 icode = CODE_FOR_avx2_gathersiv4sf;
39133 goto gather_gen;
39134 case IX86_BUILTIN_GATHERSIV8SF:
39135 icode = CODE_FOR_avx2_gathersiv8sf;
39136 goto gather_gen;
39137 case IX86_BUILTIN_GATHERDIV4SF:
39138 icode = CODE_FOR_avx2_gatherdiv4sf;
39139 goto gather_gen;
39140 case IX86_BUILTIN_GATHERDIV8SF:
39141 icode = CODE_FOR_avx2_gatherdiv8sf;
39142 goto gather_gen;
39143 case IX86_BUILTIN_GATHERSIV2DI:
39144 icode = CODE_FOR_avx2_gathersiv2di;
39145 goto gather_gen;
39146 case IX86_BUILTIN_GATHERSIV4DI:
39147 icode = CODE_FOR_avx2_gathersiv4di;
39148 goto gather_gen;
39149 case IX86_BUILTIN_GATHERDIV2DI:
39150 icode = CODE_FOR_avx2_gatherdiv2di;
39151 goto gather_gen;
39152 case IX86_BUILTIN_GATHERDIV4DI:
39153 icode = CODE_FOR_avx2_gatherdiv4di;
39154 goto gather_gen;
39155 case IX86_BUILTIN_GATHERSIV4SI:
39156 icode = CODE_FOR_avx2_gathersiv4si;
39157 goto gather_gen;
39158 case IX86_BUILTIN_GATHERSIV8SI:
39159 icode = CODE_FOR_avx2_gathersiv8si;
39160 goto gather_gen;
39161 case IX86_BUILTIN_GATHERDIV4SI:
39162 icode = CODE_FOR_avx2_gatherdiv4si;
39163 goto gather_gen;
39164 case IX86_BUILTIN_GATHERDIV8SI:
39165 icode = CODE_FOR_avx2_gatherdiv8si;
39166 goto gather_gen;
39167 case IX86_BUILTIN_GATHERALTSIV4DF:
39168 icode = CODE_FOR_avx2_gathersiv4df;
39169 goto gather_gen;
39170 case IX86_BUILTIN_GATHERALTDIV8SF:
39171 icode = CODE_FOR_avx2_gatherdiv8sf;
39172 goto gather_gen;
39173 case IX86_BUILTIN_GATHERALTSIV4DI:
39174 icode = CODE_FOR_avx2_gathersiv4di;
39175 goto gather_gen;
39176 case IX86_BUILTIN_GATHERALTDIV8SI:
39177 icode = CODE_FOR_avx2_gatherdiv8si;
39178 goto gather_gen;
39179 case IX86_BUILTIN_GATHER3SIV16SF:
39180 icode = CODE_FOR_avx512f_gathersiv16sf;
39181 goto gather_gen;
39182 case IX86_BUILTIN_GATHER3SIV8DF:
39183 icode = CODE_FOR_avx512f_gathersiv8df;
39184 goto gather_gen;
39185 case IX86_BUILTIN_GATHER3DIV16SF:
39186 icode = CODE_FOR_avx512f_gatherdiv16sf;
39187 goto gather_gen;
39188 case IX86_BUILTIN_GATHER3DIV8DF:
39189 icode = CODE_FOR_avx512f_gatherdiv8df;
39190 goto gather_gen;
39191 case IX86_BUILTIN_GATHER3SIV16SI:
39192 icode = CODE_FOR_avx512f_gathersiv16si;
39193 goto gather_gen;
39194 case IX86_BUILTIN_GATHER3SIV8DI:
39195 icode = CODE_FOR_avx512f_gathersiv8di;
39196 goto gather_gen;
39197 case IX86_BUILTIN_GATHER3DIV16SI:
39198 icode = CODE_FOR_avx512f_gatherdiv16si;
39199 goto gather_gen;
39200 case IX86_BUILTIN_GATHER3DIV8DI:
39201 icode = CODE_FOR_avx512f_gatherdiv8di;
39202 goto gather_gen;
39203 case IX86_BUILTIN_GATHER3ALTSIV8DF:
39204 icode = CODE_FOR_avx512f_gathersiv8df;
39205 goto gather_gen;
39206 case IX86_BUILTIN_GATHER3ALTDIV16SF:
39207 icode = CODE_FOR_avx512f_gatherdiv16sf;
39208 goto gather_gen;
39209 case IX86_BUILTIN_GATHER3ALTSIV8DI:
39210 icode = CODE_FOR_avx512f_gathersiv8di;
39211 goto gather_gen;
39212 case IX86_BUILTIN_GATHER3ALTDIV16SI:
39213 icode = CODE_FOR_avx512f_gatherdiv16si;
39214 goto gather_gen;
39215 case IX86_BUILTIN_GATHER3SIV2DF:
39216 icode = CODE_FOR_avx512vl_gathersiv2df;
39217 goto gather_gen;
39218 case IX86_BUILTIN_GATHER3SIV4DF:
39219 icode = CODE_FOR_avx512vl_gathersiv4df;
39220 goto gather_gen;
39221 case IX86_BUILTIN_GATHER3DIV2DF:
39222 icode = CODE_FOR_avx512vl_gatherdiv2df;
39223 goto gather_gen;
39224 case IX86_BUILTIN_GATHER3DIV4DF:
39225 icode = CODE_FOR_avx512vl_gatherdiv4df;
39226 goto gather_gen;
39227 case IX86_BUILTIN_GATHER3SIV4SF:
39228 icode = CODE_FOR_avx512vl_gathersiv4sf;
39229 goto gather_gen;
39230 case IX86_BUILTIN_GATHER3SIV8SF:
39231 icode = CODE_FOR_avx512vl_gathersiv8sf;
39232 goto gather_gen;
39233 case IX86_BUILTIN_GATHER3DIV4SF:
39234 icode = CODE_FOR_avx512vl_gatherdiv4sf;
39235 goto gather_gen;
39236 case IX86_BUILTIN_GATHER3DIV8SF:
39237 icode = CODE_FOR_avx512vl_gatherdiv8sf;
39238 goto gather_gen;
39239 case IX86_BUILTIN_GATHER3SIV2DI:
39240 icode = CODE_FOR_avx512vl_gathersiv2di;
39241 goto gather_gen;
39242 case IX86_BUILTIN_GATHER3SIV4DI:
39243 icode = CODE_FOR_avx512vl_gathersiv4di;
39244 goto gather_gen;
39245 case IX86_BUILTIN_GATHER3DIV2DI:
39246 icode = CODE_FOR_avx512vl_gatherdiv2di;
39247 goto gather_gen;
39248 case IX86_BUILTIN_GATHER3DIV4DI:
39249 icode = CODE_FOR_avx512vl_gatherdiv4di;
39250 goto gather_gen;
39251 case IX86_BUILTIN_GATHER3SIV4SI:
39252 icode = CODE_FOR_avx512vl_gathersiv4si;
39253 goto gather_gen;
39254 case IX86_BUILTIN_GATHER3SIV8SI:
39255 icode = CODE_FOR_avx512vl_gathersiv8si;
39256 goto gather_gen;
39257 case IX86_BUILTIN_GATHER3DIV4SI:
39258 icode = CODE_FOR_avx512vl_gatherdiv4si;
39259 goto gather_gen;
39260 case IX86_BUILTIN_GATHER3DIV8SI:
39261 icode = CODE_FOR_avx512vl_gatherdiv8si;
39262 goto gather_gen;
39263 case IX86_BUILTIN_GATHER3ALTSIV4DF:
39264 icode = CODE_FOR_avx512vl_gathersiv4df;
39265 goto gather_gen;
39266 case IX86_BUILTIN_GATHER3ALTDIV8SF:
39267 icode = CODE_FOR_avx512vl_gatherdiv8sf;
39268 goto gather_gen;
39269 case IX86_BUILTIN_GATHER3ALTSIV4DI:
39270 icode = CODE_FOR_avx512vl_gathersiv4di;
39271 goto gather_gen;
39272 case IX86_BUILTIN_GATHER3ALTDIV8SI:
39273 icode = CODE_FOR_avx512vl_gatherdiv8si;
39274 goto gather_gen;
39275 case IX86_BUILTIN_SCATTERSIV16SF:
39276 icode = CODE_FOR_avx512f_scattersiv16sf;
39277 goto scatter_gen;
39278 case IX86_BUILTIN_SCATTERSIV8DF:
39279 icode = CODE_FOR_avx512f_scattersiv8df;
39280 goto scatter_gen;
39281 case IX86_BUILTIN_SCATTERDIV16SF:
39282 icode = CODE_FOR_avx512f_scatterdiv16sf;
39283 goto scatter_gen;
39284 case IX86_BUILTIN_SCATTERDIV8DF:
39285 icode = CODE_FOR_avx512f_scatterdiv8df;
39286 goto scatter_gen;
39287 case IX86_BUILTIN_SCATTERSIV16SI:
39288 icode = CODE_FOR_avx512f_scattersiv16si;
39289 goto scatter_gen;
39290 case IX86_BUILTIN_SCATTERSIV8DI:
39291 icode = CODE_FOR_avx512f_scattersiv8di;
39292 goto scatter_gen;
39293 case IX86_BUILTIN_SCATTERDIV16SI:
39294 icode = CODE_FOR_avx512f_scatterdiv16si;
39295 goto scatter_gen;
39296 case IX86_BUILTIN_SCATTERDIV8DI:
39297 icode = CODE_FOR_avx512f_scatterdiv8di;
39298 goto scatter_gen;
39299 case IX86_BUILTIN_SCATTERSIV8SF:
39300 icode = CODE_FOR_avx512vl_scattersiv8sf;
39301 goto scatter_gen;
39302 case IX86_BUILTIN_SCATTERSIV4SF:
39303 icode = CODE_FOR_avx512vl_scattersiv4sf;
39304 goto scatter_gen;
39305 case IX86_BUILTIN_SCATTERSIV4DF:
39306 icode = CODE_FOR_avx512vl_scattersiv4df;
39307 goto scatter_gen;
39308 case IX86_BUILTIN_SCATTERSIV2DF:
39309 icode = CODE_FOR_avx512vl_scattersiv2df;
39310 goto scatter_gen;
39311 case IX86_BUILTIN_SCATTERDIV8SF:
39312 icode = CODE_FOR_avx512vl_scatterdiv8sf;
39313 goto scatter_gen;
39314 case IX86_BUILTIN_SCATTERDIV4SF:
39315 icode = CODE_FOR_avx512vl_scatterdiv4sf;
39316 goto scatter_gen;
39317 case IX86_BUILTIN_SCATTERDIV4DF:
39318 icode = CODE_FOR_avx512vl_scatterdiv4df;
39319 goto scatter_gen;
39320 case IX86_BUILTIN_SCATTERDIV2DF:
39321 icode = CODE_FOR_avx512vl_scatterdiv2df;
39322 goto scatter_gen;
39323 case IX86_BUILTIN_SCATTERSIV8SI:
39324 icode = CODE_FOR_avx512vl_scattersiv8si;
39325 goto scatter_gen;
39326 case IX86_BUILTIN_SCATTERSIV4SI:
39327 icode = CODE_FOR_avx512vl_scattersiv4si;
39328 goto scatter_gen;
39329 case IX86_BUILTIN_SCATTERSIV4DI:
39330 icode = CODE_FOR_avx512vl_scattersiv4di;
39331 goto scatter_gen;
39332 case IX86_BUILTIN_SCATTERSIV2DI:
39333 icode = CODE_FOR_avx512vl_scattersiv2di;
39334 goto scatter_gen;
39335 case IX86_BUILTIN_SCATTERDIV8SI:
39336 icode = CODE_FOR_avx512vl_scatterdiv8si;
39337 goto scatter_gen;
39338 case IX86_BUILTIN_SCATTERDIV4SI:
39339 icode = CODE_FOR_avx512vl_scatterdiv4si;
39340 goto scatter_gen;
39341 case IX86_BUILTIN_SCATTERDIV4DI:
39342 icode = CODE_FOR_avx512vl_scatterdiv4di;
39343 goto scatter_gen;
39344 case IX86_BUILTIN_SCATTERDIV2DI:
39345 icode = CODE_FOR_avx512vl_scatterdiv2di;
39346 goto scatter_gen;
39347 case IX86_BUILTIN_GATHERPFDPD:
39348 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
39349 goto vec_prefetch_gen;
39350 case IX86_BUILTIN_GATHERPFDPS:
39351 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
39352 goto vec_prefetch_gen;
39353 case IX86_BUILTIN_GATHERPFQPD:
39354 icode = CODE_FOR_avx512pf_gatherpfv8didf;
39355 goto vec_prefetch_gen;
39356 case IX86_BUILTIN_GATHERPFQPS:
39357 icode = CODE_FOR_avx512pf_gatherpfv8disf;
39358 goto vec_prefetch_gen;
39359 case IX86_BUILTIN_SCATTERPFDPD:
39360 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
39361 goto vec_prefetch_gen;
39362 case IX86_BUILTIN_SCATTERPFDPS:
39363 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
39364 goto vec_prefetch_gen;
39365 case IX86_BUILTIN_SCATTERPFQPD:
39366 icode = CODE_FOR_avx512pf_scatterpfv8didf;
39367 goto vec_prefetch_gen;
39368 case IX86_BUILTIN_SCATTERPFQPS:
39369 icode = CODE_FOR_avx512pf_scatterpfv8disf;
39370 goto vec_prefetch_gen;
39372 gather_gen:
39373 rtx half;
39374 rtx (*gen) (rtx, rtx);
39376 arg0 = CALL_EXPR_ARG (exp, 0);
39377 arg1 = CALL_EXPR_ARG (exp, 1);
39378 arg2 = CALL_EXPR_ARG (exp, 2);
39379 arg3 = CALL_EXPR_ARG (exp, 3);
39380 arg4 = CALL_EXPR_ARG (exp, 4);
39381 op0 = expand_normal (arg0);
39382 op1 = expand_normal (arg1);
39383 op2 = expand_normal (arg2);
39384 op3 = expand_normal (arg3);
39385 op4 = expand_normal (arg4);
39386 /* Note the arg order is different from the operand order. */
39387 mode0 = insn_data[icode].operand[1].mode;
39388 mode2 = insn_data[icode].operand[3].mode;
39389 mode3 = insn_data[icode].operand[4].mode;
39390 mode4 = insn_data[icode].operand[5].mode;
39392 if (target == NULL_RTX
39393 || GET_MODE (target) != insn_data[icode].operand[0].mode
39394 || !insn_data[icode].operand[0].predicate (target,
39395 GET_MODE (target)))
39396 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
39397 else
39398 subtarget = target;
39400 switch (fcode)
39402 case IX86_BUILTIN_GATHER3ALTSIV8DF:
39403 case IX86_BUILTIN_GATHER3ALTSIV8DI:
39404 half = gen_reg_rtx (V8SImode);
39405 if (!nonimmediate_operand (op2, V16SImode))
39406 op2 = copy_to_mode_reg (V16SImode, op2);
39407 emit_insn (gen_vec_extract_lo_v16si (half, op2));
39408 op2 = half;
39409 break;
39410 case IX86_BUILTIN_GATHER3ALTSIV4DF:
39411 case IX86_BUILTIN_GATHER3ALTSIV4DI:
39412 case IX86_BUILTIN_GATHERALTSIV4DF:
39413 case IX86_BUILTIN_GATHERALTSIV4DI:
39414 half = gen_reg_rtx (V4SImode);
39415 if (!nonimmediate_operand (op2, V8SImode))
39416 op2 = copy_to_mode_reg (V8SImode, op2);
39417 emit_insn (gen_vec_extract_lo_v8si (half, op2));
39418 op2 = half;
39419 break;
39420 case IX86_BUILTIN_GATHER3ALTDIV16SF:
39421 case IX86_BUILTIN_GATHER3ALTDIV16SI:
39422 half = gen_reg_rtx (mode0);
39423 if (mode0 == V8SFmode)
39424 gen = gen_vec_extract_lo_v16sf;
39425 else
39426 gen = gen_vec_extract_lo_v16si;
39427 if (!nonimmediate_operand (op0, GET_MODE (op0)))
39428 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
39429 emit_insn (gen (half, op0));
39430 op0 = half;
39431 if (GET_MODE (op3) != VOIDmode)
39433 if (!nonimmediate_operand (op3, GET_MODE (op3)))
39434 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
39435 emit_insn (gen (half, op3));
39436 op3 = half;
39438 break;
39439 case IX86_BUILTIN_GATHER3ALTDIV8SF:
39440 case IX86_BUILTIN_GATHER3ALTDIV8SI:
39441 case IX86_BUILTIN_GATHERALTDIV8SF:
39442 case IX86_BUILTIN_GATHERALTDIV8SI:
39443 half = gen_reg_rtx (mode0);
39444 if (mode0 == V4SFmode)
39445 gen = gen_vec_extract_lo_v8sf;
39446 else
39447 gen = gen_vec_extract_lo_v8si;
39448 if (!nonimmediate_operand (op0, GET_MODE (op0)))
39449 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
39450 emit_insn (gen (half, op0));
39451 op0 = half;
39452 if (GET_MODE (op3) != VOIDmode)
39454 if (!nonimmediate_operand (op3, GET_MODE (op3)))
39455 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
39456 emit_insn (gen (half, op3));
39457 op3 = half;
39459 break;
39460 default:
39461 break;
39464 /* Force memory operand only with base register here. But we
39465 don't want to do it on memory operand for other builtin
39466 functions. */
39467 op1 = ix86_zero_extend_to_Pmode (op1);
39469 if (!insn_data[icode].operand[1].predicate (op0, mode0))
39470 op0 = copy_to_mode_reg (mode0, op0);
39471 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
39472 op1 = copy_to_mode_reg (Pmode, op1);
39473 if (!insn_data[icode].operand[3].predicate (op2, mode2))
39474 op2 = copy_to_mode_reg (mode2, op2);
39475 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
39477 if (!insn_data[icode].operand[4].predicate (op3, mode3))
39478 op3 = copy_to_mode_reg (mode3, op3);
39480 else
39482 op3 = copy_to_reg (op3);
39483 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
39485 if (!insn_data[icode].operand[5].predicate (op4, mode4))
39487 error ("the last argument must be scale 1, 2, 4, 8");
39488 return const0_rtx;
39491 /* Optimize. If mask is known to have all high bits set,
39492 replace op0 with pc_rtx to signal that the instruction
39493 overwrites the whole destination and doesn't use its
39494 previous contents. */
39495 if (optimize)
39497 if (TREE_CODE (arg3) == INTEGER_CST)
39499 if (integer_all_onesp (arg3))
39500 op0 = pc_rtx;
39502 else if (TREE_CODE (arg3) == VECTOR_CST)
39504 unsigned int negative = 0;
39505 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
39507 tree cst = VECTOR_CST_ELT (arg3, i);
39508 if (TREE_CODE (cst) == INTEGER_CST
39509 && tree_int_cst_sign_bit (cst))
39510 negative++;
39511 else if (TREE_CODE (cst) == REAL_CST
39512 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
39513 negative++;
39515 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
39516 op0 = pc_rtx;
39518 else if (TREE_CODE (arg3) == SSA_NAME
39519 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
39521 /* Recognize also when mask is like:
39522 __v2df src = _mm_setzero_pd ();
39523 __v2df mask = _mm_cmpeq_pd (src, src);
39525 __v8sf src = _mm256_setzero_ps ();
39526 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
39527 as that is a cheaper way to load all ones into
39528 a register than having to load a constant from
39529 memory. */
39530 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
39531 if (is_gimple_call (def_stmt))
39533 tree fndecl = gimple_call_fndecl (def_stmt);
39534 if (fndecl
39535 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
39536 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
39538 case IX86_BUILTIN_CMPPD:
39539 case IX86_BUILTIN_CMPPS:
39540 case IX86_BUILTIN_CMPPD256:
39541 case IX86_BUILTIN_CMPPS256:
39542 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
39543 break;
39544 /* FALLTHRU */
39545 case IX86_BUILTIN_CMPEQPD:
39546 case IX86_BUILTIN_CMPEQPS:
39547 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
39548 && initializer_zerop (gimple_call_arg (def_stmt,
39549 1)))
39550 op0 = pc_rtx;
39551 break;
39552 default:
39553 break;
39559 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
39560 if (! pat)
39561 return const0_rtx;
39562 emit_insn (pat);
39564 switch (fcode)
39566 case IX86_BUILTIN_GATHER3DIV16SF:
39567 if (target == NULL_RTX)
39568 target = gen_reg_rtx (V8SFmode);
39569 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
39570 break;
39571 case IX86_BUILTIN_GATHER3DIV16SI:
39572 if (target == NULL_RTX)
39573 target = gen_reg_rtx (V8SImode);
39574 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
39575 break;
39576 case IX86_BUILTIN_GATHER3DIV8SF:
39577 case IX86_BUILTIN_GATHERDIV8SF:
39578 if (target == NULL_RTX)
39579 target = gen_reg_rtx (V4SFmode);
39580 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
39581 break;
39582 case IX86_BUILTIN_GATHER3DIV8SI:
39583 case IX86_BUILTIN_GATHERDIV8SI:
39584 if (target == NULL_RTX)
39585 target = gen_reg_rtx (V4SImode);
39586 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
39587 break;
39588 default:
39589 target = subtarget;
39590 break;
39592 return target;
39594 scatter_gen:
39595 arg0 = CALL_EXPR_ARG (exp, 0);
39596 arg1 = CALL_EXPR_ARG (exp, 1);
39597 arg2 = CALL_EXPR_ARG (exp, 2);
39598 arg3 = CALL_EXPR_ARG (exp, 3);
39599 arg4 = CALL_EXPR_ARG (exp, 4);
39600 op0 = expand_normal (arg0);
39601 op1 = expand_normal (arg1);
39602 op2 = expand_normal (arg2);
39603 op3 = expand_normal (arg3);
39604 op4 = expand_normal (arg4);
39605 mode1 = insn_data[icode].operand[1].mode;
39606 mode2 = insn_data[icode].operand[2].mode;
39607 mode3 = insn_data[icode].operand[3].mode;
39608 mode4 = insn_data[icode].operand[4].mode;
39610 /* Force memory operand only with base register here. But we
39611 don't want to do it on memory operand for other builtin
39612 functions. */
39613 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
39615 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39616 op0 = copy_to_mode_reg (Pmode, op0);
39618 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
39620 if (!insn_data[icode].operand[1].predicate (op1, mode1))
39621 op1 = copy_to_mode_reg (mode1, op1);
39623 else
39625 op1 = copy_to_reg (op1);
39626 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
39629 if (!insn_data[icode].operand[2].predicate (op2, mode2))
39630 op2 = copy_to_mode_reg (mode2, op2);
39632 if (!insn_data[icode].operand[3].predicate (op3, mode3))
39633 op3 = copy_to_mode_reg (mode3, op3);
39635 if (!insn_data[icode].operand[4].predicate (op4, mode4))
39637 error ("the last argument must be scale 1, 2, 4, 8");
39638 return const0_rtx;
39641 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
39642 if (! pat)
39643 return const0_rtx;
39645 emit_insn (pat);
39646 return 0;
39648 vec_prefetch_gen:
39649 arg0 = CALL_EXPR_ARG (exp, 0);
39650 arg1 = CALL_EXPR_ARG (exp, 1);
39651 arg2 = CALL_EXPR_ARG (exp, 2);
39652 arg3 = CALL_EXPR_ARG (exp, 3);
39653 arg4 = CALL_EXPR_ARG (exp, 4);
39654 op0 = expand_normal (arg0);
39655 op1 = expand_normal (arg1);
39656 op2 = expand_normal (arg2);
39657 op3 = expand_normal (arg3);
39658 op4 = expand_normal (arg4);
39659 mode0 = insn_data[icode].operand[0].mode;
39660 mode1 = insn_data[icode].operand[1].mode;
39661 mode3 = insn_data[icode].operand[3].mode;
39662 mode4 = insn_data[icode].operand[4].mode;
39664 if (GET_MODE (op0) == mode0
39665 || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
39667 if (!insn_data[icode].operand[0].predicate (op0, mode0))
39668 op0 = copy_to_mode_reg (mode0, op0);
39670 else if (op0 != constm1_rtx)
39672 op0 = copy_to_reg (op0);
39673 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
39676 if (!insn_data[icode].operand[1].predicate (op1, mode1))
39677 op1 = copy_to_mode_reg (mode1, op1);
39679 /* Force memory operand only with base register here. But we
39680 don't want to do it on memory operand for other builtin
39681 functions. */
39682 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
39684 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
39685 op2 = copy_to_mode_reg (Pmode, op2);
39687 if (!insn_data[icode].operand[3].predicate (op3, mode3))
39689 error ("the forth argument must be scale 1, 2, 4, 8");
39690 return const0_rtx;
39693 if (!insn_data[icode].operand[4].predicate (op4, mode4))
39695 error ("incorrect hint operand");
39696 return const0_rtx;
39699 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
39700 if (! pat)
39701 return const0_rtx;
39703 emit_insn (pat);
39705 return 0;
39707 case IX86_BUILTIN_XABORT:
39708 icode = CODE_FOR_xabort;
39709 arg0 = CALL_EXPR_ARG (exp, 0);
39710 op0 = expand_normal (arg0);
39711 mode0 = insn_data[icode].operand[0].mode;
39712 if (!insn_data[icode].operand[0].predicate (op0, mode0))
39714 error ("the xabort's argument must be an 8-bit immediate");
39715 return const0_rtx;
39717 emit_insn (gen_xabort (op0));
39718 return 0;
39720 default:
39721 break;
39724 for (i = 0, d = bdesc_special_args;
39725 i < ARRAY_SIZE (bdesc_special_args);
39726 i++, d++)
39727 if (d->code == fcode)
39728 return ix86_expand_special_args_builtin (d, exp, target);
39730 for (i = 0, d = bdesc_args;
39731 i < ARRAY_SIZE (bdesc_args);
39732 i++, d++)
39733 if (d->code == fcode)
39734 switch (fcode)
39736 case IX86_BUILTIN_FABSQ:
39737 case IX86_BUILTIN_COPYSIGNQ:
39738 if (!TARGET_SSE)
39739 /* Emit a normal call if SSE isn't available. */
39740 return expand_call (exp, target, ignore);
39741 default:
39742 return ix86_expand_args_builtin (d, exp, target);
39745 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
39746 if (d->code == fcode)
39747 return ix86_expand_sse_comi (d, exp, target);
39749 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
39750 if (d->code == fcode)
39751 return ix86_expand_round_builtin (d, exp, target);
39753 for (i = 0, d = bdesc_pcmpestr;
39754 i < ARRAY_SIZE (bdesc_pcmpestr);
39755 i++, d++)
39756 if (d->code == fcode)
39757 return ix86_expand_sse_pcmpestr (d, exp, target);
39759 for (i = 0, d = bdesc_pcmpistr;
39760 i < ARRAY_SIZE (bdesc_pcmpistr);
39761 i++, d++)
39762 if (d->code == fcode)
39763 return ix86_expand_sse_pcmpistr (d, exp, target);
39765 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
39766 if (d->code == fcode)
39767 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
39768 (enum ix86_builtin_func_type)
39769 d->flag, d->comparison);
39771 gcc_unreachable ();
39774 /* This returns the target-specific builtin with code CODE if
39775 current_function_decl has visibility on this builtin, which is checked
39776 using isa flags. Returns NULL_TREE otherwise. */
39778 static tree ix86_get_builtin (enum ix86_builtins code)
39780 struct cl_target_option *opts;
39781 tree target_tree = NULL_TREE;
39783 /* Determine the isa flags of current_function_decl. */
39785 if (current_function_decl)
39786 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
39788 if (target_tree == NULL)
39789 target_tree = target_option_default_node;
39791 opts = TREE_TARGET_OPTION (target_tree);
39793 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
39794 return ix86_builtin_decl (code, true);
39795 else
39796 return NULL_TREE;
39799 /* Return function decl for target specific builtin
39800 for given MPX builtin passed i FCODE. */
39801 static tree
39802 ix86_builtin_mpx_function (unsigned fcode)
39804 switch (fcode)
39806 case BUILT_IN_CHKP_BNDMK:
39807 return ix86_builtins[IX86_BUILTIN_BNDMK];
39809 case BUILT_IN_CHKP_BNDSTX:
39810 return ix86_builtins[IX86_BUILTIN_BNDSTX];
39812 case BUILT_IN_CHKP_BNDLDX:
39813 return ix86_builtins[IX86_BUILTIN_BNDLDX];
39815 case BUILT_IN_CHKP_BNDCL:
39816 return ix86_builtins[IX86_BUILTIN_BNDCL];
39818 case BUILT_IN_CHKP_BNDCU:
39819 return ix86_builtins[IX86_BUILTIN_BNDCU];
39821 case BUILT_IN_CHKP_BNDRET:
39822 return ix86_builtins[IX86_BUILTIN_BNDRET];
39824 case BUILT_IN_CHKP_INTERSECT:
39825 return ix86_builtins[IX86_BUILTIN_BNDINT];
39827 case BUILT_IN_CHKP_NARROW:
39828 return ix86_builtins[IX86_BUILTIN_BNDNARROW];
39830 case BUILT_IN_CHKP_SIZEOF:
39831 return ix86_builtins[IX86_BUILTIN_SIZEOF];
39833 case BUILT_IN_CHKP_EXTRACT_LOWER:
39834 return ix86_builtins[IX86_BUILTIN_BNDLOWER];
39836 case BUILT_IN_CHKP_EXTRACT_UPPER:
39837 return ix86_builtins[IX86_BUILTIN_BNDUPPER];
39839 default:
39840 return NULL_TREE;
39843 gcc_unreachable ();
39846 /* Helper function for ix86_load_bounds and ix86_store_bounds.
39848 Return an address to be used to load/store bounds for pointer
39849 passed in SLOT.
39851 SLOT_NO is an integer constant holding number of a target
39852 dependent special slot to be used in case SLOT is not a memory.
39854 SPECIAL_BASE is a pointer to be used as a base of fake address
39855 to access special slots in Bounds Table. SPECIAL_BASE[-1],
39856 SPECIAL_BASE[-2] etc. will be used as fake pointer locations. */
39858 static rtx
39859 ix86_get_arg_address_for_bt (rtx slot, rtx slot_no, rtx special_base)
39861 rtx addr = NULL;
39863 /* NULL slot means we pass bounds for pointer not passed to the
39864 function at all. Register slot means we pass pointer in a
39865 register. In both these cases bounds are passed via Bounds
39866 Table. Since we do not have actual pointer stored in memory,
39867 we have to use fake addresses to access Bounds Table. We
39868 start with (special_base - sizeof (void*)) and decrease this
39869 address by pointer size to get addresses for other slots. */
39870 if (!slot || REG_P (slot))
39872 gcc_assert (CONST_INT_P (slot_no));
39873 addr = plus_constant (Pmode, special_base,
39874 -(INTVAL (slot_no) + 1) * GET_MODE_SIZE (Pmode));
39876 /* If pointer is passed in a memory then its address is used to
39877 access Bounds Table. */
39878 else if (MEM_P (slot))
39880 addr = XEXP (slot, 0);
39881 if (!register_operand (addr, Pmode))
39882 addr = copy_addr_to_reg (addr);
39884 else
39885 gcc_unreachable ();
39887 return addr;
39890 /* Expand pass uses this hook to load bounds for function parameter
39891 PTR passed in SLOT in case its bounds are not passed in a register.
39893 If SLOT is a memory, then bounds are loaded as for regular pointer
39894 loaded from memory. PTR may be NULL in case SLOT is a memory.
39895 In such case value of PTR (if required) may be loaded from SLOT.
39897 If SLOT is NULL or a register then SLOT_NO is an integer constant
39898 holding number of the target dependent special slot which should be
39899 used to obtain bounds.
39901 Return loaded bounds. */
39903 static rtx
39904 ix86_load_bounds (rtx slot, rtx ptr, rtx slot_no)
39906 rtx reg = gen_reg_rtx (BNDmode);
39907 rtx addr;
39909 /* Get address to be used to access Bounds Table. Special slots start
39910 at the location of return address of the current function. */
39911 addr = ix86_get_arg_address_for_bt (slot, slot_no, arg_pointer_rtx);
39913 /* Load pointer value from a memory if we don't have it. */
39914 if (!ptr)
39916 gcc_assert (MEM_P (slot));
39917 ptr = copy_addr_to_reg (slot);
39920 emit_insn (BNDmode == BND64mode
39921 ? gen_bnd64_ldx (reg, addr, ptr)
39922 : gen_bnd32_ldx (reg, addr, ptr));
39924 return reg;
39927 /* Expand pass uses this hook to store BOUNDS for call argument PTR
39928 passed in SLOT in case BOUNDS are not passed in a register.
39930 If SLOT is a memory, then BOUNDS are stored as for regular pointer
39931 stored in memory. PTR may be NULL in case SLOT is a memory.
39932 In such case value of PTR (if required) may be loaded from SLOT.
39934 If SLOT is NULL or a register then SLOT_NO is an integer constant
39935 holding number of the target dependent special slot which should be
39936 used to store BOUNDS. */
39938 static void
39939 ix86_store_bounds (rtx ptr, rtx slot, rtx bounds, rtx slot_no)
39941 rtx addr;
39943 /* Get address to be used to access Bounds Table. Special slots start
39944 at the location of return address of a called function. */
39945 addr = ix86_get_arg_address_for_bt (slot, slot_no, stack_pointer_rtx);
39947 /* Load pointer value from a memory if we don't have it. */
39948 if (!ptr)
39950 gcc_assert (MEM_P (slot));
39951 ptr = copy_addr_to_reg (slot);
39954 gcc_assert (POINTER_BOUNDS_MODE_P (GET_MODE (bounds)));
39955 if (!register_operand (bounds, BNDmode))
39956 bounds = copy_to_mode_reg (BNDmode, bounds);
39958 emit_insn (BNDmode == BND64mode
39959 ? gen_bnd64_stx (addr, ptr, bounds)
39960 : gen_bnd32_stx (addr, ptr, bounds));
39963 /* Load and return bounds returned by function in SLOT. */
39965 static rtx
39966 ix86_load_returned_bounds (rtx slot)
39968 rtx res;
39970 gcc_assert (REG_P (slot));
39971 res = gen_reg_rtx (BNDmode);
39972 emit_move_insn (res, slot);
39974 return res;
39977 /* Store BOUNDS returned by function into SLOT. */
39979 static void
39980 ix86_store_returned_bounds (rtx slot, rtx bounds)
39982 gcc_assert (REG_P (slot));
39983 emit_move_insn (slot, bounds);
39986 /* Returns a function decl for a vectorized version of the builtin function
39987 with builtin function code FN and the result vector type TYPE, or NULL_TREE
39988 if it is not available. */
39990 static tree
39991 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
39992 tree type_in)
39994 machine_mode in_mode, out_mode;
39995 int in_n, out_n;
39996 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
39998 if (TREE_CODE (type_out) != VECTOR_TYPE
39999 || TREE_CODE (type_in) != VECTOR_TYPE
40000 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
40001 return NULL_TREE;
40003 out_mode = TYPE_MODE (TREE_TYPE (type_out));
40004 out_n = TYPE_VECTOR_SUBPARTS (type_out);
40005 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40006 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40008 switch (fn)
40010 case BUILT_IN_SQRT:
40011 if (out_mode == DFmode && in_mode == DFmode)
40013 if (out_n == 2 && in_n == 2)
40014 return ix86_get_builtin (IX86_BUILTIN_SQRTPD);
40015 else if (out_n == 4 && in_n == 4)
40016 return ix86_get_builtin (IX86_BUILTIN_SQRTPD256);
40017 else if (out_n == 8 && in_n == 8)
40018 return ix86_get_builtin (IX86_BUILTIN_SQRTPD512);
40020 break;
40022 case BUILT_IN_EXP2F:
40023 if (out_mode == SFmode && in_mode == SFmode)
40025 if (out_n == 16 && in_n == 16)
40026 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
40028 break;
40030 case BUILT_IN_SQRTF:
40031 if (out_mode == SFmode && in_mode == SFmode)
40033 if (out_n == 4 && in_n == 4)
40034 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR);
40035 else if (out_n == 8 && in_n == 8)
40036 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256);
40037 else if (out_n == 16 && in_n == 16)
40038 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR512);
40040 break;
40042 case BUILT_IN_IFLOOR:
40043 case BUILT_IN_LFLOOR:
40044 case BUILT_IN_LLFLOOR:
40045 /* The round insn does not trap on denormals. */
40046 if (flag_trapping_math || !TARGET_ROUND)
40047 break;
40049 if (out_mode == SImode && in_mode == DFmode)
40051 if (out_n == 4 && in_n == 2)
40052 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
40053 else if (out_n == 8 && in_n == 4)
40054 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
40055 else if (out_n == 16 && in_n == 8)
40056 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
40058 break;
40060 case BUILT_IN_IFLOORF:
40061 case BUILT_IN_LFLOORF:
40062 case BUILT_IN_LLFLOORF:
40063 /* The round insn does not trap on denormals. */
40064 if (flag_trapping_math || !TARGET_ROUND)
40065 break;
40067 if (out_mode == SImode && in_mode == SFmode)
40069 if (out_n == 4 && in_n == 4)
40070 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
40071 else if (out_n == 8 && in_n == 8)
40072 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
40074 break;
40076 case BUILT_IN_ICEIL:
40077 case BUILT_IN_LCEIL:
40078 case BUILT_IN_LLCEIL:
40079 /* The round insn does not trap on denormals. */
40080 if (flag_trapping_math || !TARGET_ROUND)
40081 break;
40083 if (out_mode == SImode && in_mode == DFmode)
40085 if (out_n == 4 && in_n == 2)
40086 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
40087 else if (out_n == 8 && in_n == 4)
40088 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
40089 else if (out_n == 16 && in_n == 8)
40090 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
40092 break;
40094 case BUILT_IN_ICEILF:
40095 case BUILT_IN_LCEILF:
40096 case BUILT_IN_LLCEILF:
40097 /* The round insn does not trap on denormals. */
40098 if (flag_trapping_math || !TARGET_ROUND)
40099 break;
40101 if (out_mode == SImode && in_mode == SFmode)
40103 if (out_n == 4 && in_n == 4)
40104 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
40105 else if (out_n == 8 && in_n == 8)
40106 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
40108 break;
40110 case BUILT_IN_IRINT:
40111 case BUILT_IN_LRINT:
40112 case BUILT_IN_LLRINT:
40113 if (out_mode == SImode && in_mode == DFmode)
40115 if (out_n == 4 && in_n == 2)
40116 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
40117 else if (out_n == 8 && in_n == 4)
40118 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
40120 break;
40122 case BUILT_IN_IRINTF:
40123 case BUILT_IN_LRINTF:
40124 case BUILT_IN_LLRINTF:
40125 if (out_mode == SImode && in_mode == SFmode)
40127 if (out_n == 4 && in_n == 4)
40128 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
40129 else if (out_n == 8 && in_n == 8)
40130 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
40132 break;
40134 case BUILT_IN_IROUND:
40135 case BUILT_IN_LROUND:
40136 case BUILT_IN_LLROUND:
40137 /* The round insn does not trap on denormals. */
40138 if (flag_trapping_math || !TARGET_ROUND)
40139 break;
40141 if (out_mode == SImode && in_mode == DFmode)
40143 if (out_n == 4 && in_n == 2)
40144 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
40145 else if (out_n == 8 && in_n == 4)
40146 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
40147 else if (out_n == 16 && in_n == 8)
40148 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
40150 break;
40152 case BUILT_IN_IROUNDF:
40153 case BUILT_IN_LROUNDF:
40154 case BUILT_IN_LLROUNDF:
40155 /* The round insn does not trap on denormals. */
40156 if (flag_trapping_math || !TARGET_ROUND)
40157 break;
40159 if (out_mode == SImode && in_mode == SFmode)
40161 if (out_n == 4 && in_n == 4)
40162 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
40163 else if (out_n == 8 && in_n == 8)
40164 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
40166 break;
40168 case BUILT_IN_COPYSIGN:
40169 if (out_mode == DFmode && in_mode == DFmode)
40171 if (out_n == 2 && in_n == 2)
40172 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD);
40173 else if (out_n == 4 && in_n == 4)
40174 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256);
40175 else if (out_n == 8 && in_n == 8)
40176 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD512);
40178 break;
40180 case BUILT_IN_COPYSIGNF:
40181 if (out_mode == SFmode && in_mode == SFmode)
40183 if (out_n == 4 && in_n == 4)
40184 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS);
40185 else if (out_n == 8 && in_n == 8)
40186 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256);
40187 else if (out_n == 16 && in_n == 16)
40188 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS512);
40190 break;
40192 case BUILT_IN_FLOOR:
40193 /* The round insn does not trap on denormals. */
40194 if (flag_trapping_math || !TARGET_ROUND)
40195 break;
40197 if (out_mode == DFmode && in_mode == DFmode)
40199 if (out_n == 2 && in_n == 2)
40200 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
40201 else if (out_n == 4 && in_n == 4)
40202 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
40204 break;
40206 case BUILT_IN_FLOORF:
40207 /* The round insn does not trap on denormals. */
40208 if (flag_trapping_math || !TARGET_ROUND)
40209 break;
40211 if (out_mode == SFmode && in_mode == SFmode)
40213 if (out_n == 4 && in_n == 4)
40214 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
40215 else if (out_n == 8 && in_n == 8)
40216 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
40218 break;
40220 case BUILT_IN_CEIL:
40221 /* The round insn does not trap on denormals. */
40222 if (flag_trapping_math || !TARGET_ROUND)
40223 break;
40225 if (out_mode == DFmode && in_mode == DFmode)
40227 if (out_n == 2 && in_n == 2)
40228 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
40229 else if (out_n == 4 && in_n == 4)
40230 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
40232 break;
40234 case BUILT_IN_CEILF:
40235 /* The round insn does not trap on denormals. */
40236 if (flag_trapping_math || !TARGET_ROUND)
40237 break;
40239 if (out_mode == SFmode && in_mode == SFmode)
40241 if (out_n == 4 && in_n == 4)
40242 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
40243 else if (out_n == 8 && in_n == 8)
40244 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
40246 break;
40248 case BUILT_IN_TRUNC:
40249 /* The round insn does not trap on denormals. */
40250 if (flag_trapping_math || !TARGET_ROUND)
40251 break;
40253 if (out_mode == DFmode && in_mode == DFmode)
40255 if (out_n == 2 && in_n == 2)
40256 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
40257 else if (out_n == 4 && in_n == 4)
40258 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
40260 break;
40262 case BUILT_IN_TRUNCF:
40263 /* The round insn does not trap on denormals. */
40264 if (flag_trapping_math || !TARGET_ROUND)
40265 break;
40267 if (out_mode == SFmode && in_mode == SFmode)
40269 if (out_n == 4 && in_n == 4)
40270 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
40271 else if (out_n == 8 && in_n == 8)
40272 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
40274 break;
40276 case BUILT_IN_RINT:
40277 /* The round insn does not trap on denormals. */
40278 if (flag_trapping_math || !TARGET_ROUND)
40279 break;
40281 if (out_mode == DFmode && in_mode == DFmode)
40283 if (out_n == 2 && in_n == 2)
40284 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
40285 else if (out_n == 4 && in_n == 4)
40286 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
40288 break;
40290 case BUILT_IN_RINTF:
40291 /* The round insn does not trap on denormals. */
40292 if (flag_trapping_math || !TARGET_ROUND)
40293 break;
40295 if (out_mode == SFmode && in_mode == SFmode)
40297 if (out_n == 4 && in_n == 4)
40298 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
40299 else if (out_n == 8 && in_n == 8)
40300 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
40302 break;
40304 case BUILT_IN_ROUND:
40305 /* The round insn does not trap on denormals. */
40306 if (flag_trapping_math || !TARGET_ROUND)
40307 break;
40309 if (out_mode == DFmode && in_mode == DFmode)
40311 if (out_n == 2 && in_n == 2)
40312 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
40313 else if (out_n == 4 && in_n == 4)
40314 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
40316 break;
40318 case BUILT_IN_ROUNDF:
40319 /* The round insn does not trap on denormals. */
40320 if (flag_trapping_math || !TARGET_ROUND)
40321 break;
40323 if (out_mode == SFmode && in_mode == SFmode)
40325 if (out_n == 4 && in_n == 4)
40326 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
40327 else if (out_n == 8 && in_n == 8)
40328 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
40330 break;
40332 case BUILT_IN_FMA:
40333 if (out_mode == DFmode && in_mode == DFmode)
40335 if (out_n == 2 && in_n == 2)
40336 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
40337 if (out_n == 4 && in_n == 4)
40338 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
40340 break;
40342 case BUILT_IN_FMAF:
40343 if (out_mode == SFmode && in_mode == SFmode)
40345 if (out_n == 4 && in_n == 4)
40346 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
40347 if (out_n == 8 && in_n == 8)
40348 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
40350 break;
40352 default:
40353 break;
40356 /* Dispatch to a handler for a vectorization library. */
40357 if (ix86_veclib_handler)
40358 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
40359 type_in);
40361 return NULL_TREE;
40364 /* Handler for an SVML-style interface to
40365 a library with vectorized intrinsics. */
40367 static tree
40368 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
40370 char name[20];
40371 tree fntype, new_fndecl, args;
40372 unsigned arity;
40373 const char *bname;
40374 machine_mode el_mode, in_mode;
40375 int n, in_n;
40377 /* The SVML is suitable for unsafe math only. */
40378 if (!flag_unsafe_math_optimizations)
40379 return NULL_TREE;
40381 el_mode = TYPE_MODE (TREE_TYPE (type_out));
40382 n = TYPE_VECTOR_SUBPARTS (type_out);
40383 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40384 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40385 if (el_mode != in_mode
40386 || n != in_n)
40387 return NULL_TREE;
40389 switch (fn)
40391 case BUILT_IN_EXP:
40392 case BUILT_IN_LOG:
40393 case BUILT_IN_LOG10:
40394 case BUILT_IN_POW:
40395 case BUILT_IN_TANH:
40396 case BUILT_IN_TAN:
40397 case BUILT_IN_ATAN:
40398 case BUILT_IN_ATAN2:
40399 case BUILT_IN_ATANH:
40400 case BUILT_IN_CBRT:
40401 case BUILT_IN_SINH:
40402 case BUILT_IN_SIN:
40403 case BUILT_IN_ASINH:
40404 case BUILT_IN_ASIN:
40405 case BUILT_IN_COSH:
40406 case BUILT_IN_COS:
40407 case BUILT_IN_ACOSH:
40408 case BUILT_IN_ACOS:
40409 if (el_mode != DFmode || n != 2)
40410 return NULL_TREE;
40411 break;
40413 case BUILT_IN_EXPF:
40414 case BUILT_IN_LOGF:
40415 case BUILT_IN_LOG10F:
40416 case BUILT_IN_POWF:
40417 case BUILT_IN_TANHF:
40418 case BUILT_IN_TANF:
40419 case BUILT_IN_ATANF:
40420 case BUILT_IN_ATAN2F:
40421 case BUILT_IN_ATANHF:
40422 case BUILT_IN_CBRTF:
40423 case BUILT_IN_SINHF:
40424 case BUILT_IN_SINF:
40425 case BUILT_IN_ASINHF:
40426 case BUILT_IN_ASINF:
40427 case BUILT_IN_COSHF:
40428 case BUILT_IN_COSF:
40429 case BUILT_IN_ACOSHF:
40430 case BUILT_IN_ACOSF:
40431 if (el_mode != SFmode || n != 4)
40432 return NULL_TREE;
40433 break;
40435 default:
40436 return NULL_TREE;
40439 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
40441 if (fn == BUILT_IN_LOGF)
40442 strcpy (name, "vmlsLn4");
40443 else if (fn == BUILT_IN_LOG)
40444 strcpy (name, "vmldLn2");
40445 else if (n == 4)
40447 sprintf (name, "vmls%s", bname+10);
40448 name[strlen (name)-1] = '4';
40450 else
40451 sprintf (name, "vmld%s2", bname+10);
40453 /* Convert to uppercase. */
40454 name[4] &= ~0x20;
40456 arity = 0;
40457 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
40458 args;
40459 args = TREE_CHAIN (args))
40460 arity++;
40462 if (arity == 1)
40463 fntype = build_function_type_list (type_out, type_in, NULL);
40464 else
40465 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
40467 /* Build a function declaration for the vectorized function. */
40468 new_fndecl = build_decl (BUILTINS_LOCATION,
40469 FUNCTION_DECL, get_identifier (name), fntype);
40470 TREE_PUBLIC (new_fndecl) = 1;
40471 DECL_EXTERNAL (new_fndecl) = 1;
40472 DECL_IS_NOVOPS (new_fndecl) = 1;
40473 TREE_READONLY (new_fndecl) = 1;
40475 return new_fndecl;
40478 /* Handler for an ACML-style interface to
40479 a library with vectorized intrinsics. */
40481 static tree
40482 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
40484 char name[20] = "__vr.._";
40485 tree fntype, new_fndecl, args;
40486 unsigned arity;
40487 const char *bname;
40488 machine_mode el_mode, in_mode;
40489 int n, in_n;
40491 /* The ACML is 64bits only and suitable for unsafe math only as
40492 it does not correctly support parts of IEEE with the required
40493 precision such as denormals. */
40494 if (!TARGET_64BIT
40495 || !flag_unsafe_math_optimizations)
40496 return NULL_TREE;
40498 el_mode = TYPE_MODE (TREE_TYPE (type_out));
40499 n = TYPE_VECTOR_SUBPARTS (type_out);
40500 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40501 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40502 if (el_mode != in_mode
40503 || n != in_n)
40504 return NULL_TREE;
40506 switch (fn)
40508 case BUILT_IN_SIN:
40509 case BUILT_IN_COS:
40510 case BUILT_IN_EXP:
40511 case BUILT_IN_LOG:
40512 case BUILT_IN_LOG2:
40513 case BUILT_IN_LOG10:
40514 name[4] = 'd';
40515 name[5] = '2';
40516 if (el_mode != DFmode
40517 || n != 2)
40518 return NULL_TREE;
40519 break;
40521 case BUILT_IN_SINF:
40522 case BUILT_IN_COSF:
40523 case BUILT_IN_EXPF:
40524 case BUILT_IN_POWF:
40525 case BUILT_IN_LOGF:
40526 case BUILT_IN_LOG2F:
40527 case BUILT_IN_LOG10F:
40528 name[4] = 's';
40529 name[5] = '4';
40530 if (el_mode != SFmode
40531 || n != 4)
40532 return NULL_TREE;
40533 break;
40535 default:
40536 return NULL_TREE;
40539 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
40540 sprintf (name + 7, "%s", bname+10);
40542 arity = 0;
40543 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
40544 args;
40545 args = TREE_CHAIN (args))
40546 arity++;
40548 if (arity == 1)
40549 fntype = build_function_type_list (type_out, type_in, NULL);
40550 else
40551 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
40553 /* Build a function declaration for the vectorized function. */
40554 new_fndecl = build_decl (BUILTINS_LOCATION,
40555 FUNCTION_DECL, get_identifier (name), fntype);
40556 TREE_PUBLIC (new_fndecl) = 1;
40557 DECL_EXTERNAL (new_fndecl) = 1;
40558 DECL_IS_NOVOPS (new_fndecl) = 1;
40559 TREE_READONLY (new_fndecl) = 1;
40561 return new_fndecl;
40564 /* Returns a decl of a function that implements gather load with
40565 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
40566 Return NULL_TREE if it is not available. */
40568 static tree
40569 ix86_vectorize_builtin_gather (const_tree mem_vectype,
40570 const_tree index_type, int scale)
40572 bool si;
40573 enum ix86_builtins code;
40575 if (! TARGET_AVX2)
40576 return NULL_TREE;
40578 if ((TREE_CODE (index_type) != INTEGER_TYPE
40579 && !POINTER_TYPE_P (index_type))
40580 || (TYPE_MODE (index_type) != SImode
40581 && TYPE_MODE (index_type) != DImode))
40582 return NULL_TREE;
40584 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
40585 return NULL_TREE;
40587 /* v*gather* insn sign extends index to pointer mode. */
40588 if (TYPE_PRECISION (index_type) < POINTER_SIZE
40589 && TYPE_UNSIGNED (index_type))
40590 return NULL_TREE;
40592 if (scale <= 0
40593 || scale > 8
40594 || (scale & (scale - 1)) != 0)
40595 return NULL_TREE;
40597 si = TYPE_MODE (index_type) == SImode;
40598 switch (TYPE_MODE (mem_vectype))
40600 case V2DFmode:
40601 if (TARGET_AVX512VL)
40602 code = si ? IX86_BUILTIN_GATHER3SIV2DF : IX86_BUILTIN_GATHER3DIV2DF;
40603 else
40604 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
40605 break;
40606 case V4DFmode:
40607 if (TARGET_AVX512VL)
40608 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DF : IX86_BUILTIN_GATHER3DIV4DF;
40609 else
40610 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
40611 break;
40612 case V2DImode:
40613 if (TARGET_AVX512VL)
40614 code = si ? IX86_BUILTIN_GATHER3SIV2DI : IX86_BUILTIN_GATHER3DIV2DI;
40615 else
40616 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
40617 break;
40618 case V4DImode:
40619 if (TARGET_AVX512VL)
40620 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DI : IX86_BUILTIN_GATHER3DIV4DI;
40621 else
40622 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
40623 break;
40624 case V4SFmode:
40625 if (TARGET_AVX512VL)
40626 code = si ? IX86_BUILTIN_GATHER3SIV4SF : IX86_BUILTIN_GATHER3DIV4SF;
40627 else
40628 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
40629 break;
40630 case V8SFmode:
40631 if (TARGET_AVX512VL)
40632 code = si ? IX86_BUILTIN_GATHER3SIV8SF : IX86_BUILTIN_GATHER3ALTDIV8SF;
40633 else
40634 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
40635 break;
40636 case V4SImode:
40637 if (TARGET_AVX512VL)
40638 code = si ? IX86_BUILTIN_GATHER3SIV4SI : IX86_BUILTIN_GATHER3DIV4SI;
40639 else
40640 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
40641 break;
40642 case V8SImode:
40643 if (TARGET_AVX512VL)
40644 code = si ? IX86_BUILTIN_GATHER3SIV8SI : IX86_BUILTIN_GATHER3ALTDIV8SI;
40645 else
40646 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
40647 break;
40648 case V8DFmode:
40649 if (TARGET_AVX512F)
40650 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
40651 else
40652 return NULL_TREE;
40653 break;
40654 case V8DImode:
40655 if (TARGET_AVX512F)
40656 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
40657 else
40658 return NULL_TREE;
40659 break;
40660 case V16SFmode:
40661 if (TARGET_AVX512F)
40662 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
40663 else
40664 return NULL_TREE;
40665 break;
40666 case V16SImode:
40667 if (TARGET_AVX512F)
40668 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
40669 else
40670 return NULL_TREE;
40671 break;
40672 default:
40673 return NULL_TREE;
40676 return ix86_get_builtin (code);
40679 /* Returns a code for a target-specific builtin that implements
40680 reciprocal of the function, or NULL_TREE if not available. */
40682 static tree
40683 ix86_builtin_reciprocal (unsigned int fn, bool md_fn, bool)
40685 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
40686 && flag_finite_math_only && !flag_trapping_math
40687 && flag_unsafe_math_optimizations))
40688 return NULL_TREE;
40690 if (md_fn)
40691 /* Machine dependent builtins. */
40692 switch (fn)
40694 /* Vectorized version of sqrt to rsqrt conversion. */
40695 case IX86_BUILTIN_SQRTPS_NR:
40696 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
40698 case IX86_BUILTIN_SQRTPS_NR256:
40699 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
40701 default:
40702 return NULL_TREE;
40704 else
40705 /* Normal builtins. */
40706 switch (fn)
40708 /* Sqrt to rsqrt conversion. */
40709 case BUILT_IN_SQRTF:
40710 return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
40712 default:
40713 return NULL_TREE;
40717 /* Helper for avx_vpermilps256_operand et al. This is also used by
40718 the expansion functions to turn the parallel back into a mask.
40719 The return value is 0 for no match and the imm8+1 for a match. */
40722 avx_vpermilp_parallel (rtx par, machine_mode mode)
40724 unsigned i, nelt = GET_MODE_NUNITS (mode);
40725 unsigned mask = 0;
40726 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
40728 if (XVECLEN (par, 0) != (int) nelt)
40729 return 0;
40731 /* Validate that all of the elements are constants, and not totally
40732 out of range. Copy the data into an integral array to make the
40733 subsequent checks easier. */
40734 for (i = 0; i < nelt; ++i)
40736 rtx er = XVECEXP (par, 0, i);
40737 unsigned HOST_WIDE_INT ei;
40739 if (!CONST_INT_P (er))
40740 return 0;
40741 ei = INTVAL (er);
40742 if (ei >= nelt)
40743 return 0;
40744 ipar[i] = ei;
40747 switch (mode)
40749 case V8DFmode:
40750 /* In the 512-bit DFmode case, we can only move elements within
40751 a 128-bit lane. First fill the second part of the mask,
40752 then fallthru. */
40753 for (i = 4; i < 6; ++i)
40755 if (ipar[i] < 4 || ipar[i] >= 6)
40756 return 0;
40757 mask |= (ipar[i] - 4) << i;
40759 for (i = 6; i < 8; ++i)
40761 if (ipar[i] < 6)
40762 return 0;
40763 mask |= (ipar[i] - 6) << i;
40765 /* FALLTHRU */
40767 case V4DFmode:
40768 /* In the 256-bit DFmode case, we can only move elements within
40769 a 128-bit lane. */
40770 for (i = 0; i < 2; ++i)
40772 if (ipar[i] >= 2)
40773 return 0;
40774 mask |= ipar[i] << i;
40776 for (i = 2; i < 4; ++i)
40778 if (ipar[i] < 2)
40779 return 0;
40780 mask |= (ipar[i] - 2) << i;
40782 break;
40784 case V16SFmode:
40785 /* In 512 bit SFmode case, permutation in the upper 256 bits
40786 must mirror the permutation in the lower 256-bits. */
40787 for (i = 0; i < 8; ++i)
40788 if (ipar[i] + 8 != ipar[i + 8])
40789 return 0;
40790 /* FALLTHRU */
40792 case V8SFmode:
40793 /* In 256 bit SFmode case, we have full freedom of
40794 movement within the low 128-bit lane, but the high 128-bit
40795 lane must mirror the exact same pattern. */
40796 for (i = 0; i < 4; ++i)
40797 if (ipar[i] + 4 != ipar[i + 4])
40798 return 0;
40799 nelt = 4;
40800 /* FALLTHRU */
40802 case V2DFmode:
40803 case V4SFmode:
40804 /* In the 128-bit case, we've full freedom in the placement of
40805 the elements from the source operand. */
40806 for (i = 0; i < nelt; ++i)
40807 mask |= ipar[i] << (i * (nelt / 2));
40808 break;
40810 default:
40811 gcc_unreachable ();
40814 /* Make sure success has a non-zero value by adding one. */
40815 return mask + 1;
40818 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
40819 the expansion functions to turn the parallel back into a mask.
40820 The return value is 0 for no match and the imm8+1 for a match. */
40823 avx_vperm2f128_parallel (rtx par, machine_mode mode)
40825 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
40826 unsigned mask = 0;
40827 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
40829 if (XVECLEN (par, 0) != (int) nelt)
40830 return 0;
40832 /* Validate that all of the elements are constants, and not totally
40833 out of range. Copy the data into an integral array to make the
40834 subsequent checks easier. */
40835 for (i = 0; i < nelt; ++i)
40837 rtx er = XVECEXP (par, 0, i);
40838 unsigned HOST_WIDE_INT ei;
40840 if (!CONST_INT_P (er))
40841 return 0;
40842 ei = INTVAL (er);
40843 if (ei >= 2 * nelt)
40844 return 0;
40845 ipar[i] = ei;
40848 /* Validate that the halves of the permute are halves. */
40849 for (i = 0; i < nelt2 - 1; ++i)
40850 if (ipar[i] + 1 != ipar[i + 1])
40851 return 0;
40852 for (i = nelt2; i < nelt - 1; ++i)
40853 if (ipar[i] + 1 != ipar[i + 1])
40854 return 0;
40856 /* Reconstruct the mask. */
40857 for (i = 0; i < 2; ++i)
40859 unsigned e = ipar[i * nelt2];
40860 if (e % nelt2)
40861 return 0;
40862 e /= nelt2;
40863 mask |= e << (i * 4);
40866 /* Make sure success has a non-zero value by adding one. */
40867 return mask + 1;
40870 /* Return a register priority for hard reg REGNO. */
40871 static int
40872 ix86_register_priority (int hard_regno)
40874 /* ebp and r13 as the base always wants a displacement, r12 as the
40875 base always wants an index. So discourage their usage in an
40876 address. */
40877 if (hard_regno == R12_REG || hard_regno == R13_REG)
40878 return 0;
40879 if (hard_regno == BP_REG)
40880 return 1;
40881 /* New x86-64 int registers result in bigger code size. Discourage
40882 them. */
40883 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
40884 return 2;
40885 /* New x86-64 SSE registers result in bigger code size. Discourage
40886 them. */
40887 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
40888 return 2;
40889 /* Usage of AX register results in smaller code. Prefer it. */
40890 if (hard_regno == 0)
40891 return 4;
40892 return 3;
40895 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
40897 Put float CONST_DOUBLE in the constant pool instead of fp regs.
40898 QImode must go into class Q_REGS.
40899 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
40900 movdf to do mem-to-mem moves through integer regs. */
40902 static reg_class_t
40903 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
40905 machine_mode mode = GET_MODE (x);
40907 /* We're only allowed to return a subclass of CLASS. Many of the
40908 following checks fail for NO_REGS, so eliminate that early. */
40909 if (regclass == NO_REGS)
40910 return NO_REGS;
40912 /* All classes can load zeros. */
40913 if (x == CONST0_RTX (mode))
40914 return regclass;
40916 /* Force constants into memory if we are loading a (nonzero) constant into
40917 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
40918 instructions to load from a constant. */
40919 if (CONSTANT_P (x)
40920 && (MAYBE_MMX_CLASS_P (regclass)
40921 || MAYBE_SSE_CLASS_P (regclass)
40922 || MAYBE_MASK_CLASS_P (regclass)))
40923 return NO_REGS;
40925 /* Prefer SSE regs only, if we can use them for math. */
40926 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
40927 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
40929 /* Floating-point constants need more complex checks. */
40930 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
40932 /* General regs can load everything. */
40933 if (reg_class_subset_p (regclass, GENERAL_REGS))
40934 return regclass;
40936 /* Floats can load 0 and 1 plus some others. Note that we eliminated
40937 zero above. We only want to wind up preferring 80387 registers if
40938 we plan on doing computation with them. */
40939 if (TARGET_80387
40940 && standard_80387_constant_p (x) > 0)
40942 /* Limit class to non-sse. */
40943 if (regclass == FLOAT_SSE_REGS)
40944 return FLOAT_REGS;
40945 if (regclass == FP_TOP_SSE_REGS)
40946 return FP_TOP_REG;
40947 if (regclass == FP_SECOND_SSE_REGS)
40948 return FP_SECOND_REG;
40949 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
40950 return regclass;
40953 return NO_REGS;
40956 /* Generally when we see PLUS here, it's the function invariant
40957 (plus soft-fp const_int). Which can only be computed into general
40958 regs. */
40959 if (GET_CODE (x) == PLUS)
40960 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
40962 /* QImode constants are easy to load, but non-constant QImode data
40963 must go into Q_REGS. */
40964 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
40966 if (reg_class_subset_p (regclass, Q_REGS))
40967 return regclass;
40968 if (reg_class_subset_p (Q_REGS, regclass))
40969 return Q_REGS;
40970 return NO_REGS;
40973 return regclass;
40976 /* Discourage putting floating-point values in SSE registers unless
40977 SSE math is being used, and likewise for the 387 registers. */
40978 static reg_class_t
40979 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
40981 machine_mode mode = GET_MODE (x);
40983 /* Restrict the output reload class to the register bank that we are doing
40984 math on. If we would like not to return a subset of CLASS, reject this
40985 alternative: if reload cannot do this, it will still use its choice. */
40986 mode = GET_MODE (x);
40987 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
40988 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
40990 if (X87_FLOAT_MODE_P (mode))
40992 if (regclass == FP_TOP_SSE_REGS)
40993 return FP_TOP_REG;
40994 else if (regclass == FP_SECOND_SSE_REGS)
40995 return FP_SECOND_REG;
40996 else
40997 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
41000 return regclass;
41003 static reg_class_t
41004 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
41005 machine_mode mode, secondary_reload_info *sri)
41007 /* Double-word spills from general registers to non-offsettable memory
41008 references (zero-extended addresses) require special handling. */
41009 if (TARGET_64BIT
41010 && MEM_P (x)
41011 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
41012 && INTEGER_CLASS_P (rclass)
41013 && !offsettable_memref_p (x))
41015 sri->icode = (in_p
41016 ? CODE_FOR_reload_noff_load
41017 : CODE_FOR_reload_noff_store);
41018 /* Add the cost of moving address to a temporary. */
41019 sri->extra_cost = 1;
41021 return NO_REGS;
41024 /* QImode spills from non-QI registers require
41025 intermediate register on 32bit targets. */
41026 if (mode == QImode
41027 && (MAYBE_MASK_CLASS_P (rclass)
41028 || (!TARGET_64BIT && !in_p
41029 && INTEGER_CLASS_P (rclass)
41030 && MAYBE_NON_Q_CLASS_P (rclass))))
41032 int regno;
41034 if (REG_P (x))
41035 regno = REGNO (x);
41036 else
41037 regno = -1;
41039 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
41040 regno = true_regnum (x);
41042 /* Return Q_REGS if the operand is in memory. */
41043 if (regno == -1)
41044 return Q_REGS;
41047 /* This condition handles corner case where an expression involving
41048 pointers gets vectorized. We're trying to use the address of a
41049 stack slot as a vector initializer.
41051 (set (reg:V2DI 74 [ vect_cst_.2 ])
41052 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
41054 Eventually frame gets turned into sp+offset like this:
41056 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41057 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
41058 (const_int 392 [0x188]))))
41060 That later gets turned into:
41062 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41063 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
41064 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
41066 We'll have the following reload recorded:
41068 Reload 0: reload_in (DI) =
41069 (plus:DI (reg/f:DI 7 sp)
41070 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
41071 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41072 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
41073 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
41074 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41075 reload_reg_rtx: (reg:V2DI 22 xmm1)
41077 Which isn't going to work since SSE instructions can't handle scalar
41078 additions. Returning GENERAL_REGS forces the addition into integer
41079 register and reload can handle subsequent reloads without problems. */
41081 if (in_p && GET_CODE (x) == PLUS
41082 && SSE_CLASS_P (rclass)
41083 && SCALAR_INT_MODE_P (mode))
41084 return GENERAL_REGS;
41086 return NO_REGS;
41089 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
41091 static bool
41092 ix86_class_likely_spilled_p (reg_class_t rclass)
41094 switch (rclass)
41096 case AREG:
41097 case DREG:
41098 case CREG:
41099 case BREG:
41100 case AD_REGS:
41101 case SIREG:
41102 case DIREG:
41103 case SSE_FIRST_REG:
41104 case FP_TOP_REG:
41105 case FP_SECOND_REG:
41106 case BND_REGS:
41107 return true;
41109 default:
41110 break;
41113 return false;
41116 /* If we are copying between general and FP registers, we need a memory
41117 location. The same is true for SSE and MMX registers.
41119 To optimize register_move_cost performance, allow inline variant.
41121 The macro can't work reliably when one of the CLASSES is class containing
41122 registers from multiple units (SSE, MMX, integer). We avoid this by never
41123 combining those units in single alternative in the machine description.
41124 Ensure that this constraint holds to avoid unexpected surprises.
41126 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
41127 enforce these sanity checks. */
41129 static inline bool
41130 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
41131 machine_mode mode, int strict)
41133 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
41134 return false;
41135 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
41136 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
41137 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
41138 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
41139 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
41140 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
41142 gcc_assert (!strict || lra_in_progress);
41143 return true;
41146 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
41147 return true;
41149 /* Between mask and general, we have moves no larger than word size. */
41150 if ((MAYBE_MASK_CLASS_P (class1) != MAYBE_MASK_CLASS_P (class2))
41151 && (GET_MODE_SIZE (mode) > UNITS_PER_WORD))
41152 return true;
41154 /* ??? This is a lie. We do have moves between mmx/general, and for
41155 mmx/sse2. But by saying we need secondary memory we discourage the
41156 register allocator from using the mmx registers unless needed. */
41157 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
41158 return true;
41160 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
41162 /* SSE1 doesn't have any direct moves from other classes. */
41163 if (!TARGET_SSE2)
41164 return true;
41166 /* If the target says that inter-unit moves are more expensive
41167 than moving through memory, then don't generate them. */
41168 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
41169 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
41170 return true;
41172 /* Between SSE and general, we have moves no larger than word size. */
41173 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
41174 return true;
41177 return false;
41180 bool
41181 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
41182 machine_mode mode, int strict)
41184 return inline_secondary_memory_needed (class1, class2, mode, strict);
41187 /* Implement the TARGET_CLASS_MAX_NREGS hook.
41189 On the 80386, this is the size of MODE in words,
41190 except in the FP regs, where a single reg is always enough. */
41192 static unsigned char
41193 ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
41195 if (MAYBE_INTEGER_CLASS_P (rclass))
41197 if (mode == XFmode)
41198 return (TARGET_64BIT ? 2 : 3);
41199 else if (mode == XCmode)
41200 return (TARGET_64BIT ? 4 : 6);
41201 else
41202 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
41204 else
41206 if (COMPLEX_MODE_P (mode))
41207 return 2;
41208 else
41209 return 1;
41213 /* Return true if the registers in CLASS cannot represent the change from
41214 modes FROM to TO. */
41216 bool
41217 ix86_cannot_change_mode_class (machine_mode from, machine_mode to,
41218 enum reg_class regclass)
41220 if (from == to)
41221 return false;
41223 /* x87 registers can't do subreg at all, as all values are reformatted
41224 to extended precision. */
41225 if (MAYBE_FLOAT_CLASS_P (regclass))
41226 return true;
41228 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
41230 /* Vector registers do not support QI or HImode loads. If we don't
41231 disallow a change to these modes, reload will assume it's ok to
41232 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
41233 the vec_dupv4hi pattern. */
41234 if (GET_MODE_SIZE (from) < 4)
41235 return true;
41238 return false;
41241 /* Return the cost of moving data of mode M between a
41242 register and memory. A value of 2 is the default; this cost is
41243 relative to those in `REGISTER_MOVE_COST'.
41245 This function is used extensively by register_move_cost that is used to
41246 build tables at startup. Make it inline in this case.
41247 When IN is 2, return maximum of in and out move cost.
41249 If moving between registers and memory is more expensive than
41250 between two registers, you should define this macro to express the
41251 relative cost.
41253 Model also increased moving costs of QImode registers in non
41254 Q_REGS classes.
41256 static inline int
41257 inline_memory_move_cost (machine_mode mode, enum reg_class regclass,
41258 int in)
41260 int cost;
41261 if (FLOAT_CLASS_P (regclass))
41263 int index;
41264 switch (mode)
41266 case SFmode:
41267 index = 0;
41268 break;
41269 case DFmode:
41270 index = 1;
41271 break;
41272 case XFmode:
41273 index = 2;
41274 break;
41275 default:
41276 return 100;
41278 if (in == 2)
41279 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
41280 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
41282 if (SSE_CLASS_P (regclass))
41284 int index;
41285 switch (GET_MODE_SIZE (mode))
41287 case 4:
41288 index = 0;
41289 break;
41290 case 8:
41291 index = 1;
41292 break;
41293 case 16:
41294 index = 2;
41295 break;
41296 default:
41297 return 100;
41299 if (in == 2)
41300 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
41301 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
41303 if (MMX_CLASS_P (regclass))
41305 int index;
41306 switch (GET_MODE_SIZE (mode))
41308 case 4:
41309 index = 0;
41310 break;
41311 case 8:
41312 index = 1;
41313 break;
41314 default:
41315 return 100;
41317 if (in)
41318 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
41319 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
41321 switch (GET_MODE_SIZE (mode))
41323 case 1:
41324 if (Q_CLASS_P (regclass) || TARGET_64BIT)
41326 if (!in)
41327 return ix86_cost->int_store[0];
41328 if (TARGET_PARTIAL_REG_DEPENDENCY
41329 && optimize_function_for_speed_p (cfun))
41330 cost = ix86_cost->movzbl_load;
41331 else
41332 cost = ix86_cost->int_load[0];
41333 if (in == 2)
41334 return MAX (cost, ix86_cost->int_store[0]);
41335 return cost;
41337 else
41339 if (in == 2)
41340 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
41341 if (in)
41342 return ix86_cost->movzbl_load;
41343 else
41344 return ix86_cost->int_store[0] + 4;
41346 break;
41347 case 2:
41348 if (in == 2)
41349 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
41350 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
41351 default:
41352 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
41353 if (mode == TFmode)
41354 mode = XFmode;
41355 if (in == 2)
41356 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
41357 else if (in)
41358 cost = ix86_cost->int_load[2];
41359 else
41360 cost = ix86_cost->int_store[2];
41361 return (cost * (((int) GET_MODE_SIZE (mode)
41362 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
41366 static int
41367 ix86_memory_move_cost (machine_mode mode, reg_class_t regclass,
41368 bool in)
41370 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
41374 /* Return the cost of moving data from a register in class CLASS1 to
41375 one in class CLASS2.
41377 It is not required that the cost always equal 2 when FROM is the same as TO;
41378 on some machines it is expensive to move between registers if they are not
41379 general registers. */
41381 static int
41382 ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
41383 reg_class_t class2_i)
41385 enum reg_class class1 = (enum reg_class) class1_i;
41386 enum reg_class class2 = (enum reg_class) class2_i;
41388 /* In case we require secondary memory, compute cost of the store followed
41389 by load. In order to avoid bad register allocation choices, we need
41390 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
41392 if (inline_secondary_memory_needed (class1, class2, mode, 0))
41394 int cost = 1;
41396 cost += inline_memory_move_cost (mode, class1, 2);
41397 cost += inline_memory_move_cost (mode, class2, 2);
41399 /* In case of copying from general_purpose_register we may emit multiple
41400 stores followed by single load causing memory size mismatch stall.
41401 Count this as arbitrarily high cost of 20. */
41402 if (targetm.class_max_nregs (class1, mode)
41403 > targetm.class_max_nregs (class2, mode))
41404 cost += 20;
41406 /* In the case of FP/MMX moves, the registers actually overlap, and we
41407 have to switch modes in order to treat them differently. */
41408 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
41409 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
41410 cost += 20;
41412 return cost;
41415 /* Moves between SSE/MMX and integer unit are expensive. */
41416 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
41417 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
41419 /* ??? By keeping returned value relatively high, we limit the number
41420 of moves between integer and MMX/SSE registers for all targets.
41421 Additionally, high value prevents problem with x86_modes_tieable_p(),
41422 where integer modes in MMX/SSE registers are not tieable
41423 because of missing QImode and HImode moves to, from or between
41424 MMX/SSE registers. */
41425 return MAX (8, ix86_cost->mmxsse_to_integer);
41427 if (MAYBE_FLOAT_CLASS_P (class1))
41428 return ix86_cost->fp_move;
41429 if (MAYBE_SSE_CLASS_P (class1))
41430 return ix86_cost->sse_move;
41431 if (MAYBE_MMX_CLASS_P (class1))
41432 return ix86_cost->mmx_move;
41433 return 2;
41436 /* Return TRUE if hard register REGNO can hold a value of machine-mode
41437 MODE. */
41439 bool
41440 ix86_hard_regno_mode_ok (int regno, machine_mode mode)
41442 /* Flags and only flags can only hold CCmode values. */
41443 if (CC_REGNO_P (regno))
41444 return GET_MODE_CLASS (mode) == MODE_CC;
41445 if (GET_MODE_CLASS (mode) == MODE_CC
41446 || GET_MODE_CLASS (mode) == MODE_RANDOM
41447 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
41448 return false;
41449 if (STACK_REGNO_P (regno))
41450 return VALID_FP_MODE_P (mode);
41451 if (MASK_REGNO_P (regno))
41452 return (VALID_MASK_REG_MODE (mode)
41453 || (TARGET_AVX512BW && VALID_MASK_AVX512BW_MODE (mode)));
41454 if (BND_REGNO_P (regno))
41455 return VALID_BND_REG_MODE (mode);
41456 if (SSE_REGNO_P (regno))
41458 /* We implement the move patterns for all vector modes into and
41459 out of SSE registers, even when no operation instructions
41460 are available. */
41462 /* For AVX-512 we allow, regardless of regno:
41463 - XI mode
41464 - any of 512-bit wide vector mode
41465 - any scalar mode. */
41466 if (TARGET_AVX512F
41467 && (mode == XImode
41468 || VALID_AVX512F_REG_MODE (mode)
41469 || VALID_AVX512F_SCALAR_MODE (mode)))
41470 return true;
41472 /* TODO check for QI/HI scalars. */
41473 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
41474 if (TARGET_AVX512VL
41475 && (mode == OImode
41476 || mode == TImode
41477 || VALID_AVX256_REG_MODE (mode)
41478 || VALID_AVX512VL_128_REG_MODE (mode)))
41479 return true;
41481 /* xmm16-xmm31 are only available for AVX-512. */
41482 if (EXT_REX_SSE_REGNO_P (regno))
41483 return false;
41485 /* OImode and AVX modes are available only when AVX is enabled. */
41486 return ((TARGET_AVX
41487 && VALID_AVX256_REG_OR_OI_MODE (mode))
41488 || VALID_SSE_REG_MODE (mode)
41489 || VALID_SSE2_REG_MODE (mode)
41490 || VALID_MMX_REG_MODE (mode)
41491 || VALID_MMX_REG_MODE_3DNOW (mode));
41493 if (MMX_REGNO_P (regno))
41495 /* We implement the move patterns for 3DNOW modes even in MMX mode,
41496 so if the register is available at all, then we can move data of
41497 the given mode into or out of it. */
41498 return (VALID_MMX_REG_MODE (mode)
41499 || VALID_MMX_REG_MODE_3DNOW (mode));
41502 if (mode == QImode)
41504 /* Take care for QImode values - they can be in non-QI regs,
41505 but then they do cause partial register stalls. */
41506 if (ANY_QI_REGNO_P (regno))
41507 return true;
41508 if (!TARGET_PARTIAL_REG_STALL)
41509 return true;
41510 /* LRA checks if the hard register is OK for the given mode.
41511 QImode values can live in non-QI regs, so we allow all
41512 registers here. */
41513 if (lra_in_progress)
41514 return true;
41515 return !can_create_pseudo_p ();
41517 /* We handle both integer and floats in the general purpose registers. */
41518 else if (VALID_INT_MODE_P (mode))
41519 return true;
41520 else if (VALID_FP_MODE_P (mode))
41521 return true;
41522 else if (VALID_DFP_MODE_P (mode))
41523 return true;
41524 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
41525 on to use that value in smaller contexts, this can easily force a
41526 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
41527 supporting DImode, allow it. */
41528 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
41529 return true;
41531 return false;
41534 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
41535 tieable integer mode. */
41537 static bool
41538 ix86_tieable_integer_mode_p (machine_mode mode)
41540 switch (mode)
41542 case HImode:
41543 case SImode:
41544 return true;
41546 case QImode:
41547 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
41549 case DImode:
41550 return TARGET_64BIT;
41552 default:
41553 return false;
41557 /* Return true if MODE1 is accessible in a register that can hold MODE2
41558 without copying. That is, all register classes that can hold MODE2
41559 can also hold MODE1. */
41561 bool
41562 ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
41564 if (mode1 == mode2)
41565 return true;
41567 if (ix86_tieable_integer_mode_p (mode1)
41568 && ix86_tieable_integer_mode_p (mode2))
41569 return true;
41571 /* MODE2 being XFmode implies fp stack or general regs, which means we
41572 can tie any smaller floating point modes to it. Note that we do not
41573 tie this with TFmode. */
41574 if (mode2 == XFmode)
41575 return mode1 == SFmode || mode1 == DFmode;
41577 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
41578 that we can tie it with SFmode. */
41579 if (mode2 == DFmode)
41580 return mode1 == SFmode;
41582 /* If MODE2 is only appropriate for an SSE register, then tie with
41583 any other mode acceptable to SSE registers. */
41584 if (GET_MODE_SIZE (mode2) == 32
41585 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
41586 return (GET_MODE_SIZE (mode1) == 32
41587 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
41588 if (GET_MODE_SIZE (mode2) == 16
41589 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
41590 return (GET_MODE_SIZE (mode1) == 16
41591 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
41593 /* If MODE2 is appropriate for an MMX register, then tie
41594 with any other mode acceptable to MMX registers. */
41595 if (GET_MODE_SIZE (mode2) == 8
41596 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
41597 return (GET_MODE_SIZE (mode1) == 8
41598 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
41600 return false;
41603 /* Return the cost of moving between two registers of mode MODE. */
41605 static int
41606 ix86_set_reg_reg_cost (machine_mode mode)
41608 unsigned int units = UNITS_PER_WORD;
41610 switch (GET_MODE_CLASS (mode))
41612 default:
41613 break;
41615 case MODE_CC:
41616 units = GET_MODE_SIZE (CCmode);
41617 break;
41619 case MODE_FLOAT:
41620 if ((TARGET_SSE && mode == TFmode)
41621 || (TARGET_80387 && mode == XFmode)
41622 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
41623 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
41624 units = GET_MODE_SIZE (mode);
41625 break;
41627 case MODE_COMPLEX_FLOAT:
41628 if ((TARGET_SSE && mode == TCmode)
41629 || (TARGET_80387 && mode == XCmode)
41630 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
41631 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
41632 units = GET_MODE_SIZE (mode);
41633 break;
41635 case MODE_VECTOR_INT:
41636 case MODE_VECTOR_FLOAT:
41637 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
41638 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
41639 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
41640 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
41641 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
41642 units = GET_MODE_SIZE (mode);
41645 /* Return the cost of moving between two registers of mode MODE,
41646 assuming that the move will be in pieces of at most UNITS bytes. */
41647 return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
41650 /* Compute a (partial) cost for rtx X. Return true if the complete
41651 cost has been computed, and false if subexpressions should be
41652 scanned. In either case, *TOTAL contains the cost result. */
41654 static bool
41655 ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
41656 bool speed)
41658 rtx mask;
41659 enum rtx_code code = (enum rtx_code) code_i;
41660 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
41661 machine_mode mode = GET_MODE (x);
41662 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
41664 switch (code)
41666 case SET:
41667 if (register_operand (SET_DEST (x), VOIDmode)
41668 && reg_or_0_operand (SET_SRC (x), VOIDmode))
41670 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
41671 return true;
41673 return false;
41675 case CONST_INT:
41676 case CONST:
41677 case LABEL_REF:
41678 case SYMBOL_REF:
41679 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
41680 *total = 3;
41681 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
41682 *total = 2;
41683 else if (flag_pic && SYMBOLIC_CONST (x)
41684 && !(TARGET_64BIT
41685 && (GET_CODE (x) == LABEL_REF
41686 || (GET_CODE (x) == SYMBOL_REF
41687 && SYMBOL_REF_LOCAL_P (x)))))
41688 *total = 1;
41689 else
41690 *total = 0;
41691 return true;
41693 case CONST_DOUBLE:
41694 if (mode == VOIDmode)
41696 *total = 0;
41697 return true;
41699 switch (standard_80387_constant_p (x))
41701 case 1: /* 0.0 */
41702 *total = 1;
41703 return true;
41704 default: /* Other constants */
41705 *total = 2;
41706 return true;
41707 case 0:
41708 case -1:
41709 break;
41711 if (SSE_FLOAT_MODE_P (mode))
41713 case CONST_VECTOR:
41714 switch (standard_sse_constant_p (x))
41716 case 0:
41717 break;
41718 case 1: /* 0: xor eliminates false dependency */
41719 *total = 0;
41720 return true;
41721 default: /* -1: cmp contains false dependency */
41722 *total = 1;
41723 return true;
41726 /* Fall back to (MEM (SYMBOL_REF)), since that's where
41727 it'll probably end up. Add a penalty for size. */
41728 *total = (COSTS_N_INSNS (1)
41729 + (flag_pic != 0 && !TARGET_64BIT)
41730 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
41731 return true;
41733 case ZERO_EXTEND:
41734 /* The zero extensions is often completely free on x86_64, so make
41735 it as cheap as possible. */
41736 if (TARGET_64BIT && mode == DImode
41737 && GET_MODE (XEXP (x, 0)) == SImode)
41738 *total = 1;
41739 else if (TARGET_ZERO_EXTEND_WITH_AND)
41740 *total = cost->add;
41741 else
41742 *total = cost->movzx;
41743 return false;
41745 case SIGN_EXTEND:
41746 *total = cost->movsx;
41747 return false;
41749 case ASHIFT:
41750 if (SCALAR_INT_MODE_P (mode)
41751 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
41752 && CONST_INT_P (XEXP (x, 1)))
41754 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
41755 if (value == 1)
41757 *total = cost->add;
41758 return false;
41760 if ((value == 2 || value == 3)
41761 && cost->lea <= cost->shift_const)
41763 *total = cost->lea;
41764 return false;
41767 /* FALLTHRU */
41769 case ROTATE:
41770 case ASHIFTRT:
41771 case LSHIFTRT:
41772 case ROTATERT:
41773 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
41775 /* ??? Should be SSE vector operation cost. */
41776 /* At least for published AMD latencies, this really is the same
41777 as the latency for a simple fpu operation like fabs. */
41778 /* V*QImode is emulated with 1-11 insns. */
41779 if (mode == V16QImode || mode == V32QImode)
41781 int count = 11;
41782 if (TARGET_XOP && mode == V16QImode)
41784 /* For XOP we use vpshab, which requires a broadcast of the
41785 value to the variable shift insn. For constants this
41786 means a V16Q const in mem; even when we can perform the
41787 shift with one insn set the cost to prefer paddb. */
41788 if (CONSTANT_P (XEXP (x, 1)))
41790 *total = (cost->fabs
41791 + rtx_cost (XEXP (x, 0), code, 0, speed)
41792 + (speed ? 2 : COSTS_N_BYTES (16)));
41793 return true;
41795 count = 3;
41797 else if (TARGET_SSSE3)
41798 count = 7;
41799 *total = cost->fabs * count;
41801 else
41802 *total = cost->fabs;
41804 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
41806 if (CONST_INT_P (XEXP (x, 1)))
41808 if (INTVAL (XEXP (x, 1)) > 32)
41809 *total = cost->shift_const + COSTS_N_INSNS (2);
41810 else
41811 *total = cost->shift_const * 2;
41813 else
41815 if (GET_CODE (XEXP (x, 1)) == AND)
41816 *total = cost->shift_var * 2;
41817 else
41818 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
41821 else
41823 if (CONST_INT_P (XEXP (x, 1)))
41824 *total = cost->shift_const;
41825 else if (GET_CODE (XEXP (x, 1)) == SUBREG
41826 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
41828 /* Return the cost after shift-and truncation. */
41829 *total = cost->shift_var;
41830 return true;
41832 else
41833 *total = cost->shift_var;
41835 return false;
41837 case FMA:
41839 rtx sub;
41841 gcc_assert (FLOAT_MODE_P (mode));
41842 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
41844 /* ??? SSE scalar/vector cost should be used here. */
41845 /* ??? Bald assumption that fma has the same cost as fmul. */
41846 *total = cost->fmul;
41847 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
41849 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
41850 sub = XEXP (x, 0);
41851 if (GET_CODE (sub) == NEG)
41852 sub = XEXP (sub, 0);
41853 *total += rtx_cost (sub, FMA, 0, speed);
41855 sub = XEXP (x, 2);
41856 if (GET_CODE (sub) == NEG)
41857 sub = XEXP (sub, 0);
41858 *total += rtx_cost (sub, FMA, 2, speed);
41859 return true;
41862 case MULT:
41863 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
41865 /* ??? SSE scalar cost should be used here. */
41866 *total = cost->fmul;
41867 return false;
41869 else if (X87_FLOAT_MODE_P (mode))
41871 *total = cost->fmul;
41872 return false;
41874 else if (FLOAT_MODE_P (mode))
41876 /* ??? SSE vector cost should be used here. */
41877 *total = cost->fmul;
41878 return false;
41880 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
41882 /* V*QImode is emulated with 7-13 insns. */
41883 if (mode == V16QImode || mode == V32QImode)
41885 int extra = 11;
41886 if (TARGET_XOP && mode == V16QImode)
41887 extra = 5;
41888 else if (TARGET_SSSE3)
41889 extra = 6;
41890 *total = cost->fmul * 2 + cost->fabs * extra;
41892 /* V*DImode is emulated with 5-8 insns. */
41893 else if (mode == V2DImode || mode == V4DImode)
41895 if (TARGET_XOP && mode == V2DImode)
41896 *total = cost->fmul * 2 + cost->fabs * 3;
41897 else
41898 *total = cost->fmul * 3 + cost->fabs * 5;
41900 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
41901 insns, including two PMULUDQ. */
41902 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
41903 *total = cost->fmul * 2 + cost->fabs * 5;
41904 else
41905 *total = cost->fmul;
41906 return false;
41908 else
41910 rtx op0 = XEXP (x, 0);
41911 rtx op1 = XEXP (x, 1);
41912 int nbits;
41913 if (CONST_INT_P (XEXP (x, 1)))
41915 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
41916 for (nbits = 0; value != 0; value &= value - 1)
41917 nbits++;
41919 else
41920 /* This is arbitrary. */
41921 nbits = 7;
41923 /* Compute costs correctly for widening multiplication. */
41924 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
41925 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
41926 == GET_MODE_SIZE (mode))
41928 int is_mulwiden = 0;
41929 machine_mode inner_mode = GET_MODE (op0);
41931 if (GET_CODE (op0) == GET_CODE (op1))
41932 is_mulwiden = 1, op1 = XEXP (op1, 0);
41933 else if (CONST_INT_P (op1))
41935 if (GET_CODE (op0) == SIGN_EXTEND)
41936 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
41937 == INTVAL (op1);
41938 else
41939 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
41942 if (is_mulwiden)
41943 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
41946 *total = (cost->mult_init[MODE_INDEX (mode)]
41947 + nbits * cost->mult_bit
41948 + rtx_cost (op0, outer_code, opno, speed)
41949 + rtx_cost (op1, outer_code, opno, speed));
41951 return true;
41954 case DIV:
41955 case UDIV:
41956 case MOD:
41957 case UMOD:
41958 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
41959 /* ??? SSE cost should be used here. */
41960 *total = cost->fdiv;
41961 else if (X87_FLOAT_MODE_P (mode))
41962 *total = cost->fdiv;
41963 else if (FLOAT_MODE_P (mode))
41964 /* ??? SSE vector cost should be used here. */
41965 *total = cost->fdiv;
41966 else
41967 *total = cost->divide[MODE_INDEX (mode)];
41968 return false;
41970 case PLUS:
41971 if (GET_MODE_CLASS (mode) == MODE_INT
41972 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
41974 if (GET_CODE (XEXP (x, 0)) == PLUS
41975 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
41976 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
41977 && CONSTANT_P (XEXP (x, 1)))
41979 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
41980 if (val == 2 || val == 4 || val == 8)
41982 *total = cost->lea;
41983 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
41984 outer_code, opno, speed);
41985 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
41986 outer_code, opno, speed);
41987 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
41988 return true;
41991 else if (GET_CODE (XEXP (x, 0)) == MULT
41992 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
41994 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
41995 if (val == 2 || val == 4 || val == 8)
41997 *total = cost->lea;
41998 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
41999 outer_code, opno, speed);
42000 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42001 return true;
42004 else if (GET_CODE (XEXP (x, 0)) == PLUS)
42006 *total = cost->lea;
42007 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
42008 outer_code, opno, speed);
42009 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
42010 outer_code, opno, speed);
42011 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42012 return true;
42015 /* FALLTHRU */
42017 case MINUS:
42018 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42020 /* ??? SSE cost should be used here. */
42021 *total = cost->fadd;
42022 return false;
42024 else if (X87_FLOAT_MODE_P (mode))
42026 *total = cost->fadd;
42027 return false;
42029 else if (FLOAT_MODE_P (mode))
42031 /* ??? SSE vector cost should be used here. */
42032 *total = cost->fadd;
42033 return false;
42035 /* FALLTHRU */
42037 case AND:
42038 case IOR:
42039 case XOR:
42040 if (GET_MODE_CLASS (mode) == MODE_INT
42041 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42043 *total = (cost->add * 2
42044 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
42045 << (GET_MODE (XEXP (x, 0)) != DImode))
42046 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
42047 << (GET_MODE (XEXP (x, 1)) != DImode)));
42048 return true;
42050 /* FALLTHRU */
42052 case NEG:
42053 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42055 /* ??? SSE cost should be used here. */
42056 *total = cost->fchs;
42057 return false;
42059 else if (X87_FLOAT_MODE_P (mode))
42061 *total = cost->fchs;
42062 return false;
42064 else if (FLOAT_MODE_P (mode))
42066 /* ??? SSE vector cost should be used here. */
42067 *total = cost->fchs;
42068 return false;
42070 /* FALLTHRU */
42072 case NOT:
42073 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42075 /* ??? Should be SSE vector operation cost. */
42076 /* At least for published AMD latencies, this really is the same
42077 as the latency for a simple fpu operation like fabs. */
42078 *total = cost->fabs;
42080 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42081 *total = cost->add * 2;
42082 else
42083 *total = cost->add;
42084 return false;
42086 case COMPARE:
42087 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
42088 && XEXP (XEXP (x, 0), 1) == const1_rtx
42089 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
42090 && XEXP (x, 1) == const0_rtx)
42092 /* This kind of construct is implemented using test[bwl].
42093 Treat it as if we had an AND. */
42094 *total = (cost->add
42095 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
42096 + rtx_cost (const1_rtx, outer_code, opno, speed));
42097 return true;
42099 return false;
42101 case FLOAT_EXTEND:
42102 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
42103 *total = 0;
42104 return false;
42106 case ABS:
42107 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42108 /* ??? SSE cost should be used here. */
42109 *total = cost->fabs;
42110 else if (X87_FLOAT_MODE_P (mode))
42111 *total = cost->fabs;
42112 else if (FLOAT_MODE_P (mode))
42113 /* ??? SSE vector cost should be used here. */
42114 *total = cost->fabs;
42115 return false;
42117 case SQRT:
42118 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42119 /* ??? SSE cost should be used here. */
42120 *total = cost->fsqrt;
42121 else if (X87_FLOAT_MODE_P (mode))
42122 *total = cost->fsqrt;
42123 else if (FLOAT_MODE_P (mode))
42124 /* ??? SSE vector cost should be used here. */
42125 *total = cost->fsqrt;
42126 return false;
42128 case UNSPEC:
42129 if (XINT (x, 1) == UNSPEC_TP)
42130 *total = 0;
42131 return false;
42133 case VEC_SELECT:
42134 case VEC_CONCAT:
42135 case VEC_DUPLICATE:
42136 /* ??? Assume all of these vector manipulation patterns are
42137 recognizable. In which case they all pretty much have the
42138 same cost. */
42139 *total = cost->fabs;
42140 return true;
42141 case VEC_MERGE:
42142 mask = XEXP (x, 2);
42143 /* This is masked instruction, assume the same cost,
42144 as nonmasked variant. */
42145 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
42146 *total = rtx_cost (XEXP (x, 0), outer_code, opno, speed);
42147 else
42148 *total = cost->fabs;
42149 return true;
42151 default:
42152 return false;
42156 #if TARGET_MACHO
42158 static int current_machopic_label_num;
42160 /* Given a symbol name and its associated stub, write out the
42161 definition of the stub. */
42163 void
42164 machopic_output_stub (FILE *file, const char *symb, const char *stub)
42166 unsigned int length;
42167 char *binder_name, *symbol_name, lazy_ptr_name[32];
42168 int label = ++current_machopic_label_num;
42170 /* For 64-bit we shouldn't get here. */
42171 gcc_assert (!TARGET_64BIT);
42173 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
42174 symb = targetm.strip_name_encoding (symb);
42176 length = strlen (stub);
42177 binder_name = XALLOCAVEC (char, length + 32);
42178 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
42180 length = strlen (symb);
42181 symbol_name = XALLOCAVEC (char, length + 32);
42182 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
42184 sprintf (lazy_ptr_name, "L%d$lz", label);
42186 if (MACHOPIC_ATT_STUB)
42187 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
42188 else if (MACHOPIC_PURE)
42189 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
42190 else
42191 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
42193 fprintf (file, "%s:\n", stub);
42194 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
42196 if (MACHOPIC_ATT_STUB)
42198 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
42200 else if (MACHOPIC_PURE)
42202 /* PIC stub. */
42203 /* 25-byte PIC stub using "CALL get_pc_thunk". */
42204 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
42205 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
42206 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
42207 label, lazy_ptr_name, label);
42208 fprintf (file, "\tjmp\t*%%ecx\n");
42210 else
42211 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
42213 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
42214 it needs no stub-binding-helper. */
42215 if (MACHOPIC_ATT_STUB)
42216 return;
42218 fprintf (file, "%s:\n", binder_name);
42220 if (MACHOPIC_PURE)
42222 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
42223 fprintf (file, "\tpushl\t%%ecx\n");
42225 else
42226 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
42228 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
42230 /* N.B. Keep the correspondence of these
42231 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
42232 old-pic/new-pic/non-pic stubs; altering this will break
42233 compatibility with existing dylibs. */
42234 if (MACHOPIC_PURE)
42236 /* 25-byte PIC stub using "CALL get_pc_thunk". */
42237 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
42239 else
42240 /* 16-byte -mdynamic-no-pic stub. */
42241 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
42243 fprintf (file, "%s:\n", lazy_ptr_name);
42244 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
42245 fprintf (file, ASM_LONG "%s\n", binder_name);
42247 #endif /* TARGET_MACHO */
42249 /* Order the registers for register allocator. */
42251 void
42252 x86_order_regs_for_local_alloc (void)
42254 int pos = 0;
42255 int i;
42257 /* First allocate the local general purpose registers. */
42258 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
42259 if (GENERAL_REGNO_P (i) && call_used_regs[i])
42260 reg_alloc_order [pos++] = i;
42262 /* Global general purpose registers. */
42263 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
42264 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
42265 reg_alloc_order [pos++] = i;
42267 /* x87 registers come first in case we are doing FP math
42268 using them. */
42269 if (!TARGET_SSE_MATH)
42270 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
42271 reg_alloc_order [pos++] = i;
42273 /* SSE registers. */
42274 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
42275 reg_alloc_order [pos++] = i;
42276 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
42277 reg_alloc_order [pos++] = i;
42279 /* Extended REX SSE registers. */
42280 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
42281 reg_alloc_order [pos++] = i;
42283 /* Mask register. */
42284 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
42285 reg_alloc_order [pos++] = i;
42287 /* MPX bound registers. */
42288 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
42289 reg_alloc_order [pos++] = i;
42291 /* x87 registers. */
42292 if (TARGET_SSE_MATH)
42293 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
42294 reg_alloc_order [pos++] = i;
42296 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
42297 reg_alloc_order [pos++] = i;
42299 /* Initialize the rest of array as we do not allocate some registers
42300 at all. */
42301 while (pos < FIRST_PSEUDO_REGISTER)
42302 reg_alloc_order [pos++] = 0;
42305 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
42306 in struct attribute_spec handler. */
42307 static tree
42308 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
42309 tree args,
42310 int,
42311 bool *no_add_attrs)
42313 if (TREE_CODE (*node) != FUNCTION_TYPE
42314 && TREE_CODE (*node) != METHOD_TYPE
42315 && TREE_CODE (*node) != FIELD_DECL
42316 && TREE_CODE (*node) != TYPE_DECL)
42318 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42319 name);
42320 *no_add_attrs = true;
42321 return NULL_TREE;
42323 if (TARGET_64BIT)
42325 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
42326 name);
42327 *no_add_attrs = true;
42328 return NULL_TREE;
42330 if (is_attribute_p ("callee_pop_aggregate_return", name))
42332 tree cst;
42334 cst = TREE_VALUE (args);
42335 if (TREE_CODE (cst) != INTEGER_CST)
42337 warning (OPT_Wattributes,
42338 "%qE attribute requires an integer constant argument",
42339 name);
42340 *no_add_attrs = true;
42342 else if (compare_tree_int (cst, 0) != 0
42343 && compare_tree_int (cst, 1) != 0)
42345 warning (OPT_Wattributes,
42346 "argument to %qE attribute is neither zero, nor one",
42347 name);
42348 *no_add_attrs = true;
42351 return NULL_TREE;
42354 return NULL_TREE;
42357 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
42358 struct attribute_spec.handler. */
42359 static tree
42360 ix86_handle_abi_attribute (tree *node, tree name, tree, int,
42361 bool *no_add_attrs)
42363 if (TREE_CODE (*node) != FUNCTION_TYPE
42364 && TREE_CODE (*node) != METHOD_TYPE
42365 && TREE_CODE (*node) != FIELD_DECL
42366 && TREE_CODE (*node) != TYPE_DECL)
42368 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42369 name);
42370 *no_add_attrs = true;
42371 return NULL_TREE;
42374 /* Can combine regparm with all attributes but fastcall. */
42375 if (is_attribute_p ("ms_abi", name))
42377 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
42379 error ("ms_abi and sysv_abi attributes are not compatible");
42382 return NULL_TREE;
42384 else if (is_attribute_p ("sysv_abi", name))
42386 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
42388 error ("ms_abi and sysv_abi attributes are not compatible");
42391 return NULL_TREE;
42394 return NULL_TREE;
42397 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
42398 struct attribute_spec.handler. */
42399 static tree
42400 ix86_handle_struct_attribute (tree *node, tree name, tree, int,
42401 bool *no_add_attrs)
42403 tree *type = NULL;
42404 if (DECL_P (*node))
42406 if (TREE_CODE (*node) == TYPE_DECL)
42407 type = &TREE_TYPE (*node);
42409 else
42410 type = node;
42412 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
42414 warning (OPT_Wattributes, "%qE attribute ignored",
42415 name);
42416 *no_add_attrs = true;
42419 else if ((is_attribute_p ("ms_struct", name)
42420 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
42421 || ((is_attribute_p ("gcc_struct", name)
42422 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
42424 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
42425 name);
42426 *no_add_attrs = true;
42429 return NULL_TREE;
42432 static tree
42433 ix86_handle_fndecl_attribute (tree *node, tree name, tree, int,
42434 bool *no_add_attrs)
42436 if (TREE_CODE (*node) != FUNCTION_DECL)
42438 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42439 name);
42440 *no_add_attrs = true;
42442 return NULL_TREE;
42445 static bool
42446 ix86_ms_bitfield_layout_p (const_tree record_type)
42448 return ((TARGET_MS_BITFIELD_LAYOUT
42449 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
42450 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
42453 /* Returns an expression indicating where the this parameter is
42454 located on entry to the FUNCTION. */
42456 static rtx
42457 x86_this_parameter (tree function)
42459 tree type = TREE_TYPE (function);
42460 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
42461 int nregs;
42463 if (TARGET_64BIT)
42465 const int *parm_regs;
42467 if (ix86_function_type_abi (type) == MS_ABI)
42468 parm_regs = x86_64_ms_abi_int_parameter_registers;
42469 else
42470 parm_regs = x86_64_int_parameter_registers;
42471 return gen_rtx_REG (Pmode, parm_regs[aggr]);
42474 nregs = ix86_function_regparm (type, function);
42476 if (nregs > 0 && !stdarg_p (type))
42478 int regno;
42479 unsigned int ccvt = ix86_get_callcvt (type);
42481 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
42482 regno = aggr ? DX_REG : CX_REG;
42483 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
42485 regno = CX_REG;
42486 if (aggr)
42487 return gen_rtx_MEM (SImode,
42488 plus_constant (Pmode, stack_pointer_rtx, 4));
42490 else
42492 regno = AX_REG;
42493 if (aggr)
42495 regno = DX_REG;
42496 if (nregs == 1)
42497 return gen_rtx_MEM (SImode,
42498 plus_constant (Pmode,
42499 stack_pointer_rtx, 4));
42502 return gen_rtx_REG (SImode, regno);
42505 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
42506 aggr ? 8 : 4));
42509 /* Determine whether x86_output_mi_thunk can succeed. */
42511 static bool
42512 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
42513 const_tree function)
42515 /* 64-bit can handle anything. */
42516 if (TARGET_64BIT)
42517 return true;
42519 /* For 32-bit, everything's fine if we have one free register. */
42520 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
42521 return true;
42523 /* Need a free register for vcall_offset. */
42524 if (vcall_offset)
42525 return false;
42527 /* Need a free register for GOT references. */
42528 if (flag_pic && !targetm.binds_local_p (function))
42529 return false;
42531 /* Otherwise ok. */
42532 return true;
42535 /* Output the assembler code for a thunk function. THUNK_DECL is the
42536 declaration for the thunk function itself, FUNCTION is the decl for
42537 the target function. DELTA is an immediate constant offset to be
42538 added to THIS. If VCALL_OFFSET is nonzero, the word at
42539 *(*this + vcall_offset) should be added to THIS. */
42541 static void
42542 x86_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
42543 HOST_WIDE_INT vcall_offset, tree function)
42545 rtx this_param = x86_this_parameter (function);
42546 rtx this_reg, tmp, fnaddr;
42547 unsigned int tmp_regno;
42548 rtx_insn *insn;
42550 if (TARGET_64BIT)
42551 tmp_regno = R10_REG;
42552 else
42554 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
42555 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
42556 tmp_regno = AX_REG;
42557 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
42558 tmp_regno = DX_REG;
42559 else
42560 tmp_regno = CX_REG;
42563 emit_note (NOTE_INSN_PROLOGUE_END);
42565 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
42566 pull it in now and let DELTA benefit. */
42567 if (REG_P (this_param))
42568 this_reg = this_param;
42569 else if (vcall_offset)
42571 /* Put the this parameter into %eax. */
42572 this_reg = gen_rtx_REG (Pmode, AX_REG);
42573 emit_move_insn (this_reg, this_param);
42575 else
42576 this_reg = NULL_RTX;
42578 /* Adjust the this parameter by a fixed constant. */
42579 if (delta)
42581 rtx delta_rtx = GEN_INT (delta);
42582 rtx delta_dst = this_reg ? this_reg : this_param;
42584 if (TARGET_64BIT)
42586 if (!x86_64_general_operand (delta_rtx, Pmode))
42588 tmp = gen_rtx_REG (Pmode, tmp_regno);
42589 emit_move_insn (tmp, delta_rtx);
42590 delta_rtx = tmp;
42594 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
42597 /* Adjust the this parameter by a value stored in the vtable. */
42598 if (vcall_offset)
42600 rtx vcall_addr, vcall_mem, this_mem;
42602 tmp = gen_rtx_REG (Pmode, tmp_regno);
42604 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
42605 if (Pmode != ptr_mode)
42606 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
42607 emit_move_insn (tmp, this_mem);
42609 /* Adjust the this parameter. */
42610 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
42611 if (TARGET_64BIT
42612 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
42614 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
42615 emit_move_insn (tmp2, GEN_INT (vcall_offset));
42616 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
42619 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
42620 if (Pmode != ptr_mode)
42621 emit_insn (gen_addsi_1_zext (this_reg,
42622 gen_rtx_REG (ptr_mode,
42623 REGNO (this_reg)),
42624 vcall_mem));
42625 else
42626 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
42629 /* If necessary, drop THIS back to its stack slot. */
42630 if (this_reg && this_reg != this_param)
42631 emit_move_insn (this_param, this_reg);
42633 fnaddr = XEXP (DECL_RTL (function), 0);
42634 if (TARGET_64BIT)
42636 if (!flag_pic || targetm.binds_local_p (function)
42637 || TARGET_PECOFF)
42639 else
42641 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
42642 tmp = gen_rtx_CONST (Pmode, tmp);
42643 fnaddr = gen_const_mem (Pmode, tmp);
42646 else
42648 if (!flag_pic || targetm.binds_local_p (function))
42650 #if TARGET_MACHO
42651 else if (TARGET_MACHO)
42653 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
42654 fnaddr = XEXP (fnaddr, 0);
42656 #endif /* TARGET_MACHO */
42657 else
42659 tmp = gen_rtx_REG (Pmode, CX_REG);
42660 output_set_got (tmp, NULL_RTX);
42662 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
42663 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
42664 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
42665 fnaddr = gen_const_mem (Pmode, fnaddr);
42669 /* Our sibling call patterns do not allow memories, because we have no
42670 predicate that can distinguish between frame and non-frame memory.
42671 For our purposes here, we can get away with (ab)using a jump pattern,
42672 because we're going to do no optimization. */
42673 if (MEM_P (fnaddr))
42675 if (sibcall_insn_operand (fnaddr, word_mode))
42677 fnaddr = XEXP (DECL_RTL (function), 0);
42678 tmp = gen_rtx_MEM (QImode, fnaddr);
42679 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
42680 tmp = emit_call_insn (tmp);
42681 SIBLING_CALL_P (tmp) = 1;
42683 else
42684 emit_jump_insn (gen_indirect_jump (fnaddr));
42686 else
42688 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
42689 fnaddr = legitimize_pic_address (fnaddr,
42690 gen_rtx_REG (Pmode, tmp_regno));
42692 if (!sibcall_insn_operand (fnaddr, word_mode))
42694 tmp = gen_rtx_REG (word_mode, tmp_regno);
42695 if (GET_MODE (fnaddr) != word_mode)
42696 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
42697 emit_move_insn (tmp, fnaddr);
42698 fnaddr = tmp;
42701 tmp = gen_rtx_MEM (QImode, fnaddr);
42702 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
42703 tmp = emit_call_insn (tmp);
42704 SIBLING_CALL_P (tmp) = 1;
42706 emit_barrier ();
42708 /* Emit just enough of rest_of_compilation to get the insns emitted.
42709 Note that use_thunk calls assemble_start_function et al. */
42710 insn = get_insns ();
42711 shorten_branches (insn);
42712 final_start_function (insn, file, 1);
42713 final (insn, file, 1);
42714 final_end_function ();
42717 static void
42718 x86_file_start (void)
42720 default_file_start ();
42721 if (TARGET_16BIT)
42722 fputs ("\t.code16gcc\n", asm_out_file);
42723 #if TARGET_MACHO
42724 darwin_file_start ();
42725 #endif
42726 if (X86_FILE_START_VERSION_DIRECTIVE)
42727 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
42728 if (X86_FILE_START_FLTUSED)
42729 fputs ("\t.global\t__fltused\n", asm_out_file);
42730 if (ix86_asm_dialect == ASM_INTEL)
42731 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
42735 x86_field_alignment (tree field, int computed)
42737 machine_mode mode;
42738 tree type = TREE_TYPE (field);
42740 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
42741 return computed;
42742 mode = TYPE_MODE (strip_array_types (type));
42743 if (mode == DFmode || mode == DCmode
42744 || GET_MODE_CLASS (mode) == MODE_INT
42745 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
42746 return MIN (32, computed);
42747 return computed;
42750 /* Print call to TARGET to FILE. */
42752 static void
42753 x86_print_call_or_nop (FILE *file, const char *target)
42755 if (flag_nop_mcount)
42756 fprintf (file, "1:\tnopl 0x00(%%eax,%%eax,1)\n"); /* 5 byte nop. */
42757 else
42758 fprintf (file, "1:\tcall\t%s\n", target);
42761 /* Output assembler code to FILE to increment profiler label # LABELNO
42762 for profiling a function entry. */
42763 void
42764 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
42766 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
42767 : MCOUNT_NAME);
42768 if (TARGET_64BIT)
42770 #ifndef NO_PROFILE_COUNTERS
42771 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
42772 #endif
42774 if (!TARGET_PECOFF && flag_pic)
42775 fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
42776 else
42777 x86_print_call_or_nop (file, mcount_name);
42779 else if (flag_pic)
42781 #ifndef NO_PROFILE_COUNTERS
42782 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
42783 LPREFIX, labelno);
42784 #endif
42785 fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
42787 else
42789 #ifndef NO_PROFILE_COUNTERS
42790 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
42791 LPREFIX, labelno);
42792 #endif
42793 x86_print_call_or_nop (file, mcount_name);
42796 if (flag_record_mcount)
42798 fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
42799 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
42800 fprintf (file, "\t.previous\n");
42804 /* We don't have exact information about the insn sizes, but we may assume
42805 quite safely that we are informed about all 1 byte insns and memory
42806 address sizes. This is enough to eliminate unnecessary padding in
42807 99% of cases. */
42809 static int
42810 min_insn_size (rtx_insn *insn)
42812 int l = 0, len;
42814 if (!INSN_P (insn) || !active_insn_p (insn))
42815 return 0;
42817 /* Discard alignments we've emit and jump instructions. */
42818 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
42819 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
42820 return 0;
42822 /* Important case - calls are always 5 bytes.
42823 It is common to have many calls in the row. */
42824 if (CALL_P (insn)
42825 && symbolic_reference_mentioned_p (PATTERN (insn))
42826 && !SIBLING_CALL_P (insn))
42827 return 5;
42828 len = get_attr_length (insn);
42829 if (len <= 1)
42830 return 1;
42832 /* For normal instructions we rely on get_attr_length being exact,
42833 with a few exceptions. */
42834 if (!JUMP_P (insn))
42836 enum attr_type type = get_attr_type (insn);
42838 switch (type)
42840 case TYPE_MULTI:
42841 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
42842 || asm_noperands (PATTERN (insn)) >= 0)
42843 return 0;
42844 break;
42845 case TYPE_OTHER:
42846 case TYPE_FCMP:
42847 break;
42848 default:
42849 /* Otherwise trust get_attr_length. */
42850 return len;
42853 l = get_attr_length_address (insn);
42854 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
42855 l = 4;
42857 if (l)
42858 return 1+l;
42859 else
42860 return 2;
42863 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
42865 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
42866 window. */
42868 static void
42869 ix86_avoid_jump_mispredicts (void)
42871 rtx_insn *insn, *start = get_insns ();
42872 int nbytes = 0, njumps = 0;
42873 int isjump = 0;
42875 /* Look for all minimal intervals of instructions containing 4 jumps.
42876 The intervals are bounded by START and INSN. NBYTES is the total
42877 size of instructions in the interval including INSN and not including
42878 START. When the NBYTES is smaller than 16 bytes, it is possible
42879 that the end of START and INSN ends up in the same 16byte page.
42881 The smallest offset in the page INSN can start is the case where START
42882 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
42883 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
42885 Don't consider asm goto as jump, while it can contain a jump, it doesn't
42886 have to, control transfer to label(s) can be performed through other
42887 means, and also we estimate minimum length of all asm stmts as 0. */
42888 for (insn = start; insn; insn = NEXT_INSN (insn))
42890 int min_size;
42892 if (LABEL_P (insn))
42894 int align = label_to_alignment (insn);
42895 int max_skip = label_to_max_skip (insn);
42897 if (max_skip > 15)
42898 max_skip = 15;
42899 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
42900 already in the current 16 byte page, because otherwise
42901 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
42902 bytes to reach 16 byte boundary. */
42903 if (align <= 0
42904 || (align <= 3 && max_skip != (1 << align) - 1))
42905 max_skip = 0;
42906 if (dump_file)
42907 fprintf (dump_file, "Label %i with max_skip %i\n",
42908 INSN_UID (insn), max_skip);
42909 if (max_skip)
42911 while (nbytes + max_skip >= 16)
42913 start = NEXT_INSN (start);
42914 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
42915 || CALL_P (start))
42916 njumps--, isjump = 1;
42917 else
42918 isjump = 0;
42919 nbytes -= min_insn_size (start);
42922 continue;
42925 min_size = min_insn_size (insn);
42926 nbytes += min_size;
42927 if (dump_file)
42928 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
42929 INSN_UID (insn), min_size);
42930 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
42931 || CALL_P (insn))
42932 njumps++;
42933 else
42934 continue;
42936 while (njumps > 3)
42938 start = NEXT_INSN (start);
42939 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
42940 || CALL_P (start))
42941 njumps--, isjump = 1;
42942 else
42943 isjump = 0;
42944 nbytes -= min_insn_size (start);
42946 gcc_assert (njumps >= 0);
42947 if (dump_file)
42948 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
42949 INSN_UID (start), INSN_UID (insn), nbytes);
42951 if (njumps == 3 && isjump && nbytes < 16)
42953 int padsize = 15 - nbytes + min_insn_size (insn);
42955 if (dump_file)
42956 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
42957 INSN_UID (insn), padsize);
42958 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
42962 #endif
42964 /* AMD Athlon works faster
42965 when RET is not destination of conditional jump or directly preceded
42966 by other jump instruction. We avoid the penalty by inserting NOP just
42967 before the RET instructions in such cases. */
42968 static void
42969 ix86_pad_returns (void)
42971 edge e;
42972 edge_iterator ei;
42974 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
42976 basic_block bb = e->src;
42977 rtx_insn *ret = BB_END (bb);
42978 rtx_insn *prev;
42979 bool replace = false;
42981 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
42982 || optimize_bb_for_size_p (bb))
42983 continue;
42984 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
42985 if (active_insn_p (prev) || LABEL_P (prev))
42986 break;
42987 if (prev && LABEL_P (prev))
42989 edge e;
42990 edge_iterator ei;
42992 FOR_EACH_EDGE (e, ei, bb->preds)
42993 if (EDGE_FREQUENCY (e) && e->src->index >= 0
42994 && !(e->flags & EDGE_FALLTHRU))
42996 replace = true;
42997 break;
43000 if (!replace)
43002 prev = prev_active_insn (ret);
43003 if (prev
43004 && ((JUMP_P (prev) && any_condjump_p (prev))
43005 || CALL_P (prev)))
43006 replace = true;
43007 /* Empty functions get branch mispredict even when
43008 the jump destination is not visible to us. */
43009 if (!prev && !optimize_function_for_size_p (cfun))
43010 replace = true;
43012 if (replace)
43014 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
43015 delete_insn (ret);
43020 /* Count the minimum number of instructions in BB. Return 4 if the
43021 number of instructions >= 4. */
43023 static int
43024 ix86_count_insn_bb (basic_block bb)
43026 rtx_insn *insn;
43027 int insn_count = 0;
43029 /* Count number of instructions in this block. Return 4 if the number
43030 of instructions >= 4. */
43031 FOR_BB_INSNS (bb, insn)
43033 /* Only happen in exit blocks. */
43034 if (JUMP_P (insn)
43035 && ANY_RETURN_P (PATTERN (insn)))
43036 break;
43038 if (NONDEBUG_INSN_P (insn)
43039 && GET_CODE (PATTERN (insn)) != USE
43040 && GET_CODE (PATTERN (insn)) != CLOBBER)
43042 insn_count++;
43043 if (insn_count >= 4)
43044 return insn_count;
43048 return insn_count;
43052 /* Count the minimum number of instructions in code path in BB.
43053 Return 4 if the number of instructions >= 4. */
43055 static int
43056 ix86_count_insn (basic_block bb)
43058 edge e;
43059 edge_iterator ei;
43060 int min_prev_count;
43062 /* Only bother counting instructions along paths with no
43063 more than 2 basic blocks between entry and exit. Given
43064 that BB has an edge to exit, determine if a predecessor
43065 of BB has an edge from entry. If so, compute the number
43066 of instructions in the predecessor block. If there
43067 happen to be multiple such blocks, compute the minimum. */
43068 min_prev_count = 4;
43069 FOR_EACH_EDGE (e, ei, bb->preds)
43071 edge prev_e;
43072 edge_iterator prev_ei;
43074 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
43076 min_prev_count = 0;
43077 break;
43079 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
43081 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
43083 int count = ix86_count_insn_bb (e->src);
43084 if (count < min_prev_count)
43085 min_prev_count = count;
43086 break;
43091 if (min_prev_count < 4)
43092 min_prev_count += ix86_count_insn_bb (bb);
43094 return min_prev_count;
43097 /* Pad short function to 4 instructions. */
43099 static void
43100 ix86_pad_short_function (void)
43102 edge e;
43103 edge_iterator ei;
43105 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43107 rtx_insn *ret = BB_END (e->src);
43108 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
43110 int insn_count = ix86_count_insn (e->src);
43112 /* Pad short function. */
43113 if (insn_count < 4)
43115 rtx_insn *insn = ret;
43117 /* Find epilogue. */
43118 while (insn
43119 && (!NOTE_P (insn)
43120 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
43121 insn = PREV_INSN (insn);
43123 if (!insn)
43124 insn = ret;
43126 /* Two NOPs count as one instruction. */
43127 insn_count = 2 * (4 - insn_count);
43128 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
43134 /* Fix up a Windows system unwinder issue. If an EH region falls through into
43135 the epilogue, the Windows system unwinder will apply epilogue logic and
43136 produce incorrect offsets. This can be avoided by adding a nop between
43137 the last insn that can throw and the first insn of the epilogue. */
43139 static void
43140 ix86_seh_fixup_eh_fallthru (void)
43142 edge e;
43143 edge_iterator ei;
43145 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43147 rtx_insn *insn, *next;
43149 /* Find the beginning of the epilogue. */
43150 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
43151 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
43152 break;
43153 if (insn == NULL)
43154 continue;
43156 /* We only care about preceding insns that can throw. */
43157 insn = prev_active_insn (insn);
43158 if (insn == NULL || !can_throw_internal (insn))
43159 continue;
43161 /* Do not separate calls from their debug information. */
43162 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
43163 if (NOTE_P (next)
43164 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
43165 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
43166 insn = next;
43167 else
43168 break;
43170 emit_insn_after (gen_nops (const1_rtx), insn);
43174 /* Implement machine specific optimizations. We implement padding of returns
43175 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
43176 static void
43177 ix86_reorg (void)
43179 /* We are freeing block_for_insn in the toplev to keep compatibility
43180 with old MDEP_REORGS that are not CFG based. Recompute it now. */
43181 compute_bb_for_insn ();
43183 if (TARGET_SEH && current_function_has_exception_handlers ())
43184 ix86_seh_fixup_eh_fallthru ();
43186 if (optimize && optimize_function_for_speed_p (cfun))
43188 if (TARGET_PAD_SHORT_FUNCTION)
43189 ix86_pad_short_function ();
43190 else if (TARGET_PAD_RETURNS)
43191 ix86_pad_returns ();
43192 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
43193 if (TARGET_FOUR_JUMP_LIMIT)
43194 ix86_avoid_jump_mispredicts ();
43195 #endif
43199 /* Return nonzero when QImode register that must be represented via REX prefix
43200 is used. */
43201 bool
43202 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
43204 int i;
43205 extract_insn_cached (insn);
43206 for (i = 0; i < recog_data.n_operands; i++)
43207 if (GENERAL_REG_P (recog_data.operand[i])
43208 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
43209 return true;
43210 return false;
43213 /* Return true when INSN mentions register that must be encoded using REX
43214 prefix. */
43215 bool
43216 x86_extended_reg_mentioned_p (rtx insn)
43218 subrtx_iterator::array_type array;
43219 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
43221 const_rtx x = *iter;
43222 if (REG_P (x)
43223 && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))))
43224 return true;
43226 return false;
43229 /* If profitable, negate (without causing overflow) integer constant
43230 of mode MODE at location LOC. Return true in this case. */
43231 bool
43232 x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
43234 HOST_WIDE_INT val;
43236 if (!CONST_INT_P (*loc))
43237 return false;
43239 switch (mode)
43241 case DImode:
43242 /* DImode x86_64 constants must fit in 32 bits. */
43243 gcc_assert (x86_64_immediate_operand (*loc, mode));
43245 mode = SImode;
43246 break;
43248 case SImode:
43249 case HImode:
43250 case QImode:
43251 break;
43253 default:
43254 gcc_unreachable ();
43257 /* Avoid overflows. */
43258 if (mode_signbit_p (mode, *loc))
43259 return false;
43261 val = INTVAL (*loc);
43263 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
43264 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
43265 if ((val < 0 && val != -128)
43266 || val == 128)
43268 *loc = GEN_INT (-val);
43269 return true;
43272 return false;
43275 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
43276 optabs would emit if we didn't have TFmode patterns. */
43278 void
43279 x86_emit_floatuns (rtx operands[2])
43281 rtx_code_label *neglab, *donelab;
43282 rtx i0, i1, f0, in, out;
43283 machine_mode mode, inmode;
43285 inmode = GET_MODE (operands[1]);
43286 gcc_assert (inmode == SImode || inmode == DImode);
43288 out = operands[0];
43289 in = force_reg (inmode, operands[1]);
43290 mode = GET_MODE (out);
43291 neglab = gen_label_rtx ();
43292 donelab = gen_label_rtx ();
43293 f0 = gen_reg_rtx (mode);
43295 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
43297 expand_float (out, in, 0);
43299 emit_jump_insn (gen_jump (donelab));
43300 emit_barrier ();
43302 emit_label (neglab);
43304 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
43305 1, OPTAB_DIRECT);
43306 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
43307 1, OPTAB_DIRECT);
43308 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
43310 expand_float (f0, i0, 0);
43312 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
43314 emit_label (donelab);
43317 static bool canonicalize_perm (struct expand_vec_perm_d *d);
43318 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
43319 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
43320 static bool expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool);
43322 /* Get a vector mode of the same size as the original but with elements
43323 twice as wide. This is only guaranteed to apply to integral vectors. */
43325 static inline machine_mode
43326 get_mode_wider_vector (machine_mode o)
43328 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
43329 machine_mode n = GET_MODE_WIDER_MODE (o);
43330 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
43331 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
43332 return n;
43335 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
43336 fill target with val via vec_duplicate. */
43338 static bool
43339 ix86_vector_duplicate_value (machine_mode mode, rtx target, rtx val)
43341 bool ok;
43342 rtx_insn *insn;
43343 rtx dup;
43345 /* First attempt to recognize VAL as-is. */
43346 dup = gen_rtx_VEC_DUPLICATE (mode, val);
43347 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
43348 if (recog_memoized (insn) < 0)
43350 rtx_insn *seq;
43351 /* If that fails, force VAL into a register. */
43353 start_sequence ();
43354 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
43355 seq = get_insns ();
43356 end_sequence ();
43357 if (seq)
43358 emit_insn_before (seq, insn);
43360 ok = recog_memoized (insn) >= 0;
43361 gcc_assert (ok);
43363 return true;
43366 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
43367 with all elements equal to VAR. Return true if successful. */
43369 static bool
43370 ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
43371 rtx target, rtx val)
43373 bool ok;
43375 switch (mode)
43377 case V2SImode:
43378 case V2SFmode:
43379 if (!mmx_ok)
43380 return false;
43381 /* FALLTHRU */
43383 case V4DFmode:
43384 case V4DImode:
43385 case V8SFmode:
43386 case V8SImode:
43387 case V2DFmode:
43388 case V2DImode:
43389 case V4SFmode:
43390 case V4SImode:
43391 case V16SImode:
43392 case V8DImode:
43393 case V16SFmode:
43394 case V8DFmode:
43395 return ix86_vector_duplicate_value (mode, target, val);
43397 case V4HImode:
43398 if (!mmx_ok)
43399 return false;
43400 if (TARGET_SSE || TARGET_3DNOW_A)
43402 rtx x;
43404 val = gen_lowpart (SImode, val);
43405 x = gen_rtx_TRUNCATE (HImode, val);
43406 x = gen_rtx_VEC_DUPLICATE (mode, x);
43407 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43408 return true;
43410 goto widen;
43412 case V8QImode:
43413 if (!mmx_ok)
43414 return false;
43415 goto widen;
43417 case V8HImode:
43418 if (TARGET_AVX2)
43419 return ix86_vector_duplicate_value (mode, target, val);
43421 if (TARGET_SSE2)
43423 struct expand_vec_perm_d dperm;
43424 rtx tmp1, tmp2;
43426 permute:
43427 memset (&dperm, 0, sizeof (dperm));
43428 dperm.target = target;
43429 dperm.vmode = mode;
43430 dperm.nelt = GET_MODE_NUNITS (mode);
43431 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
43432 dperm.one_operand_p = true;
43434 /* Extend to SImode using a paradoxical SUBREG. */
43435 tmp1 = gen_reg_rtx (SImode);
43436 emit_move_insn (tmp1, gen_lowpart (SImode, val));
43438 /* Insert the SImode value as low element of a V4SImode vector. */
43439 tmp2 = gen_reg_rtx (V4SImode);
43440 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
43441 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
43443 ok = (expand_vec_perm_1 (&dperm)
43444 || expand_vec_perm_broadcast_1 (&dperm));
43445 gcc_assert (ok);
43446 return ok;
43448 goto widen;
43450 case V16QImode:
43451 if (TARGET_AVX2)
43452 return ix86_vector_duplicate_value (mode, target, val);
43454 if (TARGET_SSE2)
43455 goto permute;
43456 goto widen;
43458 widen:
43459 /* Replicate the value once into the next wider mode and recurse. */
43461 machine_mode smode, wsmode, wvmode;
43462 rtx x;
43464 smode = GET_MODE_INNER (mode);
43465 wvmode = get_mode_wider_vector (mode);
43466 wsmode = GET_MODE_INNER (wvmode);
43468 val = convert_modes (wsmode, smode, val, true);
43469 x = expand_simple_binop (wsmode, ASHIFT, val,
43470 GEN_INT (GET_MODE_BITSIZE (smode)),
43471 NULL_RTX, 1, OPTAB_LIB_WIDEN);
43472 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
43474 x = gen_reg_rtx (wvmode);
43475 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
43476 gcc_assert (ok);
43477 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
43478 return ok;
43481 case V16HImode:
43482 case V32QImode:
43483 if (TARGET_AVX2)
43484 return ix86_vector_duplicate_value (mode, target, val);
43485 else
43487 machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
43488 rtx x = gen_reg_rtx (hvmode);
43490 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
43491 gcc_assert (ok);
43493 x = gen_rtx_VEC_CONCAT (mode, x, x);
43494 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43496 return true;
43498 case V64QImode:
43499 case V32HImode:
43500 if (TARGET_AVX512BW)
43501 return ix86_vector_duplicate_value (mode, target, val);
43502 else
43504 machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode);
43505 rtx x = gen_reg_rtx (hvmode);
43507 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
43508 gcc_assert (ok);
43510 x = gen_rtx_VEC_CONCAT (mode, x, x);
43511 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43513 return true;
43515 default:
43516 return false;
43520 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
43521 whose ONE_VAR element is VAR, and other elements are zero. Return true
43522 if successful. */
43524 static bool
43525 ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
43526 rtx target, rtx var, int one_var)
43528 machine_mode vsimode;
43529 rtx new_target;
43530 rtx x, tmp;
43531 bool use_vector_set = false;
43533 switch (mode)
43535 case V2DImode:
43536 /* For SSE4.1, we normally use vector set. But if the second
43537 element is zero and inter-unit moves are OK, we use movq
43538 instead. */
43539 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
43540 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
43541 && one_var == 0));
43542 break;
43543 case V16QImode:
43544 case V4SImode:
43545 case V4SFmode:
43546 use_vector_set = TARGET_SSE4_1;
43547 break;
43548 case V8HImode:
43549 use_vector_set = TARGET_SSE2;
43550 break;
43551 case V4HImode:
43552 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
43553 break;
43554 case V32QImode:
43555 case V16HImode:
43556 case V8SImode:
43557 case V8SFmode:
43558 case V4DFmode:
43559 use_vector_set = TARGET_AVX;
43560 break;
43561 case V4DImode:
43562 /* Use ix86_expand_vector_set in 64bit mode only. */
43563 use_vector_set = TARGET_AVX && TARGET_64BIT;
43564 break;
43565 default:
43566 break;
43569 if (use_vector_set)
43571 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
43572 var = force_reg (GET_MODE_INNER (mode), var);
43573 ix86_expand_vector_set (mmx_ok, target, var, one_var);
43574 return true;
43577 switch (mode)
43579 case V2SFmode:
43580 case V2SImode:
43581 if (!mmx_ok)
43582 return false;
43583 /* FALLTHRU */
43585 case V2DFmode:
43586 case V2DImode:
43587 if (one_var != 0)
43588 return false;
43589 var = force_reg (GET_MODE_INNER (mode), var);
43590 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
43591 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43592 return true;
43594 case V4SFmode:
43595 case V4SImode:
43596 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
43597 new_target = gen_reg_rtx (mode);
43598 else
43599 new_target = target;
43600 var = force_reg (GET_MODE_INNER (mode), var);
43601 x = gen_rtx_VEC_DUPLICATE (mode, var);
43602 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
43603 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
43604 if (one_var != 0)
43606 /* We need to shuffle the value to the correct position, so
43607 create a new pseudo to store the intermediate result. */
43609 /* With SSE2, we can use the integer shuffle insns. */
43610 if (mode != V4SFmode && TARGET_SSE2)
43612 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
43613 const1_rtx,
43614 GEN_INT (one_var == 1 ? 0 : 1),
43615 GEN_INT (one_var == 2 ? 0 : 1),
43616 GEN_INT (one_var == 3 ? 0 : 1)));
43617 if (target != new_target)
43618 emit_move_insn (target, new_target);
43619 return true;
43622 /* Otherwise convert the intermediate result to V4SFmode and
43623 use the SSE1 shuffle instructions. */
43624 if (mode != V4SFmode)
43626 tmp = gen_reg_rtx (V4SFmode);
43627 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
43629 else
43630 tmp = new_target;
43632 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
43633 const1_rtx,
43634 GEN_INT (one_var == 1 ? 0 : 1),
43635 GEN_INT (one_var == 2 ? 0+4 : 1+4),
43636 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
43638 if (mode != V4SFmode)
43639 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
43640 else if (tmp != target)
43641 emit_move_insn (target, tmp);
43643 else if (target != new_target)
43644 emit_move_insn (target, new_target);
43645 return true;
43647 case V8HImode:
43648 case V16QImode:
43649 vsimode = V4SImode;
43650 goto widen;
43651 case V4HImode:
43652 case V8QImode:
43653 if (!mmx_ok)
43654 return false;
43655 vsimode = V2SImode;
43656 goto widen;
43657 widen:
43658 if (one_var != 0)
43659 return false;
43661 /* Zero extend the variable element to SImode and recurse. */
43662 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
43664 x = gen_reg_rtx (vsimode);
43665 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
43666 var, one_var))
43667 gcc_unreachable ();
43669 emit_move_insn (target, gen_lowpart (mode, x));
43670 return true;
43672 default:
43673 return false;
43677 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
43678 consisting of the values in VALS. It is known that all elements
43679 except ONE_VAR are constants. Return true if successful. */
43681 static bool
43682 ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode,
43683 rtx target, rtx vals, int one_var)
43685 rtx var = XVECEXP (vals, 0, one_var);
43686 machine_mode wmode;
43687 rtx const_vec, x;
43689 const_vec = copy_rtx (vals);
43690 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
43691 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
43693 switch (mode)
43695 case V2DFmode:
43696 case V2DImode:
43697 case V2SFmode:
43698 case V2SImode:
43699 /* For the two element vectors, it's just as easy to use
43700 the general case. */
43701 return false;
43703 case V4DImode:
43704 /* Use ix86_expand_vector_set in 64bit mode only. */
43705 if (!TARGET_64BIT)
43706 return false;
43707 case V4DFmode:
43708 case V8SFmode:
43709 case V8SImode:
43710 case V16HImode:
43711 case V32QImode:
43712 case V4SFmode:
43713 case V4SImode:
43714 case V8HImode:
43715 case V4HImode:
43716 break;
43718 case V16QImode:
43719 if (TARGET_SSE4_1)
43720 break;
43721 wmode = V8HImode;
43722 goto widen;
43723 case V8QImode:
43724 wmode = V4HImode;
43725 goto widen;
43726 widen:
43727 /* There's no way to set one QImode entry easily. Combine
43728 the variable value with its adjacent constant value, and
43729 promote to an HImode set. */
43730 x = XVECEXP (vals, 0, one_var ^ 1);
43731 if (one_var & 1)
43733 var = convert_modes (HImode, QImode, var, true);
43734 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
43735 NULL_RTX, 1, OPTAB_LIB_WIDEN);
43736 x = GEN_INT (INTVAL (x) & 0xff);
43738 else
43740 var = convert_modes (HImode, QImode, var, true);
43741 x = gen_int_mode (INTVAL (x) << 8, HImode);
43743 if (x != const0_rtx)
43744 var = expand_simple_binop (HImode, IOR, var, x, var,
43745 1, OPTAB_LIB_WIDEN);
43747 x = gen_reg_rtx (wmode);
43748 emit_move_insn (x, gen_lowpart (wmode, const_vec));
43749 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
43751 emit_move_insn (target, gen_lowpart (mode, x));
43752 return true;
43754 default:
43755 return false;
43758 emit_move_insn (target, const_vec);
43759 ix86_expand_vector_set (mmx_ok, target, var, one_var);
43760 return true;
43763 /* A subroutine of ix86_expand_vector_init_general. Use vector
43764 concatenate to handle the most general case: all values variable,
43765 and none identical. */
43767 static void
43768 ix86_expand_vector_init_concat (machine_mode mode,
43769 rtx target, rtx *ops, int n)
43771 machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
43772 rtx first[16], second[8], third[4];
43773 rtvec v;
43774 int i, j;
43776 switch (n)
43778 case 2:
43779 switch (mode)
43781 case V16SImode:
43782 cmode = V8SImode;
43783 break;
43784 case V16SFmode:
43785 cmode = V8SFmode;
43786 break;
43787 case V8DImode:
43788 cmode = V4DImode;
43789 break;
43790 case V8DFmode:
43791 cmode = V4DFmode;
43792 break;
43793 case V8SImode:
43794 cmode = V4SImode;
43795 break;
43796 case V8SFmode:
43797 cmode = V4SFmode;
43798 break;
43799 case V4DImode:
43800 cmode = V2DImode;
43801 break;
43802 case V4DFmode:
43803 cmode = V2DFmode;
43804 break;
43805 case V4SImode:
43806 cmode = V2SImode;
43807 break;
43808 case V4SFmode:
43809 cmode = V2SFmode;
43810 break;
43811 case V2DImode:
43812 cmode = DImode;
43813 break;
43814 case V2SImode:
43815 cmode = SImode;
43816 break;
43817 case V2DFmode:
43818 cmode = DFmode;
43819 break;
43820 case V2SFmode:
43821 cmode = SFmode;
43822 break;
43823 default:
43824 gcc_unreachable ();
43827 if (!register_operand (ops[1], cmode))
43828 ops[1] = force_reg (cmode, ops[1]);
43829 if (!register_operand (ops[0], cmode))
43830 ops[0] = force_reg (cmode, ops[0]);
43831 emit_insn (gen_rtx_SET (VOIDmode, target,
43832 gen_rtx_VEC_CONCAT (mode, ops[0],
43833 ops[1])));
43834 break;
43836 case 4:
43837 switch (mode)
43839 case V4DImode:
43840 cmode = V2DImode;
43841 break;
43842 case V4DFmode:
43843 cmode = V2DFmode;
43844 break;
43845 case V4SImode:
43846 cmode = V2SImode;
43847 break;
43848 case V4SFmode:
43849 cmode = V2SFmode;
43850 break;
43851 default:
43852 gcc_unreachable ();
43854 goto half;
43856 case 8:
43857 switch (mode)
43859 case V8DImode:
43860 cmode = V2DImode;
43861 hmode = V4DImode;
43862 break;
43863 case V8DFmode:
43864 cmode = V2DFmode;
43865 hmode = V4DFmode;
43866 break;
43867 case V8SImode:
43868 cmode = V2SImode;
43869 hmode = V4SImode;
43870 break;
43871 case V8SFmode:
43872 cmode = V2SFmode;
43873 hmode = V4SFmode;
43874 break;
43875 default:
43876 gcc_unreachable ();
43878 goto half;
43880 case 16:
43881 switch (mode)
43883 case V16SImode:
43884 cmode = V2SImode;
43885 hmode = V4SImode;
43886 gmode = V8SImode;
43887 break;
43888 case V16SFmode:
43889 cmode = V2SFmode;
43890 hmode = V4SFmode;
43891 gmode = V8SFmode;
43892 break;
43893 default:
43894 gcc_unreachable ();
43896 goto half;
43898 half:
43899 /* FIXME: We process inputs backward to help RA. PR 36222. */
43900 i = n - 1;
43901 j = (n >> 1) - 1;
43902 for (; i > 0; i -= 2, j--)
43904 first[j] = gen_reg_rtx (cmode);
43905 v = gen_rtvec (2, ops[i - 1], ops[i]);
43906 ix86_expand_vector_init (false, first[j],
43907 gen_rtx_PARALLEL (cmode, v));
43910 n >>= 1;
43911 if (n > 4)
43913 gcc_assert (hmode != VOIDmode);
43914 gcc_assert (gmode != VOIDmode);
43915 for (i = j = 0; i < n; i += 2, j++)
43917 second[j] = gen_reg_rtx (hmode);
43918 ix86_expand_vector_init_concat (hmode, second [j],
43919 &first [i], 2);
43921 n >>= 1;
43922 for (i = j = 0; i < n; i += 2, j++)
43924 third[j] = gen_reg_rtx (gmode);
43925 ix86_expand_vector_init_concat (gmode, third[j],
43926 &second[i], 2);
43928 n >>= 1;
43929 ix86_expand_vector_init_concat (mode, target, third, n);
43931 else if (n > 2)
43933 gcc_assert (hmode != VOIDmode);
43934 for (i = j = 0; i < n; i += 2, j++)
43936 second[j] = gen_reg_rtx (hmode);
43937 ix86_expand_vector_init_concat (hmode, second [j],
43938 &first [i], 2);
43940 n >>= 1;
43941 ix86_expand_vector_init_concat (mode, target, second, n);
43943 else
43944 ix86_expand_vector_init_concat (mode, target, first, n);
43945 break;
43947 default:
43948 gcc_unreachable ();
43952 /* A subroutine of ix86_expand_vector_init_general. Use vector
43953 interleave to handle the most general case: all values variable,
43954 and none identical. */
43956 static void
43957 ix86_expand_vector_init_interleave (machine_mode mode,
43958 rtx target, rtx *ops, int n)
43960 machine_mode first_imode, second_imode, third_imode, inner_mode;
43961 int i, j;
43962 rtx op0, op1;
43963 rtx (*gen_load_even) (rtx, rtx, rtx);
43964 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
43965 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
43967 switch (mode)
43969 case V8HImode:
43970 gen_load_even = gen_vec_setv8hi;
43971 gen_interleave_first_low = gen_vec_interleave_lowv4si;
43972 gen_interleave_second_low = gen_vec_interleave_lowv2di;
43973 inner_mode = HImode;
43974 first_imode = V4SImode;
43975 second_imode = V2DImode;
43976 third_imode = VOIDmode;
43977 break;
43978 case V16QImode:
43979 gen_load_even = gen_vec_setv16qi;
43980 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
43981 gen_interleave_second_low = gen_vec_interleave_lowv4si;
43982 inner_mode = QImode;
43983 first_imode = V8HImode;
43984 second_imode = V4SImode;
43985 third_imode = V2DImode;
43986 break;
43987 default:
43988 gcc_unreachable ();
43991 for (i = 0; i < n; i++)
43993 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
43994 op0 = gen_reg_rtx (SImode);
43995 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
43997 /* Insert the SImode value as low element of V4SImode vector. */
43998 op1 = gen_reg_rtx (V4SImode);
43999 op0 = gen_rtx_VEC_MERGE (V4SImode,
44000 gen_rtx_VEC_DUPLICATE (V4SImode,
44001 op0),
44002 CONST0_RTX (V4SImode),
44003 const1_rtx);
44004 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
44006 /* Cast the V4SImode vector back to a vector in orignal mode. */
44007 op0 = gen_reg_rtx (mode);
44008 emit_move_insn (op0, gen_lowpart (mode, op1));
44010 /* Load even elements into the second position. */
44011 emit_insn (gen_load_even (op0,
44012 force_reg (inner_mode,
44013 ops [i + i + 1]),
44014 const1_rtx));
44016 /* Cast vector to FIRST_IMODE vector. */
44017 ops[i] = gen_reg_rtx (first_imode);
44018 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
44021 /* Interleave low FIRST_IMODE vectors. */
44022 for (i = j = 0; i < n; i += 2, j++)
44024 op0 = gen_reg_rtx (first_imode);
44025 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
44027 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
44028 ops[j] = gen_reg_rtx (second_imode);
44029 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
44032 /* Interleave low SECOND_IMODE vectors. */
44033 switch (second_imode)
44035 case V4SImode:
44036 for (i = j = 0; i < n / 2; i += 2, j++)
44038 op0 = gen_reg_rtx (second_imode);
44039 emit_insn (gen_interleave_second_low (op0, ops[i],
44040 ops[i + 1]));
44042 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
44043 vector. */
44044 ops[j] = gen_reg_rtx (third_imode);
44045 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
44047 second_imode = V2DImode;
44048 gen_interleave_second_low = gen_vec_interleave_lowv2di;
44049 /* FALLTHRU */
44051 case V2DImode:
44052 op0 = gen_reg_rtx (second_imode);
44053 emit_insn (gen_interleave_second_low (op0, ops[0],
44054 ops[1]));
44056 /* Cast the SECOND_IMODE vector back to a vector on original
44057 mode. */
44058 emit_insn (gen_rtx_SET (VOIDmode, target,
44059 gen_lowpart (mode, op0)));
44060 break;
44062 default:
44063 gcc_unreachable ();
44067 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
44068 all values variable, and none identical. */
44070 static void
44071 ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode,
44072 rtx target, rtx vals)
44074 rtx ops[64], op0, op1, op2, op3, op4, op5;
44075 machine_mode half_mode = VOIDmode;
44076 machine_mode quarter_mode = VOIDmode;
44077 int n, i;
44079 switch (mode)
44081 case V2SFmode:
44082 case V2SImode:
44083 if (!mmx_ok && !TARGET_SSE)
44084 break;
44085 /* FALLTHRU */
44087 case V16SImode:
44088 case V16SFmode:
44089 case V8DFmode:
44090 case V8DImode:
44091 case V8SFmode:
44092 case V8SImode:
44093 case V4DFmode:
44094 case V4DImode:
44095 case V4SFmode:
44096 case V4SImode:
44097 case V2DFmode:
44098 case V2DImode:
44099 n = GET_MODE_NUNITS (mode);
44100 for (i = 0; i < n; i++)
44101 ops[i] = XVECEXP (vals, 0, i);
44102 ix86_expand_vector_init_concat (mode, target, ops, n);
44103 return;
44105 case V32QImode:
44106 half_mode = V16QImode;
44107 goto half;
44109 case V16HImode:
44110 half_mode = V8HImode;
44111 goto half;
44113 half:
44114 n = GET_MODE_NUNITS (mode);
44115 for (i = 0; i < n; i++)
44116 ops[i] = XVECEXP (vals, 0, i);
44117 op0 = gen_reg_rtx (half_mode);
44118 op1 = gen_reg_rtx (half_mode);
44119 ix86_expand_vector_init_interleave (half_mode, op0, ops,
44120 n >> 2);
44121 ix86_expand_vector_init_interleave (half_mode, op1,
44122 &ops [n >> 1], n >> 2);
44123 emit_insn (gen_rtx_SET (VOIDmode, target,
44124 gen_rtx_VEC_CONCAT (mode, op0, op1)));
44125 return;
44127 case V64QImode:
44128 quarter_mode = V16QImode;
44129 half_mode = V32QImode;
44130 goto quarter;
44132 case V32HImode:
44133 quarter_mode = V8HImode;
44134 half_mode = V16HImode;
44135 goto quarter;
44137 quarter:
44138 n = GET_MODE_NUNITS (mode);
44139 for (i = 0; i < n; i++)
44140 ops[i] = XVECEXP (vals, 0, i);
44141 op0 = gen_reg_rtx (quarter_mode);
44142 op1 = gen_reg_rtx (quarter_mode);
44143 op2 = gen_reg_rtx (quarter_mode);
44144 op3 = gen_reg_rtx (quarter_mode);
44145 op4 = gen_reg_rtx (half_mode);
44146 op5 = gen_reg_rtx (half_mode);
44147 ix86_expand_vector_init_interleave (quarter_mode, op0, ops,
44148 n >> 3);
44149 ix86_expand_vector_init_interleave (quarter_mode, op1,
44150 &ops [n >> 2], n >> 3);
44151 ix86_expand_vector_init_interleave (quarter_mode, op2,
44152 &ops [n >> 1], n >> 3);
44153 ix86_expand_vector_init_interleave (quarter_mode, op3,
44154 &ops [(n >> 1) | (n >> 2)], n >> 3);
44155 emit_insn (gen_rtx_SET (VOIDmode, op4,
44156 gen_rtx_VEC_CONCAT (half_mode, op0, op1)));
44157 emit_insn (gen_rtx_SET (VOIDmode, op5,
44158 gen_rtx_VEC_CONCAT (half_mode, op2, op3)));
44159 emit_insn (gen_rtx_SET (VOIDmode, target,
44160 gen_rtx_VEC_CONCAT (mode, op4, op5)));
44161 return;
44163 case V16QImode:
44164 if (!TARGET_SSE4_1)
44165 break;
44166 /* FALLTHRU */
44168 case V8HImode:
44169 if (!TARGET_SSE2)
44170 break;
44172 /* Don't use ix86_expand_vector_init_interleave if we can't
44173 move from GPR to SSE register directly. */
44174 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
44175 break;
44177 n = GET_MODE_NUNITS (mode);
44178 for (i = 0; i < n; i++)
44179 ops[i] = XVECEXP (vals, 0, i);
44180 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
44181 return;
44183 case V4HImode:
44184 case V8QImode:
44185 break;
44187 default:
44188 gcc_unreachable ();
44192 int i, j, n_elts, n_words, n_elt_per_word;
44193 machine_mode inner_mode;
44194 rtx words[4], shift;
44196 inner_mode = GET_MODE_INNER (mode);
44197 n_elts = GET_MODE_NUNITS (mode);
44198 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
44199 n_elt_per_word = n_elts / n_words;
44200 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
44202 for (i = 0; i < n_words; ++i)
44204 rtx word = NULL_RTX;
44206 for (j = 0; j < n_elt_per_word; ++j)
44208 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
44209 elt = convert_modes (word_mode, inner_mode, elt, true);
44211 if (j == 0)
44212 word = elt;
44213 else
44215 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
44216 word, 1, OPTAB_LIB_WIDEN);
44217 word = expand_simple_binop (word_mode, IOR, word, elt,
44218 word, 1, OPTAB_LIB_WIDEN);
44222 words[i] = word;
44225 if (n_words == 1)
44226 emit_move_insn (target, gen_lowpart (mode, words[0]));
44227 else if (n_words == 2)
44229 rtx tmp = gen_reg_rtx (mode);
44230 emit_clobber (tmp);
44231 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
44232 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
44233 emit_move_insn (target, tmp);
44235 else if (n_words == 4)
44237 rtx tmp = gen_reg_rtx (V4SImode);
44238 gcc_assert (word_mode == SImode);
44239 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
44240 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
44241 emit_move_insn (target, gen_lowpart (mode, tmp));
44243 else
44244 gcc_unreachable ();
44248 /* Initialize vector TARGET via VALS. Suppress the use of MMX
44249 instructions unless MMX_OK is true. */
44251 void
44252 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
44254 machine_mode mode = GET_MODE (target);
44255 machine_mode inner_mode = GET_MODE_INNER (mode);
44256 int n_elts = GET_MODE_NUNITS (mode);
44257 int n_var = 0, one_var = -1;
44258 bool all_same = true, all_const_zero = true;
44259 int i;
44260 rtx x;
44262 for (i = 0; i < n_elts; ++i)
44264 x = XVECEXP (vals, 0, i);
44265 if (!(CONST_INT_P (x)
44266 || GET_CODE (x) == CONST_DOUBLE
44267 || GET_CODE (x) == CONST_FIXED))
44268 n_var++, one_var = i;
44269 else if (x != CONST0_RTX (inner_mode))
44270 all_const_zero = false;
44271 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
44272 all_same = false;
44275 /* Constants are best loaded from the constant pool. */
44276 if (n_var == 0)
44278 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
44279 return;
44282 /* If all values are identical, broadcast the value. */
44283 if (all_same
44284 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
44285 XVECEXP (vals, 0, 0)))
44286 return;
44288 /* Values where only one field is non-constant are best loaded from
44289 the pool and overwritten via move later. */
44290 if (n_var == 1)
44292 if (all_const_zero
44293 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
44294 XVECEXP (vals, 0, one_var),
44295 one_var))
44296 return;
44298 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
44299 return;
44302 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
44305 void
44306 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
44308 machine_mode mode = GET_MODE (target);
44309 machine_mode inner_mode = GET_MODE_INNER (mode);
44310 machine_mode half_mode;
44311 bool use_vec_merge = false;
44312 rtx tmp;
44313 static rtx (*gen_extract[6][2]) (rtx, rtx)
44315 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
44316 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
44317 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
44318 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
44319 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
44320 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
44322 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
44324 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
44325 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
44326 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
44327 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
44328 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
44329 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
44331 int i, j, n;
44333 switch (mode)
44335 case V2SFmode:
44336 case V2SImode:
44337 if (mmx_ok)
44339 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
44340 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
44341 if (elt == 0)
44342 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
44343 else
44344 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
44345 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44346 return;
44348 break;
44350 case V2DImode:
44351 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
44352 if (use_vec_merge)
44353 break;
44355 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
44356 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
44357 if (elt == 0)
44358 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
44359 else
44360 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
44361 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44362 return;
44364 case V2DFmode:
44366 rtx op0, op1;
44368 /* For the two element vectors, we implement a VEC_CONCAT with
44369 the extraction of the other element. */
44371 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
44372 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
44374 if (elt == 0)
44375 op0 = val, op1 = tmp;
44376 else
44377 op0 = tmp, op1 = val;
44379 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
44380 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44382 return;
44384 case V4SFmode:
44385 use_vec_merge = TARGET_SSE4_1;
44386 if (use_vec_merge)
44387 break;
44389 switch (elt)
44391 case 0:
44392 use_vec_merge = true;
44393 break;
44395 case 1:
44396 /* tmp = target = A B C D */
44397 tmp = copy_to_reg (target);
44398 /* target = A A B B */
44399 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
44400 /* target = X A B B */
44401 ix86_expand_vector_set (false, target, val, 0);
44402 /* target = A X C D */
44403 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44404 const1_rtx, const0_rtx,
44405 GEN_INT (2+4), GEN_INT (3+4)));
44406 return;
44408 case 2:
44409 /* tmp = target = A B C D */
44410 tmp = copy_to_reg (target);
44411 /* tmp = X B C D */
44412 ix86_expand_vector_set (false, tmp, val, 0);
44413 /* target = A B X D */
44414 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44415 const0_rtx, const1_rtx,
44416 GEN_INT (0+4), GEN_INT (3+4)));
44417 return;
44419 case 3:
44420 /* tmp = target = A B C D */
44421 tmp = copy_to_reg (target);
44422 /* tmp = X B C D */
44423 ix86_expand_vector_set (false, tmp, val, 0);
44424 /* target = A B X D */
44425 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44426 const0_rtx, const1_rtx,
44427 GEN_INT (2+4), GEN_INT (0+4)));
44428 return;
44430 default:
44431 gcc_unreachable ();
44433 break;
44435 case V4SImode:
44436 use_vec_merge = TARGET_SSE4_1;
44437 if (use_vec_merge)
44438 break;
44440 /* Element 0 handled by vec_merge below. */
44441 if (elt == 0)
44443 use_vec_merge = true;
44444 break;
44447 if (TARGET_SSE2)
44449 /* With SSE2, use integer shuffles to swap element 0 and ELT,
44450 store into element 0, then shuffle them back. */
44452 rtx order[4];
44454 order[0] = GEN_INT (elt);
44455 order[1] = const1_rtx;
44456 order[2] = const2_rtx;
44457 order[3] = GEN_INT (3);
44458 order[elt] = const0_rtx;
44460 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
44461 order[1], order[2], order[3]));
44463 ix86_expand_vector_set (false, target, val, 0);
44465 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
44466 order[1], order[2], order[3]));
44468 else
44470 /* For SSE1, we have to reuse the V4SF code. */
44471 rtx t = gen_reg_rtx (V4SFmode);
44472 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
44473 emit_move_insn (target, gen_lowpart (mode, t));
44475 return;
44477 case V8HImode:
44478 use_vec_merge = TARGET_SSE2;
44479 break;
44480 case V4HImode:
44481 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
44482 break;
44484 case V16QImode:
44485 use_vec_merge = TARGET_SSE4_1;
44486 break;
44488 case V8QImode:
44489 break;
44491 case V32QImode:
44492 half_mode = V16QImode;
44493 j = 0;
44494 n = 16;
44495 goto half;
44497 case V16HImode:
44498 half_mode = V8HImode;
44499 j = 1;
44500 n = 8;
44501 goto half;
44503 case V8SImode:
44504 half_mode = V4SImode;
44505 j = 2;
44506 n = 4;
44507 goto half;
44509 case V4DImode:
44510 half_mode = V2DImode;
44511 j = 3;
44512 n = 2;
44513 goto half;
44515 case V8SFmode:
44516 half_mode = V4SFmode;
44517 j = 4;
44518 n = 4;
44519 goto half;
44521 case V4DFmode:
44522 half_mode = V2DFmode;
44523 j = 5;
44524 n = 2;
44525 goto half;
44527 half:
44528 /* Compute offset. */
44529 i = elt / n;
44530 elt %= n;
44532 gcc_assert (i <= 1);
44534 /* Extract the half. */
44535 tmp = gen_reg_rtx (half_mode);
44536 emit_insn (gen_extract[j][i] (tmp, target));
44538 /* Put val in tmp at elt. */
44539 ix86_expand_vector_set (false, tmp, val, elt);
44541 /* Put it back. */
44542 emit_insn (gen_insert[j][i] (target, target, tmp));
44543 return;
44545 case V8DFmode:
44546 if (TARGET_AVX512F)
44548 tmp = gen_reg_rtx (mode);
44549 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44550 gen_rtx_VEC_DUPLICATE (mode, val)));
44551 emit_insn (gen_avx512f_blendmv8df (target, tmp, target,
44552 force_reg (QImode, GEN_INT (1 << elt))));
44553 return;
44555 else
44556 break;
44557 case V8DImode:
44558 if (TARGET_AVX512F)
44560 tmp = gen_reg_rtx (mode);
44561 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44562 gen_rtx_VEC_DUPLICATE (mode, val)));
44563 emit_insn (gen_avx512f_blendmv8di (target, tmp, target,
44564 force_reg (QImode, GEN_INT (1 << elt))));
44565 return;
44567 else
44568 break;
44569 case V16SFmode:
44570 if (TARGET_AVX512F)
44572 tmp = gen_reg_rtx (mode);
44573 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44574 gen_rtx_VEC_DUPLICATE (mode, val)));
44575 emit_insn (gen_avx512f_blendmv16sf (target, tmp, target,
44576 force_reg (HImode, GEN_INT (1 << elt))));
44577 return;
44579 else
44580 break;
44581 case V16SImode:
44582 if (TARGET_AVX512F)
44584 tmp = gen_reg_rtx (mode);
44585 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44586 gen_rtx_VEC_DUPLICATE (mode, val)));
44587 emit_insn (gen_avx512f_blendmv16si (target, tmp, target,
44588 force_reg (HImode, GEN_INT (1 << elt))));
44589 return;
44591 else
44592 break;
44593 case V32HImode:
44594 if (TARGET_AVX512F && TARGET_AVX512BW)
44596 tmp = gen_reg_rtx (mode);
44597 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44598 gen_rtx_VEC_DUPLICATE (mode, val)));
44599 emit_insn (gen_avx512bw_blendmv32hi (target, tmp, target,
44600 force_reg (SImode, GEN_INT (1 << elt))));
44601 return;
44603 else
44604 break;
44605 case V64QImode:
44606 if (TARGET_AVX512F && TARGET_AVX512BW)
44608 tmp = gen_reg_rtx (mode);
44609 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44610 gen_rtx_VEC_DUPLICATE (mode, val)));
44611 emit_insn (gen_avx512bw_blendmv64qi (target, tmp, target,
44612 force_reg (DImode, GEN_INT (1 << elt))));
44613 return;
44615 else
44616 break;
44618 default:
44619 break;
44622 if (use_vec_merge)
44624 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
44625 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
44626 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44628 else
44630 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
44632 emit_move_insn (mem, target);
44634 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
44635 emit_move_insn (tmp, val);
44637 emit_move_insn (target, mem);
44641 void
44642 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
44644 machine_mode mode = GET_MODE (vec);
44645 machine_mode inner_mode = GET_MODE_INNER (mode);
44646 bool use_vec_extr = false;
44647 rtx tmp;
44649 switch (mode)
44651 case V2SImode:
44652 case V2SFmode:
44653 if (!mmx_ok)
44654 break;
44655 /* FALLTHRU */
44657 case V2DFmode:
44658 case V2DImode:
44659 use_vec_extr = true;
44660 break;
44662 case V4SFmode:
44663 use_vec_extr = TARGET_SSE4_1;
44664 if (use_vec_extr)
44665 break;
44667 switch (elt)
44669 case 0:
44670 tmp = vec;
44671 break;
44673 case 1:
44674 case 3:
44675 tmp = gen_reg_rtx (mode);
44676 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
44677 GEN_INT (elt), GEN_INT (elt),
44678 GEN_INT (elt+4), GEN_INT (elt+4)));
44679 break;
44681 case 2:
44682 tmp = gen_reg_rtx (mode);
44683 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
44684 break;
44686 default:
44687 gcc_unreachable ();
44689 vec = tmp;
44690 use_vec_extr = true;
44691 elt = 0;
44692 break;
44694 case V4SImode:
44695 use_vec_extr = TARGET_SSE4_1;
44696 if (use_vec_extr)
44697 break;
44699 if (TARGET_SSE2)
44701 switch (elt)
44703 case 0:
44704 tmp = vec;
44705 break;
44707 case 1:
44708 case 3:
44709 tmp = gen_reg_rtx (mode);
44710 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
44711 GEN_INT (elt), GEN_INT (elt),
44712 GEN_INT (elt), GEN_INT (elt)));
44713 break;
44715 case 2:
44716 tmp = gen_reg_rtx (mode);
44717 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
44718 break;
44720 default:
44721 gcc_unreachable ();
44723 vec = tmp;
44724 use_vec_extr = true;
44725 elt = 0;
44727 else
44729 /* For SSE1, we have to reuse the V4SF code. */
44730 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
44731 gen_lowpart (V4SFmode, vec), elt);
44732 return;
44734 break;
44736 case V8HImode:
44737 use_vec_extr = TARGET_SSE2;
44738 break;
44739 case V4HImode:
44740 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
44741 break;
44743 case V16QImode:
44744 use_vec_extr = TARGET_SSE4_1;
44745 break;
44747 case V8SFmode:
44748 if (TARGET_AVX)
44750 tmp = gen_reg_rtx (V4SFmode);
44751 if (elt < 4)
44752 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
44753 else
44754 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
44755 ix86_expand_vector_extract (false, target, tmp, elt & 3);
44756 return;
44758 break;
44760 case V4DFmode:
44761 if (TARGET_AVX)
44763 tmp = gen_reg_rtx (V2DFmode);
44764 if (elt < 2)
44765 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
44766 else
44767 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
44768 ix86_expand_vector_extract (false, target, tmp, elt & 1);
44769 return;
44771 break;
44773 case V32QImode:
44774 if (TARGET_AVX)
44776 tmp = gen_reg_rtx (V16QImode);
44777 if (elt < 16)
44778 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
44779 else
44780 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
44781 ix86_expand_vector_extract (false, target, tmp, elt & 15);
44782 return;
44784 break;
44786 case V16HImode:
44787 if (TARGET_AVX)
44789 tmp = gen_reg_rtx (V8HImode);
44790 if (elt < 8)
44791 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
44792 else
44793 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
44794 ix86_expand_vector_extract (false, target, tmp, elt & 7);
44795 return;
44797 break;
44799 case V8SImode:
44800 if (TARGET_AVX)
44802 tmp = gen_reg_rtx (V4SImode);
44803 if (elt < 4)
44804 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
44805 else
44806 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
44807 ix86_expand_vector_extract (false, target, tmp, elt & 3);
44808 return;
44810 break;
44812 case V4DImode:
44813 if (TARGET_AVX)
44815 tmp = gen_reg_rtx (V2DImode);
44816 if (elt < 2)
44817 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
44818 else
44819 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
44820 ix86_expand_vector_extract (false, target, tmp, elt & 1);
44821 return;
44823 break;
44825 case V32HImode:
44826 if (TARGET_AVX512BW)
44828 tmp = gen_reg_rtx (V16HImode);
44829 if (elt < 16)
44830 emit_insn (gen_vec_extract_lo_v32hi (tmp, vec));
44831 else
44832 emit_insn (gen_vec_extract_hi_v32hi (tmp, vec));
44833 ix86_expand_vector_extract (false, target, tmp, elt & 15);
44834 return;
44836 break;
44838 case V64QImode:
44839 if (TARGET_AVX512BW)
44841 tmp = gen_reg_rtx (V32QImode);
44842 if (elt < 32)
44843 emit_insn (gen_vec_extract_lo_v64qi (tmp, vec));
44844 else
44845 emit_insn (gen_vec_extract_hi_v64qi (tmp, vec));
44846 ix86_expand_vector_extract (false, target, tmp, elt & 31);
44847 return;
44849 break;
44851 case V16SFmode:
44852 tmp = gen_reg_rtx (V8SFmode);
44853 if (elt < 8)
44854 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
44855 else
44856 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
44857 ix86_expand_vector_extract (false, target, tmp, elt & 7);
44858 return;
44860 case V8DFmode:
44861 tmp = gen_reg_rtx (V4DFmode);
44862 if (elt < 4)
44863 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
44864 else
44865 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
44866 ix86_expand_vector_extract (false, target, tmp, elt & 3);
44867 return;
44869 case V16SImode:
44870 tmp = gen_reg_rtx (V8SImode);
44871 if (elt < 8)
44872 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
44873 else
44874 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
44875 ix86_expand_vector_extract (false, target, tmp, elt & 7);
44876 return;
44878 case V8DImode:
44879 tmp = gen_reg_rtx (V4DImode);
44880 if (elt < 4)
44881 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
44882 else
44883 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
44884 ix86_expand_vector_extract (false, target, tmp, elt & 3);
44885 return;
44887 case V8QImode:
44888 /* ??? Could extract the appropriate HImode element and shift. */
44889 default:
44890 break;
44893 if (use_vec_extr)
44895 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
44896 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
44898 /* Let the rtl optimizers know about the zero extension performed. */
44899 if (inner_mode == QImode || inner_mode == HImode)
44901 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
44902 target = gen_lowpart (SImode, target);
44905 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44907 else
44909 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
44911 emit_move_insn (mem, vec);
44913 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
44914 emit_move_insn (target, tmp);
44918 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
44919 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
44920 The upper bits of DEST are undefined, though they shouldn't cause
44921 exceptions (some bits from src or all zeros are ok). */
44923 static void
44924 emit_reduc_half (rtx dest, rtx src, int i)
44926 rtx tem, d = dest;
44927 switch (GET_MODE (src))
44929 case V4SFmode:
44930 if (i == 128)
44931 tem = gen_sse_movhlps (dest, src, src);
44932 else
44933 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
44934 GEN_INT (1 + 4), GEN_INT (1 + 4));
44935 break;
44936 case V2DFmode:
44937 tem = gen_vec_interleave_highv2df (dest, src, src);
44938 break;
44939 case V16QImode:
44940 case V8HImode:
44941 case V4SImode:
44942 case V2DImode:
44943 d = gen_reg_rtx (V1TImode);
44944 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
44945 GEN_INT (i / 2));
44946 break;
44947 case V8SFmode:
44948 if (i == 256)
44949 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
44950 else
44951 tem = gen_avx_shufps256 (dest, src, src,
44952 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
44953 break;
44954 case V4DFmode:
44955 if (i == 256)
44956 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
44957 else
44958 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
44959 break;
44960 case V32QImode:
44961 case V16HImode:
44962 case V8SImode:
44963 case V4DImode:
44964 if (i == 256)
44966 if (GET_MODE (dest) != V4DImode)
44967 d = gen_reg_rtx (V4DImode);
44968 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
44969 gen_lowpart (V4DImode, src),
44970 const1_rtx);
44972 else
44974 d = gen_reg_rtx (V2TImode);
44975 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
44976 GEN_INT (i / 2));
44978 break;
44979 case V64QImode:
44980 case V32HImode:
44981 case V16SImode:
44982 case V16SFmode:
44983 case V8DImode:
44984 case V8DFmode:
44985 if (i > 128)
44986 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
44987 gen_lowpart (V16SImode, src),
44988 gen_lowpart (V16SImode, src),
44989 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
44990 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
44991 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
44992 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
44993 GEN_INT (0xC), GEN_INT (0xD),
44994 GEN_INT (0xE), GEN_INT (0xF),
44995 GEN_INT (0x10), GEN_INT (0x11),
44996 GEN_INT (0x12), GEN_INT (0x13),
44997 GEN_INT (0x14), GEN_INT (0x15),
44998 GEN_INT (0x16), GEN_INT (0x17));
44999 else
45000 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
45001 gen_lowpart (V16SImode, src),
45002 GEN_INT (i == 128 ? 0x2 : 0x1),
45003 GEN_INT (0x3),
45004 GEN_INT (0x3),
45005 GEN_INT (0x3),
45006 GEN_INT (i == 128 ? 0x6 : 0x5),
45007 GEN_INT (0x7),
45008 GEN_INT (0x7),
45009 GEN_INT (0x7),
45010 GEN_INT (i == 128 ? 0xA : 0x9),
45011 GEN_INT (0xB),
45012 GEN_INT (0xB),
45013 GEN_INT (0xB),
45014 GEN_INT (i == 128 ? 0xE : 0xD),
45015 GEN_INT (0xF),
45016 GEN_INT (0xF),
45017 GEN_INT (0xF));
45018 break;
45019 default:
45020 gcc_unreachable ();
45022 emit_insn (tem);
45023 if (d != dest)
45024 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
45027 /* Expand a vector reduction. FN is the binary pattern to reduce;
45028 DEST is the destination; IN is the input vector. */
45030 void
45031 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
45033 rtx half, dst, vec = in;
45034 machine_mode mode = GET_MODE (in);
45035 int i;
45037 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
45038 if (TARGET_SSE4_1
45039 && mode == V8HImode
45040 && fn == gen_uminv8hi3)
45042 emit_insn (gen_sse4_1_phminposuw (dest, in));
45043 return;
45046 for (i = GET_MODE_BITSIZE (mode);
45047 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
45048 i >>= 1)
45050 half = gen_reg_rtx (mode);
45051 emit_reduc_half (half, vec, i);
45052 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
45053 dst = dest;
45054 else
45055 dst = gen_reg_rtx (mode);
45056 emit_insn (fn (dst, half, vec));
45057 vec = dst;
45061 /* Target hook for scalar_mode_supported_p. */
45062 static bool
45063 ix86_scalar_mode_supported_p (machine_mode mode)
45065 if (DECIMAL_FLOAT_MODE_P (mode))
45066 return default_decimal_float_supported_p ();
45067 else if (mode == TFmode)
45068 return true;
45069 else
45070 return default_scalar_mode_supported_p (mode);
45073 /* Implements target hook vector_mode_supported_p. */
45074 static bool
45075 ix86_vector_mode_supported_p (machine_mode mode)
45077 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
45078 return true;
45079 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
45080 return true;
45081 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
45082 return true;
45083 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
45084 return true;
45085 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
45086 return true;
45087 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
45088 return true;
45089 return false;
45092 /* Implement target hook libgcc_floating_mode_supported_p. */
45093 static bool
45094 ix86_libgcc_floating_mode_supported_p (machine_mode mode)
45096 switch (mode)
45098 case SFmode:
45099 case DFmode:
45100 case XFmode:
45101 return true;
45103 case TFmode:
45104 #ifdef IX86_NO_LIBGCC_TFMODE
45105 return false;
45106 #elif defined IX86_MAYBE_NO_LIBGCC_TFMODE
45107 return TARGET_LONG_DOUBLE_128;
45108 #else
45109 return true;
45110 #endif
45112 default:
45113 return false;
45117 /* Target hook for c_mode_for_suffix. */
45118 static machine_mode
45119 ix86_c_mode_for_suffix (char suffix)
45121 if (suffix == 'q')
45122 return TFmode;
45123 if (suffix == 'w')
45124 return XFmode;
45126 return VOIDmode;
45129 /* Worker function for TARGET_MD_ASM_CLOBBERS.
45131 We do this in the new i386 backend to maintain source compatibility
45132 with the old cc0-based compiler. */
45134 static tree
45135 ix86_md_asm_clobbers (tree, tree, tree clobbers)
45137 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
45138 clobbers);
45139 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
45140 clobbers);
45141 return clobbers;
45144 /* Implements target vector targetm.asm.encode_section_info. */
45146 static void ATTRIBUTE_UNUSED
45147 ix86_encode_section_info (tree decl, rtx rtl, int first)
45149 default_encode_section_info (decl, rtl, first);
45151 if (ix86_in_large_data_p (decl))
45152 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
45155 /* Worker function for REVERSE_CONDITION. */
45157 enum rtx_code
45158 ix86_reverse_condition (enum rtx_code code, machine_mode mode)
45160 return (mode != CCFPmode && mode != CCFPUmode
45161 ? reverse_condition (code)
45162 : reverse_condition_maybe_unordered (code));
45165 /* Output code to perform an x87 FP register move, from OPERANDS[1]
45166 to OPERANDS[0]. */
45168 const char *
45169 output_387_reg_move (rtx insn, rtx *operands)
45171 if (REG_P (operands[0]))
45173 if (REG_P (operands[1])
45174 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
45176 if (REGNO (operands[0]) == FIRST_STACK_REG)
45177 return output_387_ffreep (operands, 0);
45178 return "fstp\t%y0";
45180 if (STACK_TOP_P (operands[0]))
45181 return "fld%Z1\t%y1";
45182 return "fst\t%y0";
45184 else if (MEM_P (operands[0]))
45186 gcc_assert (REG_P (operands[1]));
45187 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
45188 return "fstp%Z0\t%y0";
45189 else
45191 /* There is no non-popping store to memory for XFmode.
45192 So if we need one, follow the store with a load. */
45193 if (GET_MODE (operands[0]) == XFmode)
45194 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
45195 else
45196 return "fst%Z0\t%y0";
45199 else
45200 gcc_unreachable();
45203 /* Output code to perform a conditional jump to LABEL, if C2 flag in
45204 FP status register is set. */
45206 void
45207 ix86_emit_fp_unordered_jump (rtx label)
45209 rtx reg = gen_reg_rtx (HImode);
45210 rtx temp;
45212 emit_insn (gen_x86_fnstsw_1 (reg));
45214 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
45216 emit_insn (gen_x86_sahf_1 (reg));
45218 temp = gen_rtx_REG (CCmode, FLAGS_REG);
45219 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
45221 else
45223 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
45225 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
45226 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
45229 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
45230 gen_rtx_LABEL_REF (VOIDmode, label),
45231 pc_rtx);
45232 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
45234 emit_jump_insn (temp);
45235 predict_jump (REG_BR_PROB_BASE * 10 / 100);
45238 /* Output code to perform a log1p XFmode calculation. */
45240 void ix86_emit_i387_log1p (rtx op0, rtx op1)
45242 rtx_code_label *label1 = gen_label_rtx ();
45243 rtx_code_label *label2 = gen_label_rtx ();
45245 rtx tmp = gen_reg_rtx (XFmode);
45246 rtx tmp2 = gen_reg_rtx (XFmode);
45247 rtx test;
45249 emit_insn (gen_absxf2 (tmp, op1));
45250 test = gen_rtx_GE (VOIDmode, tmp,
45251 CONST_DOUBLE_FROM_REAL_VALUE (
45252 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
45253 XFmode));
45254 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
45256 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
45257 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
45258 emit_jump (label2);
45260 emit_label (label1);
45261 emit_move_insn (tmp, CONST1_RTX (XFmode));
45262 emit_insn (gen_addxf3 (tmp, op1, tmp));
45263 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
45264 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
45266 emit_label (label2);
45269 /* Emit code for round calculation. */
45270 void ix86_emit_i387_round (rtx op0, rtx op1)
45272 machine_mode inmode = GET_MODE (op1);
45273 machine_mode outmode = GET_MODE (op0);
45274 rtx e1, e2, res, tmp, tmp1, half;
45275 rtx scratch = gen_reg_rtx (HImode);
45276 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
45277 rtx_code_label *jump_label = gen_label_rtx ();
45278 rtx insn;
45279 rtx (*gen_abs) (rtx, rtx);
45280 rtx (*gen_neg) (rtx, rtx);
45282 switch (inmode)
45284 case SFmode:
45285 gen_abs = gen_abssf2;
45286 break;
45287 case DFmode:
45288 gen_abs = gen_absdf2;
45289 break;
45290 case XFmode:
45291 gen_abs = gen_absxf2;
45292 break;
45293 default:
45294 gcc_unreachable ();
45297 switch (outmode)
45299 case SFmode:
45300 gen_neg = gen_negsf2;
45301 break;
45302 case DFmode:
45303 gen_neg = gen_negdf2;
45304 break;
45305 case XFmode:
45306 gen_neg = gen_negxf2;
45307 break;
45308 case HImode:
45309 gen_neg = gen_neghi2;
45310 break;
45311 case SImode:
45312 gen_neg = gen_negsi2;
45313 break;
45314 case DImode:
45315 gen_neg = gen_negdi2;
45316 break;
45317 default:
45318 gcc_unreachable ();
45321 e1 = gen_reg_rtx (inmode);
45322 e2 = gen_reg_rtx (inmode);
45323 res = gen_reg_rtx (outmode);
45325 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
45327 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
45329 /* scratch = fxam(op1) */
45330 emit_insn (gen_rtx_SET (VOIDmode, scratch,
45331 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
45332 UNSPEC_FXAM)));
45333 /* e1 = fabs(op1) */
45334 emit_insn (gen_abs (e1, op1));
45336 /* e2 = e1 + 0.5 */
45337 half = force_reg (inmode, half);
45338 emit_insn (gen_rtx_SET (VOIDmode, e2,
45339 gen_rtx_PLUS (inmode, e1, half)));
45341 /* res = floor(e2) */
45342 if (inmode != XFmode)
45344 tmp1 = gen_reg_rtx (XFmode);
45346 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
45347 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
45349 else
45350 tmp1 = e2;
45352 switch (outmode)
45354 case SFmode:
45355 case DFmode:
45357 rtx tmp0 = gen_reg_rtx (XFmode);
45359 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
45361 emit_insn (gen_rtx_SET (VOIDmode, res,
45362 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
45363 UNSPEC_TRUNC_NOOP)));
45365 break;
45366 case XFmode:
45367 emit_insn (gen_frndintxf2_floor (res, tmp1));
45368 break;
45369 case HImode:
45370 emit_insn (gen_lfloorxfhi2 (res, tmp1));
45371 break;
45372 case SImode:
45373 emit_insn (gen_lfloorxfsi2 (res, tmp1));
45374 break;
45375 case DImode:
45376 emit_insn (gen_lfloorxfdi2 (res, tmp1));
45377 break;
45378 default:
45379 gcc_unreachable ();
45382 /* flags = signbit(a) */
45383 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
45385 /* if (flags) then res = -res */
45386 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
45387 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
45388 gen_rtx_LABEL_REF (VOIDmode, jump_label),
45389 pc_rtx);
45390 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
45391 predict_jump (REG_BR_PROB_BASE * 50 / 100);
45392 JUMP_LABEL (insn) = jump_label;
45394 emit_insn (gen_neg (res, res));
45396 emit_label (jump_label);
45397 LABEL_NUSES (jump_label) = 1;
45399 emit_move_insn (op0, res);
45402 /* Output code to perform a Newton-Rhapson approximation of a single precision
45403 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
45405 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
45407 rtx x0, x1, e0, e1;
45409 x0 = gen_reg_rtx (mode);
45410 e0 = gen_reg_rtx (mode);
45411 e1 = gen_reg_rtx (mode);
45412 x1 = gen_reg_rtx (mode);
45414 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
45416 b = force_reg (mode, b);
45418 /* x0 = rcp(b) estimate */
45419 if (mode == V16SFmode || mode == V8DFmode)
45420 emit_insn (gen_rtx_SET (VOIDmode, x0,
45421 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
45422 UNSPEC_RCP14)));
45423 else
45424 emit_insn (gen_rtx_SET (VOIDmode, x0,
45425 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
45426 UNSPEC_RCP)));
45428 /* e0 = x0 * b */
45429 emit_insn (gen_rtx_SET (VOIDmode, e0,
45430 gen_rtx_MULT (mode, x0, b)));
45432 /* e0 = x0 * e0 */
45433 emit_insn (gen_rtx_SET (VOIDmode, e0,
45434 gen_rtx_MULT (mode, x0, e0)));
45436 /* e1 = x0 + x0 */
45437 emit_insn (gen_rtx_SET (VOIDmode, e1,
45438 gen_rtx_PLUS (mode, x0, x0)));
45440 /* x1 = e1 - e0 */
45441 emit_insn (gen_rtx_SET (VOIDmode, x1,
45442 gen_rtx_MINUS (mode, e1, e0)));
45444 /* res = a * x1 */
45445 emit_insn (gen_rtx_SET (VOIDmode, res,
45446 gen_rtx_MULT (mode, a, x1)));
45449 /* Output code to perform a Newton-Rhapson approximation of a
45450 single precision floating point [reciprocal] square root. */
45452 void ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode,
45453 bool recip)
45455 rtx x0, e0, e1, e2, e3, mthree, mhalf;
45456 REAL_VALUE_TYPE r;
45457 int unspec;
45459 x0 = gen_reg_rtx (mode);
45460 e0 = gen_reg_rtx (mode);
45461 e1 = gen_reg_rtx (mode);
45462 e2 = gen_reg_rtx (mode);
45463 e3 = gen_reg_rtx (mode);
45465 real_from_integer (&r, VOIDmode, -3, SIGNED);
45466 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
45468 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
45469 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
45470 unspec = UNSPEC_RSQRT;
45472 if (VECTOR_MODE_P (mode))
45474 mthree = ix86_build_const_vector (mode, true, mthree);
45475 mhalf = ix86_build_const_vector (mode, true, mhalf);
45476 /* There is no 512-bit rsqrt. There is however rsqrt14. */
45477 if (GET_MODE_SIZE (mode) == 64)
45478 unspec = UNSPEC_RSQRT14;
45481 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
45482 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
45484 a = force_reg (mode, a);
45486 /* x0 = rsqrt(a) estimate */
45487 emit_insn (gen_rtx_SET (VOIDmode, x0,
45488 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
45489 unspec)));
45491 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
45492 if (!recip)
45494 rtx zero, mask;
45496 zero = gen_reg_rtx (mode);
45497 mask = gen_reg_rtx (mode);
45499 zero = force_reg (mode, CONST0_RTX(mode));
45501 /* Handle masked compare. */
45502 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
45504 mask = gen_reg_rtx (HImode);
45505 /* Imm value 0x4 corresponds to not-equal comparison. */
45506 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
45507 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
45509 else
45511 emit_insn (gen_rtx_SET (VOIDmode, mask,
45512 gen_rtx_NE (mode, zero, a)));
45514 emit_insn (gen_rtx_SET (VOIDmode, x0,
45515 gen_rtx_AND (mode, x0, mask)));
45519 /* e0 = x0 * a */
45520 emit_insn (gen_rtx_SET (VOIDmode, e0,
45521 gen_rtx_MULT (mode, x0, a)));
45522 /* e1 = e0 * x0 */
45523 emit_insn (gen_rtx_SET (VOIDmode, e1,
45524 gen_rtx_MULT (mode, e0, x0)));
45526 /* e2 = e1 - 3. */
45527 mthree = force_reg (mode, mthree);
45528 emit_insn (gen_rtx_SET (VOIDmode, e2,
45529 gen_rtx_PLUS (mode, e1, mthree)));
45531 mhalf = force_reg (mode, mhalf);
45532 if (recip)
45533 /* e3 = -.5 * x0 */
45534 emit_insn (gen_rtx_SET (VOIDmode, e3,
45535 gen_rtx_MULT (mode, x0, mhalf)));
45536 else
45537 /* e3 = -.5 * e0 */
45538 emit_insn (gen_rtx_SET (VOIDmode, e3,
45539 gen_rtx_MULT (mode, e0, mhalf)));
45540 /* ret = e2 * e3 */
45541 emit_insn (gen_rtx_SET (VOIDmode, res,
45542 gen_rtx_MULT (mode, e2, e3)));
45545 #ifdef TARGET_SOLARIS
45546 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
45548 static void
45549 i386_solaris_elf_named_section (const char *name, unsigned int flags,
45550 tree decl)
45552 /* With Binutils 2.15, the "@unwind" marker must be specified on
45553 every occurrence of the ".eh_frame" section, not just the first
45554 one. */
45555 if (TARGET_64BIT
45556 && strcmp (name, ".eh_frame") == 0)
45558 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
45559 flags & SECTION_WRITE ? "aw" : "a");
45560 return;
45563 #ifndef USE_GAS
45564 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
45566 solaris_elf_asm_comdat_section (name, flags, decl);
45567 return;
45569 #endif
45571 default_elf_asm_named_section (name, flags, decl);
45573 #endif /* TARGET_SOLARIS */
45575 /* Return the mangling of TYPE if it is an extended fundamental type. */
45577 static const char *
45578 ix86_mangle_type (const_tree type)
45580 type = TYPE_MAIN_VARIANT (type);
45582 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
45583 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
45584 return NULL;
45586 switch (TYPE_MODE (type))
45588 case TFmode:
45589 /* __float128 is "g". */
45590 return "g";
45591 case XFmode:
45592 /* "long double" or __float80 is "e". */
45593 return "e";
45594 default:
45595 return NULL;
45599 /* For 32-bit code we can save PIC register setup by using
45600 __stack_chk_fail_local hidden function instead of calling
45601 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
45602 register, so it is better to call __stack_chk_fail directly. */
45604 static tree ATTRIBUTE_UNUSED
45605 ix86_stack_protect_fail (void)
45607 return TARGET_64BIT
45608 ? default_external_stack_protect_fail ()
45609 : default_hidden_stack_protect_fail ();
45612 /* Select a format to encode pointers in exception handling data. CODE
45613 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
45614 true if the symbol may be affected by dynamic relocations.
45616 ??? All x86 object file formats are capable of representing this.
45617 After all, the relocation needed is the same as for the call insn.
45618 Whether or not a particular assembler allows us to enter such, I
45619 guess we'll have to see. */
45621 asm_preferred_eh_data_format (int code, int global)
45623 if (flag_pic)
45625 int type = DW_EH_PE_sdata8;
45626 if (!TARGET_64BIT
45627 || ix86_cmodel == CM_SMALL_PIC
45628 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
45629 type = DW_EH_PE_sdata4;
45630 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
45632 if (ix86_cmodel == CM_SMALL
45633 || (ix86_cmodel == CM_MEDIUM && code))
45634 return DW_EH_PE_udata4;
45635 return DW_EH_PE_absptr;
45638 /* Expand copysign from SIGN to the positive value ABS_VALUE
45639 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
45640 the sign-bit. */
45641 static void
45642 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
45644 machine_mode mode = GET_MODE (sign);
45645 rtx sgn = gen_reg_rtx (mode);
45646 if (mask == NULL_RTX)
45648 machine_mode vmode;
45650 if (mode == SFmode)
45651 vmode = V4SFmode;
45652 else if (mode == DFmode)
45653 vmode = V2DFmode;
45654 else
45655 vmode = mode;
45657 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
45658 if (!VECTOR_MODE_P (mode))
45660 /* We need to generate a scalar mode mask in this case. */
45661 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
45662 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
45663 mask = gen_reg_rtx (mode);
45664 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
45667 else
45668 mask = gen_rtx_NOT (mode, mask);
45669 emit_insn (gen_rtx_SET (VOIDmode, sgn,
45670 gen_rtx_AND (mode, mask, sign)));
45671 emit_insn (gen_rtx_SET (VOIDmode, result,
45672 gen_rtx_IOR (mode, abs_value, sgn)));
45675 /* Expand fabs (OP0) and return a new rtx that holds the result. The
45676 mask for masking out the sign-bit is stored in *SMASK, if that is
45677 non-null. */
45678 static rtx
45679 ix86_expand_sse_fabs (rtx op0, rtx *smask)
45681 machine_mode vmode, mode = GET_MODE (op0);
45682 rtx xa, mask;
45684 xa = gen_reg_rtx (mode);
45685 if (mode == SFmode)
45686 vmode = V4SFmode;
45687 else if (mode == DFmode)
45688 vmode = V2DFmode;
45689 else
45690 vmode = mode;
45691 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
45692 if (!VECTOR_MODE_P (mode))
45694 /* We need to generate a scalar mode mask in this case. */
45695 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
45696 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
45697 mask = gen_reg_rtx (mode);
45698 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
45700 emit_insn (gen_rtx_SET (VOIDmode, xa,
45701 gen_rtx_AND (mode, op0, mask)));
45703 if (smask)
45704 *smask = mask;
45706 return xa;
45709 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
45710 swapping the operands if SWAP_OPERANDS is true. The expanded
45711 code is a forward jump to a newly created label in case the
45712 comparison is true. The generated label rtx is returned. */
45713 static rtx_code_label *
45714 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
45715 bool swap_operands)
45717 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
45718 rtx_code_label *label;
45719 rtx tmp;
45721 if (swap_operands)
45722 std::swap (op0, op1);
45724 label = gen_label_rtx ();
45725 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
45726 emit_insn (gen_rtx_SET (VOIDmode, tmp,
45727 gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
45728 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
45729 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
45730 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
45731 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
45732 JUMP_LABEL (tmp) = label;
45734 return label;
45737 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
45738 using comparison code CODE. Operands are swapped for the comparison if
45739 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
45740 static rtx
45741 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
45742 bool swap_operands)
45744 rtx (*insn)(rtx, rtx, rtx, rtx);
45745 machine_mode mode = GET_MODE (op0);
45746 rtx mask = gen_reg_rtx (mode);
45748 if (swap_operands)
45749 std::swap (op0, op1);
45751 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
45753 emit_insn (insn (mask, op0, op1,
45754 gen_rtx_fmt_ee (code, mode, op0, op1)));
45755 return mask;
45758 /* Generate and return a rtx of mode MODE for 2**n where n is the number
45759 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
45760 static rtx
45761 ix86_gen_TWO52 (machine_mode mode)
45763 REAL_VALUE_TYPE TWO52r;
45764 rtx TWO52;
45766 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
45767 TWO52 = const_double_from_real_value (TWO52r, mode);
45768 TWO52 = force_reg (mode, TWO52);
45770 return TWO52;
45773 /* Expand SSE sequence for computing lround from OP1 storing
45774 into OP0. */
45775 void
45776 ix86_expand_lround (rtx op0, rtx op1)
45778 /* C code for the stuff we're doing below:
45779 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
45780 return (long)tmp;
45782 machine_mode mode = GET_MODE (op1);
45783 const struct real_format *fmt;
45784 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
45785 rtx adj;
45787 /* load nextafter (0.5, 0.0) */
45788 fmt = REAL_MODE_FORMAT (mode);
45789 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
45790 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
45792 /* adj = copysign (0.5, op1) */
45793 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
45794 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
45796 /* adj = op1 + adj */
45797 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
45799 /* op0 = (imode)adj */
45800 expand_fix (op0, adj, 0);
45803 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
45804 into OPERAND0. */
45805 void
45806 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
45808 /* C code for the stuff we're doing below (for do_floor):
45809 xi = (long)op1;
45810 xi -= (double)xi > op1 ? 1 : 0;
45811 return xi;
45813 machine_mode fmode = GET_MODE (op1);
45814 machine_mode imode = GET_MODE (op0);
45815 rtx ireg, freg, tmp;
45816 rtx_code_label *label;
45818 /* reg = (long)op1 */
45819 ireg = gen_reg_rtx (imode);
45820 expand_fix (ireg, op1, 0);
45822 /* freg = (double)reg */
45823 freg = gen_reg_rtx (fmode);
45824 expand_float (freg, ireg, 0);
45826 /* ireg = (freg > op1) ? ireg - 1 : ireg */
45827 label = ix86_expand_sse_compare_and_jump (UNLE,
45828 freg, op1, !do_floor);
45829 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
45830 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
45831 emit_move_insn (ireg, tmp);
45833 emit_label (label);
45834 LABEL_NUSES (label) = 1;
45836 emit_move_insn (op0, ireg);
45839 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
45840 result in OPERAND0. */
45841 void
45842 ix86_expand_rint (rtx operand0, rtx operand1)
45844 /* C code for the stuff we're doing below:
45845 xa = fabs (operand1);
45846 if (!isless (xa, 2**52))
45847 return operand1;
45848 xa = xa + 2**52 - 2**52;
45849 return copysign (xa, operand1);
45851 machine_mode mode = GET_MODE (operand0);
45852 rtx res, xa, TWO52, mask;
45853 rtx_code_label *label;
45855 res = gen_reg_rtx (mode);
45856 emit_move_insn (res, operand1);
45858 /* xa = abs (operand1) */
45859 xa = ix86_expand_sse_fabs (res, &mask);
45861 /* if (!isless (xa, TWO52)) goto label; */
45862 TWO52 = ix86_gen_TWO52 (mode);
45863 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
45865 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
45866 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
45868 ix86_sse_copysign_to_positive (res, xa, res, mask);
45870 emit_label (label);
45871 LABEL_NUSES (label) = 1;
45873 emit_move_insn (operand0, res);
45876 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
45877 into OPERAND0. */
45878 void
45879 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
45881 /* C code for the stuff we expand below.
45882 double xa = fabs (x), x2;
45883 if (!isless (xa, TWO52))
45884 return x;
45885 xa = xa + TWO52 - TWO52;
45886 x2 = copysign (xa, x);
45887 Compensate. Floor:
45888 if (x2 > x)
45889 x2 -= 1;
45890 Compensate. Ceil:
45891 if (x2 < x)
45892 x2 -= -1;
45893 return x2;
45895 machine_mode mode = GET_MODE (operand0);
45896 rtx xa, TWO52, tmp, one, res, mask;
45897 rtx_code_label *label;
45899 TWO52 = ix86_gen_TWO52 (mode);
45901 /* Temporary for holding the result, initialized to the input
45902 operand to ease control flow. */
45903 res = gen_reg_rtx (mode);
45904 emit_move_insn (res, operand1);
45906 /* xa = abs (operand1) */
45907 xa = ix86_expand_sse_fabs (res, &mask);
45909 /* if (!isless (xa, TWO52)) goto label; */
45910 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
45912 /* xa = xa + TWO52 - TWO52; */
45913 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
45914 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
45916 /* xa = copysign (xa, operand1) */
45917 ix86_sse_copysign_to_positive (xa, xa, res, mask);
45919 /* generate 1.0 or -1.0 */
45920 one = force_reg (mode,
45921 const_double_from_real_value (do_floor
45922 ? dconst1 : dconstm1, mode));
45924 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
45925 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
45926 emit_insn (gen_rtx_SET (VOIDmode, tmp,
45927 gen_rtx_AND (mode, one, tmp)));
45928 /* We always need to subtract here to preserve signed zero. */
45929 tmp = expand_simple_binop (mode, MINUS,
45930 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
45931 emit_move_insn (res, tmp);
45933 emit_label (label);
45934 LABEL_NUSES (label) = 1;
45936 emit_move_insn (operand0, res);
45939 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
45940 into OPERAND0. */
45941 void
45942 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
45944 /* C code for the stuff we expand below.
45945 double xa = fabs (x), x2;
45946 if (!isless (xa, TWO52))
45947 return x;
45948 x2 = (double)(long)x;
45949 Compensate. Floor:
45950 if (x2 > x)
45951 x2 -= 1;
45952 Compensate. Ceil:
45953 if (x2 < x)
45954 x2 += 1;
45955 if (HONOR_SIGNED_ZEROS (mode))
45956 return copysign (x2, x);
45957 return x2;
45959 machine_mode mode = GET_MODE (operand0);
45960 rtx xa, xi, TWO52, tmp, one, res, mask;
45961 rtx_code_label *label;
45963 TWO52 = ix86_gen_TWO52 (mode);
45965 /* Temporary for holding the result, initialized to the input
45966 operand to ease control flow. */
45967 res = gen_reg_rtx (mode);
45968 emit_move_insn (res, operand1);
45970 /* xa = abs (operand1) */
45971 xa = ix86_expand_sse_fabs (res, &mask);
45973 /* if (!isless (xa, TWO52)) goto label; */
45974 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
45976 /* xa = (double)(long)x */
45977 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
45978 expand_fix (xi, res, 0);
45979 expand_float (xa, xi, 0);
45981 /* generate 1.0 */
45982 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
45984 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
45985 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
45986 emit_insn (gen_rtx_SET (VOIDmode, tmp,
45987 gen_rtx_AND (mode, one, tmp)));
45988 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
45989 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
45990 emit_move_insn (res, tmp);
45992 if (HONOR_SIGNED_ZEROS (mode))
45993 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
45995 emit_label (label);
45996 LABEL_NUSES (label) = 1;
45998 emit_move_insn (operand0, res);
46001 /* Expand SSE sequence for computing round from OPERAND1 storing
46002 into OPERAND0. Sequence that works without relying on DImode truncation
46003 via cvttsd2siq that is only available on 64bit targets. */
46004 void
46005 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
46007 /* C code for the stuff we expand below.
46008 double xa = fabs (x), xa2, x2;
46009 if (!isless (xa, TWO52))
46010 return x;
46011 Using the absolute value and copying back sign makes
46012 -0.0 -> -0.0 correct.
46013 xa2 = xa + TWO52 - TWO52;
46014 Compensate.
46015 dxa = xa2 - xa;
46016 if (dxa <= -0.5)
46017 xa2 += 1;
46018 else if (dxa > 0.5)
46019 xa2 -= 1;
46020 x2 = copysign (xa2, x);
46021 return x2;
46023 machine_mode mode = GET_MODE (operand0);
46024 rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask;
46025 rtx_code_label *label;
46027 TWO52 = ix86_gen_TWO52 (mode);
46029 /* Temporary for holding the result, initialized to the input
46030 operand to ease control flow. */
46031 res = gen_reg_rtx (mode);
46032 emit_move_insn (res, operand1);
46034 /* xa = abs (operand1) */
46035 xa = ix86_expand_sse_fabs (res, &mask);
46037 /* if (!isless (xa, TWO52)) goto label; */
46038 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46040 /* xa2 = xa + TWO52 - TWO52; */
46041 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46042 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
46044 /* dxa = xa2 - xa; */
46045 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
46047 /* generate 0.5, 1.0 and -0.5 */
46048 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
46049 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
46050 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
46051 0, OPTAB_DIRECT);
46053 /* Compensate. */
46054 tmp = gen_reg_rtx (mode);
46055 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
46056 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
46057 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46058 gen_rtx_AND (mode, one, tmp)));
46059 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46060 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
46061 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
46062 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46063 gen_rtx_AND (mode, one, tmp)));
46064 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46066 /* res = copysign (xa2, operand1) */
46067 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
46069 emit_label (label);
46070 LABEL_NUSES (label) = 1;
46072 emit_move_insn (operand0, res);
46075 /* Expand SSE sequence for computing trunc from OPERAND1 storing
46076 into OPERAND0. */
46077 void
46078 ix86_expand_trunc (rtx operand0, rtx operand1)
46080 /* C code for SSE variant we expand below.
46081 double xa = fabs (x), x2;
46082 if (!isless (xa, TWO52))
46083 return x;
46084 x2 = (double)(long)x;
46085 if (HONOR_SIGNED_ZEROS (mode))
46086 return copysign (x2, x);
46087 return x2;
46089 machine_mode mode = GET_MODE (operand0);
46090 rtx xa, xi, TWO52, res, mask;
46091 rtx_code_label *label;
46093 TWO52 = ix86_gen_TWO52 (mode);
46095 /* Temporary for holding the result, initialized to the input
46096 operand to ease control flow. */
46097 res = gen_reg_rtx (mode);
46098 emit_move_insn (res, operand1);
46100 /* xa = abs (operand1) */
46101 xa = ix86_expand_sse_fabs (res, &mask);
46103 /* if (!isless (xa, TWO52)) goto label; */
46104 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46106 /* x = (double)(long)x */
46107 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46108 expand_fix (xi, res, 0);
46109 expand_float (res, xi, 0);
46111 if (HONOR_SIGNED_ZEROS (mode))
46112 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
46114 emit_label (label);
46115 LABEL_NUSES (label) = 1;
46117 emit_move_insn (operand0, res);
46120 /* Expand SSE sequence for computing trunc from OPERAND1 storing
46121 into OPERAND0. */
46122 void
46123 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
46125 machine_mode mode = GET_MODE (operand0);
46126 rtx xa, mask, TWO52, one, res, smask, tmp;
46127 rtx_code_label *label;
46129 /* C code for SSE variant we expand below.
46130 double xa = fabs (x), x2;
46131 if (!isless (xa, TWO52))
46132 return x;
46133 xa2 = xa + TWO52 - TWO52;
46134 Compensate:
46135 if (xa2 > xa)
46136 xa2 -= 1.0;
46137 x2 = copysign (xa2, x);
46138 return x2;
46141 TWO52 = ix86_gen_TWO52 (mode);
46143 /* Temporary for holding the result, initialized to the input
46144 operand to ease control flow. */
46145 res = gen_reg_rtx (mode);
46146 emit_move_insn (res, operand1);
46148 /* xa = abs (operand1) */
46149 xa = ix86_expand_sse_fabs (res, &smask);
46151 /* if (!isless (xa, TWO52)) goto label; */
46152 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46154 /* res = xa + TWO52 - TWO52; */
46155 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46156 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
46157 emit_move_insn (res, tmp);
46159 /* generate 1.0 */
46160 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
46162 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
46163 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
46164 emit_insn (gen_rtx_SET (VOIDmode, mask,
46165 gen_rtx_AND (mode, mask, one)));
46166 tmp = expand_simple_binop (mode, MINUS,
46167 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
46168 emit_move_insn (res, tmp);
46170 /* res = copysign (res, operand1) */
46171 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
46173 emit_label (label);
46174 LABEL_NUSES (label) = 1;
46176 emit_move_insn (operand0, res);
46179 /* Expand SSE sequence for computing round from OPERAND1 storing
46180 into OPERAND0. */
46181 void
46182 ix86_expand_round (rtx operand0, rtx operand1)
46184 /* C code for the stuff we're doing below:
46185 double xa = fabs (x);
46186 if (!isless (xa, TWO52))
46187 return x;
46188 xa = (double)(long)(xa + nextafter (0.5, 0.0));
46189 return copysign (xa, x);
46191 machine_mode mode = GET_MODE (operand0);
46192 rtx res, TWO52, xa, xi, half, mask;
46193 rtx_code_label *label;
46194 const struct real_format *fmt;
46195 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46197 /* Temporary for holding the result, initialized to the input
46198 operand to ease control flow. */
46199 res = gen_reg_rtx (mode);
46200 emit_move_insn (res, operand1);
46202 TWO52 = ix86_gen_TWO52 (mode);
46203 xa = ix86_expand_sse_fabs (res, &mask);
46204 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46206 /* load nextafter (0.5, 0.0) */
46207 fmt = REAL_MODE_FORMAT (mode);
46208 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46209 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46211 /* xa = xa + 0.5 */
46212 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
46213 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
46215 /* xa = (double)(int64_t)xa */
46216 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46217 expand_fix (xi, xa, 0);
46218 expand_float (xa, xi, 0);
46220 /* res = copysign (xa, operand1) */
46221 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
46223 emit_label (label);
46224 LABEL_NUSES (label) = 1;
46226 emit_move_insn (operand0, res);
46229 /* Expand SSE sequence for computing round
46230 from OP1 storing into OP0 using sse4 round insn. */
46231 void
46232 ix86_expand_round_sse4 (rtx op0, rtx op1)
46234 machine_mode mode = GET_MODE (op0);
46235 rtx e1, e2, res, half;
46236 const struct real_format *fmt;
46237 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46238 rtx (*gen_copysign) (rtx, rtx, rtx);
46239 rtx (*gen_round) (rtx, rtx, rtx);
46241 switch (mode)
46243 case SFmode:
46244 gen_copysign = gen_copysignsf3;
46245 gen_round = gen_sse4_1_roundsf2;
46246 break;
46247 case DFmode:
46248 gen_copysign = gen_copysigndf3;
46249 gen_round = gen_sse4_1_rounddf2;
46250 break;
46251 default:
46252 gcc_unreachable ();
46255 /* round (a) = trunc (a + copysign (0.5, a)) */
46257 /* load nextafter (0.5, 0.0) */
46258 fmt = REAL_MODE_FORMAT (mode);
46259 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46260 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46261 half = const_double_from_real_value (pred_half, mode);
46263 /* e1 = copysign (0.5, op1) */
46264 e1 = gen_reg_rtx (mode);
46265 emit_insn (gen_copysign (e1, half, op1));
46267 /* e2 = op1 + e1 */
46268 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
46270 /* res = trunc (e2) */
46271 res = gen_reg_rtx (mode);
46272 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
46274 emit_move_insn (op0, res);
46278 /* Table of valid machine attributes. */
46279 static const struct attribute_spec ix86_attribute_table[] =
46281 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
46282 affects_type_identity } */
46283 /* Stdcall attribute says callee is responsible for popping arguments
46284 if they are not variable. */
46285 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46286 true },
46287 /* Fastcall attribute says callee is responsible for popping arguments
46288 if they are not variable. */
46289 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46290 true },
46291 /* Thiscall attribute says callee is responsible for popping arguments
46292 if they are not variable. */
46293 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46294 true },
46295 /* Cdecl attribute says the callee is a normal C declaration */
46296 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46297 true },
46298 /* Regparm attribute specifies how many integer arguments are to be
46299 passed in registers. */
46300 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
46301 true },
46302 /* Sseregparm attribute says we are using x86_64 calling conventions
46303 for FP arguments. */
46304 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46305 true },
46306 /* The transactional memory builtins are implicitly regparm or fastcall
46307 depending on the ABI. Override the generic do-nothing attribute that
46308 these builtins were declared with. */
46309 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
46310 true },
46311 /* force_align_arg_pointer says this function realigns the stack at entry. */
46312 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
46313 false, true, true, ix86_handle_cconv_attribute, false },
46314 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
46315 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
46316 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
46317 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
46318 false },
46319 #endif
46320 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
46321 false },
46322 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
46323 false },
46324 #ifdef SUBTARGET_ATTRIBUTE_TABLE
46325 SUBTARGET_ATTRIBUTE_TABLE,
46326 #endif
46327 /* ms_abi and sysv_abi calling convention function attributes. */
46328 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
46329 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
46330 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
46331 false },
46332 { "callee_pop_aggregate_return", 1, 1, false, true, true,
46333 ix86_handle_callee_pop_aggregate_return, true },
46334 /* End element. */
46335 { NULL, 0, 0, false, false, false, NULL, false }
46338 /* Implement targetm.vectorize.builtin_vectorization_cost. */
46339 static int
46340 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
46341 tree vectype, int)
46343 unsigned elements;
46345 switch (type_of_cost)
46347 case scalar_stmt:
46348 return ix86_cost->scalar_stmt_cost;
46350 case scalar_load:
46351 return ix86_cost->scalar_load_cost;
46353 case scalar_store:
46354 return ix86_cost->scalar_store_cost;
46356 case vector_stmt:
46357 return ix86_cost->vec_stmt_cost;
46359 case vector_load:
46360 return ix86_cost->vec_align_load_cost;
46362 case vector_store:
46363 return ix86_cost->vec_store_cost;
46365 case vec_to_scalar:
46366 return ix86_cost->vec_to_scalar_cost;
46368 case scalar_to_vec:
46369 return ix86_cost->scalar_to_vec_cost;
46371 case unaligned_load:
46372 case unaligned_store:
46373 return ix86_cost->vec_unalign_load_cost;
46375 case cond_branch_taken:
46376 return ix86_cost->cond_taken_branch_cost;
46378 case cond_branch_not_taken:
46379 return ix86_cost->cond_not_taken_branch_cost;
46381 case vec_perm:
46382 case vec_promote_demote:
46383 return ix86_cost->vec_stmt_cost;
46385 case vec_construct:
46386 elements = TYPE_VECTOR_SUBPARTS (vectype);
46387 return elements / 2 + 1;
46389 default:
46390 gcc_unreachable ();
46394 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
46395 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
46396 insn every time. */
46398 static GTY(()) rtx_insn *vselect_insn;
46400 /* Initialize vselect_insn. */
46402 static void
46403 init_vselect_insn (void)
46405 unsigned i;
46406 rtx x;
46408 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
46409 for (i = 0; i < MAX_VECT_LEN; ++i)
46410 XVECEXP (x, 0, i) = const0_rtx;
46411 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
46412 const0_rtx), x);
46413 x = gen_rtx_SET (VOIDmode, const0_rtx, x);
46414 start_sequence ();
46415 vselect_insn = emit_insn (x);
46416 end_sequence ();
46419 /* Construct (set target (vec_select op0 (parallel perm))) and
46420 return true if that's a valid instruction in the active ISA. */
46422 static bool
46423 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
46424 unsigned nelt, bool testing_p)
46426 unsigned int i;
46427 rtx x, save_vconcat;
46428 int icode;
46430 if (vselect_insn == NULL_RTX)
46431 init_vselect_insn ();
46433 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
46434 PUT_NUM_ELEM (XVEC (x, 0), nelt);
46435 for (i = 0; i < nelt; ++i)
46436 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
46437 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
46438 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
46439 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
46440 SET_DEST (PATTERN (vselect_insn)) = target;
46441 icode = recog_memoized (vselect_insn);
46443 if (icode >= 0 && !testing_p)
46444 emit_insn (copy_rtx (PATTERN (vselect_insn)));
46446 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
46447 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
46448 INSN_CODE (vselect_insn) = -1;
46450 return icode >= 0;
46453 /* Similar, but generate a vec_concat from op0 and op1 as well. */
46455 static bool
46456 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
46457 const unsigned char *perm, unsigned nelt,
46458 bool testing_p)
46460 machine_mode v2mode;
46461 rtx x;
46462 bool ok;
46464 if (vselect_insn == NULL_RTX)
46465 init_vselect_insn ();
46467 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
46468 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
46469 PUT_MODE (x, v2mode);
46470 XEXP (x, 0) = op0;
46471 XEXP (x, 1) = op1;
46472 ok = expand_vselect (target, x, perm, nelt, testing_p);
46473 XEXP (x, 0) = const0_rtx;
46474 XEXP (x, 1) = const0_rtx;
46475 return ok;
46478 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
46479 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
46481 static bool
46482 expand_vec_perm_blend (struct expand_vec_perm_d *d)
46484 machine_mode vmode = d->vmode;
46485 unsigned i, mask, nelt = d->nelt;
46486 rtx target, op0, op1, x;
46487 rtx rperm[32], vperm;
46489 if (d->one_operand_p)
46490 return false;
46491 if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64
46492 && GET_MODE_SIZE (GET_MODE_INNER (vmode)) >= 4)
46494 else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
46496 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
46498 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
46500 else
46501 return false;
46503 /* This is a blend, not a permute. Elements must stay in their
46504 respective lanes. */
46505 for (i = 0; i < nelt; ++i)
46507 unsigned e = d->perm[i];
46508 if (!(e == i || e == i + nelt))
46509 return false;
46512 if (d->testing_p)
46513 return true;
46515 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
46516 decision should be extracted elsewhere, so that we only try that
46517 sequence once all budget==3 options have been tried. */
46518 target = d->target;
46519 op0 = d->op0;
46520 op1 = d->op1;
46521 mask = 0;
46523 switch (vmode)
46525 case V8DFmode:
46526 case V16SFmode:
46527 case V4DFmode:
46528 case V8SFmode:
46529 case V2DFmode:
46530 case V4SFmode:
46531 case V8HImode:
46532 case V8SImode:
46533 case V32HImode:
46534 case V64QImode:
46535 case V16SImode:
46536 case V8DImode:
46537 for (i = 0; i < nelt; ++i)
46538 mask |= (d->perm[i] >= nelt) << i;
46539 break;
46541 case V2DImode:
46542 for (i = 0; i < 2; ++i)
46543 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
46544 vmode = V8HImode;
46545 goto do_subreg;
46547 case V4SImode:
46548 for (i = 0; i < 4; ++i)
46549 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
46550 vmode = V8HImode;
46551 goto do_subreg;
46553 case V16QImode:
46554 /* See if bytes move in pairs so we can use pblendw with
46555 an immediate argument, rather than pblendvb with a vector
46556 argument. */
46557 for (i = 0; i < 16; i += 2)
46558 if (d->perm[i] + 1 != d->perm[i + 1])
46560 use_pblendvb:
46561 for (i = 0; i < nelt; ++i)
46562 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
46564 finish_pblendvb:
46565 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
46566 vperm = force_reg (vmode, vperm);
46568 if (GET_MODE_SIZE (vmode) == 16)
46569 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
46570 else
46571 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
46572 if (target != d->target)
46573 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
46574 return true;
46577 for (i = 0; i < 8; ++i)
46578 mask |= (d->perm[i * 2] >= 16) << i;
46579 vmode = V8HImode;
46580 /* FALLTHRU */
46582 do_subreg:
46583 target = gen_reg_rtx (vmode);
46584 op0 = gen_lowpart (vmode, op0);
46585 op1 = gen_lowpart (vmode, op1);
46586 break;
46588 case V32QImode:
46589 /* See if bytes move in pairs. If not, vpblendvb must be used. */
46590 for (i = 0; i < 32; i += 2)
46591 if (d->perm[i] + 1 != d->perm[i + 1])
46592 goto use_pblendvb;
46593 /* See if bytes move in quadruplets. If yes, vpblendd
46594 with immediate can be used. */
46595 for (i = 0; i < 32; i += 4)
46596 if (d->perm[i] + 2 != d->perm[i + 2])
46597 break;
46598 if (i < 32)
46600 /* See if bytes move the same in both lanes. If yes,
46601 vpblendw with immediate can be used. */
46602 for (i = 0; i < 16; i += 2)
46603 if (d->perm[i] + 16 != d->perm[i + 16])
46604 goto use_pblendvb;
46606 /* Use vpblendw. */
46607 for (i = 0; i < 16; ++i)
46608 mask |= (d->perm[i * 2] >= 32) << i;
46609 vmode = V16HImode;
46610 goto do_subreg;
46613 /* Use vpblendd. */
46614 for (i = 0; i < 8; ++i)
46615 mask |= (d->perm[i * 4] >= 32) << i;
46616 vmode = V8SImode;
46617 goto do_subreg;
46619 case V16HImode:
46620 /* See if words move in pairs. If yes, vpblendd can be used. */
46621 for (i = 0; i < 16; i += 2)
46622 if (d->perm[i] + 1 != d->perm[i + 1])
46623 break;
46624 if (i < 16)
46626 /* See if words move the same in both lanes. If not,
46627 vpblendvb must be used. */
46628 for (i = 0; i < 8; i++)
46629 if (d->perm[i] + 8 != d->perm[i + 8])
46631 /* Use vpblendvb. */
46632 for (i = 0; i < 32; ++i)
46633 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
46635 vmode = V32QImode;
46636 nelt = 32;
46637 target = gen_reg_rtx (vmode);
46638 op0 = gen_lowpart (vmode, op0);
46639 op1 = gen_lowpart (vmode, op1);
46640 goto finish_pblendvb;
46643 /* Use vpblendw. */
46644 for (i = 0; i < 16; ++i)
46645 mask |= (d->perm[i] >= 16) << i;
46646 break;
46649 /* Use vpblendd. */
46650 for (i = 0; i < 8; ++i)
46651 mask |= (d->perm[i * 2] >= 16) << i;
46652 vmode = V8SImode;
46653 goto do_subreg;
46655 case V4DImode:
46656 /* Use vpblendd. */
46657 for (i = 0; i < 4; ++i)
46658 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
46659 vmode = V8SImode;
46660 goto do_subreg;
46662 default:
46663 gcc_unreachable ();
46666 /* This matches five different patterns with the different modes. */
46667 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
46668 x = gen_rtx_SET (VOIDmode, target, x);
46669 emit_insn (x);
46670 if (target != d->target)
46671 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
46673 return true;
46676 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
46677 in terms of the variable form of vpermilps.
46679 Note that we will have already failed the immediate input vpermilps,
46680 which requires that the high and low part shuffle be identical; the
46681 variable form doesn't require that. */
46683 static bool
46684 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
46686 rtx rperm[8], vperm;
46687 unsigned i;
46689 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
46690 return false;
46692 /* We can only permute within the 128-bit lane. */
46693 for (i = 0; i < 8; ++i)
46695 unsigned e = d->perm[i];
46696 if (i < 4 ? e >= 4 : e < 4)
46697 return false;
46700 if (d->testing_p)
46701 return true;
46703 for (i = 0; i < 8; ++i)
46705 unsigned e = d->perm[i];
46707 /* Within each 128-bit lane, the elements of op0 are numbered
46708 from 0 and the elements of op1 are numbered from 4. */
46709 if (e >= 8 + 4)
46710 e -= 8;
46711 else if (e >= 4)
46712 e -= 4;
46714 rperm[i] = GEN_INT (e);
46717 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
46718 vperm = force_reg (V8SImode, vperm);
46719 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
46721 return true;
46724 /* Return true if permutation D can be performed as VMODE permutation
46725 instead. */
46727 static bool
46728 valid_perm_using_mode_p (machine_mode vmode, struct expand_vec_perm_d *d)
46730 unsigned int i, j, chunk;
46732 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
46733 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
46734 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
46735 return false;
46737 if (GET_MODE_NUNITS (vmode) >= d->nelt)
46738 return true;
46740 chunk = d->nelt / GET_MODE_NUNITS (vmode);
46741 for (i = 0; i < d->nelt; i += chunk)
46742 if (d->perm[i] & (chunk - 1))
46743 return false;
46744 else
46745 for (j = 1; j < chunk; ++j)
46746 if (d->perm[i] + j != d->perm[i + j])
46747 return false;
46749 return true;
46752 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
46753 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
46755 static bool
46756 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
46758 unsigned i, nelt, eltsz, mask;
46759 unsigned char perm[64];
46760 machine_mode vmode = V16QImode;
46761 rtx rperm[64], vperm, target, op0, op1;
46763 nelt = d->nelt;
46765 if (!d->one_operand_p)
46767 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
46769 if (TARGET_AVX2
46770 && valid_perm_using_mode_p (V2TImode, d))
46772 if (d->testing_p)
46773 return true;
46775 /* Use vperm2i128 insn. The pattern uses
46776 V4DImode instead of V2TImode. */
46777 target = d->target;
46778 if (d->vmode != V4DImode)
46779 target = gen_reg_rtx (V4DImode);
46780 op0 = gen_lowpart (V4DImode, d->op0);
46781 op1 = gen_lowpart (V4DImode, d->op1);
46782 rperm[0]
46783 = GEN_INT ((d->perm[0] / (nelt / 2))
46784 | ((d->perm[nelt / 2] / (nelt / 2)) * 16));
46785 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
46786 if (target != d->target)
46787 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
46788 return true;
46790 return false;
46793 else
46795 if (GET_MODE_SIZE (d->vmode) == 16)
46797 if (!TARGET_SSSE3)
46798 return false;
46800 else if (GET_MODE_SIZE (d->vmode) == 32)
46802 if (!TARGET_AVX2)
46803 return false;
46805 /* V4DImode should be already handled through
46806 expand_vselect by vpermq instruction. */
46807 gcc_assert (d->vmode != V4DImode);
46809 vmode = V32QImode;
46810 if (d->vmode == V8SImode
46811 || d->vmode == V16HImode
46812 || d->vmode == V32QImode)
46814 /* First see if vpermq can be used for
46815 V8SImode/V16HImode/V32QImode. */
46816 if (valid_perm_using_mode_p (V4DImode, d))
46818 for (i = 0; i < 4; i++)
46819 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
46820 if (d->testing_p)
46821 return true;
46822 target = gen_reg_rtx (V4DImode);
46823 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
46824 perm, 4, false))
46826 emit_move_insn (d->target,
46827 gen_lowpart (d->vmode, target));
46828 return true;
46830 return false;
46833 /* Next see if vpermd can be used. */
46834 if (valid_perm_using_mode_p (V8SImode, d))
46835 vmode = V8SImode;
46837 /* Or if vpermps can be used. */
46838 else if (d->vmode == V8SFmode)
46839 vmode = V8SImode;
46841 if (vmode == V32QImode)
46843 /* vpshufb only works intra lanes, it is not
46844 possible to shuffle bytes in between the lanes. */
46845 for (i = 0; i < nelt; ++i)
46846 if ((d->perm[i] ^ i) & (nelt / 2))
46847 return false;
46850 else if (GET_MODE_SIZE (d->vmode) == 64)
46852 if (!TARGET_AVX512BW)
46853 return false;
46855 /* If vpermq didn't work, vpshufb won't work either. */
46856 if (d->vmode == V8DFmode || d->vmode == V8DImode)
46857 return false;
46859 vmode = V64QImode;
46860 if (d->vmode == V16SImode
46861 || d->vmode == V32HImode
46862 || d->vmode == V64QImode)
46864 /* First see if vpermq can be used for
46865 V16SImode/V32HImode/V64QImode. */
46866 if (valid_perm_using_mode_p (V8DImode, d))
46868 for (i = 0; i < 8; i++)
46869 perm[i] = (d->perm[i * nelt / 8] * 8 / nelt) & 7;
46870 if (d->testing_p)
46871 return true;
46872 target = gen_reg_rtx (V8DImode);
46873 if (expand_vselect (target, gen_lowpart (V8DImode, d->op0),
46874 perm, 8, false))
46876 emit_move_insn (d->target,
46877 gen_lowpart (d->vmode, target));
46878 return true;
46880 return false;
46883 /* Next see if vpermd can be used. */
46884 if (valid_perm_using_mode_p (V16SImode, d))
46885 vmode = V16SImode;
46887 /* Or if vpermps can be used. */
46888 else if (d->vmode == V16SFmode)
46889 vmode = V16SImode;
46890 if (vmode == V64QImode)
46892 /* vpshufb only works intra lanes, it is not
46893 possible to shuffle bytes in between the lanes. */
46894 for (i = 0; i < nelt; ++i)
46895 if ((d->perm[i] ^ i) & (nelt / 4))
46896 return false;
46899 else
46900 return false;
46903 if (d->testing_p)
46904 return true;
46906 if (vmode == V8SImode)
46907 for (i = 0; i < 8; ++i)
46908 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
46909 else if (vmode == V16SImode)
46910 for (i = 0; i < 16; ++i)
46911 rperm[i] = GEN_INT ((d->perm[i * nelt / 16] * 16 / nelt) & 15);
46912 else
46914 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
46915 if (!d->one_operand_p)
46916 mask = 2 * nelt - 1;
46917 else if (vmode == V16QImode)
46918 mask = nelt - 1;
46919 else if (vmode == V64QImode)
46920 mask = nelt / 4 - 1;
46921 else
46922 mask = nelt / 2 - 1;
46924 for (i = 0; i < nelt; ++i)
46926 unsigned j, e = d->perm[i] & mask;
46927 for (j = 0; j < eltsz; ++j)
46928 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
46932 vperm = gen_rtx_CONST_VECTOR (vmode,
46933 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
46934 vperm = force_reg (vmode, vperm);
46936 target = d->target;
46937 if (d->vmode != vmode)
46938 target = gen_reg_rtx (vmode);
46939 op0 = gen_lowpart (vmode, d->op0);
46940 if (d->one_operand_p)
46942 if (vmode == V16QImode)
46943 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
46944 else if (vmode == V32QImode)
46945 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
46946 else if (vmode == V64QImode)
46947 emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm));
46948 else if (vmode == V8SFmode)
46949 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
46950 else if (vmode == V8SImode)
46951 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
46952 else if (vmode == V16SFmode)
46953 emit_insn (gen_avx512f_permvarv16sf (target, op0, vperm));
46954 else if (vmode == V16SImode)
46955 emit_insn (gen_avx512f_permvarv16si (target, op0, vperm));
46956 else
46957 gcc_unreachable ();
46959 else
46961 op1 = gen_lowpart (vmode, d->op1);
46962 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
46964 if (target != d->target)
46965 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
46967 return true;
46970 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
46971 in a single instruction. */
46973 static bool
46974 expand_vec_perm_1 (struct expand_vec_perm_d *d)
46976 unsigned i, nelt = d->nelt;
46977 unsigned char perm2[MAX_VECT_LEN];
46979 /* Check plain VEC_SELECT first, because AVX has instructions that could
46980 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
46981 input where SEL+CONCAT may not. */
46982 if (d->one_operand_p)
46984 int mask = nelt - 1;
46985 bool identity_perm = true;
46986 bool broadcast_perm = true;
46988 for (i = 0; i < nelt; i++)
46990 perm2[i] = d->perm[i] & mask;
46991 if (perm2[i] != i)
46992 identity_perm = false;
46993 if (perm2[i])
46994 broadcast_perm = false;
46997 if (identity_perm)
46999 if (!d->testing_p)
47000 emit_move_insn (d->target, d->op0);
47001 return true;
47003 else if (broadcast_perm && TARGET_AVX2)
47005 /* Use vpbroadcast{b,w,d}. */
47006 rtx (*gen) (rtx, rtx) = NULL;
47007 switch (d->vmode)
47009 case V64QImode:
47010 if (TARGET_AVX512BW)
47011 gen = gen_avx512bw_vec_dupv64qi_1;
47012 break;
47013 case V32QImode:
47014 gen = gen_avx2_pbroadcastv32qi_1;
47015 break;
47016 case V32HImode:
47017 if (TARGET_AVX512BW)
47018 gen = gen_avx512bw_vec_dupv32hi_1;
47019 break;
47020 case V16HImode:
47021 gen = gen_avx2_pbroadcastv16hi_1;
47022 break;
47023 case V16SImode:
47024 if (TARGET_AVX512F)
47025 gen = gen_avx512f_vec_dupv16si_1;
47026 break;
47027 case V8SImode:
47028 gen = gen_avx2_pbroadcastv8si_1;
47029 break;
47030 case V16QImode:
47031 gen = gen_avx2_pbroadcastv16qi;
47032 break;
47033 case V8HImode:
47034 gen = gen_avx2_pbroadcastv8hi;
47035 break;
47036 case V16SFmode:
47037 if (TARGET_AVX512F)
47038 gen = gen_avx512f_vec_dupv16sf_1;
47039 break;
47040 case V8SFmode:
47041 gen = gen_avx2_vec_dupv8sf_1;
47042 break;
47043 case V8DFmode:
47044 if (TARGET_AVX512F)
47045 gen = gen_avx512f_vec_dupv8df_1;
47046 break;
47047 case V8DImode:
47048 if (TARGET_AVX512F)
47049 gen = gen_avx512f_vec_dupv8di_1;
47050 break;
47051 /* For other modes prefer other shuffles this function creates. */
47052 default: break;
47054 if (gen != NULL)
47056 if (!d->testing_p)
47057 emit_insn (gen (d->target, d->op0));
47058 return true;
47062 if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
47063 return true;
47065 /* There are plenty of patterns in sse.md that are written for
47066 SEL+CONCAT and are not replicated for a single op. Perhaps
47067 that should be changed, to avoid the nastiness here. */
47069 /* Recognize interleave style patterns, which means incrementing
47070 every other permutation operand. */
47071 for (i = 0; i < nelt; i += 2)
47073 perm2[i] = d->perm[i] & mask;
47074 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
47076 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
47077 d->testing_p))
47078 return true;
47080 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
47081 if (nelt >= 4)
47083 for (i = 0; i < nelt; i += 4)
47085 perm2[i + 0] = d->perm[i + 0] & mask;
47086 perm2[i + 1] = d->perm[i + 1] & mask;
47087 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
47088 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
47091 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
47092 d->testing_p))
47093 return true;
47097 /* Finally, try the fully general two operand permute. */
47098 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
47099 d->testing_p))
47100 return true;
47102 /* Recognize interleave style patterns with reversed operands. */
47103 if (!d->one_operand_p)
47105 for (i = 0; i < nelt; ++i)
47107 unsigned e = d->perm[i];
47108 if (e >= nelt)
47109 e -= nelt;
47110 else
47111 e += nelt;
47112 perm2[i] = e;
47115 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
47116 d->testing_p))
47117 return true;
47120 /* Try the SSE4.1 blend variable merge instructions. */
47121 if (expand_vec_perm_blend (d))
47122 return true;
47124 /* Try one of the AVX vpermil variable permutations. */
47125 if (expand_vec_perm_vpermil (d))
47126 return true;
47128 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
47129 vpshufb, vpermd, vpermps or vpermq variable permutation. */
47130 if (expand_vec_perm_pshufb (d))
47131 return true;
47133 /* Try the AVX2 vpalignr instruction. */
47134 if (expand_vec_perm_palignr (d, true))
47135 return true;
47137 /* Try the AVX512F vpermi2 instructions. */
47138 if (ix86_expand_vec_perm_vpermi2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d))
47139 return true;
47141 return false;
47144 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47145 in terms of a pair of pshuflw + pshufhw instructions. */
47147 static bool
47148 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
47150 unsigned char perm2[MAX_VECT_LEN];
47151 unsigned i;
47152 bool ok;
47154 if (d->vmode != V8HImode || !d->one_operand_p)
47155 return false;
47157 /* The two permutations only operate in 64-bit lanes. */
47158 for (i = 0; i < 4; ++i)
47159 if (d->perm[i] >= 4)
47160 return false;
47161 for (i = 4; i < 8; ++i)
47162 if (d->perm[i] < 4)
47163 return false;
47165 if (d->testing_p)
47166 return true;
47168 /* Emit the pshuflw. */
47169 memcpy (perm2, d->perm, 4);
47170 for (i = 4; i < 8; ++i)
47171 perm2[i] = i;
47172 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
47173 gcc_assert (ok);
47175 /* Emit the pshufhw. */
47176 memcpy (perm2 + 4, d->perm + 4, 4);
47177 for (i = 0; i < 4; ++i)
47178 perm2[i] = i;
47179 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
47180 gcc_assert (ok);
47182 return true;
47185 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47186 the permutation using the SSSE3 palignr instruction. This succeeds
47187 when all of the elements in PERM fit within one vector and we merely
47188 need to shift them down so that a single vector permutation has a
47189 chance to succeed. If SINGLE_INSN_ONLY_P, succeed if only
47190 the vpalignr instruction itself can perform the requested permutation. */
47192 static bool
47193 expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p)
47195 unsigned i, nelt = d->nelt;
47196 unsigned min, max, minswap, maxswap;
47197 bool in_order, ok, swap = false;
47198 rtx shift, target;
47199 struct expand_vec_perm_d dcopy;
47201 /* Even with AVX, palignr only operates on 128-bit vectors,
47202 in AVX2 palignr operates on both 128-bit lanes. */
47203 if ((!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
47204 && (!TARGET_AVX2 || GET_MODE_SIZE (d->vmode) != 32))
47205 return false;
47207 min = 2 * nelt;
47208 max = 0;
47209 minswap = 2 * nelt;
47210 maxswap = 0;
47211 for (i = 0; i < nelt; ++i)
47213 unsigned e = d->perm[i];
47214 unsigned eswap = d->perm[i] ^ nelt;
47215 if (GET_MODE_SIZE (d->vmode) == 32)
47217 e = (e & ((nelt / 2) - 1)) | ((e & nelt) >> 1);
47218 eswap = e ^ (nelt / 2);
47220 if (e < min)
47221 min = e;
47222 if (e > max)
47223 max = e;
47224 if (eswap < minswap)
47225 minswap = eswap;
47226 if (eswap > maxswap)
47227 maxswap = eswap;
47229 if (min == 0
47230 || max - min >= (GET_MODE_SIZE (d->vmode) == 32 ? nelt / 2 : nelt))
47232 if (d->one_operand_p
47233 || minswap == 0
47234 || maxswap - minswap >= (GET_MODE_SIZE (d->vmode) == 32
47235 ? nelt / 2 : nelt))
47236 return false;
47237 swap = true;
47238 min = minswap;
47239 max = maxswap;
47242 /* Given that we have SSSE3, we know we'll be able to implement the
47243 single operand permutation after the palignr with pshufb for
47244 128-bit vectors. If SINGLE_INSN_ONLY_P, in_order has to be computed
47245 first. */
47246 if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16 && !single_insn_only_p)
47247 return true;
47249 dcopy = *d;
47250 if (swap)
47252 dcopy.op0 = d->op1;
47253 dcopy.op1 = d->op0;
47254 for (i = 0; i < nelt; ++i)
47255 dcopy.perm[i] ^= nelt;
47258 in_order = true;
47259 for (i = 0; i < nelt; ++i)
47261 unsigned e = dcopy.perm[i];
47262 if (GET_MODE_SIZE (d->vmode) == 32
47263 && e >= nelt
47264 && (e & (nelt / 2 - 1)) < min)
47265 e = e - min - (nelt / 2);
47266 else
47267 e = e - min;
47268 if (e != i)
47269 in_order = false;
47270 dcopy.perm[i] = e;
47272 dcopy.one_operand_p = true;
47274 if (single_insn_only_p && !in_order)
47275 return false;
47277 /* For AVX2, test whether we can permute the result in one instruction. */
47278 if (d->testing_p)
47280 if (in_order)
47281 return true;
47282 dcopy.op1 = dcopy.op0;
47283 return expand_vec_perm_1 (&dcopy);
47286 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
47287 if (GET_MODE_SIZE (d->vmode) == 16)
47289 target = gen_reg_rtx (TImode);
47290 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, dcopy.op1),
47291 gen_lowpart (TImode, dcopy.op0), shift));
47293 else
47295 target = gen_reg_rtx (V2TImode);
47296 emit_insn (gen_avx2_palignrv2ti (target,
47297 gen_lowpart (V2TImode, dcopy.op1),
47298 gen_lowpart (V2TImode, dcopy.op0),
47299 shift));
47302 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
47304 /* Test for the degenerate case where the alignment by itself
47305 produces the desired permutation. */
47306 if (in_order)
47308 emit_move_insn (d->target, dcopy.op0);
47309 return true;
47312 ok = expand_vec_perm_1 (&dcopy);
47313 gcc_assert (ok || GET_MODE_SIZE (d->vmode) == 32);
47315 return ok;
47318 /* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
47319 the permutation using the SSE4_1 pblendv instruction. Potentially
47320 reduces permutation from 2 pshufb and or to 1 pshufb and pblendv. */
47322 static bool
47323 expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
47325 unsigned i, which, nelt = d->nelt;
47326 struct expand_vec_perm_d dcopy, dcopy1;
47327 machine_mode vmode = d->vmode;
47328 bool ok;
47330 /* Use the same checks as in expand_vec_perm_blend. */
47331 if (d->one_operand_p)
47332 return false;
47333 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
47335 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
47337 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
47339 else
47340 return false;
47342 /* Figure out where permutation elements stay not in their
47343 respective lanes. */
47344 for (i = 0, which = 0; i < nelt; ++i)
47346 unsigned e = d->perm[i];
47347 if (e != i)
47348 which |= (e < nelt ? 1 : 2);
47350 /* We can pblend the part where elements stay not in their
47351 respective lanes only when these elements are all in one
47352 half of a permutation.
47353 {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
47354 lanes, but both 8 and 9 >= 8
47355 {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
47356 respective lanes and 8 >= 8, but 2 not. */
47357 if (which != 1 && which != 2)
47358 return false;
47359 if (d->testing_p && GET_MODE_SIZE (vmode) == 16)
47360 return true;
47362 /* First we apply one operand permutation to the part where
47363 elements stay not in their respective lanes. */
47364 dcopy = *d;
47365 if (which == 2)
47366 dcopy.op0 = dcopy.op1 = d->op1;
47367 else
47368 dcopy.op0 = dcopy.op1 = d->op0;
47369 dcopy.one_operand_p = true;
47371 for (i = 0; i < nelt; ++i)
47372 dcopy.perm[i] = d->perm[i] & (nelt - 1);
47374 ok = expand_vec_perm_1 (&dcopy);
47375 if (GET_MODE_SIZE (vmode) != 16 && !ok)
47376 return false;
47377 else
47378 gcc_assert (ok);
47379 if (d->testing_p)
47380 return true;
47382 /* Next we put permuted elements into their positions. */
47383 dcopy1 = *d;
47384 if (which == 2)
47385 dcopy1.op1 = dcopy.target;
47386 else
47387 dcopy1.op0 = dcopy.target;
47389 for (i = 0; i < nelt; ++i)
47390 dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i);
47392 ok = expand_vec_perm_blend (&dcopy1);
47393 gcc_assert (ok);
47395 return true;
47398 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
47400 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47401 a two vector permutation into a single vector permutation by using
47402 an interleave operation to merge the vectors. */
47404 static bool
47405 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
47407 struct expand_vec_perm_d dremap, dfinal;
47408 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
47409 unsigned HOST_WIDE_INT contents;
47410 unsigned char remap[2 * MAX_VECT_LEN];
47411 rtx_insn *seq;
47412 bool ok, same_halves = false;
47414 if (GET_MODE_SIZE (d->vmode) == 16)
47416 if (d->one_operand_p)
47417 return false;
47419 else if (GET_MODE_SIZE (d->vmode) == 32)
47421 if (!TARGET_AVX)
47422 return false;
47423 /* For 32-byte modes allow even d->one_operand_p.
47424 The lack of cross-lane shuffling in some instructions
47425 might prevent a single insn shuffle. */
47426 dfinal = *d;
47427 dfinal.testing_p = true;
47428 /* If expand_vec_perm_interleave3 can expand this into
47429 a 3 insn sequence, give up and let it be expanded as
47430 3 insn sequence. While that is one insn longer,
47431 it doesn't need a memory operand and in the common
47432 case that both interleave low and high permutations
47433 with the same operands are adjacent needs 4 insns
47434 for both after CSE. */
47435 if (expand_vec_perm_interleave3 (&dfinal))
47436 return false;
47438 else
47439 return false;
47441 /* Examine from whence the elements come. */
47442 contents = 0;
47443 for (i = 0; i < nelt; ++i)
47444 contents |= ((unsigned HOST_WIDE_INT) 1) << d->perm[i];
47446 memset (remap, 0xff, sizeof (remap));
47447 dremap = *d;
47449 if (GET_MODE_SIZE (d->vmode) == 16)
47451 unsigned HOST_WIDE_INT h1, h2, h3, h4;
47453 /* Split the two input vectors into 4 halves. */
47454 h1 = (((unsigned HOST_WIDE_INT) 1) << nelt2) - 1;
47455 h2 = h1 << nelt2;
47456 h3 = h2 << nelt2;
47457 h4 = h3 << nelt2;
47459 /* If the elements from the low halves use interleave low, and similarly
47460 for interleave high. If the elements are from mis-matched halves, we
47461 can use shufps for V4SF/V4SI or do a DImode shuffle. */
47462 if ((contents & (h1 | h3)) == contents)
47464 /* punpckl* */
47465 for (i = 0; i < nelt2; ++i)
47467 remap[i] = i * 2;
47468 remap[i + nelt] = i * 2 + 1;
47469 dremap.perm[i * 2] = i;
47470 dremap.perm[i * 2 + 1] = i + nelt;
47472 if (!TARGET_SSE2 && d->vmode == V4SImode)
47473 dremap.vmode = V4SFmode;
47475 else if ((contents & (h2 | h4)) == contents)
47477 /* punpckh* */
47478 for (i = 0; i < nelt2; ++i)
47480 remap[i + nelt2] = i * 2;
47481 remap[i + nelt + nelt2] = i * 2 + 1;
47482 dremap.perm[i * 2] = i + nelt2;
47483 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
47485 if (!TARGET_SSE2 && d->vmode == V4SImode)
47486 dremap.vmode = V4SFmode;
47488 else if ((contents & (h1 | h4)) == contents)
47490 /* shufps */
47491 for (i = 0; i < nelt2; ++i)
47493 remap[i] = i;
47494 remap[i + nelt + nelt2] = i + nelt2;
47495 dremap.perm[i] = i;
47496 dremap.perm[i + nelt2] = i + nelt + nelt2;
47498 if (nelt != 4)
47500 /* shufpd */
47501 dremap.vmode = V2DImode;
47502 dremap.nelt = 2;
47503 dremap.perm[0] = 0;
47504 dremap.perm[1] = 3;
47507 else if ((contents & (h2 | h3)) == contents)
47509 /* shufps */
47510 for (i = 0; i < nelt2; ++i)
47512 remap[i + nelt2] = i;
47513 remap[i + nelt] = i + nelt2;
47514 dremap.perm[i] = i + nelt2;
47515 dremap.perm[i + nelt2] = i + nelt;
47517 if (nelt != 4)
47519 /* shufpd */
47520 dremap.vmode = V2DImode;
47521 dremap.nelt = 2;
47522 dremap.perm[0] = 1;
47523 dremap.perm[1] = 2;
47526 else
47527 return false;
47529 else
47531 unsigned int nelt4 = nelt / 4, nzcnt = 0;
47532 unsigned HOST_WIDE_INT q[8];
47533 unsigned int nonzero_halves[4];
47535 /* Split the two input vectors into 8 quarters. */
47536 q[0] = (((unsigned HOST_WIDE_INT) 1) << nelt4) - 1;
47537 for (i = 1; i < 8; ++i)
47538 q[i] = q[0] << (nelt4 * i);
47539 for (i = 0; i < 4; ++i)
47540 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
47542 nonzero_halves[nzcnt] = i;
47543 ++nzcnt;
47546 if (nzcnt == 1)
47548 gcc_assert (d->one_operand_p);
47549 nonzero_halves[1] = nonzero_halves[0];
47550 same_halves = true;
47552 else if (d->one_operand_p)
47554 gcc_assert (nonzero_halves[0] == 0);
47555 gcc_assert (nonzero_halves[1] == 1);
47558 if (nzcnt <= 2)
47560 if (d->perm[0] / nelt2 == nonzero_halves[1])
47562 /* Attempt to increase the likelihood that dfinal
47563 shuffle will be intra-lane. */
47564 char tmph = nonzero_halves[0];
47565 nonzero_halves[0] = nonzero_halves[1];
47566 nonzero_halves[1] = tmph;
47569 /* vperm2f128 or vperm2i128. */
47570 for (i = 0; i < nelt2; ++i)
47572 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
47573 remap[i + nonzero_halves[0] * nelt2] = i;
47574 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
47575 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
47578 if (d->vmode != V8SFmode
47579 && d->vmode != V4DFmode
47580 && d->vmode != V8SImode)
47582 dremap.vmode = V8SImode;
47583 dremap.nelt = 8;
47584 for (i = 0; i < 4; ++i)
47586 dremap.perm[i] = i + nonzero_halves[0] * 4;
47587 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
47591 else if (d->one_operand_p)
47592 return false;
47593 else if (TARGET_AVX2
47594 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
47596 /* vpunpckl* */
47597 for (i = 0; i < nelt4; ++i)
47599 remap[i] = i * 2;
47600 remap[i + nelt] = i * 2 + 1;
47601 remap[i + nelt2] = i * 2 + nelt2;
47602 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
47603 dremap.perm[i * 2] = i;
47604 dremap.perm[i * 2 + 1] = i + nelt;
47605 dremap.perm[i * 2 + nelt2] = i + nelt2;
47606 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
47609 else if (TARGET_AVX2
47610 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
47612 /* vpunpckh* */
47613 for (i = 0; i < nelt4; ++i)
47615 remap[i + nelt4] = i * 2;
47616 remap[i + nelt + nelt4] = i * 2 + 1;
47617 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
47618 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
47619 dremap.perm[i * 2] = i + nelt4;
47620 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
47621 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
47622 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
47625 else
47626 return false;
47629 /* Use the remapping array set up above to move the elements from their
47630 swizzled locations into their final destinations. */
47631 dfinal = *d;
47632 for (i = 0; i < nelt; ++i)
47634 unsigned e = remap[d->perm[i]];
47635 gcc_assert (e < nelt);
47636 /* If same_halves is true, both halves of the remapped vector are the
47637 same. Avoid cross-lane accesses if possible. */
47638 if (same_halves && i >= nelt2)
47640 gcc_assert (e < nelt2);
47641 dfinal.perm[i] = e + nelt2;
47643 else
47644 dfinal.perm[i] = e;
47646 if (!d->testing_p)
47648 dremap.target = gen_reg_rtx (dremap.vmode);
47649 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
47651 dfinal.op1 = dfinal.op0;
47652 dfinal.one_operand_p = true;
47654 /* Test if the final remap can be done with a single insn. For V4SFmode or
47655 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
47656 start_sequence ();
47657 ok = expand_vec_perm_1 (&dfinal);
47658 seq = get_insns ();
47659 end_sequence ();
47661 if (!ok)
47662 return false;
47664 if (d->testing_p)
47665 return true;
47667 if (dremap.vmode != dfinal.vmode)
47669 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
47670 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
47673 ok = expand_vec_perm_1 (&dremap);
47674 gcc_assert (ok);
47676 emit_insn (seq);
47677 return true;
47680 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47681 a single vector cross-lane permutation into vpermq followed
47682 by any of the single insn permutations. */
47684 static bool
47685 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
47687 struct expand_vec_perm_d dremap, dfinal;
47688 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
47689 unsigned contents[2];
47690 bool ok;
47692 if (!(TARGET_AVX2
47693 && (d->vmode == V32QImode || d->vmode == V16HImode)
47694 && d->one_operand_p))
47695 return false;
47697 contents[0] = 0;
47698 contents[1] = 0;
47699 for (i = 0; i < nelt2; ++i)
47701 contents[0] |= 1u << (d->perm[i] / nelt4);
47702 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
47705 for (i = 0; i < 2; ++i)
47707 unsigned int cnt = 0;
47708 for (j = 0; j < 4; ++j)
47709 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
47710 return false;
47713 if (d->testing_p)
47714 return true;
47716 dremap = *d;
47717 dremap.vmode = V4DImode;
47718 dremap.nelt = 4;
47719 dremap.target = gen_reg_rtx (V4DImode);
47720 dremap.op0 = gen_lowpart (V4DImode, d->op0);
47721 dremap.op1 = dremap.op0;
47722 dremap.one_operand_p = true;
47723 for (i = 0; i < 2; ++i)
47725 unsigned int cnt = 0;
47726 for (j = 0; j < 4; ++j)
47727 if ((contents[i] & (1u << j)) != 0)
47728 dremap.perm[2 * i + cnt++] = j;
47729 for (; cnt < 2; ++cnt)
47730 dremap.perm[2 * i + cnt] = 0;
47733 dfinal = *d;
47734 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
47735 dfinal.op1 = dfinal.op0;
47736 dfinal.one_operand_p = true;
47737 for (i = 0, j = 0; i < nelt; ++i)
47739 if (i == nelt2)
47740 j = 2;
47741 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
47742 if ((d->perm[i] / nelt4) == dremap.perm[j])
47744 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
47745 dfinal.perm[i] |= nelt4;
47746 else
47747 gcc_unreachable ();
47750 ok = expand_vec_perm_1 (&dremap);
47751 gcc_assert (ok);
47753 ok = expand_vec_perm_1 (&dfinal);
47754 gcc_assert (ok);
47756 return true;
47759 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
47760 a vector permutation using two instructions, vperm2f128 resp.
47761 vperm2i128 followed by any single in-lane permutation. */
47763 static bool
47764 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
47766 struct expand_vec_perm_d dfirst, dsecond;
47767 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
47768 bool ok;
47770 if (!TARGET_AVX
47771 || GET_MODE_SIZE (d->vmode) != 32
47772 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
47773 return false;
47775 dsecond = *d;
47776 dsecond.one_operand_p = false;
47777 dsecond.testing_p = true;
47779 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
47780 immediate. For perm < 16 the second permutation uses
47781 d->op0 as first operand, for perm >= 16 it uses d->op1
47782 as first operand. The second operand is the result of
47783 vperm2[fi]128. */
47784 for (perm = 0; perm < 32; perm++)
47786 /* Ignore permutations which do not move anything cross-lane. */
47787 if (perm < 16)
47789 /* The second shuffle for e.g. V4DFmode has
47790 0123 and ABCD operands.
47791 Ignore AB23, as 23 is already in the second lane
47792 of the first operand. */
47793 if ((perm & 0xc) == (1 << 2)) continue;
47794 /* And 01CD, as 01 is in the first lane of the first
47795 operand. */
47796 if ((perm & 3) == 0) continue;
47797 /* And 4567, as then the vperm2[fi]128 doesn't change
47798 anything on the original 4567 second operand. */
47799 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
47801 else
47803 /* The second shuffle for e.g. V4DFmode has
47804 4567 and ABCD operands.
47805 Ignore AB67, as 67 is already in the second lane
47806 of the first operand. */
47807 if ((perm & 0xc) == (3 << 2)) continue;
47808 /* And 45CD, as 45 is in the first lane of the first
47809 operand. */
47810 if ((perm & 3) == 2) continue;
47811 /* And 0123, as then the vperm2[fi]128 doesn't change
47812 anything on the original 0123 first operand. */
47813 if ((perm & 0xf) == (1 << 2)) continue;
47816 for (i = 0; i < nelt; i++)
47818 j = d->perm[i] / nelt2;
47819 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
47820 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
47821 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
47822 dsecond.perm[i] = d->perm[i] & (nelt - 1);
47823 else
47824 break;
47827 if (i == nelt)
47829 start_sequence ();
47830 ok = expand_vec_perm_1 (&dsecond);
47831 end_sequence ();
47833 else
47834 ok = false;
47836 if (ok)
47838 if (d->testing_p)
47839 return true;
47841 /* Found a usable second shuffle. dfirst will be
47842 vperm2f128 on d->op0 and d->op1. */
47843 dsecond.testing_p = false;
47844 dfirst = *d;
47845 dfirst.target = gen_reg_rtx (d->vmode);
47846 for (i = 0; i < nelt; i++)
47847 dfirst.perm[i] = (i & (nelt2 - 1))
47848 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
47850 canonicalize_perm (&dfirst);
47851 ok = expand_vec_perm_1 (&dfirst);
47852 gcc_assert (ok);
47854 /* And dsecond is some single insn shuffle, taking
47855 d->op0 and result of vperm2f128 (if perm < 16) or
47856 d->op1 and result of vperm2f128 (otherwise). */
47857 if (perm >= 16)
47858 dsecond.op0 = dsecond.op1;
47859 dsecond.op1 = dfirst.target;
47861 ok = expand_vec_perm_1 (&dsecond);
47862 gcc_assert (ok);
47864 return true;
47867 /* For one operand, the only useful vperm2f128 permutation is 0x01
47868 aka lanes swap. */
47869 if (d->one_operand_p)
47870 return false;
47873 return false;
47876 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47877 a two vector permutation using 2 intra-lane interleave insns
47878 and cross-lane shuffle for 32-byte vectors. */
47880 static bool
47881 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
47883 unsigned i, nelt;
47884 rtx (*gen) (rtx, rtx, rtx);
47886 if (d->one_operand_p)
47887 return false;
47888 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
47890 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
47892 else
47893 return false;
47895 nelt = d->nelt;
47896 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
47897 return false;
47898 for (i = 0; i < nelt; i += 2)
47899 if (d->perm[i] != d->perm[0] + i / 2
47900 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
47901 return false;
47903 if (d->testing_p)
47904 return true;
47906 switch (d->vmode)
47908 case V32QImode:
47909 if (d->perm[0])
47910 gen = gen_vec_interleave_highv32qi;
47911 else
47912 gen = gen_vec_interleave_lowv32qi;
47913 break;
47914 case V16HImode:
47915 if (d->perm[0])
47916 gen = gen_vec_interleave_highv16hi;
47917 else
47918 gen = gen_vec_interleave_lowv16hi;
47919 break;
47920 case V8SImode:
47921 if (d->perm[0])
47922 gen = gen_vec_interleave_highv8si;
47923 else
47924 gen = gen_vec_interleave_lowv8si;
47925 break;
47926 case V4DImode:
47927 if (d->perm[0])
47928 gen = gen_vec_interleave_highv4di;
47929 else
47930 gen = gen_vec_interleave_lowv4di;
47931 break;
47932 case V8SFmode:
47933 if (d->perm[0])
47934 gen = gen_vec_interleave_highv8sf;
47935 else
47936 gen = gen_vec_interleave_lowv8sf;
47937 break;
47938 case V4DFmode:
47939 if (d->perm[0])
47940 gen = gen_vec_interleave_highv4df;
47941 else
47942 gen = gen_vec_interleave_lowv4df;
47943 break;
47944 default:
47945 gcc_unreachable ();
47948 emit_insn (gen (d->target, d->op0, d->op1));
47949 return true;
47952 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
47953 a single vector permutation using a single intra-lane vector
47954 permutation, vperm2f128 swapping the lanes and vblend* insn blending
47955 the non-swapped and swapped vectors together. */
47957 static bool
47958 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
47960 struct expand_vec_perm_d dfirst, dsecond;
47961 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
47962 rtx_insn *seq;
47963 bool ok;
47964 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
47966 if (!TARGET_AVX
47967 || TARGET_AVX2
47968 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
47969 || !d->one_operand_p)
47970 return false;
47972 dfirst = *d;
47973 for (i = 0; i < nelt; i++)
47974 dfirst.perm[i] = 0xff;
47975 for (i = 0, msk = 0; i < nelt; i++)
47977 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
47978 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
47979 return false;
47980 dfirst.perm[j] = d->perm[i];
47981 if (j != i)
47982 msk |= (1 << i);
47984 for (i = 0; i < nelt; i++)
47985 if (dfirst.perm[i] == 0xff)
47986 dfirst.perm[i] = i;
47988 if (!d->testing_p)
47989 dfirst.target = gen_reg_rtx (dfirst.vmode);
47991 start_sequence ();
47992 ok = expand_vec_perm_1 (&dfirst);
47993 seq = get_insns ();
47994 end_sequence ();
47996 if (!ok)
47997 return false;
47999 if (d->testing_p)
48000 return true;
48002 emit_insn (seq);
48004 dsecond = *d;
48005 dsecond.op0 = dfirst.target;
48006 dsecond.op1 = dfirst.target;
48007 dsecond.one_operand_p = true;
48008 dsecond.target = gen_reg_rtx (dsecond.vmode);
48009 for (i = 0; i < nelt; i++)
48010 dsecond.perm[i] = i ^ nelt2;
48012 ok = expand_vec_perm_1 (&dsecond);
48013 gcc_assert (ok);
48015 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
48016 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
48017 return true;
48020 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
48021 permutation using two vperm2f128, followed by a vshufpd insn blending
48022 the two vectors together. */
48024 static bool
48025 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
48027 struct expand_vec_perm_d dfirst, dsecond, dthird;
48028 bool ok;
48030 if (!TARGET_AVX || (d->vmode != V4DFmode))
48031 return false;
48033 if (d->testing_p)
48034 return true;
48036 dfirst = *d;
48037 dsecond = *d;
48038 dthird = *d;
48040 dfirst.perm[0] = (d->perm[0] & ~1);
48041 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
48042 dfirst.perm[2] = (d->perm[2] & ~1);
48043 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
48044 dsecond.perm[0] = (d->perm[1] & ~1);
48045 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
48046 dsecond.perm[2] = (d->perm[3] & ~1);
48047 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
48048 dthird.perm[0] = (d->perm[0] % 2);
48049 dthird.perm[1] = (d->perm[1] % 2) + 4;
48050 dthird.perm[2] = (d->perm[2] % 2) + 2;
48051 dthird.perm[3] = (d->perm[3] % 2) + 6;
48053 dfirst.target = gen_reg_rtx (dfirst.vmode);
48054 dsecond.target = gen_reg_rtx (dsecond.vmode);
48055 dthird.op0 = dfirst.target;
48056 dthird.op1 = dsecond.target;
48057 dthird.one_operand_p = false;
48059 canonicalize_perm (&dfirst);
48060 canonicalize_perm (&dsecond);
48062 ok = expand_vec_perm_1 (&dfirst)
48063 && expand_vec_perm_1 (&dsecond)
48064 && expand_vec_perm_1 (&dthird);
48066 gcc_assert (ok);
48068 return true;
48071 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
48072 permutation with two pshufb insns and an ior. We should have already
48073 failed all two instruction sequences. */
48075 static bool
48076 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
48078 rtx rperm[2][16], vperm, l, h, op, m128;
48079 unsigned int i, nelt, eltsz;
48081 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
48082 return false;
48083 gcc_assert (!d->one_operand_p);
48085 if (d->testing_p)
48086 return true;
48088 nelt = d->nelt;
48089 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48091 /* Generate two permutation masks. If the required element is within
48092 the given vector it is shuffled into the proper lane. If the required
48093 element is in the other vector, force a zero into the lane by setting
48094 bit 7 in the permutation mask. */
48095 m128 = GEN_INT (-128);
48096 for (i = 0; i < nelt; ++i)
48098 unsigned j, e = d->perm[i];
48099 unsigned which = (e >= nelt);
48100 if (e >= nelt)
48101 e -= nelt;
48103 for (j = 0; j < eltsz; ++j)
48105 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
48106 rperm[1-which][i*eltsz + j] = m128;
48110 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
48111 vperm = force_reg (V16QImode, vperm);
48113 l = gen_reg_rtx (V16QImode);
48114 op = gen_lowpart (V16QImode, d->op0);
48115 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
48117 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
48118 vperm = force_reg (V16QImode, vperm);
48120 h = gen_reg_rtx (V16QImode);
48121 op = gen_lowpart (V16QImode, d->op1);
48122 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
48124 op = d->target;
48125 if (d->vmode != V16QImode)
48126 op = gen_reg_rtx (V16QImode);
48127 emit_insn (gen_iorv16qi3 (op, l, h));
48128 if (op != d->target)
48129 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48131 return true;
48134 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
48135 with two vpshufb insns, vpermq and vpor. We should have already failed
48136 all two or three instruction sequences. */
48138 static bool
48139 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
48141 rtx rperm[2][32], vperm, l, h, hp, op, m128;
48142 unsigned int i, nelt, eltsz;
48144 if (!TARGET_AVX2
48145 || !d->one_operand_p
48146 || (d->vmode != V32QImode && d->vmode != V16HImode))
48147 return false;
48149 if (d->testing_p)
48150 return true;
48152 nelt = d->nelt;
48153 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48155 /* Generate two permutation masks. If the required element is within
48156 the same lane, it is shuffled in. If the required element from the
48157 other lane, force a zero by setting bit 7 in the permutation mask.
48158 In the other mask the mask has non-negative elements if element
48159 is requested from the other lane, but also moved to the other lane,
48160 so that the result of vpshufb can have the two V2TImode halves
48161 swapped. */
48162 m128 = GEN_INT (-128);
48163 for (i = 0; i < nelt; ++i)
48165 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48166 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
48168 for (j = 0; j < eltsz; ++j)
48170 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
48171 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
48175 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48176 vperm = force_reg (V32QImode, vperm);
48178 h = gen_reg_rtx (V32QImode);
48179 op = gen_lowpart (V32QImode, d->op0);
48180 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
48182 /* Swap the 128-byte lanes of h into hp. */
48183 hp = gen_reg_rtx (V4DImode);
48184 op = gen_lowpart (V4DImode, h);
48185 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
48186 const1_rtx));
48188 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48189 vperm = force_reg (V32QImode, vperm);
48191 l = gen_reg_rtx (V32QImode);
48192 op = gen_lowpart (V32QImode, d->op0);
48193 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48195 op = d->target;
48196 if (d->vmode != V32QImode)
48197 op = gen_reg_rtx (V32QImode);
48198 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
48199 if (op != d->target)
48200 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48202 return true;
48205 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
48206 and extract-odd permutations of two V32QImode and V16QImode operand
48207 with two vpshufb insns, vpor and vpermq. We should have already
48208 failed all two or three instruction sequences. */
48210 static bool
48211 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
48213 rtx rperm[2][32], vperm, l, h, ior, op, m128;
48214 unsigned int i, nelt, eltsz;
48216 if (!TARGET_AVX2
48217 || d->one_operand_p
48218 || (d->vmode != V32QImode && d->vmode != V16HImode))
48219 return false;
48221 for (i = 0; i < d->nelt; ++i)
48222 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
48223 return false;
48225 if (d->testing_p)
48226 return true;
48228 nelt = d->nelt;
48229 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48231 /* Generate two permutation masks. In the first permutation mask
48232 the first quarter will contain indexes for the first half
48233 of the op0, the second quarter will contain bit 7 set, third quarter
48234 will contain indexes for the second half of the op0 and the
48235 last quarter bit 7 set. In the second permutation mask
48236 the first quarter will contain bit 7 set, the second quarter
48237 indexes for the first half of the op1, the third quarter bit 7 set
48238 and last quarter indexes for the second half of the op1.
48239 I.e. the first mask e.g. for V32QImode extract even will be:
48240 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
48241 (all values masked with 0xf except for -128) and second mask
48242 for extract even will be
48243 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
48244 m128 = GEN_INT (-128);
48245 for (i = 0; i < nelt; ++i)
48247 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48248 unsigned which = d->perm[i] >= nelt;
48249 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
48251 for (j = 0; j < eltsz; ++j)
48253 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
48254 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
48258 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48259 vperm = force_reg (V32QImode, vperm);
48261 l = gen_reg_rtx (V32QImode);
48262 op = gen_lowpart (V32QImode, d->op0);
48263 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48265 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48266 vperm = force_reg (V32QImode, vperm);
48268 h = gen_reg_rtx (V32QImode);
48269 op = gen_lowpart (V32QImode, d->op1);
48270 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
48272 ior = gen_reg_rtx (V32QImode);
48273 emit_insn (gen_iorv32qi3 (ior, l, h));
48275 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
48276 op = gen_reg_rtx (V4DImode);
48277 ior = gen_lowpart (V4DImode, ior);
48278 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
48279 const1_rtx, GEN_INT (3)));
48280 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48282 return true;
48285 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
48286 and extract-odd permutations. */
48288 static bool
48289 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
48291 rtx t1, t2, t3, t4, t5;
48293 switch (d->vmode)
48295 case V4DFmode:
48296 if (d->testing_p)
48297 break;
48298 t1 = gen_reg_rtx (V4DFmode);
48299 t2 = gen_reg_rtx (V4DFmode);
48301 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
48302 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
48303 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
48305 /* Now an unpck[lh]pd will produce the result required. */
48306 if (odd)
48307 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
48308 else
48309 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
48310 emit_insn (t3);
48311 break;
48313 case V8SFmode:
48315 int mask = odd ? 0xdd : 0x88;
48317 if (d->testing_p)
48318 break;
48319 t1 = gen_reg_rtx (V8SFmode);
48320 t2 = gen_reg_rtx (V8SFmode);
48321 t3 = gen_reg_rtx (V8SFmode);
48323 /* Shuffle within the 128-bit lanes to produce:
48324 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
48325 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
48326 GEN_INT (mask)));
48328 /* Shuffle the lanes around to produce:
48329 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
48330 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
48331 GEN_INT (0x3)));
48333 /* Shuffle within the 128-bit lanes to produce:
48334 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
48335 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
48337 /* Shuffle within the 128-bit lanes to produce:
48338 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
48339 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
48341 /* Shuffle the lanes around to produce:
48342 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
48343 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
48344 GEN_INT (0x20)));
48346 break;
48348 case V2DFmode:
48349 case V4SFmode:
48350 case V2DImode:
48351 case V4SImode:
48352 /* These are always directly implementable by expand_vec_perm_1. */
48353 gcc_unreachable ();
48355 case V8HImode:
48356 if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
48357 return expand_vec_perm_pshufb2 (d);
48358 else
48360 if (d->testing_p)
48361 break;
48362 /* We need 2*log2(N)-1 operations to achieve odd/even
48363 with interleave. */
48364 t1 = gen_reg_rtx (V8HImode);
48365 t2 = gen_reg_rtx (V8HImode);
48366 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
48367 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
48368 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
48369 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
48370 if (odd)
48371 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
48372 else
48373 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
48374 emit_insn (t3);
48376 break;
48378 case V16QImode:
48379 if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
48380 return expand_vec_perm_pshufb2 (d);
48381 else
48383 if (d->testing_p)
48384 break;
48385 t1 = gen_reg_rtx (V16QImode);
48386 t2 = gen_reg_rtx (V16QImode);
48387 t3 = gen_reg_rtx (V16QImode);
48388 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
48389 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
48390 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
48391 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
48392 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
48393 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
48394 if (odd)
48395 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
48396 else
48397 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
48398 emit_insn (t3);
48400 break;
48402 case V16HImode:
48403 case V32QImode:
48404 return expand_vec_perm_vpshufb2_vpermq_even_odd (d);
48406 case V4DImode:
48407 if (!TARGET_AVX2)
48409 struct expand_vec_perm_d d_copy = *d;
48410 d_copy.vmode = V4DFmode;
48411 if (d->testing_p)
48412 d_copy.target = gen_lowpart (V4DFmode, d->target);
48413 else
48414 d_copy.target = gen_reg_rtx (V4DFmode);
48415 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
48416 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
48417 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
48419 if (!d->testing_p)
48420 emit_move_insn (d->target,
48421 gen_lowpart (V4DImode, d_copy.target));
48422 return true;
48424 return false;
48427 if (d->testing_p)
48428 break;
48430 t1 = gen_reg_rtx (V4DImode);
48431 t2 = gen_reg_rtx (V4DImode);
48433 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
48434 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
48435 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
48437 /* Now an vpunpck[lh]qdq will produce the result required. */
48438 if (odd)
48439 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
48440 else
48441 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
48442 emit_insn (t3);
48443 break;
48445 case V8SImode:
48446 if (!TARGET_AVX2)
48448 struct expand_vec_perm_d d_copy = *d;
48449 d_copy.vmode = V8SFmode;
48450 if (d->testing_p)
48451 d_copy.target = gen_lowpart (V8SFmode, d->target);
48452 else
48453 d_copy.target = gen_reg_rtx (V8SFmode);
48454 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
48455 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
48456 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
48458 if (!d->testing_p)
48459 emit_move_insn (d->target,
48460 gen_lowpart (V8SImode, d_copy.target));
48461 return true;
48463 return false;
48466 if (d->testing_p)
48467 break;
48469 t1 = gen_reg_rtx (V8SImode);
48470 t2 = gen_reg_rtx (V8SImode);
48471 t3 = gen_reg_rtx (V4DImode);
48472 t4 = gen_reg_rtx (V4DImode);
48473 t5 = gen_reg_rtx (V4DImode);
48475 /* Shuffle the lanes around into
48476 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
48477 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
48478 gen_lowpart (V4DImode, d->op1),
48479 GEN_INT (0x20)));
48480 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
48481 gen_lowpart (V4DImode, d->op1),
48482 GEN_INT (0x31)));
48484 /* Swap the 2nd and 3rd position in each lane into
48485 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
48486 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
48487 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
48488 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
48489 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
48491 /* Now an vpunpck[lh]qdq will produce
48492 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
48493 if (odd)
48494 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
48495 gen_lowpart (V4DImode, t2));
48496 else
48497 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
48498 gen_lowpart (V4DImode, t2));
48499 emit_insn (t3);
48500 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
48501 break;
48503 default:
48504 gcc_unreachable ();
48507 return true;
48510 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
48511 extract-even and extract-odd permutations. */
48513 static bool
48514 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
48516 unsigned i, odd, nelt = d->nelt;
48518 odd = d->perm[0];
48519 if (odd != 0 && odd != 1)
48520 return false;
48522 for (i = 1; i < nelt; ++i)
48523 if (d->perm[i] != 2 * i + odd)
48524 return false;
48526 return expand_vec_perm_even_odd_1 (d, odd);
48529 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
48530 permutations. We assume that expand_vec_perm_1 has already failed. */
48532 static bool
48533 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
48535 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
48536 machine_mode vmode = d->vmode;
48537 unsigned char perm2[4];
48538 rtx op0 = d->op0, dest;
48539 bool ok;
48541 switch (vmode)
48543 case V4DFmode:
48544 case V8SFmode:
48545 /* These are special-cased in sse.md so that we can optionally
48546 use the vbroadcast instruction. They expand to two insns
48547 if the input happens to be in a register. */
48548 gcc_unreachable ();
48550 case V2DFmode:
48551 case V2DImode:
48552 case V4SFmode:
48553 case V4SImode:
48554 /* These are always implementable using standard shuffle patterns. */
48555 gcc_unreachable ();
48557 case V8HImode:
48558 case V16QImode:
48559 /* These can be implemented via interleave. We save one insn by
48560 stopping once we have promoted to V4SImode and then use pshufd. */
48561 if (d->testing_p)
48562 return true;
48565 rtx dest;
48566 rtx (*gen) (rtx, rtx, rtx)
48567 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
48568 : gen_vec_interleave_lowv8hi;
48570 if (elt >= nelt2)
48572 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
48573 : gen_vec_interleave_highv8hi;
48574 elt -= nelt2;
48576 nelt2 /= 2;
48578 dest = gen_reg_rtx (vmode);
48579 emit_insn (gen (dest, op0, op0));
48580 vmode = get_mode_wider_vector (vmode);
48581 op0 = gen_lowpart (vmode, dest);
48583 while (vmode != V4SImode);
48585 memset (perm2, elt, 4);
48586 dest = gen_reg_rtx (V4SImode);
48587 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
48588 gcc_assert (ok);
48589 if (!d->testing_p)
48590 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
48591 return true;
48593 case V32QImode:
48594 case V16HImode:
48595 case V8SImode:
48596 case V4DImode:
48597 /* For AVX2 broadcasts of the first element vpbroadcast* or
48598 vpermq should be used by expand_vec_perm_1. */
48599 gcc_assert (!TARGET_AVX2 || d->perm[0]);
48600 return false;
48602 default:
48603 gcc_unreachable ();
48607 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
48608 broadcast permutations. */
48610 static bool
48611 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
48613 unsigned i, elt, nelt = d->nelt;
48615 if (!d->one_operand_p)
48616 return false;
48618 elt = d->perm[0];
48619 for (i = 1; i < nelt; ++i)
48620 if (d->perm[i] != elt)
48621 return false;
48623 return expand_vec_perm_broadcast_1 (d);
48626 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
48627 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
48628 all the shorter instruction sequences. */
48630 static bool
48631 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
48633 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
48634 unsigned int i, nelt, eltsz;
48635 bool used[4];
48637 if (!TARGET_AVX2
48638 || d->one_operand_p
48639 || (d->vmode != V32QImode && d->vmode != V16HImode))
48640 return false;
48642 if (d->testing_p)
48643 return true;
48645 nelt = d->nelt;
48646 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48648 /* Generate 4 permutation masks. If the required element is within
48649 the same lane, it is shuffled in. If the required element from the
48650 other lane, force a zero by setting bit 7 in the permutation mask.
48651 In the other mask the mask has non-negative elements if element
48652 is requested from the other lane, but also moved to the other lane,
48653 so that the result of vpshufb can have the two V2TImode halves
48654 swapped. */
48655 m128 = GEN_INT (-128);
48656 for (i = 0; i < 32; ++i)
48658 rperm[0][i] = m128;
48659 rperm[1][i] = m128;
48660 rperm[2][i] = m128;
48661 rperm[3][i] = m128;
48663 used[0] = false;
48664 used[1] = false;
48665 used[2] = false;
48666 used[3] = false;
48667 for (i = 0; i < nelt; ++i)
48669 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48670 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
48671 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
48673 for (j = 0; j < eltsz; ++j)
48674 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
48675 used[which] = true;
48678 for (i = 0; i < 2; ++i)
48680 if (!used[2 * i + 1])
48682 h[i] = NULL_RTX;
48683 continue;
48685 vperm = gen_rtx_CONST_VECTOR (V32QImode,
48686 gen_rtvec_v (32, rperm[2 * i + 1]));
48687 vperm = force_reg (V32QImode, vperm);
48688 h[i] = gen_reg_rtx (V32QImode);
48689 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
48690 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
48693 /* Swap the 128-byte lanes of h[X]. */
48694 for (i = 0; i < 2; ++i)
48696 if (h[i] == NULL_RTX)
48697 continue;
48698 op = gen_reg_rtx (V4DImode);
48699 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
48700 const2_rtx, GEN_INT (3), const0_rtx,
48701 const1_rtx));
48702 h[i] = gen_lowpart (V32QImode, op);
48705 for (i = 0; i < 2; ++i)
48707 if (!used[2 * i])
48709 l[i] = NULL_RTX;
48710 continue;
48712 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
48713 vperm = force_reg (V32QImode, vperm);
48714 l[i] = gen_reg_rtx (V32QImode);
48715 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
48716 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
48719 for (i = 0; i < 2; ++i)
48721 if (h[i] && l[i])
48723 op = gen_reg_rtx (V32QImode);
48724 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
48725 l[i] = op;
48727 else if (h[i])
48728 l[i] = h[i];
48731 gcc_assert (l[0] && l[1]);
48732 op = d->target;
48733 if (d->vmode != V32QImode)
48734 op = gen_reg_rtx (V32QImode);
48735 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
48736 if (op != d->target)
48737 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48738 return true;
48741 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
48742 With all of the interface bits taken care of, perform the expansion
48743 in D and return true on success. */
48745 static bool
48746 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
48748 /* Try a single instruction expansion. */
48749 if (expand_vec_perm_1 (d))
48750 return true;
48752 /* Try sequences of two instructions. */
48754 if (expand_vec_perm_pshuflw_pshufhw (d))
48755 return true;
48757 if (expand_vec_perm_palignr (d, false))
48758 return true;
48760 if (expand_vec_perm_interleave2 (d))
48761 return true;
48763 if (expand_vec_perm_broadcast (d))
48764 return true;
48766 if (expand_vec_perm_vpermq_perm_1 (d))
48767 return true;
48769 if (expand_vec_perm_vperm2f128 (d))
48770 return true;
48772 if (expand_vec_perm_pblendv (d))
48773 return true;
48775 /* Try sequences of three instructions. */
48777 if (expand_vec_perm_2vperm2f128_vshuf (d))
48778 return true;
48780 if (expand_vec_perm_pshufb2 (d))
48781 return true;
48783 if (expand_vec_perm_interleave3 (d))
48784 return true;
48786 if (expand_vec_perm_vperm2f128_vblend (d))
48787 return true;
48789 /* Try sequences of four instructions. */
48791 if (expand_vec_perm_vpshufb2_vpermq (d))
48792 return true;
48794 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
48795 return true;
48797 /* ??? Look for narrow permutations whose element orderings would
48798 allow the promotion to a wider mode. */
48800 /* ??? Look for sequences of interleave or a wider permute that place
48801 the data into the correct lanes for a half-vector shuffle like
48802 pshuf[lh]w or vpermilps. */
48804 /* ??? Look for sequences of interleave that produce the desired results.
48805 The combinatorics of punpck[lh] get pretty ugly... */
48807 if (expand_vec_perm_even_odd (d))
48808 return true;
48810 /* Even longer sequences. */
48811 if (expand_vec_perm_vpshufb4_vpermq2 (d))
48812 return true;
48814 return false;
48817 /* If a permutation only uses one operand, make it clear. Returns true
48818 if the permutation references both operands. */
48820 static bool
48821 canonicalize_perm (struct expand_vec_perm_d *d)
48823 int i, which, nelt = d->nelt;
48825 for (i = which = 0; i < nelt; ++i)
48826 which |= (d->perm[i] < nelt ? 1 : 2);
48828 d->one_operand_p = true;
48829 switch (which)
48831 default:
48832 gcc_unreachable();
48834 case 3:
48835 if (!rtx_equal_p (d->op0, d->op1))
48837 d->one_operand_p = false;
48838 break;
48840 /* The elements of PERM do not suggest that only the first operand
48841 is used, but both operands are identical. Allow easier matching
48842 of the permutation by folding the permutation into the single
48843 input vector. */
48844 /* FALLTHRU */
48846 case 2:
48847 for (i = 0; i < nelt; ++i)
48848 d->perm[i] &= nelt - 1;
48849 d->op0 = d->op1;
48850 break;
48852 case 1:
48853 d->op1 = d->op0;
48854 break;
48857 return (which == 3);
48860 bool
48861 ix86_expand_vec_perm_const (rtx operands[4])
48863 struct expand_vec_perm_d d;
48864 unsigned char perm[MAX_VECT_LEN];
48865 int i, nelt;
48866 bool two_args;
48867 rtx sel;
48869 d.target = operands[0];
48870 d.op0 = operands[1];
48871 d.op1 = operands[2];
48872 sel = operands[3];
48874 d.vmode = GET_MODE (d.target);
48875 gcc_assert (VECTOR_MODE_P (d.vmode));
48876 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
48877 d.testing_p = false;
48879 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
48880 gcc_assert (XVECLEN (sel, 0) == nelt);
48881 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
48883 for (i = 0; i < nelt; ++i)
48885 rtx e = XVECEXP (sel, 0, i);
48886 int ei = INTVAL (e) & (2 * nelt - 1);
48887 d.perm[i] = ei;
48888 perm[i] = ei;
48891 two_args = canonicalize_perm (&d);
48893 if (ix86_expand_vec_perm_const_1 (&d))
48894 return true;
48896 /* If the selector says both arguments are needed, but the operands are the
48897 same, the above tried to expand with one_operand_p and flattened selector.
48898 If that didn't work, retry without one_operand_p; we succeeded with that
48899 during testing. */
48900 if (two_args && d.one_operand_p)
48902 d.one_operand_p = false;
48903 memcpy (d.perm, perm, sizeof (perm));
48904 return ix86_expand_vec_perm_const_1 (&d);
48907 return false;
48910 /* Implement targetm.vectorize.vec_perm_const_ok. */
48912 static bool
48913 ix86_vectorize_vec_perm_const_ok (machine_mode vmode,
48914 const unsigned char *sel)
48916 struct expand_vec_perm_d d;
48917 unsigned int i, nelt, which;
48918 bool ret;
48920 d.vmode = vmode;
48921 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
48922 d.testing_p = true;
48924 /* Given sufficient ISA support we can just return true here
48925 for selected vector modes. */
48926 switch (d.vmode)
48928 case V16SFmode:
48929 case V16SImode:
48930 case V8DImode:
48931 case V8DFmode:
48932 if (TARGET_AVX512F)
48933 /* All implementable with a single vpermi2 insn. */
48934 return true;
48935 break;
48936 case V32HImode:
48937 if (TARGET_AVX512BW)
48938 /* All implementable with a single vpermi2 insn. */
48939 return true;
48940 break;
48941 case V8SImode:
48942 case V8SFmode:
48943 case V4DFmode:
48944 case V4DImode:
48945 if (TARGET_AVX512VL)
48946 /* All implementable with a single vpermi2 insn. */
48947 return true;
48948 break;
48949 case V16HImode:
48950 if (TARGET_AVX2)
48951 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
48952 return true;
48953 break;
48954 case V32QImode:
48955 if (TARGET_AVX2)
48956 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
48957 return true;
48958 break;
48959 case V4SImode:
48960 case V4SFmode:
48961 case V8HImode:
48962 case V16QImode:
48963 /* All implementable with a single vpperm insn. */
48964 if (TARGET_XOP)
48965 return true;
48966 /* All implementable with 2 pshufb + 1 ior. */
48967 if (TARGET_SSSE3)
48968 return true;
48969 break;
48970 case V2DImode:
48971 case V2DFmode:
48972 /* All implementable with shufpd or unpck[lh]pd. */
48973 return true;
48974 default:
48975 return false;
48978 /* Extract the values from the vector CST into the permutation
48979 array in D. */
48980 memcpy (d.perm, sel, nelt);
48981 for (i = which = 0; i < nelt; ++i)
48983 unsigned char e = d.perm[i];
48984 gcc_assert (e < 2 * nelt);
48985 which |= (e < nelt ? 1 : 2);
48988 /* For all elements from second vector, fold the elements to first. */
48989 if (which == 2)
48990 for (i = 0; i < nelt; ++i)
48991 d.perm[i] -= nelt;
48993 /* Check whether the mask can be applied to the vector type. */
48994 d.one_operand_p = (which != 3);
48996 /* Implementable with shufps or pshufd. */
48997 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
48998 return true;
49000 /* Otherwise we have to go through the motions and see if we can
49001 figure out how to generate the requested permutation. */
49002 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
49003 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
49004 if (!d.one_operand_p)
49005 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
49007 start_sequence ();
49008 ret = ix86_expand_vec_perm_const_1 (&d);
49009 end_sequence ();
49011 return ret;
49014 void
49015 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
49017 struct expand_vec_perm_d d;
49018 unsigned i, nelt;
49020 d.target = targ;
49021 d.op0 = op0;
49022 d.op1 = op1;
49023 d.vmode = GET_MODE (targ);
49024 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49025 d.one_operand_p = false;
49026 d.testing_p = false;
49028 for (i = 0; i < nelt; ++i)
49029 d.perm[i] = i * 2 + odd;
49031 /* We'll either be able to implement the permutation directly... */
49032 if (expand_vec_perm_1 (&d))
49033 return;
49035 /* ... or we use the special-case patterns. */
49036 expand_vec_perm_even_odd_1 (&d, odd);
49039 static void
49040 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
49042 struct expand_vec_perm_d d;
49043 unsigned i, nelt, base;
49044 bool ok;
49046 d.target = targ;
49047 d.op0 = op0;
49048 d.op1 = op1;
49049 d.vmode = GET_MODE (targ);
49050 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49051 d.one_operand_p = false;
49052 d.testing_p = false;
49054 base = high_p ? nelt / 2 : 0;
49055 for (i = 0; i < nelt / 2; ++i)
49057 d.perm[i * 2] = i + base;
49058 d.perm[i * 2 + 1] = i + base + nelt;
49061 /* Note that for AVX this isn't one instruction. */
49062 ok = ix86_expand_vec_perm_const_1 (&d);
49063 gcc_assert (ok);
49067 /* Expand a vector operation CODE for a V*QImode in terms of the
49068 same operation on V*HImode. */
49070 void
49071 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
49073 machine_mode qimode = GET_MODE (dest);
49074 machine_mode himode;
49075 rtx (*gen_il) (rtx, rtx, rtx);
49076 rtx (*gen_ih) (rtx, rtx, rtx);
49077 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
49078 struct expand_vec_perm_d d;
49079 bool ok, full_interleave;
49080 bool uns_p = false;
49081 int i;
49083 switch (qimode)
49085 case V16QImode:
49086 himode = V8HImode;
49087 gen_il = gen_vec_interleave_lowv16qi;
49088 gen_ih = gen_vec_interleave_highv16qi;
49089 break;
49090 case V32QImode:
49091 himode = V16HImode;
49092 gen_il = gen_avx2_interleave_lowv32qi;
49093 gen_ih = gen_avx2_interleave_highv32qi;
49094 break;
49095 case V64QImode:
49096 himode = V32HImode;
49097 gen_il = gen_avx512bw_interleave_lowv64qi;
49098 gen_ih = gen_avx512bw_interleave_highv64qi;
49099 break;
49100 default:
49101 gcc_unreachable ();
49104 op2_l = op2_h = op2;
49105 switch (code)
49107 case MULT:
49108 /* Unpack data such that we've got a source byte in each low byte of
49109 each word. We don't care what goes into the high byte of each word.
49110 Rather than trying to get zero in there, most convenient is to let
49111 it be a copy of the low byte. */
49112 op2_l = gen_reg_rtx (qimode);
49113 op2_h = gen_reg_rtx (qimode);
49114 emit_insn (gen_il (op2_l, op2, op2));
49115 emit_insn (gen_ih (op2_h, op2, op2));
49116 /* FALLTHRU */
49118 op1_l = gen_reg_rtx (qimode);
49119 op1_h = gen_reg_rtx (qimode);
49120 emit_insn (gen_il (op1_l, op1, op1));
49121 emit_insn (gen_ih (op1_h, op1, op1));
49122 full_interleave = qimode == V16QImode;
49123 break;
49125 case ASHIFT:
49126 case LSHIFTRT:
49127 uns_p = true;
49128 /* FALLTHRU */
49129 case ASHIFTRT:
49130 op1_l = gen_reg_rtx (himode);
49131 op1_h = gen_reg_rtx (himode);
49132 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
49133 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
49134 full_interleave = true;
49135 break;
49136 default:
49137 gcc_unreachable ();
49140 /* Perform the operation. */
49141 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
49142 1, OPTAB_DIRECT);
49143 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
49144 1, OPTAB_DIRECT);
49145 gcc_assert (res_l && res_h);
49147 /* Merge the data back into the right place. */
49148 d.target = dest;
49149 d.op0 = gen_lowpart (qimode, res_l);
49150 d.op1 = gen_lowpart (qimode, res_h);
49151 d.vmode = qimode;
49152 d.nelt = GET_MODE_NUNITS (qimode);
49153 d.one_operand_p = false;
49154 d.testing_p = false;
49156 if (full_interleave)
49158 /* For SSE2, we used an full interleave, so the desired
49159 results are in the even elements. */
49160 for (i = 0; i < 64; ++i)
49161 d.perm[i] = i * 2;
49163 else
49165 /* For AVX, the interleave used above was not cross-lane. So the
49166 extraction is evens but with the second and third quarter swapped.
49167 Happily, that is even one insn shorter than even extraction. */
49168 for (i = 0; i < 64; ++i)
49169 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
49172 ok = ix86_expand_vec_perm_const_1 (&d);
49173 gcc_assert (ok);
49175 set_unique_reg_note (get_last_insn (), REG_EQUAL,
49176 gen_rtx_fmt_ee (code, qimode, op1, op2));
49179 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
49180 if op is CONST_VECTOR with all odd elements equal to their
49181 preceding element. */
49183 static bool
49184 const_vector_equal_evenodd_p (rtx op)
49186 machine_mode mode = GET_MODE (op);
49187 int i, nunits = GET_MODE_NUNITS (mode);
49188 if (GET_CODE (op) != CONST_VECTOR
49189 || nunits != CONST_VECTOR_NUNITS (op))
49190 return false;
49191 for (i = 0; i < nunits; i += 2)
49192 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
49193 return false;
49194 return true;
49197 void
49198 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
49199 bool uns_p, bool odd_p)
49201 machine_mode mode = GET_MODE (op1);
49202 machine_mode wmode = GET_MODE (dest);
49203 rtx x;
49204 rtx orig_op1 = op1, orig_op2 = op2;
49206 if (!nonimmediate_operand (op1, mode))
49207 op1 = force_reg (mode, op1);
49208 if (!nonimmediate_operand (op2, mode))
49209 op2 = force_reg (mode, op2);
49211 /* We only play even/odd games with vectors of SImode. */
49212 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
49214 /* If we're looking for the odd results, shift those members down to
49215 the even slots. For some cpus this is faster than a PSHUFD. */
49216 if (odd_p)
49218 /* For XOP use vpmacsdqh, but only for smult, as it is only
49219 signed. */
49220 if (TARGET_XOP && mode == V4SImode && !uns_p)
49222 x = force_reg (wmode, CONST0_RTX (wmode));
49223 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
49224 return;
49227 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
49228 if (!const_vector_equal_evenodd_p (orig_op1))
49229 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
49230 x, NULL, 1, OPTAB_DIRECT);
49231 if (!const_vector_equal_evenodd_p (orig_op2))
49232 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
49233 x, NULL, 1, OPTAB_DIRECT);
49234 op1 = gen_lowpart (mode, op1);
49235 op2 = gen_lowpart (mode, op2);
49238 if (mode == V16SImode)
49240 if (uns_p)
49241 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
49242 else
49243 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
49245 else if (mode == V8SImode)
49247 if (uns_p)
49248 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
49249 else
49250 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
49252 else if (uns_p)
49253 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
49254 else if (TARGET_SSE4_1)
49255 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
49256 else
49258 rtx s1, s2, t0, t1, t2;
49260 /* The easiest way to implement this without PMULDQ is to go through
49261 the motions as if we are performing a full 64-bit multiply. With
49262 the exception that we need to do less shuffling of the elements. */
49264 /* Compute the sign-extension, aka highparts, of the two operands. */
49265 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
49266 op1, pc_rtx, pc_rtx);
49267 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
49268 op2, pc_rtx, pc_rtx);
49270 /* Multiply LO(A) * HI(B), and vice-versa. */
49271 t1 = gen_reg_rtx (wmode);
49272 t2 = gen_reg_rtx (wmode);
49273 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
49274 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
49276 /* Multiply LO(A) * LO(B). */
49277 t0 = gen_reg_rtx (wmode);
49278 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
49280 /* Combine and shift the highparts into place. */
49281 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
49282 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
49283 1, OPTAB_DIRECT);
49285 /* Combine high and low parts. */
49286 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
49287 return;
49289 emit_insn (x);
49292 void
49293 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
49294 bool uns_p, bool high_p)
49296 machine_mode wmode = GET_MODE (dest);
49297 machine_mode mode = GET_MODE (op1);
49298 rtx t1, t2, t3, t4, mask;
49300 switch (mode)
49302 case V4SImode:
49303 t1 = gen_reg_rtx (mode);
49304 t2 = gen_reg_rtx (mode);
49305 if (TARGET_XOP && !uns_p)
49307 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
49308 shuffle the elements once so that all elements are in the right
49309 place for immediate use: { A C B D }. */
49310 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
49311 const1_rtx, GEN_INT (3)));
49312 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
49313 const1_rtx, GEN_INT (3)));
49315 else
49317 /* Put the elements into place for the multiply. */
49318 ix86_expand_vec_interleave (t1, op1, op1, high_p);
49319 ix86_expand_vec_interleave (t2, op2, op2, high_p);
49320 high_p = false;
49322 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
49323 break;
49325 case V8SImode:
49326 /* Shuffle the elements between the lanes. After this we
49327 have { A B E F | C D G H } for each operand. */
49328 t1 = gen_reg_rtx (V4DImode);
49329 t2 = gen_reg_rtx (V4DImode);
49330 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
49331 const0_rtx, const2_rtx,
49332 const1_rtx, GEN_INT (3)));
49333 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
49334 const0_rtx, const2_rtx,
49335 const1_rtx, GEN_INT (3)));
49337 /* Shuffle the elements within the lanes. After this we
49338 have { A A B B | C C D D } or { E E F F | G G H H }. */
49339 t3 = gen_reg_rtx (V8SImode);
49340 t4 = gen_reg_rtx (V8SImode);
49341 mask = GEN_INT (high_p
49342 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
49343 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
49344 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
49345 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
49347 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
49348 break;
49350 case V8HImode:
49351 case V16HImode:
49352 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
49353 uns_p, OPTAB_DIRECT);
49354 t2 = expand_binop (mode,
49355 uns_p ? umul_highpart_optab : smul_highpart_optab,
49356 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
49357 gcc_assert (t1 && t2);
49359 t3 = gen_reg_rtx (mode);
49360 ix86_expand_vec_interleave (t3, t1, t2, high_p);
49361 emit_move_insn (dest, gen_lowpart (wmode, t3));
49362 break;
49364 case V16QImode:
49365 case V32QImode:
49366 case V32HImode:
49367 case V16SImode:
49368 case V64QImode:
49369 t1 = gen_reg_rtx (wmode);
49370 t2 = gen_reg_rtx (wmode);
49371 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
49372 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
49374 emit_insn (gen_rtx_SET (VOIDmode, dest, gen_rtx_MULT (wmode, t1, t2)));
49375 break;
49377 default:
49378 gcc_unreachable ();
49382 void
49383 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
49385 rtx res_1, res_2, res_3, res_4;
49387 res_1 = gen_reg_rtx (V4SImode);
49388 res_2 = gen_reg_rtx (V4SImode);
49389 res_3 = gen_reg_rtx (V2DImode);
49390 res_4 = gen_reg_rtx (V2DImode);
49391 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
49392 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
49394 /* Move the results in element 2 down to element 1; we don't care
49395 what goes in elements 2 and 3. Then we can merge the parts
49396 back together with an interleave.
49398 Note that two other sequences were tried:
49399 (1) Use interleaves at the start instead of psrldq, which allows
49400 us to use a single shufps to merge things back at the end.
49401 (2) Use shufps here to combine the two vectors, then pshufd to
49402 put the elements in the correct order.
49403 In both cases the cost of the reformatting stall was too high
49404 and the overall sequence slower. */
49406 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
49407 const0_rtx, const2_rtx,
49408 const0_rtx, const0_rtx));
49409 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
49410 const0_rtx, const2_rtx,
49411 const0_rtx, const0_rtx));
49412 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
49414 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
49417 void
49418 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
49420 machine_mode mode = GET_MODE (op0);
49421 rtx t1, t2, t3, t4, t5, t6;
49423 if (TARGET_AVX512DQ && mode == V8DImode)
49424 emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2));
49425 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode)
49426 emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2));
49427 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V2DImode)
49428 emit_insn (gen_avx512dq_mulv2di3 (op0, op1, op2));
49429 else if (TARGET_XOP && mode == V2DImode)
49431 /* op1: A,B,C,D, op2: E,F,G,H */
49432 op1 = gen_lowpart (V4SImode, op1);
49433 op2 = gen_lowpart (V4SImode, op2);
49435 t1 = gen_reg_rtx (V4SImode);
49436 t2 = gen_reg_rtx (V4SImode);
49437 t3 = gen_reg_rtx (V2DImode);
49438 t4 = gen_reg_rtx (V2DImode);
49440 /* t1: B,A,D,C */
49441 emit_insn (gen_sse2_pshufd_1 (t1, op1,
49442 GEN_INT (1),
49443 GEN_INT (0),
49444 GEN_INT (3),
49445 GEN_INT (2)));
49447 /* t2: (B*E),(A*F),(D*G),(C*H) */
49448 emit_insn (gen_mulv4si3 (t2, t1, op2));
49450 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
49451 emit_insn (gen_xop_phadddq (t3, t2));
49453 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
49454 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
49456 /* Multiply lower parts and add all */
49457 t5 = gen_reg_rtx (V2DImode);
49458 emit_insn (gen_vec_widen_umult_even_v4si (t5,
49459 gen_lowpart (V4SImode, op1),
49460 gen_lowpart (V4SImode, op2)));
49461 op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
49464 else
49466 machine_mode nmode;
49467 rtx (*umul) (rtx, rtx, rtx);
49469 if (mode == V2DImode)
49471 umul = gen_vec_widen_umult_even_v4si;
49472 nmode = V4SImode;
49474 else if (mode == V4DImode)
49476 umul = gen_vec_widen_umult_even_v8si;
49477 nmode = V8SImode;
49479 else if (mode == V8DImode)
49481 umul = gen_vec_widen_umult_even_v16si;
49482 nmode = V16SImode;
49484 else
49485 gcc_unreachable ();
49488 /* Multiply low parts. */
49489 t1 = gen_reg_rtx (mode);
49490 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
49492 /* Shift input vectors right 32 bits so we can multiply high parts. */
49493 t6 = GEN_INT (32);
49494 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
49495 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
49497 /* Multiply high parts by low parts. */
49498 t4 = gen_reg_rtx (mode);
49499 t5 = gen_reg_rtx (mode);
49500 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
49501 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
49503 /* Combine and shift the highparts back. */
49504 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
49505 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
49507 /* Combine high and low parts. */
49508 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
49511 set_unique_reg_note (get_last_insn (), REG_EQUAL,
49512 gen_rtx_MULT (mode, op1, op2));
49515 /* Return 1 if control tansfer instruction INSN
49516 should be encoded with bnd prefix.
49517 If insn is NULL then return 1 when control
49518 transfer instructions should be prefixed with
49519 bnd by default for current function. */
49521 bool
49522 ix86_bnd_prefixed_insn_p (rtx insn)
49524 /* For call insns check special flag. */
49525 if (insn && CALL_P (insn))
49527 rtx call = get_call_rtx_from (insn);
49528 if (call)
49529 return CALL_EXPR_WITH_BOUNDS_P (call);
49532 /* All other insns are prefixed only if function is instrumented. */
49533 return chkp_function_instrumented_p (current_function_decl);
49536 /* Calculate integer abs() using only SSE2 instructions. */
49538 void
49539 ix86_expand_sse2_abs (rtx target, rtx input)
49541 machine_mode mode = GET_MODE (target);
49542 rtx tmp0, tmp1, x;
49544 switch (mode)
49546 /* For 32-bit signed integer X, the best way to calculate the absolute
49547 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
49548 case V4SImode:
49549 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
49550 GEN_INT (GET_MODE_BITSIZE
49551 (GET_MODE_INNER (mode)) - 1),
49552 NULL, 0, OPTAB_DIRECT);
49553 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
49554 NULL, 0, OPTAB_DIRECT);
49555 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
49556 target, 0, OPTAB_DIRECT);
49557 break;
49559 /* For 16-bit signed integer X, the best way to calculate the absolute
49560 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
49561 case V8HImode:
49562 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
49564 x = expand_simple_binop (mode, SMAX, tmp0, input,
49565 target, 0, OPTAB_DIRECT);
49566 break;
49568 /* For 8-bit signed integer X, the best way to calculate the absolute
49569 value of X is min ((unsigned char) X, (unsigned char) (-X)),
49570 as SSE2 provides the PMINUB insn. */
49571 case V16QImode:
49572 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
49574 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
49575 target, 0, OPTAB_DIRECT);
49576 break;
49578 default:
49579 gcc_unreachable ();
49582 if (x != target)
49583 emit_move_insn (target, x);
49586 /* Expand an insert into a vector register through pinsr insn.
49587 Return true if successful. */
49589 bool
49590 ix86_expand_pinsr (rtx *operands)
49592 rtx dst = operands[0];
49593 rtx src = operands[3];
49595 unsigned int size = INTVAL (operands[1]);
49596 unsigned int pos = INTVAL (operands[2]);
49598 if (GET_CODE (dst) == SUBREG)
49600 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
49601 dst = SUBREG_REG (dst);
49604 if (GET_CODE (src) == SUBREG)
49605 src = SUBREG_REG (src);
49607 switch (GET_MODE (dst))
49609 case V16QImode:
49610 case V8HImode:
49611 case V4SImode:
49612 case V2DImode:
49614 machine_mode srcmode, dstmode;
49615 rtx (*pinsr)(rtx, rtx, rtx, rtx);
49617 srcmode = mode_for_size (size, MODE_INT, 0);
49619 switch (srcmode)
49621 case QImode:
49622 if (!TARGET_SSE4_1)
49623 return false;
49624 dstmode = V16QImode;
49625 pinsr = gen_sse4_1_pinsrb;
49626 break;
49628 case HImode:
49629 if (!TARGET_SSE2)
49630 return false;
49631 dstmode = V8HImode;
49632 pinsr = gen_sse2_pinsrw;
49633 break;
49635 case SImode:
49636 if (!TARGET_SSE4_1)
49637 return false;
49638 dstmode = V4SImode;
49639 pinsr = gen_sse4_1_pinsrd;
49640 break;
49642 case DImode:
49643 gcc_assert (TARGET_64BIT);
49644 if (!TARGET_SSE4_1)
49645 return false;
49646 dstmode = V2DImode;
49647 pinsr = gen_sse4_1_pinsrq;
49648 break;
49650 default:
49651 return false;
49654 rtx d = dst;
49655 if (GET_MODE (dst) != dstmode)
49656 d = gen_reg_rtx (dstmode);
49657 src = gen_lowpart (srcmode, src);
49659 pos /= size;
49661 emit_insn (pinsr (d, gen_lowpart (dstmode, dst), src,
49662 GEN_INT (1 << pos)));
49663 if (d != dst)
49664 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
49665 return true;
49668 default:
49669 return false;
49673 /* This function returns the calling abi specific va_list type node.
49674 It returns the FNDECL specific va_list type. */
49676 static tree
49677 ix86_fn_abi_va_list (tree fndecl)
49679 if (!TARGET_64BIT)
49680 return va_list_type_node;
49681 gcc_assert (fndecl != NULL_TREE);
49683 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
49684 return ms_va_list_type_node;
49685 else
49686 return sysv_va_list_type_node;
49689 /* Returns the canonical va_list type specified by TYPE. If there
49690 is no valid TYPE provided, it return NULL_TREE. */
49692 static tree
49693 ix86_canonical_va_list_type (tree type)
49695 tree wtype, htype;
49697 /* Resolve references and pointers to va_list type. */
49698 if (TREE_CODE (type) == MEM_REF)
49699 type = TREE_TYPE (type);
49700 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
49701 type = TREE_TYPE (type);
49702 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
49703 type = TREE_TYPE (type);
49705 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
49707 wtype = va_list_type_node;
49708 gcc_assert (wtype != NULL_TREE);
49709 htype = type;
49710 if (TREE_CODE (wtype) == ARRAY_TYPE)
49712 /* If va_list is an array type, the argument may have decayed
49713 to a pointer type, e.g. by being passed to another function.
49714 In that case, unwrap both types so that we can compare the
49715 underlying records. */
49716 if (TREE_CODE (htype) == ARRAY_TYPE
49717 || POINTER_TYPE_P (htype))
49719 wtype = TREE_TYPE (wtype);
49720 htype = TREE_TYPE (htype);
49723 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
49724 return va_list_type_node;
49725 wtype = sysv_va_list_type_node;
49726 gcc_assert (wtype != NULL_TREE);
49727 htype = type;
49728 if (TREE_CODE (wtype) == ARRAY_TYPE)
49730 /* If va_list is an array type, the argument may have decayed
49731 to a pointer type, e.g. by being passed to another function.
49732 In that case, unwrap both types so that we can compare the
49733 underlying records. */
49734 if (TREE_CODE (htype) == ARRAY_TYPE
49735 || POINTER_TYPE_P (htype))
49737 wtype = TREE_TYPE (wtype);
49738 htype = TREE_TYPE (htype);
49741 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
49742 return sysv_va_list_type_node;
49743 wtype = ms_va_list_type_node;
49744 gcc_assert (wtype != NULL_TREE);
49745 htype = type;
49746 if (TREE_CODE (wtype) == ARRAY_TYPE)
49748 /* If va_list is an array type, the argument may have decayed
49749 to a pointer type, e.g. by being passed to another function.
49750 In that case, unwrap both types so that we can compare the
49751 underlying records. */
49752 if (TREE_CODE (htype) == ARRAY_TYPE
49753 || POINTER_TYPE_P (htype))
49755 wtype = TREE_TYPE (wtype);
49756 htype = TREE_TYPE (htype);
49759 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
49760 return ms_va_list_type_node;
49761 return NULL_TREE;
49763 return std_canonical_va_list_type (type);
49766 /* Iterate through the target-specific builtin types for va_list.
49767 IDX denotes the iterator, *PTREE is set to the result type of
49768 the va_list builtin, and *PNAME to its internal type.
49769 Returns zero if there is no element for this index, otherwise
49770 IDX should be increased upon the next call.
49771 Note, do not iterate a base builtin's name like __builtin_va_list.
49772 Used from c_common_nodes_and_builtins. */
49774 static int
49775 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
49777 if (TARGET_64BIT)
49779 switch (idx)
49781 default:
49782 break;
49784 case 0:
49785 *ptree = ms_va_list_type_node;
49786 *pname = "__builtin_ms_va_list";
49787 return 1;
49789 case 1:
49790 *ptree = sysv_va_list_type_node;
49791 *pname = "__builtin_sysv_va_list";
49792 return 1;
49796 return 0;
49799 #undef TARGET_SCHED_DISPATCH
49800 #define TARGET_SCHED_DISPATCH has_dispatch
49801 #undef TARGET_SCHED_DISPATCH_DO
49802 #define TARGET_SCHED_DISPATCH_DO do_dispatch
49803 #undef TARGET_SCHED_REASSOCIATION_WIDTH
49804 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
49805 #undef TARGET_SCHED_REORDER
49806 #define TARGET_SCHED_REORDER ix86_sched_reorder
49807 #undef TARGET_SCHED_ADJUST_PRIORITY
49808 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
49809 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
49810 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
49811 ix86_dependencies_evaluation_hook
49813 /* The size of the dispatch window is the total number of bytes of
49814 object code allowed in a window. */
49815 #define DISPATCH_WINDOW_SIZE 16
49817 /* Number of dispatch windows considered for scheduling. */
49818 #define MAX_DISPATCH_WINDOWS 3
49820 /* Maximum number of instructions in a window. */
49821 #define MAX_INSN 4
49823 /* Maximum number of immediate operands in a window. */
49824 #define MAX_IMM 4
49826 /* Maximum number of immediate bits allowed in a window. */
49827 #define MAX_IMM_SIZE 128
49829 /* Maximum number of 32 bit immediates allowed in a window. */
49830 #define MAX_IMM_32 4
49832 /* Maximum number of 64 bit immediates allowed in a window. */
49833 #define MAX_IMM_64 2
49835 /* Maximum total of loads or prefetches allowed in a window. */
49836 #define MAX_LOAD 2
49838 /* Maximum total of stores allowed in a window. */
49839 #define MAX_STORE 1
49841 #undef BIG
49842 #define BIG 100
49845 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
49846 enum dispatch_group {
49847 disp_no_group = 0,
49848 disp_load,
49849 disp_store,
49850 disp_load_store,
49851 disp_prefetch,
49852 disp_imm,
49853 disp_imm_32,
49854 disp_imm_64,
49855 disp_branch,
49856 disp_cmp,
49857 disp_jcc,
49858 disp_last
49861 /* Number of allowable groups in a dispatch window. It is an array
49862 indexed by dispatch_group enum. 100 is used as a big number,
49863 because the number of these kind of operations does not have any
49864 effect in dispatch window, but we need them for other reasons in
49865 the table. */
49866 static unsigned int num_allowable_groups[disp_last] = {
49867 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
49870 char group_name[disp_last + 1][16] = {
49871 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
49872 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
49873 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
49876 /* Instruction path. */
49877 enum insn_path {
49878 no_path = 0,
49879 path_single, /* Single micro op. */
49880 path_double, /* Double micro op. */
49881 path_multi, /* Instructions with more than 2 micro op.. */
49882 last_path
49885 /* sched_insn_info defines a window to the instructions scheduled in
49886 the basic block. It contains a pointer to the insn_info table and
49887 the instruction scheduled.
49889 Windows are allocated for each basic block and are linked
49890 together. */
49891 typedef struct sched_insn_info_s {
49892 rtx insn;
49893 enum dispatch_group group;
49894 enum insn_path path;
49895 int byte_len;
49896 int imm_bytes;
49897 } sched_insn_info;
49899 /* Linked list of dispatch windows. This is a two way list of
49900 dispatch windows of a basic block. It contains information about
49901 the number of uops in the window and the total number of
49902 instructions and of bytes in the object code for this dispatch
49903 window. */
49904 typedef struct dispatch_windows_s {
49905 int num_insn; /* Number of insn in the window. */
49906 int num_uops; /* Number of uops in the window. */
49907 int window_size; /* Number of bytes in the window. */
49908 int window_num; /* Window number between 0 or 1. */
49909 int num_imm; /* Number of immediates in an insn. */
49910 int num_imm_32; /* Number of 32 bit immediates in an insn. */
49911 int num_imm_64; /* Number of 64 bit immediates in an insn. */
49912 int imm_size; /* Total immediates in the window. */
49913 int num_loads; /* Total memory loads in the window. */
49914 int num_stores; /* Total memory stores in the window. */
49915 int violation; /* Violation exists in window. */
49916 sched_insn_info *window; /* Pointer to the window. */
49917 struct dispatch_windows_s *next;
49918 struct dispatch_windows_s *prev;
49919 } dispatch_windows;
49921 /* Immediate valuse used in an insn. */
49922 typedef struct imm_info_s
49924 int imm;
49925 int imm32;
49926 int imm64;
49927 } imm_info;
49929 static dispatch_windows *dispatch_window_list;
49930 static dispatch_windows *dispatch_window_list1;
49932 /* Get dispatch group of insn. */
49934 static enum dispatch_group
49935 get_mem_group (rtx_insn *insn)
49937 enum attr_memory memory;
49939 if (INSN_CODE (insn) < 0)
49940 return disp_no_group;
49941 memory = get_attr_memory (insn);
49942 if (memory == MEMORY_STORE)
49943 return disp_store;
49945 if (memory == MEMORY_LOAD)
49946 return disp_load;
49948 if (memory == MEMORY_BOTH)
49949 return disp_load_store;
49951 return disp_no_group;
49954 /* Return true if insn is a compare instruction. */
49956 static bool
49957 is_cmp (rtx_insn *insn)
49959 enum attr_type type;
49961 type = get_attr_type (insn);
49962 return (type == TYPE_TEST
49963 || type == TYPE_ICMP
49964 || type == TYPE_FCMP
49965 || GET_CODE (PATTERN (insn)) == COMPARE);
49968 /* Return true if a dispatch violation encountered. */
49970 static bool
49971 dispatch_violation (void)
49973 if (dispatch_window_list->next)
49974 return dispatch_window_list->next->violation;
49975 return dispatch_window_list->violation;
49978 /* Return true if insn is a branch instruction. */
49980 static bool
49981 is_branch (rtx insn)
49983 return (CALL_P (insn) || JUMP_P (insn));
49986 /* Return true if insn is a prefetch instruction. */
49988 static bool
49989 is_prefetch (rtx insn)
49991 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
49994 /* This function initializes a dispatch window and the list container holding a
49995 pointer to the window. */
49997 static void
49998 init_window (int window_num)
50000 int i;
50001 dispatch_windows *new_list;
50003 if (window_num == 0)
50004 new_list = dispatch_window_list;
50005 else
50006 new_list = dispatch_window_list1;
50008 new_list->num_insn = 0;
50009 new_list->num_uops = 0;
50010 new_list->window_size = 0;
50011 new_list->next = NULL;
50012 new_list->prev = NULL;
50013 new_list->window_num = window_num;
50014 new_list->num_imm = 0;
50015 new_list->num_imm_32 = 0;
50016 new_list->num_imm_64 = 0;
50017 new_list->imm_size = 0;
50018 new_list->num_loads = 0;
50019 new_list->num_stores = 0;
50020 new_list->violation = false;
50022 for (i = 0; i < MAX_INSN; i++)
50024 new_list->window[i].insn = NULL;
50025 new_list->window[i].group = disp_no_group;
50026 new_list->window[i].path = no_path;
50027 new_list->window[i].byte_len = 0;
50028 new_list->window[i].imm_bytes = 0;
50030 return;
50033 /* This function allocates and initializes a dispatch window and the
50034 list container holding a pointer to the window. */
50036 static dispatch_windows *
50037 allocate_window (void)
50039 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
50040 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
50042 return new_list;
50045 /* This routine initializes the dispatch scheduling information. It
50046 initiates building dispatch scheduler tables and constructs the
50047 first dispatch window. */
50049 static void
50050 init_dispatch_sched (void)
50052 /* Allocate a dispatch list and a window. */
50053 dispatch_window_list = allocate_window ();
50054 dispatch_window_list1 = allocate_window ();
50055 init_window (0);
50056 init_window (1);
50059 /* This function returns true if a branch is detected. End of a basic block
50060 does not have to be a branch, but here we assume only branches end a
50061 window. */
50063 static bool
50064 is_end_basic_block (enum dispatch_group group)
50066 return group == disp_branch;
50069 /* This function is called when the end of a window processing is reached. */
50071 static void
50072 process_end_window (void)
50074 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
50075 if (dispatch_window_list->next)
50077 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
50078 gcc_assert (dispatch_window_list->window_size
50079 + dispatch_window_list1->window_size <= 48);
50080 init_window (1);
50082 init_window (0);
50085 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
50086 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
50087 for 48 bytes of instructions. Note that these windows are not dispatch
50088 windows that their sizes are DISPATCH_WINDOW_SIZE. */
50090 static dispatch_windows *
50091 allocate_next_window (int window_num)
50093 if (window_num == 0)
50095 if (dispatch_window_list->next)
50096 init_window (1);
50097 init_window (0);
50098 return dispatch_window_list;
50101 dispatch_window_list->next = dispatch_window_list1;
50102 dispatch_window_list1->prev = dispatch_window_list;
50104 return dispatch_window_list1;
50107 /* Compute number of immediate operands of an instruction. */
50109 static void
50110 find_constant (rtx in_rtx, imm_info *imm_values)
50112 if (INSN_P (in_rtx))
50113 in_rtx = PATTERN (in_rtx);
50114 subrtx_iterator::array_type array;
50115 FOR_EACH_SUBRTX (iter, array, in_rtx, ALL)
50116 if (const_rtx x = *iter)
50117 switch (GET_CODE (x))
50119 case CONST:
50120 case SYMBOL_REF:
50121 case CONST_INT:
50122 (imm_values->imm)++;
50123 if (x86_64_immediate_operand (CONST_CAST_RTX (x), SImode))
50124 (imm_values->imm32)++;
50125 else
50126 (imm_values->imm64)++;
50127 break;
50129 case CONST_DOUBLE:
50130 (imm_values->imm)++;
50131 (imm_values->imm64)++;
50132 break;
50134 case CODE_LABEL:
50135 if (LABEL_KIND (x) == LABEL_NORMAL)
50137 (imm_values->imm)++;
50138 (imm_values->imm32)++;
50140 break;
50142 default:
50143 break;
50147 /* Return total size of immediate operands of an instruction along with number
50148 of corresponding immediate-operands. It initializes its parameters to zero
50149 befor calling FIND_CONSTANT.
50150 INSN is the input instruction. IMM is the total of immediates.
50151 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
50152 bit immediates. */
50154 static int
50155 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
50157 imm_info imm_values = {0, 0, 0};
50159 find_constant (insn, &imm_values);
50160 *imm = imm_values.imm;
50161 *imm32 = imm_values.imm32;
50162 *imm64 = imm_values.imm64;
50163 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
50166 /* This function indicates if an operand of an instruction is an
50167 immediate. */
50169 static bool
50170 has_immediate (rtx insn)
50172 int num_imm_operand;
50173 int num_imm32_operand;
50174 int num_imm64_operand;
50176 if (insn)
50177 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50178 &num_imm64_operand);
50179 return false;
50182 /* Return single or double path for instructions. */
50184 static enum insn_path
50185 get_insn_path (rtx_insn *insn)
50187 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
50189 if ((int)path == 0)
50190 return path_single;
50192 if ((int)path == 1)
50193 return path_double;
50195 return path_multi;
50198 /* Return insn dispatch group. */
50200 static enum dispatch_group
50201 get_insn_group (rtx_insn *insn)
50203 enum dispatch_group group = get_mem_group (insn);
50204 if (group)
50205 return group;
50207 if (is_branch (insn))
50208 return disp_branch;
50210 if (is_cmp (insn))
50211 return disp_cmp;
50213 if (has_immediate (insn))
50214 return disp_imm;
50216 if (is_prefetch (insn))
50217 return disp_prefetch;
50219 return disp_no_group;
50222 /* Count number of GROUP restricted instructions in a dispatch
50223 window WINDOW_LIST. */
50225 static int
50226 count_num_restricted (rtx_insn *insn, dispatch_windows *window_list)
50228 enum dispatch_group group = get_insn_group (insn);
50229 int imm_size;
50230 int num_imm_operand;
50231 int num_imm32_operand;
50232 int num_imm64_operand;
50234 if (group == disp_no_group)
50235 return 0;
50237 if (group == disp_imm)
50239 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50240 &num_imm64_operand);
50241 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
50242 || num_imm_operand + window_list->num_imm > MAX_IMM
50243 || (num_imm32_operand > 0
50244 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
50245 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
50246 || (num_imm64_operand > 0
50247 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
50248 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
50249 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
50250 && num_imm64_operand > 0
50251 && ((window_list->num_imm_64 > 0
50252 && window_list->num_insn >= 2)
50253 || window_list->num_insn >= 3)))
50254 return BIG;
50256 return 1;
50259 if ((group == disp_load_store
50260 && (window_list->num_loads >= MAX_LOAD
50261 || window_list->num_stores >= MAX_STORE))
50262 || ((group == disp_load
50263 || group == disp_prefetch)
50264 && window_list->num_loads >= MAX_LOAD)
50265 || (group == disp_store
50266 && window_list->num_stores >= MAX_STORE))
50267 return BIG;
50269 return 1;
50272 /* This function returns true if insn satisfies dispatch rules on the
50273 last window scheduled. */
50275 static bool
50276 fits_dispatch_window (rtx_insn *insn)
50278 dispatch_windows *window_list = dispatch_window_list;
50279 dispatch_windows *window_list_next = dispatch_window_list->next;
50280 unsigned int num_restrict;
50281 enum dispatch_group group = get_insn_group (insn);
50282 enum insn_path path = get_insn_path (insn);
50283 int sum;
50285 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
50286 instructions should be given the lowest priority in the
50287 scheduling process in Haifa scheduler to make sure they will be
50288 scheduled in the same dispatch window as the reference to them. */
50289 if (group == disp_jcc || group == disp_cmp)
50290 return false;
50292 /* Check nonrestricted. */
50293 if (group == disp_no_group || group == disp_branch)
50294 return true;
50296 /* Get last dispatch window. */
50297 if (window_list_next)
50298 window_list = window_list_next;
50300 if (window_list->window_num == 1)
50302 sum = window_list->prev->window_size + window_list->window_size;
50304 if (sum == 32
50305 || (min_insn_size (insn) + sum) >= 48)
50306 /* Window 1 is full. Go for next window. */
50307 return true;
50310 num_restrict = count_num_restricted (insn, window_list);
50312 if (num_restrict > num_allowable_groups[group])
50313 return false;
50315 /* See if it fits in the first window. */
50316 if (window_list->window_num == 0)
50318 /* The first widow should have only single and double path
50319 uops. */
50320 if (path == path_double
50321 && (window_list->num_uops + 2) > MAX_INSN)
50322 return false;
50323 else if (path != path_single)
50324 return false;
50326 return true;
50329 /* Add an instruction INSN with NUM_UOPS micro-operations to the
50330 dispatch window WINDOW_LIST. */
50332 static void
50333 add_insn_window (rtx_insn *insn, dispatch_windows *window_list, int num_uops)
50335 int byte_len = min_insn_size (insn);
50336 int num_insn = window_list->num_insn;
50337 int imm_size;
50338 sched_insn_info *window = window_list->window;
50339 enum dispatch_group group = get_insn_group (insn);
50340 enum insn_path path = get_insn_path (insn);
50341 int num_imm_operand;
50342 int num_imm32_operand;
50343 int num_imm64_operand;
50345 if (!window_list->violation && group != disp_cmp
50346 && !fits_dispatch_window (insn))
50347 window_list->violation = true;
50349 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50350 &num_imm64_operand);
50352 /* Initialize window with new instruction. */
50353 window[num_insn].insn = insn;
50354 window[num_insn].byte_len = byte_len;
50355 window[num_insn].group = group;
50356 window[num_insn].path = path;
50357 window[num_insn].imm_bytes = imm_size;
50359 window_list->window_size += byte_len;
50360 window_list->num_insn = num_insn + 1;
50361 window_list->num_uops = window_list->num_uops + num_uops;
50362 window_list->imm_size += imm_size;
50363 window_list->num_imm += num_imm_operand;
50364 window_list->num_imm_32 += num_imm32_operand;
50365 window_list->num_imm_64 += num_imm64_operand;
50367 if (group == disp_store)
50368 window_list->num_stores += 1;
50369 else if (group == disp_load
50370 || group == disp_prefetch)
50371 window_list->num_loads += 1;
50372 else if (group == disp_load_store)
50374 window_list->num_stores += 1;
50375 window_list->num_loads += 1;
50379 /* Adds a scheduled instruction, INSN, to the current dispatch window.
50380 If the total bytes of instructions or the number of instructions in
50381 the window exceed allowable, it allocates a new window. */
50383 static void
50384 add_to_dispatch_window (rtx_insn *insn)
50386 int byte_len;
50387 dispatch_windows *window_list;
50388 dispatch_windows *next_list;
50389 dispatch_windows *window0_list;
50390 enum insn_path path;
50391 enum dispatch_group insn_group;
50392 bool insn_fits;
50393 int num_insn;
50394 int num_uops;
50395 int window_num;
50396 int insn_num_uops;
50397 int sum;
50399 if (INSN_CODE (insn) < 0)
50400 return;
50402 byte_len = min_insn_size (insn);
50403 window_list = dispatch_window_list;
50404 next_list = window_list->next;
50405 path = get_insn_path (insn);
50406 insn_group = get_insn_group (insn);
50408 /* Get the last dispatch window. */
50409 if (next_list)
50410 window_list = dispatch_window_list->next;
50412 if (path == path_single)
50413 insn_num_uops = 1;
50414 else if (path == path_double)
50415 insn_num_uops = 2;
50416 else
50417 insn_num_uops = (int) path;
50419 /* If current window is full, get a new window.
50420 Window number zero is full, if MAX_INSN uops are scheduled in it.
50421 Window number one is full, if window zero's bytes plus window
50422 one's bytes is 32, or if the bytes of the new instruction added
50423 to the total makes it greater than 48, or it has already MAX_INSN
50424 instructions in it. */
50425 num_insn = window_list->num_insn;
50426 num_uops = window_list->num_uops;
50427 window_num = window_list->window_num;
50428 insn_fits = fits_dispatch_window (insn);
50430 if (num_insn >= MAX_INSN
50431 || num_uops + insn_num_uops > MAX_INSN
50432 || !(insn_fits))
50434 window_num = ~window_num & 1;
50435 window_list = allocate_next_window (window_num);
50438 if (window_num == 0)
50440 add_insn_window (insn, window_list, insn_num_uops);
50441 if (window_list->num_insn >= MAX_INSN
50442 && insn_group == disp_branch)
50444 process_end_window ();
50445 return;
50448 else if (window_num == 1)
50450 window0_list = window_list->prev;
50451 sum = window0_list->window_size + window_list->window_size;
50452 if (sum == 32
50453 || (byte_len + sum) >= 48)
50455 process_end_window ();
50456 window_list = dispatch_window_list;
50459 add_insn_window (insn, window_list, insn_num_uops);
50461 else
50462 gcc_unreachable ();
50464 if (is_end_basic_block (insn_group))
50466 /* End of basic block is reached do end-basic-block process. */
50467 process_end_window ();
50468 return;
50472 /* Print the dispatch window, WINDOW_NUM, to FILE. */
50474 DEBUG_FUNCTION static void
50475 debug_dispatch_window_file (FILE *file, int window_num)
50477 dispatch_windows *list;
50478 int i;
50480 if (window_num == 0)
50481 list = dispatch_window_list;
50482 else
50483 list = dispatch_window_list1;
50485 fprintf (file, "Window #%d:\n", list->window_num);
50486 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
50487 list->num_insn, list->num_uops, list->window_size);
50488 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
50489 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
50491 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
50492 list->num_stores);
50493 fprintf (file, " insn info:\n");
50495 for (i = 0; i < MAX_INSN; i++)
50497 if (!list->window[i].insn)
50498 break;
50499 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
50500 i, group_name[list->window[i].group],
50501 i, (void *)list->window[i].insn,
50502 i, list->window[i].path,
50503 i, list->window[i].byte_len,
50504 i, list->window[i].imm_bytes);
50508 /* Print to stdout a dispatch window. */
50510 DEBUG_FUNCTION void
50511 debug_dispatch_window (int window_num)
50513 debug_dispatch_window_file (stdout, window_num);
50516 /* Print INSN dispatch information to FILE. */
50518 DEBUG_FUNCTION static void
50519 debug_insn_dispatch_info_file (FILE *file, rtx_insn *insn)
50521 int byte_len;
50522 enum insn_path path;
50523 enum dispatch_group group;
50524 int imm_size;
50525 int num_imm_operand;
50526 int num_imm32_operand;
50527 int num_imm64_operand;
50529 if (INSN_CODE (insn) < 0)
50530 return;
50532 byte_len = min_insn_size (insn);
50533 path = get_insn_path (insn);
50534 group = get_insn_group (insn);
50535 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50536 &num_imm64_operand);
50538 fprintf (file, " insn info:\n");
50539 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
50540 group_name[group], path, byte_len);
50541 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
50542 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
50545 /* Print to STDERR the status of the ready list with respect to
50546 dispatch windows. */
50548 DEBUG_FUNCTION void
50549 debug_ready_dispatch (void)
50551 int i;
50552 int no_ready = number_in_ready ();
50554 fprintf (stdout, "Number of ready: %d\n", no_ready);
50556 for (i = 0; i < no_ready; i++)
50557 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
50560 /* This routine is the driver of the dispatch scheduler. */
50562 static void
50563 do_dispatch (rtx_insn *insn, int mode)
50565 if (mode == DISPATCH_INIT)
50566 init_dispatch_sched ();
50567 else if (mode == ADD_TO_DISPATCH_WINDOW)
50568 add_to_dispatch_window (insn);
50571 /* Return TRUE if Dispatch Scheduling is supported. */
50573 static bool
50574 has_dispatch (rtx_insn *insn, int action)
50576 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
50577 && flag_dispatch_scheduler)
50578 switch (action)
50580 default:
50581 return false;
50583 case IS_DISPATCH_ON:
50584 return true;
50585 break;
50587 case IS_CMP:
50588 return is_cmp (insn);
50590 case DISPATCH_VIOLATION:
50591 return dispatch_violation ();
50593 case FITS_DISPATCH_WINDOW:
50594 return fits_dispatch_window (insn);
50597 return false;
50600 /* Implementation of reassociation_width target hook used by
50601 reassoc phase to identify parallelism level in reassociated
50602 tree. Statements tree_code is passed in OPC. Arguments type
50603 is passed in MODE.
50605 Currently parallel reassociation is enabled for Atom
50606 processors only and we set reassociation width to be 2
50607 because Atom may issue up to 2 instructions per cycle.
50609 Return value should be fixed if parallel reassociation is
50610 enabled for other processors. */
50612 static int
50613 ix86_reassociation_width (unsigned int, machine_mode mode)
50615 int res = 1;
50617 /* Vector part. */
50618 if (VECTOR_MODE_P (mode))
50620 if (TARGET_VECTOR_PARALLEL_EXECUTION)
50621 return 2;
50622 else
50623 return 1;
50626 /* Scalar part. */
50627 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
50628 res = 2;
50629 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
50630 res = 2;
50632 return res;
50635 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
50636 place emms and femms instructions. */
50638 static machine_mode
50639 ix86_preferred_simd_mode (machine_mode mode)
50641 if (!TARGET_SSE)
50642 return word_mode;
50644 switch (mode)
50646 case QImode:
50647 return TARGET_AVX512BW ? V64QImode :
50648 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
50649 case HImode:
50650 return TARGET_AVX512BW ? V32HImode :
50651 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
50652 case SImode:
50653 return TARGET_AVX512F ? V16SImode :
50654 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
50655 case DImode:
50656 return TARGET_AVX512F ? V8DImode :
50657 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
50659 case SFmode:
50660 if (TARGET_AVX512F)
50661 return V16SFmode;
50662 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
50663 return V8SFmode;
50664 else
50665 return V4SFmode;
50667 case DFmode:
50668 if (!TARGET_VECTORIZE_DOUBLE)
50669 return word_mode;
50670 else if (TARGET_AVX512F)
50671 return V8DFmode;
50672 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
50673 return V4DFmode;
50674 else if (TARGET_SSE2)
50675 return V2DFmode;
50676 /* FALLTHRU */
50678 default:
50679 return word_mode;
50683 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
50684 vectors. If AVX512F is enabled then try vectorizing with 512bit,
50685 256bit and 128bit vectors. */
50687 static unsigned int
50688 ix86_autovectorize_vector_sizes (void)
50690 return TARGET_AVX512F ? 64 | 32 | 16 :
50691 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
50696 /* Return class of registers which could be used for pseudo of MODE
50697 and of class RCLASS for spilling instead of memory. Return NO_REGS
50698 if it is not possible or non-profitable. */
50699 static reg_class_t
50700 ix86_spill_class (reg_class_t rclass, machine_mode mode)
50702 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
50703 && (mode == SImode || (TARGET_64BIT && mode == DImode))
50704 && rclass != NO_REGS && INTEGER_CLASS_P (rclass))
50705 return ALL_SSE_REGS;
50706 return NO_REGS;
50709 /* Implement targetm.vectorize.init_cost. */
50711 static void *
50712 ix86_init_cost (struct loop *)
50714 unsigned *cost = XNEWVEC (unsigned, 3);
50715 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
50716 return cost;
50719 /* Implement targetm.vectorize.add_stmt_cost. */
50721 static unsigned
50722 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
50723 struct _stmt_vec_info *stmt_info, int misalign,
50724 enum vect_cost_model_location where)
50726 unsigned *cost = (unsigned *) data;
50727 unsigned retval = 0;
50729 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
50730 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
50732 /* Statements in an inner loop relative to the loop being
50733 vectorized are weighted more heavily. The value here is
50734 arbitrary and could potentially be improved with analysis. */
50735 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
50736 count *= 50; /* FIXME. */
50738 retval = (unsigned) (count * stmt_cost);
50740 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
50741 for Silvermont as it has out of order integer pipeline and can execute
50742 2 scalar instruction per tick, but has in order SIMD pipeline. */
50743 if (TARGET_SILVERMONT || TARGET_INTEL)
50744 if (stmt_info && stmt_info->stmt)
50746 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
50747 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
50748 retval = (retval * 17) / 10;
50751 cost[where] += retval;
50753 return retval;
50756 /* Implement targetm.vectorize.finish_cost. */
50758 static void
50759 ix86_finish_cost (void *data, unsigned *prologue_cost,
50760 unsigned *body_cost, unsigned *epilogue_cost)
50762 unsigned *cost = (unsigned *) data;
50763 *prologue_cost = cost[vect_prologue];
50764 *body_cost = cost[vect_body];
50765 *epilogue_cost = cost[vect_epilogue];
50768 /* Implement targetm.vectorize.destroy_cost_data. */
50770 static void
50771 ix86_destroy_cost_data (void *data)
50773 free (data);
50776 /* Validate target specific memory model bits in VAL. */
50778 static unsigned HOST_WIDE_INT
50779 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
50781 unsigned HOST_WIDE_INT model = val & MEMMODEL_MASK;
50782 bool strong;
50784 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
50785 |MEMMODEL_MASK)
50786 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
50788 warning (OPT_Winvalid_memory_model,
50789 "Unknown architecture specific memory model");
50790 return MEMMODEL_SEQ_CST;
50792 strong = (model == MEMMODEL_ACQ_REL || model == MEMMODEL_SEQ_CST);
50793 if (val & IX86_HLE_ACQUIRE && !(model == MEMMODEL_ACQUIRE || strong))
50795 warning (OPT_Winvalid_memory_model,
50796 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
50797 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
50799 if (val & IX86_HLE_RELEASE && !(model == MEMMODEL_RELEASE || strong))
50801 warning (OPT_Winvalid_memory_model,
50802 "HLE_RELEASE not used with RELEASE or stronger memory model");
50803 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
50805 return val;
50808 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
50809 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
50810 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
50811 or number of vecsize_mangle variants that should be emitted. */
50813 static int
50814 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
50815 struct cgraph_simd_clone *clonei,
50816 tree base_type, int num)
50818 int ret = 1;
50820 if (clonei->simdlen
50821 && (clonei->simdlen < 2
50822 || clonei->simdlen > 16
50823 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
50825 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
50826 "unsupported simdlen %d", clonei->simdlen);
50827 return 0;
50830 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
50831 if (TREE_CODE (ret_type) != VOID_TYPE)
50832 switch (TYPE_MODE (ret_type))
50834 case QImode:
50835 case HImode:
50836 case SImode:
50837 case DImode:
50838 case SFmode:
50839 case DFmode:
50840 /* case SCmode: */
50841 /* case DCmode: */
50842 break;
50843 default:
50844 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
50845 "unsupported return type %qT for simd\n", ret_type);
50846 return 0;
50849 tree t;
50850 int i;
50852 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
50853 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
50854 switch (TYPE_MODE (TREE_TYPE (t)))
50856 case QImode:
50857 case HImode:
50858 case SImode:
50859 case DImode:
50860 case SFmode:
50861 case DFmode:
50862 /* case SCmode: */
50863 /* case DCmode: */
50864 break;
50865 default:
50866 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
50867 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
50868 return 0;
50871 if (clonei->cilk_elemental)
50873 /* Parse here processor clause. If not present, default to 'b'. */
50874 clonei->vecsize_mangle = 'b';
50876 else if (!TREE_PUBLIC (node->decl))
50878 /* If the function isn't exported, we can pick up just one ISA
50879 for the clones. */
50880 if (TARGET_AVX2)
50881 clonei->vecsize_mangle = 'd';
50882 else if (TARGET_AVX)
50883 clonei->vecsize_mangle = 'c';
50884 else
50885 clonei->vecsize_mangle = 'b';
50886 ret = 1;
50888 else
50890 clonei->vecsize_mangle = "bcd"[num];
50891 ret = 3;
50893 switch (clonei->vecsize_mangle)
50895 case 'b':
50896 clonei->vecsize_int = 128;
50897 clonei->vecsize_float = 128;
50898 break;
50899 case 'c':
50900 clonei->vecsize_int = 128;
50901 clonei->vecsize_float = 256;
50902 break;
50903 case 'd':
50904 clonei->vecsize_int = 256;
50905 clonei->vecsize_float = 256;
50906 break;
50908 if (clonei->simdlen == 0)
50910 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
50911 clonei->simdlen = clonei->vecsize_int;
50912 else
50913 clonei->simdlen = clonei->vecsize_float;
50914 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
50915 if (clonei->simdlen > 16)
50916 clonei->simdlen = 16;
50918 return ret;
50921 /* Add target attribute to SIMD clone NODE if needed. */
50923 static void
50924 ix86_simd_clone_adjust (struct cgraph_node *node)
50926 const char *str = NULL;
50927 gcc_assert (node->decl == cfun->decl);
50928 switch (node->simdclone->vecsize_mangle)
50930 case 'b':
50931 if (!TARGET_SSE2)
50932 str = "sse2";
50933 break;
50934 case 'c':
50935 if (!TARGET_AVX)
50936 str = "avx";
50937 break;
50938 case 'd':
50939 if (!TARGET_AVX2)
50940 str = "avx2";
50941 break;
50942 default:
50943 gcc_unreachable ();
50945 if (str == NULL)
50946 return;
50947 push_cfun (NULL);
50948 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
50949 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
50950 gcc_assert (ok);
50951 pop_cfun ();
50952 ix86_previous_fndecl = NULL_TREE;
50953 ix86_set_current_function (node->decl);
50956 /* If SIMD clone NODE can't be used in a vectorized loop
50957 in current function, return -1, otherwise return a badness of using it
50958 (0 if it is most desirable from vecsize_mangle point of view, 1
50959 slightly less desirable, etc.). */
50961 static int
50962 ix86_simd_clone_usable (struct cgraph_node *node)
50964 switch (node->simdclone->vecsize_mangle)
50966 case 'b':
50967 if (!TARGET_SSE2)
50968 return -1;
50969 if (!TARGET_AVX)
50970 return 0;
50971 return TARGET_AVX2 ? 2 : 1;
50972 case 'c':
50973 if (!TARGET_AVX)
50974 return -1;
50975 return TARGET_AVX2 ? 1 : 0;
50976 break;
50977 case 'd':
50978 if (!TARGET_AVX2)
50979 return -1;
50980 return 0;
50981 default:
50982 gcc_unreachable ();
50986 /* This function adjusts the unroll factor based on
50987 the hardware capabilities. For ex, bdver3 has
50988 a loop buffer which makes unrolling of smaller
50989 loops less important. This function decides the
50990 unroll factor using number of memory references
50991 (value 32 is used) as a heuristic. */
50993 static unsigned
50994 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
50996 basic_block *bbs;
50997 rtx_insn *insn;
50998 unsigned i;
50999 unsigned mem_count = 0;
51001 if (!TARGET_ADJUST_UNROLL)
51002 return nunroll;
51004 /* Count the number of memory references within the loop body.
51005 This value determines the unrolling factor for bdver3 and bdver4
51006 architectures. */
51007 subrtx_iterator::array_type array;
51008 bbs = get_loop_body (loop);
51009 for (i = 0; i < loop->num_nodes; i++)
51010 FOR_BB_INSNS (bbs[i], insn)
51011 if (NONDEBUG_INSN_P (insn))
51012 FOR_EACH_SUBRTX (iter, array, insn, NONCONST)
51013 if (const_rtx x = *iter)
51014 if (MEM_P (x))
51016 machine_mode mode = GET_MODE (x);
51017 unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
51018 if (n_words > 4)
51019 mem_count += 2;
51020 else
51021 mem_count += 1;
51023 free (bbs);
51025 if (mem_count && mem_count <=32)
51026 return 32/mem_count;
51028 return nunroll;
51032 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
51034 static bool
51035 ix86_float_exceptions_rounding_supported_p (void)
51037 /* For x87 floating point with standard excess precision handling,
51038 there is no adddf3 pattern (since x87 floating point only has
51039 XFmode operations) so the default hook implementation gets this
51040 wrong. */
51041 return TARGET_80387 || TARGET_SSE_MATH;
51044 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
51046 static void
51047 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
51049 if (!TARGET_80387 && !TARGET_SSE_MATH)
51050 return;
51051 tree exceptions_var = create_tmp_var (integer_type_node, NULL);
51052 if (TARGET_80387)
51054 tree fenv_index_type = build_index_type (size_int (6));
51055 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
51056 tree fenv_var = create_tmp_var (fenv_type, NULL);
51057 mark_addressable (fenv_var);
51058 tree fenv_ptr = build_pointer_type (fenv_type);
51059 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
51060 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
51061 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
51062 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
51063 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
51064 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
51065 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
51066 tree hold_fnclex = build_call_expr (fnclex, 0);
51067 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_fnstenv,
51068 hold_fnclex);
51069 *clear = build_call_expr (fnclex, 0);
51070 tree sw_var = create_tmp_var (short_unsigned_type_node, NULL);
51071 tree fnstsw_call = build_call_expr (fnstsw, 0);
51072 tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
51073 sw_var, fnstsw_call);
51074 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
51075 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
51076 exceptions_var, exceptions_x87);
51077 *update = build2 (COMPOUND_EXPR, integer_type_node,
51078 sw_mod, update_mod);
51079 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
51080 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
51082 if (TARGET_SSE_MATH)
51084 tree mxcsr_orig_var = create_tmp_var (unsigned_type_node, NULL);
51085 tree mxcsr_mod_var = create_tmp_var (unsigned_type_node, NULL);
51086 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
51087 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
51088 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
51089 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
51090 mxcsr_orig_var, stmxcsr_hold_call);
51091 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
51092 mxcsr_orig_var,
51093 build_int_cst (unsigned_type_node, 0x1f80));
51094 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
51095 build_int_cst (unsigned_type_node, 0xffffffc0));
51096 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
51097 mxcsr_mod_var, hold_mod_val);
51098 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
51099 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
51100 hold_assign_orig, hold_assign_mod);
51101 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
51102 ldmxcsr_hold_call);
51103 if (*hold)
51104 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
51105 else
51106 *hold = hold_all;
51107 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
51108 if (*clear)
51109 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
51110 ldmxcsr_clear_call);
51111 else
51112 *clear = ldmxcsr_clear_call;
51113 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
51114 tree exceptions_sse = fold_convert (integer_type_node,
51115 stxmcsr_update_call);
51116 if (*update)
51118 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
51119 exceptions_var, exceptions_sse);
51120 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
51121 exceptions_var, exceptions_mod);
51122 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
51123 exceptions_assign);
51125 else
51126 *update = build2 (MODIFY_EXPR, integer_type_node,
51127 exceptions_var, exceptions_sse);
51128 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
51129 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
51130 ldmxcsr_update_call);
51132 tree atomic_feraiseexcept
51133 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
51134 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
51135 1, exceptions_var);
51136 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
51137 atomic_feraiseexcept_call);
51140 /* Return mode to be used for bounds or VOIDmode
51141 if bounds are not supported. */
51143 static enum machine_mode
51144 ix86_mpx_bound_mode ()
51146 /* Do not support pointer checker if MPX
51147 is not enabled. */
51148 if (!TARGET_MPX)
51150 if (flag_check_pointer_bounds)
51151 warning (0, "Pointer Checker requires MPX support on this target."
51152 " Use -mmpx options to enable MPX.");
51153 return VOIDmode;
51156 return BNDmode;
51159 /* Return constant used to statically initialize constant bounds.
51161 This function is used to create special bound values. For now
51162 only INIT bounds and NONE bounds are expected. More special
51163 values may be added later. */
51165 static tree
51166 ix86_make_bounds_constant (HOST_WIDE_INT lb, HOST_WIDE_INT ub)
51168 tree low = lb ? build_minus_one_cst (pointer_sized_int_node)
51169 : build_zero_cst (pointer_sized_int_node);
51170 tree high = ub ? build_zero_cst (pointer_sized_int_node)
51171 : build_minus_one_cst (pointer_sized_int_node);
51173 /* This function is supposed to be used to create INIT and
51174 NONE bounds only. */
51175 gcc_assert ((lb == 0 && ub == -1)
51176 || (lb == -1 && ub == 0));
51178 return build_complex (NULL, low, high);
51181 /* Generate a list of statements STMTS to initialize pointer bounds
51182 variable VAR with bounds LB and UB. Return the number of generated
51183 statements. */
51185 static int
51186 ix86_initialize_bounds (tree var, tree lb, tree ub, tree *stmts)
51188 tree bnd_ptr = build_pointer_type (pointer_sized_int_node);
51189 tree lhs, modify, var_p;
51191 ub = build1 (BIT_NOT_EXPR, pointer_sized_int_node, ub);
51192 var_p = fold_convert (bnd_ptr, build_fold_addr_expr (var));
51194 lhs = build1 (INDIRECT_REF, pointer_sized_int_node, var_p);
51195 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, lb);
51196 append_to_statement_list (modify, stmts);
51198 lhs = build1 (INDIRECT_REF, pointer_sized_int_node,
51199 build2 (POINTER_PLUS_EXPR, bnd_ptr, var_p,
51200 TYPE_SIZE_UNIT (pointer_sized_int_node)));
51201 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, ub);
51202 append_to_statement_list (modify, stmts);
51204 return 2;
51207 /* Initialize the GCC target structure. */
51208 #undef TARGET_RETURN_IN_MEMORY
51209 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
51211 #undef TARGET_LEGITIMIZE_ADDRESS
51212 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
51214 #undef TARGET_ATTRIBUTE_TABLE
51215 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
51216 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
51217 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
51218 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
51219 # undef TARGET_MERGE_DECL_ATTRIBUTES
51220 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
51221 #endif
51223 #undef TARGET_COMP_TYPE_ATTRIBUTES
51224 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
51226 #undef TARGET_INIT_BUILTINS
51227 #define TARGET_INIT_BUILTINS ix86_init_builtins
51228 #undef TARGET_BUILTIN_DECL
51229 #define TARGET_BUILTIN_DECL ix86_builtin_decl
51230 #undef TARGET_EXPAND_BUILTIN
51231 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
51233 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
51234 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
51235 ix86_builtin_vectorized_function
51237 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
51238 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
51240 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
51241 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
51243 #undef TARGET_VECTORIZE_BUILTIN_GATHER
51244 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
51246 #undef TARGET_BUILTIN_RECIPROCAL
51247 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
51249 #undef TARGET_ASM_FUNCTION_EPILOGUE
51250 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
51252 #undef TARGET_ENCODE_SECTION_INFO
51253 #ifndef SUBTARGET_ENCODE_SECTION_INFO
51254 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
51255 #else
51256 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
51257 #endif
51259 #undef TARGET_ASM_OPEN_PAREN
51260 #define TARGET_ASM_OPEN_PAREN ""
51261 #undef TARGET_ASM_CLOSE_PAREN
51262 #define TARGET_ASM_CLOSE_PAREN ""
51264 #undef TARGET_ASM_BYTE_OP
51265 #define TARGET_ASM_BYTE_OP ASM_BYTE
51267 #undef TARGET_ASM_ALIGNED_HI_OP
51268 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
51269 #undef TARGET_ASM_ALIGNED_SI_OP
51270 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
51271 #ifdef ASM_QUAD
51272 #undef TARGET_ASM_ALIGNED_DI_OP
51273 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
51274 #endif
51276 #undef TARGET_PROFILE_BEFORE_PROLOGUE
51277 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
51279 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
51280 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
51282 #undef TARGET_ASM_UNALIGNED_HI_OP
51283 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
51284 #undef TARGET_ASM_UNALIGNED_SI_OP
51285 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
51286 #undef TARGET_ASM_UNALIGNED_DI_OP
51287 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
51289 #undef TARGET_PRINT_OPERAND
51290 #define TARGET_PRINT_OPERAND ix86_print_operand
51291 #undef TARGET_PRINT_OPERAND_ADDRESS
51292 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
51293 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
51294 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
51295 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
51296 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
51298 #undef TARGET_SCHED_INIT_GLOBAL
51299 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
51300 #undef TARGET_SCHED_ADJUST_COST
51301 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
51302 #undef TARGET_SCHED_ISSUE_RATE
51303 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
51304 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
51305 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
51306 ia32_multipass_dfa_lookahead
51307 #undef TARGET_SCHED_MACRO_FUSION_P
51308 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
51309 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
51310 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
51312 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
51313 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
51315 #undef TARGET_MEMMODEL_CHECK
51316 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
51318 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
51319 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
51321 #ifdef HAVE_AS_TLS
51322 #undef TARGET_HAVE_TLS
51323 #define TARGET_HAVE_TLS true
51324 #endif
51325 #undef TARGET_CANNOT_FORCE_CONST_MEM
51326 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
51327 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
51328 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
51330 #undef TARGET_DELEGITIMIZE_ADDRESS
51331 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
51333 #undef TARGET_MS_BITFIELD_LAYOUT_P
51334 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
51336 #if TARGET_MACHO
51337 #undef TARGET_BINDS_LOCAL_P
51338 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
51339 #endif
51340 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
51341 #undef TARGET_BINDS_LOCAL_P
51342 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
51343 #endif
51345 #undef TARGET_ASM_OUTPUT_MI_THUNK
51346 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
51347 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
51348 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
51350 #undef TARGET_ASM_FILE_START
51351 #define TARGET_ASM_FILE_START x86_file_start
51353 #undef TARGET_OPTION_OVERRIDE
51354 #define TARGET_OPTION_OVERRIDE ix86_option_override
51356 #undef TARGET_REGISTER_MOVE_COST
51357 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
51358 #undef TARGET_MEMORY_MOVE_COST
51359 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
51360 #undef TARGET_RTX_COSTS
51361 #define TARGET_RTX_COSTS ix86_rtx_costs
51362 #undef TARGET_ADDRESS_COST
51363 #define TARGET_ADDRESS_COST ix86_address_cost
51365 #undef TARGET_FIXED_CONDITION_CODE_REGS
51366 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
51367 #undef TARGET_CC_MODES_COMPATIBLE
51368 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
51370 #undef TARGET_MACHINE_DEPENDENT_REORG
51371 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
51373 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
51374 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
51376 #undef TARGET_BUILD_BUILTIN_VA_LIST
51377 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
51379 #undef TARGET_FOLD_BUILTIN
51380 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
51382 #undef TARGET_COMPARE_VERSION_PRIORITY
51383 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
51385 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
51386 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
51387 ix86_generate_version_dispatcher_body
51389 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
51390 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
51391 ix86_get_function_versions_dispatcher
51393 #undef TARGET_ENUM_VA_LIST_P
51394 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
51396 #undef TARGET_FN_ABI_VA_LIST
51397 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
51399 #undef TARGET_CANONICAL_VA_LIST_TYPE
51400 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
51402 #undef TARGET_EXPAND_BUILTIN_VA_START
51403 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
51405 #undef TARGET_MD_ASM_CLOBBERS
51406 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
51408 #undef TARGET_PROMOTE_PROTOTYPES
51409 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
51410 #undef TARGET_SETUP_INCOMING_VARARGS
51411 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
51412 #undef TARGET_MUST_PASS_IN_STACK
51413 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
51414 #undef TARGET_FUNCTION_ARG_ADVANCE
51415 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
51416 #undef TARGET_FUNCTION_ARG
51417 #define TARGET_FUNCTION_ARG ix86_function_arg
51418 #undef TARGET_INIT_PIC_REG
51419 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
51420 #undef TARGET_USE_PSEUDO_PIC_REG
51421 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
51422 #undef TARGET_FUNCTION_ARG_BOUNDARY
51423 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
51424 #undef TARGET_PASS_BY_REFERENCE
51425 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
51426 #undef TARGET_INTERNAL_ARG_POINTER
51427 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
51428 #undef TARGET_UPDATE_STACK_BOUNDARY
51429 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
51430 #undef TARGET_GET_DRAP_RTX
51431 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
51432 #undef TARGET_STRICT_ARGUMENT_NAMING
51433 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
51434 #undef TARGET_STATIC_CHAIN
51435 #define TARGET_STATIC_CHAIN ix86_static_chain
51436 #undef TARGET_TRAMPOLINE_INIT
51437 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
51438 #undef TARGET_RETURN_POPS_ARGS
51439 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
51441 #undef TARGET_LEGITIMATE_COMBINED_INSN
51442 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
51444 #undef TARGET_ASAN_SHADOW_OFFSET
51445 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
51447 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
51448 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
51450 #undef TARGET_SCALAR_MODE_SUPPORTED_P
51451 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
51453 #undef TARGET_VECTOR_MODE_SUPPORTED_P
51454 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
51456 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
51457 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
51458 ix86_libgcc_floating_mode_supported_p
51460 #undef TARGET_C_MODE_FOR_SUFFIX
51461 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
51463 #ifdef HAVE_AS_TLS
51464 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
51465 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
51466 #endif
51468 #ifdef SUBTARGET_INSERT_ATTRIBUTES
51469 #undef TARGET_INSERT_ATTRIBUTES
51470 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
51471 #endif
51473 #undef TARGET_MANGLE_TYPE
51474 #define TARGET_MANGLE_TYPE ix86_mangle_type
51476 #if !TARGET_MACHO
51477 #undef TARGET_STACK_PROTECT_FAIL
51478 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
51479 #endif
51481 #undef TARGET_FUNCTION_VALUE
51482 #define TARGET_FUNCTION_VALUE ix86_function_value
51484 #undef TARGET_FUNCTION_VALUE_REGNO_P
51485 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
51487 #undef TARGET_PROMOTE_FUNCTION_MODE
51488 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
51490 #undef TARGET_MEMBER_TYPE_FORCES_BLK
51491 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
51493 #undef TARGET_INSTANTIATE_DECLS
51494 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
51496 #undef TARGET_SECONDARY_RELOAD
51497 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
51499 #undef TARGET_CLASS_MAX_NREGS
51500 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
51502 #undef TARGET_PREFERRED_RELOAD_CLASS
51503 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
51504 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
51505 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
51506 #undef TARGET_CLASS_LIKELY_SPILLED_P
51507 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
51509 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
51510 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
51511 ix86_builtin_vectorization_cost
51512 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
51513 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
51514 ix86_vectorize_vec_perm_const_ok
51515 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
51516 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
51517 ix86_preferred_simd_mode
51518 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
51519 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
51520 ix86_autovectorize_vector_sizes
51521 #undef TARGET_VECTORIZE_INIT_COST
51522 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
51523 #undef TARGET_VECTORIZE_ADD_STMT_COST
51524 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
51525 #undef TARGET_VECTORIZE_FINISH_COST
51526 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
51527 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
51528 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
51530 #undef TARGET_SET_CURRENT_FUNCTION
51531 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
51533 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
51534 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
51536 #undef TARGET_OPTION_SAVE
51537 #define TARGET_OPTION_SAVE ix86_function_specific_save
51539 #undef TARGET_OPTION_RESTORE
51540 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
51542 #undef TARGET_OPTION_PRINT
51543 #define TARGET_OPTION_PRINT ix86_function_specific_print
51545 #undef TARGET_OPTION_FUNCTION_VERSIONS
51546 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
51548 #undef TARGET_CAN_INLINE_P
51549 #define TARGET_CAN_INLINE_P ix86_can_inline_p
51551 #undef TARGET_EXPAND_TO_RTL_HOOK
51552 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
51554 #undef TARGET_LEGITIMATE_ADDRESS_P
51555 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
51557 #undef TARGET_LRA_P
51558 #define TARGET_LRA_P hook_bool_void_true
51560 #undef TARGET_REGISTER_PRIORITY
51561 #define TARGET_REGISTER_PRIORITY ix86_register_priority
51563 #undef TARGET_REGISTER_USAGE_LEVELING_P
51564 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
51566 #undef TARGET_LEGITIMATE_CONSTANT_P
51567 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
51569 #undef TARGET_FRAME_POINTER_REQUIRED
51570 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
51572 #undef TARGET_CAN_ELIMINATE
51573 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
51575 #undef TARGET_EXTRA_LIVE_ON_ENTRY
51576 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
51578 #undef TARGET_ASM_CODE_END
51579 #define TARGET_ASM_CODE_END ix86_code_end
51581 #undef TARGET_CONDITIONAL_REGISTER_USAGE
51582 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
51584 #if TARGET_MACHO
51585 #undef TARGET_INIT_LIBFUNCS
51586 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
51587 #endif
51589 #undef TARGET_LOOP_UNROLL_ADJUST
51590 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
51592 #undef TARGET_SPILL_CLASS
51593 #define TARGET_SPILL_CLASS ix86_spill_class
51595 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
51596 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
51597 ix86_simd_clone_compute_vecsize_and_simdlen
51599 #undef TARGET_SIMD_CLONE_ADJUST
51600 #define TARGET_SIMD_CLONE_ADJUST \
51601 ix86_simd_clone_adjust
51603 #undef TARGET_SIMD_CLONE_USABLE
51604 #define TARGET_SIMD_CLONE_USABLE \
51605 ix86_simd_clone_usable
51607 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
51608 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
51609 ix86_float_exceptions_rounding_supported_p
51611 #undef TARGET_MODE_EMIT
51612 #define TARGET_MODE_EMIT ix86_emit_mode_set
51614 #undef TARGET_MODE_NEEDED
51615 #define TARGET_MODE_NEEDED ix86_mode_needed
51617 #undef TARGET_MODE_AFTER
51618 #define TARGET_MODE_AFTER ix86_mode_after
51620 #undef TARGET_MODE_ENTRY
51621 #define TARGET_MODE_ENTRY ix86_mode_entry
51623 #undef TARGET_MODE_EXIT
51624 #define TARGET_MODE_EXIT ix86_mode_exit
51626 #undef TARGET_MODE_PRIORITY
51627 #define TARGET_MODE_PRIORITY ix86_mode_priority
51629 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
51630 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
51632 #undef TARGET_LOAD_BOUNDS_FOR_ARG
51633 #define TARGET_LOAD_BOUNDS_FOR_ARG ix86_load_bounds
51635 #undef TARGET_STORE_BOUNDS_FOR_ARG
51636 #define TARGET_STORE_BOUNDS_FOR_ARG ix86_store_bounds
51638 #undef TARGET_LOAD_RETURNED_BOUNDS
51639 #define TARGET_LOAD_RETURNED_BOUNDS ix86_load_returned_bounds
51641 #undef TARGET_STORE_RETURNED_BOUNDS
51642 #define TARGET_STORE_RETURNED_BOUNDS ix86_store_returned_bounds
51644 #undef TARGET_CHKP_BOUND_MODE
51645 #define TARGET_CHKP_BOUND_MODE ix86_mpx_bound_mode
51647 #undef TARGET_BUILTIN_CHKP_FUNCTION
51648 #define TARGET_BUILTIN_CHKP_FUNCTION ix86_builtin_mpx_function
51650 #undef TARGET_CHKP_FUNCTION_VALUE_BOUNDS
51651 #define TARGET_CHKP_FUNCTION_VALUE_BOUNDS ix86_function_value_bounds
51653 #undef TARGET_CHKP_MAKE_BOUNDS_CONSTANT
51654 #define TARGET_CHKP_MAKE_BOUNDS_CONSTANT ix86_make_bounds_constant
51656 #undef TARGET_CHKP_INITIALIZE_BOUNDS
51657 #define TARGET_CHKP_INITIALIZE_BOUNDS ix86_initialize_bounds
51659 #undef TARGET_SETUP_INCOMING_VARARG_BOUNDS
51660 #define TARGET_SETUP_INCOMING_VARARG_BOUNDS ix86_setup_incoming_vararg_bounds
51662 struct gcc_target targetm = TARGET_INITIALIZER;
51664 #include "gt-i386.h"